From 4cda36c860e0a1a111525c63ae69851dc1506ef9 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Tue, 17 Mar 2026 15:18:32 -0400
Subject: [PATCH 01/46] Add SillyTavern integration docs

New integration guide for the sillytavern-honcho extension covering
install, global config, context architecture, enrichment modes, and
troubleshooting. Added to v3 integrations nav.
---
 docs/docs.json                              |   3 +-
 docs/v3/guides/integrations/sillytavern.mdx | 177 ++++++++++++++++++++
 2 files changed, 179 insertions(+), 1 deletion(-)
 create mode 100644 docs/v3/guides/integrations/sillytavern.mdx
diff --git a/docs/docs.json b/docs/docs.json
index 27f6b633a..39b675bef 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -97,7 +97,8 @@
                   "v3/guides/integrations/langgraph",
                   "v3/guides/integrations/mcp",
                   "v3/guides/integrations/n8n",
-                  "v3/guides/integrations/openclaw"
+                  "v3/guides/integrations/openclaw",
+                  "v3/guides/integrations/sillytavern"
                 ]
               },
               {
diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
new file mode 100644
index 000000000..ce418907b
--- /dev/null
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -0,0 +1,177 @@
+---
+title: "SillyTavern"
+icon: 'comments'
+description: "Add persistent, personalized memory to SillyTavern AI characters with Honcho"
+sidebarTitle: 'SillyTavern'
+---
+
+Give your SillyTavern characters long-term memory. Honcho remembers who you are, what you've talked about, and how to talk to you -- across sessions, characters, and restarts.
+
+The extension has two parts: a **client extension** (browser) that hooks into SillyTavern events, and a **server plugin** (Node.js) that proxies requests to the Honcho API.
+
+## Quick Start
+
+### Step 1: Get Your Honcho API Key
+
+1. Go to **[app.honcho.dev](https://app.honcho.dev)**
+2. Sign up or log in
+3. Copy your API key
+
+### Step 2: Install
+
+From your **SillyTavern directory**:
+
+**macOS / Linux:**
+```bash
+bash <(curl -s https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/install.sh)
+```
+
+**Windows (PowerShell):**
+```powershell
+irm https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/install.ps1 | iex
+```
+
+<Note>
+Server plugins must be enabled. The installer checks for this, but if you haven't already, add `enableServerPlugins: true` to your `config.yaml`.
+</Note>
+
+The installer:
+1. Clones the extension into `public/scripts/extensions/third-party/sillytavern-honcho`
+2. Symlinks the server plugin to `plugins/honcho-proxy`
+3. Installs the `@honcho-ai/sdk` dependency
+4. Detects your `~/.honcho/config.json` if it exists
+
+### Step 3: Restart SillyTavern
+
+Stop and restart SillyTavern so the server plugin loads.
+
+### Step 4: Configure
+
+Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
+
+1. Check **Enable Honcho Memory**
+2. Click the API key field to set your key
+3. Enter your workspace ID
+4. Status indicator should show **Ready**
+
+## Global Config (Multi-Tool Setups)
+
+If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension auto-populates settings from `~/.honcho/config.json`:
+
+```json
+{
+  "apiKey": "your-honcho-api-key",
+  "peerName": "your-name",
+  "workspace": "sillytavern",
+  "enabled": true
+}
+```
+
+Config reads fall back from `hosts.sillytavern` to root-level globals. Writes are always scoped to `hosts.sillytavern` -- the extension never mutates settings for other tools.
+
+```jsonc
+{
+  "apiKey": "hch-v2-...",
+  "peerName": "alice",
+  "hosts": {
+    "sillytavern": {
+      "workspace": "sillytavern",
+      "aiPeer": "Assistant"       // Updated automatically per character
+    },
+    "claude_code": { "..." : "..." },
+    "cursor": { "..." : "..." }
+  }
+}
+```
+
+## How It Works
+
+### Context Architecture
+
+Every generation injects a **base context layer** from `session.context()` -- the peer representation (what Honcho knows about you) and session summary. This uses stale-while-revalidate caching: the first turn blocks to populate the cache, then every subsequent turn serves the cached result instantly while refreshing in the background.
+
+The **enrichment mode** controls what layers on top of the base context:
+
+| Mode | Behavior |
+| --- | --- |
+| **Context only** | Base layer only -- peer representation + session summary |
+| **Reasoning** (default) | Base layer + dialectic `peer.chat()` queries on a configurable interval |
+| **Tool call** | Base layer + function tools the LLM can call on demand |
+
+Both the context and reasoning layers use stale-while-revalidate with configurable refresh intervals. After the first turn of a session, there is zero added latency.
+
+### Tool Call Mode
+
+In tool call mode, the extension registers three function tools that the LLM can invoke:
+
+| Tool | Description |
+| --- | --- |
+| `honcho_query_memory` | Dialectic chat query -- ask Honcho what it knows |
+| `honcho_save_observation` | Save an insight about the user to memory |
+| `honcho_search_history` | Semantic search across session messages |
+
+This mode works best with models that support function calling. The LLM decides when to query memory rather than firing on every turn.
+
+### Peer Modes
+
+| Mode | Behavior |
+| --- | --- |
+| **Single peer** | One user peer shared across all characters |
+| **Per-persona** | Each character gets its own isolated memory |
+
+### Session Naming
+
+| Mode | Behavior |
+| --- | --- |
+| **Auto** | Per-chat hash (unique per conversation) |
+| **Per-character** | One session per character (persistent) |
+| **Custom** | User-defined session name |
+
+### Event Flow
+
+| SillyTavern Event | Action |
+| --- | --- |
+| Chat opened | Creates or retrieves Honcho session + peers |
+| Before generation | Injects memory context into the prompt |
+| User sends message | Stores message in Honcho session |
+| AI responds | Stores response in Honcho session |
+
+## Architecture
+
+```
+Browser (Client Extension)              Server (Plugin)
++-----------------------+               +------------------------------+
+| index.js              |  fetch()      | plugin/index.js              |
+|                       | ------------> |                              |
+| - Settings UI         | /api/plugins/ | - Express router (7 routes)  |
+| - Event hooks         |  honcho-proxy | - Honcho SDK (@honcho-ai/sdk)|
+| - Prompt injection    |               | - API key from ST secrets or |
+| - Tool registration   |               |   ~/.honcho/config.json      |
++-----------------------+               +------------------------------+
+```
+
+The server plugin reads API credentials from SillyTavern's secrets store first, falling back to `~/.honcho/config.json`. It re-reads the global config before every write to prevent race conditions with concurrent tools.
+
+## Troubleshooting
+
+| Symptom | Fix |
+| --- | --- |
+| No "Honcho Memory" in Extensions | Check symlink exists: `ls public/scripts/extensions/third-party/sillytavern-honcho/manifest.json` |
+| Plugin not initializing | Ensure `enableServerPlugins: true` in `config.yaml`, then restart ST |
+| 403 on plugin requests | Set Honcho API key in extension settings or `~/.honcho/config.json` |
+| SDK import error | Run `cd plugins/honcho-proxy && npm install` |
+| Extension loads but nothing happens | Enable the checkbox and ensure workspace ID is set |
+
+---
+
+## Next Steps
+
+<CardGroup cols={2}>
+  <Card title="GitHub Repository" icon="github" href="https://github.com/plastic-labs/sillytavern-honcho">
+    Source code, issues, and install scripts.
+  </Card>
+
+  <Card title="Honcho Architecture" icon="sitemap" href="../../documentation/core-concepts/architecture">
+    Learn about peers, sessions, and dialectic reasoning.
+  </Card>
+</CardGroup>

From fa8f0b1a19959049b0639d1091eb073bfe5dd849 Mon Sep 17 00:00:00 2001
From: Luba Kaper <55723620+LubaKaper@users.noreply.github.com>
Date: Mon, 6 Apr 2026 17:06:08 -0400
Subject: [PATCH 02/46] feat(examples): add Honcho memory skill for Zo Computer
 (#495)

* chore: add .worktrees/ to .gitignore

* feat(examples): add Zo Computer memory skill integration

* feat(examples): add Zo Computer memory skill integration

* fix(examples): address CodeRabbit review on Zo skill integration

  - Fix version inconsistency: SKILL.md matches pyproject.toml (>=2.1.0)
  - Move client.py into tools/ package and use relative imports
  - Add assistant_id parameter to save_memory() for consistency with get_context()
  - Use UUID-based IDs in tests to prevent state leakage between runs
  - Add pytest.mark.skipif guard on integration tests (requires HONCHO_API_KEY)
  - Fix import ordering, move pytest to module level, sort __all__ alphabetically
  - Fix markdown blank lines around fenced code blocks (MD031)
  - Add rate limit delay fixture to avoid hitting Honcho free tier limits

* fix(examples): validate HONCHO_API_KEY early in client initialization

* docs(examples): note cross-peer memory behavior in shared workspaces

* docs(examples): fix save_memory and query_memory signatures in README

* docs(examples): fix markdown linting issues in README

* docs(examples): add assistant_id parameter to save_memory example in
  SKILL.md

---------

Co-authored-by: Luba Kaper <lubakaper@lubas-air.mynetworksettings.com>
---
 .gitignore                        |   1 +
 examples/zo/README.md             | 148 +++++++++
 examples/zo/SKILL.md              | 118 +++++++
 examples/zo/pyproject.toml        |  25 ++
 examples/zo/tests/test_basic.py   |  69 ++++
 examples/zo/tests/test_tools.py   | 199 ++++++++++++
 examples/zo/tools/__init__.py     |   7 +
 examples/zo/tools/client.py       |  32 ++
 examples/zo/tools/get_context.py  |  42 +++
 examples/zo/tools/query_memory.py |  37 +++
 examples/zo/tools/save_memory.py  |  45 +++
 examples/zo/uv.lock               | 503 ++++++++++++++++++++++++++++++
 12 files changed, 1226 insertions(+)
 create mode 100644 examples/zo/README.md
 create mode 100644 examples/zo/SKILL.md
 create mode 100644 examples/zo/pyproject.toml
 create mode 100644 examples/zo/tests/test_basic.py
 create mode 100644 examples/zo/tests/test_tools.py
 create mode 100644 examples/zo/tools/__init__.py
 create mode 100644 examples/zo/tools/client.py
 create mode 100644 examples/zo/tools/get_context.py
 create mode 100644 examples/zo/tools/query_memory.py
 create mode 100644 examples/zo/tools/save_memory.py
 create mode 100644 examples/zo/uv.lock

diff --git a/.gitignore b/.gitignore
index 9a31fc383..ea5ccca18 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.worktrees/
 api/**/*.db
 api/data
 api/docker-compose.yml
diff --git a/examples/zo/README.md b/examples/zo/README.md
new file mode 100644
index 000000000..b8c8b2179
--- /dev/null
+++ b/examples/zo/README.md
@@ -0,0 +1,148 @@
+# Honcho Memory Skill for Zo Computer
+
+Give your AI persistent memory across conversations using [Honcho](https://honcho.dev).
+
+## Features
+
+- **Auto-Memory**: Save user and assistant messages to Honcho with one call
+- **Query Memory**: Ask natural language questions about what Honcho remembers ("What are my hobbies?")
+- **Context Injection**: Retrieve conversation context formatted for direct LLM use
+- **Multi-Workspace Support**: Manage separate memory spaces via `HONCHO_WORKSPACE_ID`
+
+## Installation
+
+```bash
+pip install honcho-ai python-dotenv
+```
+
+Or with uv:
+
+```bash
+uv add honcho-ai python-dotenv
+```
+
+## Environment Variables
+
+Create a `.env` file:
+
+```env
+HONCHO_API_KEY=your-api-key-here
+HONCHO_WORKSPACE_ID=default
+```
+
+Get your API key at [honcho.dev](https://honcho.dev).
+
+## Quick Start
+
+```python
+from tools.save_memory import save_memory
+from tools.query_memory import query_memory
+from tools.get_context import get_context
+
+# Save a conversation turn
+save_memory("alice", "I love hiking in the mountains", "user", "session-1")
+save_memory("alice", "That sounds wonderful!", "assistant", "session-1")
+
+# Query what Honcho remembers
+answer = query_memory("alice", "What are my hobbies?", "session-1")
+print(answer)  # "Alice enjoys hiking in the mountains."
+
+# Get context ready for an LLM call
+messages = get_context("alice", "session-1", "assistant", tokens=4000)
+# messages is a list of {"role": ..., "content": ...} dicts
+```
+
+## Tool Reference
+
+### `save_memory(user_id, content, role, session_id, assistant_id="assistant")`
+
+Saves a message to Honcho memory.
+
+| Param | Type | Description |
+|---|---|---|
+| `user_id` | `str` | Unique user identifier |
+| `content` | `str` | Message text |
+| `role` | `str` | `"user"` or `"assistant"` |
+| `session_id` | `str` | Session/conversation identifier |
+| `assistant_id` | `str` | Peer ID for the assistant. Defaults to `"assistant"` |
+
+Returns a confirmation string.
+
+---
+
+### `query_memory(user_id, query, session_id=None)`
+
+Queries stored memory using Honcho's Dialectic API.
+
+| Param | Type | Description |
+|---|---|---|
+| `user_id` | `str` | Unique user identifier |
+| `query` | `str` | Natural language question |
+| `session_id` | `str \| None` | Optional: scope to a specific session. Defaults to `None` (global memory) |
+
+Returns a natural language answer.
+
+> **Note:** In shared workspaces, `query_memory` may return data from other peers if the queried user has no stored memory yet. The Dialectic API draws from workspace-level context as a fallback. Use unique `HONCHO_WORKSPACE_ID` values per user group in production to prevent cross-peer data leakage.
+
+---
+
+### `get_context(user_id, session_id, assistant_id, tokens=4000)`
+
+Retrieves conversation context in OpenAI message format.
+
+| Param | Type | Description |
+|---|---|---|
+| `user_id` | `str` | Unique user identifier |
+| `session_id` | `str` | Session/conversation identifier |
+| `assistant_id` | `str` | Peer ID for the assistant |
+| `tokens` | `int` | Max tokens to include (default: 4000) |
+
+Returns a list of `{"role": ..., "content": ...}` dicts.
+
+## Concept Mapping
+
+| Zo Computer | Honcho |
+|---|---|
+| Account | Workspace |
+| User | Peer |
+| Conversation | Session |
+| Message | Message |
+
+## Running Tests
+
+Requires a running Honcho server. See the [main repo](../../README.md) for setup instructions.
+
+```bash
+uv run pytest tests/ -v
+```
+
+## Submitting to the Zo Skill Marketplace
+
+To publish this skill to the [Zo Skills Registry](https://github.com/zocomputer/skills):
+
+1. **Fork** the `zocomputer/skills` repository.
+2. **Copy** this directory into the `/Community` folder of your fork, naming it `honcho-memory`:
+
+   ```
+   Community/
+   └── honcho-memory/
+       ├── SKILL.md
+       ├── README.md
+       ├── client.py
+       ├── pyproject.toml
+       └── tools/
+   ```
+
+3. **Validate** your skill:
+
+   ```bash
+   bun validate
+   ```
+
+4. **Submit a pull request** to the upstream registry repository.
+
+Once merged, the skill will be automatically added to the Zo marketplace `manifest.json`.
+
+## License
+
+AGPL-3.0-or-later
diff --git a/examples/zo/SKILL.md b/examples/zo/SKILL.md
new file mode 100644
index 000000000..7c2d532b2
--- /dev/null
+++ b/examples/zo/SKILL.md
@@ -0,0 +1,118 @@
+---
+name: honcho-memory
+description: Gives AI agents persistent memory across conversations using Honcho. Automatically saves and retrieves user context so the AI remembers preferences, history, and facts between sessions. Use when you need the AI to remember past conversations, recall what a user has told it, inject relevant context into prompts, or manage separate memory spaces for different topics.
+license: AGPL-3.0
+compatibility: Requires Python 3.9+, honcho-ai>=2.1.0, and a Honcho API key from honcho.dev. Set HONCHO_API_KEY and optionally HONCHO_WORKSPACE_ID in your environment.
+metadata:
+  author: plastic-labs
+  version: "0.1.0"
+  honcho-sdk: "2.1.0"
+---
+
+# Honcho Memory Skill
+
+This skill provides three tools for storing and retrieving AI memory using [Honcho](https://honcho.dev).
+
+## Setup
+
+1. Get a Honcho API key at [honcho.dev](https://honcho.dev).
+2. Set environment variables:
+
+   ```
+   HONCHO_API_KEY=your-api-key
+   HONCHO_WORKSPACE_ID=default   # optional, defaults to "default"
+   ```
+
+3. Install dependencies:
+
+   ```
+   pip install honcho-ai python-dotenv
+   ```
+
+## Tools
+
+### `save_memory`
+
+Saves a conversation turn (user or assistant message) to Honcho.
+
+**When to use:** After every message exchange to build up the user's memory.
+
+```python
+from tools.save_memory import save_memory
+
+save_memory(
+    user_id="alice",           # unique user identifier
+    content="I love hiking",   # message text
+    role="user",               # "user" or "assistant"
+    session_id="chat-1",       # conversation session ID
+    assistant_id="assistant"   # optional: assistant peer ID (default: "assistant")
+)
+```
+
+### `query_memory`
+
+Asks a natural language question against stored memory using Honcho's Dialectic API.
+
+**When to use:** When the user asks "do you remember...?", or when you need to recall facts about the user before responding.
+
+```python
+from tools.query_memory import query_memory
+
+answer = query_memory(
+    user_id="alice",
+    query="What are Alice's hobbies?",
+    session_id="chat-1"   # optional: scope to a session
+)
+# Returns: "Alice enjoys hiking."
+```
+
+### `get_context`
+
+Retrieves recent conversation history formatted for direct use in an LLM API call.
+
+**When to use:** At the start of each LLM call to inject relevant context from past conversations.
+
+```python
+from tools.get_context import get_context
+
+messages = get_context(
+    user_id="alice",
+    session_id="chat-1",
+    assistant_id="assistant",
+    tokens=4000              # max tokens to include
+)
+# Returns: [{"role": "user", "content": "..."}, ...]
+```
+
+## Concept Mapping
+
+| Zo Computer | Honcho |
+|---|---|
+| Account | Workspace |
+| User | Peer |
+| Conversation | Session |
+| Message | Message |
+
+## Example: Full Conversation Flow
+
+```python
+from tools.save_memory import save_memory
+from tools.query_memory import query_memory
+from tools.get_context import get_context
+
+user_id = "alice"
+session_id = "session-1"
+
+# 1. Save user message
+save_memory(user_id, "I'm learning Rust and love rock climbing", "user", session_id)
+
+# 2. Save assistant reply
+save_memory(user_id, "That's great! Both require patience.", "assistant", session_id)
+
+# 3. In a later session, recall what you know
+print(query_memory(user_id, "What does Alice do in her free time?"))
+# → "Alice is learning Rust and enjoys rock climbing."
+
+# 4. Get context window for next LLM call
+messages = get_context(user_id, session_id, "assistant", tokens=4000)
+```
diff --git a/examples/zo/pyproject.toml b/examples/zo/pyproject.toml
new file mode 100644
index 000000000..68c0a616a
--- /dev/null
+++ b/examples/zo/pyproject.toml
@@ -0,0 +1,25 @@
+[project]
+name = "honcho-zo-skill"
+version = "0.1.0"
+description = "Honcho persistent memory skill for Zo Computer"
+readme = "README.md"
+requires-python = ">=3.9"
+dependencies = [
+    "honcho-ai>=2.1.0",
+    "python-dotenv>=1.0.0",
+]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+]
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["tools"]
+
+[tool.pytest.ini_options]
+pythonpath = ["."]
diff --git a/examples/zo/tests/test_basic.py b/examples/zo/tests/test_basic.py
new file mode 100644
index 000000000..1afe00d48
--- /dev/null
+++ b/examples/zo/tests/test_basic.py
@@ -0,0 +1,69 @@
+"""Basic import and structure tests for honcho-zo-skill.
+
+These tests validate package structure and imports without requiring
+a running Honcho server.
+"""
+
+import os
+import sys
+
+import pytest
+
+# Add parent directory to path so tools/ can be imported
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+
+def test_save_memory_import():
+    """Test that save_memory can be imported."""
+    from tools.save_memory import save_memory
+
+    assert callable(save_memory)
+
+
+def test_query_memory_import():
+    """Test that query_memory can be imported."""
+    from tools.query_memory import query_memory
+
+    assert callable(query_memory)
+
+
+def test_get_context_import():
+    """Test that get_context can be imported."""
+    from tools.get_context import get_context
+
+    assert callable(get_context)
+
+
+def test_tools_package_import():
+    """Test that the tools package exports all three functions."""
+    import tools
+
+    assert hasattr(tools, "save_memory")
+    assert hasattr(tools, "query_memory")
+    assert hasattr(tools, "get_context")
+
+
+def test_tools_all_exports():
+    """Test that __all__ contains expected exports."""
+    import tools
+
+    assert hasattr(tools, "__all__")
+    expected = ["get_context", "query_memory", "save_memory"]
+    for name in expected:
+        assert name in tools.__all__, f"{name} not in __all__"
+
+
+def test_save_memory_raises_on_empty_content():
+    """Test that save_memory raises ValueError for empty content."""
+    from tools.save_memory import save_memory
+
+    with pytest.raises(ValueError, match="content must not be empty"):
+        save_memory("user1", "", "user", "session1")
+
+
+def test_query_memory_raises_on_empty_query():
+    """Test that query_memory raises ValueError for empty query."""
+    from tools.query_memory import query_memory
+
+    with pytest.raises(ValueError, match="query must not be empty"):
+        query_memory("user1", "")
diff --git a/examples/zo/tests/test_tools.py b/examples/zo/tests/test_tools.py
new file mode 100644
index 000000000..00c34b78b
--- /dev/null
+++ b/examples/zo/tests/test_tools.py
@@ -0,0 +1,199 @@
+"""Functional tests for Honcho Zo skill tools.
+
+These tests require a Honcho API key set in the HONCHO_API_KEY environment
+variable. They run against the Honcho cloud API (honcho.dev) by default.
+Set HONCHO_WORKSPACE_ID to scope tests to a specific workspace.
+"""
+
+import os
+import sys
+import time
+import uuid
+
+import pytest
+
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+from tools.get_context import get_context
+from tools.query_memory import query_memory
+from tools.save_memory import save_memory
+
+pytestmark = pytest.mark.skipif(
+    not os.getenv("HONCHO_API_KEY"),
+    reason="HONCHO_API_KEY not set — skipping integration tests",
+)
+
+
+@pytest.fixture(autouse=True)
+def rate_limit_delay():
+    """Pause between tests to stay under the Honcho API rate limit (5 req/sec)."""
+    yield
+    time.sleep(0.5)
+
+
+def unique_id(prefix: str) -> str:
+    """Generate a unique ID with a prefix to avoid test state leakage."""
+    return f"{prefix}_{uuid.uuid4().hex[:8]}"
+
+
+class TestSaveMemory:
+    """Tests for save_memory tool."""
+
+    def test_returns_confirmation_string(self):
+        """Test that save_memory returns a non-empty confirmation string."""
+        result = save_memory(unique_id("user"), "Hello, I love hiking!", "user", unique_id("session"))
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_saves_user_message(self):
+        """Test saving a user-role message."""
+        user_id = unique_id("user")
+        result = save_memory(user_id, "I enjoy Python programming", "user", unique_id("session"))
+
+        assert isinstance(result, str)
+        assert "user" in result.lower() or user_id in result
+
+    def test_saves_assistant_message(self):
+        """Test saving an assistant-role message."""
+        result = save_memory(unique_id("user"), "That sounds great!", "assistant", unique_id("session"))
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_saves_multiple_turns(self):
+        """Test saving multiple turns in the same session."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+
+        result1 = save_memory(user_id, "I love mountains", "user", session_id)
+        result2 = save_memory(user_id, "That's wonderful!", "assistant", session_id)
+
+        assert isinstance(result1, str) and len(result1) > 0
+        assert isinstance(result2, str) and len(result2) > 0
+
+    def test_non_assistant_role_treated_as_user(self):
+        """Test that any role other than 'assistant' is treated as user."""
+        result = save_memory(unique_id("user"), "Testing role fallback", "human", unique_id("session"))
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_custom_assistant_id(self):
+        """Test that a custom assistant_id is accepted."""
+        result = save_memory(
+            unique_id("user"), "Hello!", "assistant", unique_id("session"), assistant_id="my-bot"
+        )
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+
+class TestQueryMemory:
+    """Tests for query_memory tool."""
+
+    def test_returns_string(self):
+        """Test that query_memory returns a string response."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        save_memory(user_id, "I love pizza and Italian food", "user", session_id)
+
+        result = query_memory(user_id, "What does the user enjoy?")
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_returns_string_with_session_scope(self):
+        """Test query_memory scoped to a specific session."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        save_memory(user_id, "My favorite color is blue", "user", session_id)
+
+        result = query_memory(user_id, "What is the user's favorite color?", session_id)
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_returns_fallback_for_unknown_user(self):
+        """Test that query_memory returns a non-empty string even for new users."""
+        result = query_memory(unique_id("user"), "What do I like?")
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+
+class TestGetContext:
+    """Tests for get_context tool."""
+
+    def test_returns_list(self):
+        """Test that get_context returns a list."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        save_memory(user_id, "Hello there!", "user", session_id)
+
+        result = get_context(user_id, session_id, "assistant")
+
+        assert isinstance(result, list)
+
+    def test_returns_openai_format(self):
+        """Test that returned messages are in OpenAI format."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        save_memory(user_id, "My name is Alex", "user", session_id)
+        save_memory(user_id, "Nice to meet you, Alex!", "assistant", session_id)
+
+        result = get_context(user_id, session_id, "assistant")
+
+        assert isinstance(result, list)
+        for msg in result:
+            assert "role" in msg
+            assert "content" in msg
+            assert msg["role"] in ("user", "assistant", "system")
+            assert isinstance(msg["content"], str)
+
+    def test_respects_token_limit(self):
+        """Test that context respects the token limit parameter."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        for i in range(5):
+            save_memory(user_id, f"Message number {i} with some content", "user", session_id)
+
+        result_small = get_context(user_id, session_id, "assistant", tokens=100)
+        result_large = get_context(user_id, session_id, "assistant", tokens=8000)
+
+        assert isinstance(result_small, list)
+        assert isinstance(result_large, list)
+        assert len(result_large) >= len(result_small)
+
+    def test_empty_session_returns_list(self):
+        """Test that get_context returns an empty list for a session with no messages."""
+        result = get_context(unique_id("user"), unique_id("session"), "assistant")
+
+        assert isinstance(result, list)
+
+
+class TestToolsWorkTogether:
+    """Integration tests using all three tools in sequence."""
+
+    def test_save_query_roundtrip(self):
+        """Test saving a message and then querying it."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        save_memory(user_id, "I am a software engineer who loves Rust", "user", session_id)
+
+        result = query_memory(user_id, "What is the user's profession?", session_id)
+
+        assert isinstance(result, str)
+        assert len(result) > 0
+
+    def test_save_then_get_context(self):
+        """Test that saved messages appear in context."""
+        user_id = unique_id("user")
+        session_id = unique_id("session")
+        save_memory(user_id, "Hello!", "user", session_id)
+        save_memory(user_id, "Hi there!", "assistant", session_id)
+
+        messages = get_context(user_id, session_id, "assistant")
+
+        assert isinstance(messages, list)
+        assert len(messages) >= 1
diff --git a/examples/zo/tools/__init__.py b/examples/zo/tools/__init__.py
new file mode 100644
index 000000000..4d8b48304
--- /dev/null
+++ b/examples/zo/tools/__init__.py
@@ -0,0 +1,7 @@
+"""Honcho memory tools for Zo Computer."""
+
+from tools.get_context import get_context
+from tools.query_memory import query_memory
+from tools.save_memory import save_memory
+
+__all__ = ["get_context", "query_memory", "save_memory"]
diff --git a/examples/zo/tools/client.py b/examples/zo/tools/client.py
new file mode 100644
index 000000000..ad1257b29
--- /dev/null
+++ b/examples/zo/tools/client.py
@@ -0,0 +1,32 @@
+"""Honcho client initialization for Zo Computer skill."""
+
+import os
+
+from dotenv import load_dotenv
+from honcho import Honcho
+
+load_dotenv()
+
+
+def get_client(workspace_id: str | None = None) -> Honcho:
+    """Initialize and return a Honcho client.
+
+    Reads HONCHO_API_KEY and HONCHO_WORKSPACE_ID from environment variables.
+    The workspace_id parameter overrides the environment variable if provided.
+
+    Args:
+        workspace_id: Optional workspace ID override. Falls back to the
+            HONCHO_WORKSPACE_ID env var, then to "default".
+
+    Returns:
+        Configured Honcho client instance.
+    """
+    api_key = os.getenv("HONCHO_API_KEY")
+    if not api_key:
+        raise ValueError(
+            "HONCHO_API_KEY is required. Set it in your environment or .env file."
+        )
+
+    env_workspace = os.getenv("HONCHO_WORKSPACE_ID")
+    resolved_workspace = workspace_id or env_workspace or "default"
+    return Honcho(api_key=api_key, workspace_id=resolved_workspace)
diff --git a/examples/zo/tools/get_context.py b/examples/zo/tools/get_context.py
new file mode 100644
index 000000000..fdf3cb10b
--- /dev/null
+++ b/examples/zo/tools/get_context.py
@@ -0,0 +1,42 @@
+"""Retrieve conversation context from Honcho formatted for LLM use."""
+
+from __future__ import annotations
+
+from .client import get_client
+
+
+def get_context(
+    user_id: str,
+    session_id: str,
+    assistant_id: str,
+    tokens: int = 4000,
+) -> list[dict[str, str]]:
+    """Retrieve conversation context ready for injection into an LLM prompt.
+
+    Fetches recent messages from a Honcho session within the given token
+    budget and converts them to OpenAI-compatible message format. Use the
+    returned list directly as the ``messages`` parameter in an LLM API call.
+
+    Args:
+        user_id: Unique identifier for the user peer. Used to ensure the
+            peer is registered in the session before fetching context.
+        session_id: Identifier for the conversation session.
+        assistant_id: Peer ID representing the assistant. This determines
+            which role is mapped to ``"assistant"`` in the output.
+        tokens: Maximum number of tokens to include in the context window.
+            Defaults to 4000.
+
+    Returns:
+        A list of message dicts in OpenAI format:
+        ``[{"role": "user" | "assistant", "content": "..."}]``.
+        Returns an empty list if the session has no messages.
+    """
+    honcho = get_client()
+    user_peer = honcho.peer(user_id)
+    assistant_peer = honcho.peer(assistant_id)
+    session = honcho.session(session_id)
+
+    session.add_peers([user_peer, assistant_peer])
+
+    context = session.context(tokens=tokens)
+    return context.to_openai(assistant=assistant_id)
diff --git a/examples/zo/tools/query_memory.py b/examples/zo/tools/query_memory.py
new file mode 100644
index 000000000..01836a09d
--- /dev/null
+++ b/examples/zo/tools/query_memory.py
@@ -0,0 +1,37 @@
+"""Query a user's Honcho memory using the Dialectic API."""
+
+from __future__ import annotations
+
+from .client import get_client
+
+
+def query_memory(user_id: str, query: str, session_id: str | None = None) -> str:
+    """Query stored memory for a user using Honcho's Dialectic API.
+
+    Sends a natural language question to Honcho and returns an answer
+    grounded in the peer's long-term representation and stored observations.
+
+    Args:
+        user_id: Unique identifier for the user peer.
+        query: Natural language question, e.g. "What are my hobbies?".
+        session_id: Optional session ID to scope the query to a specific
+            conversation. If omitted, the query draws from global memory.
+
+    Returns:
+        A natural language answer from Honcho's Dialectic API, or a
+        default message if no relevant information was found.
+
+    Raises:
+        ValueError: If query is empty.
+    """
+    if not query:
+        raise ValueError("query must not be empty")
+
+    honcho = get_client()
+    peer = honcho.peer(user_id)
+
+    response = peer.chat(query=query, session=session_id)
+
+    if response:
+        return str(response)
+    return "No relevant information found in memory."
diff --git a/examples/zo/tools/save_memory.py b/examples/zo/tools/save_memory.py
new file mode 100644
index 000000000..f78180665
--- /dev/null
+++ b/examples/zo/tools/save_memory.py
@@ -0,0 +1,45 @@
+"""Save a conversation message to Honcho memory."""
+
+from .client import get_client
+
+
+def save_memory(
+    user_id: str,
+    content: str,
+    role: str,
+    session_id: str,
+    assistant_id: str = "assistant",
+) -> str:
+    """Save a single conversation turn to Honcho memory.
+
+    Creates the peer and session if they do not already exist. Registers
+    the peer in the session on first use, then persists the message.
+
+    Args:
+        user_id: Unique identifier for the user peer.
+        content: Text content of the message to save.
+        role: Either "user" or "assistant". Determines which peer sends
+            the message. Any value other than "assistant" is treated as "user".
+        session_id: Identifier for the conversation session.
+        assistant_id: Peer ID for the assistant. Defaults to "assistant".
+
+    Returns:
+        A confirmation string describing what was saved.
+
+    Raises:
+        ValueError: If content is empty.
+    """
+    if not content:
+        raise ValueError("content must not be empty")
+
+    honcho = get_client()
+    user_peer = honcho.peer(user_id)
+    assistant_peer = honcho.peer(assistant_id)
+    session = honcho.session(session_id)
+
+    session.add_peers([user_peer, assistant_peer])
+
+    sender = assistant_peer if role == "assistant" else user_peer
+    session.add_messages([sender.message(content)])
+
+    return f"Saved {role} message to session '{session_id}' for user '{user_id}'."
diff --git a/examples/zo/uv.lock b/examples/zo/uv.lock
new file mode 100644
index 000000000..902f8fdd1
--- /dev/null
+++ b/examples/zo/uv.lock
@@ -0,0 +1,503 @@
+version = 1
+revision = 3
+requires-python = ">=3.9"
+resolution-markers = [
+    "python_full_version >= '3.10'",
+    "python_full_version < '3.10'",
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version < '3.10'" },
+    { name = "idna", marker = "python_full_version < '3.10'" },
+    { name = "typing-extensions", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.13.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+dependencies = [
+    { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" },
+    { name = "idna", marker = "python_full_version >= '3.10'" },
+    { name = "typing-extensions", marker = "python_full_version >= '3.10' and python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/19/14/2c5dd9f512b66549ae92767a9c7b330ae88e1932ca57876909410251fe13/anyio-4.13.0.tar.gz", hash = "sha256:334b70e641fd2221c1505b3890c69882fe4a2df910cba14d97019b90b24439dc", size = 231622, upload-time = "2026-03-24T12:59:09.671Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/da/42/e921fccf5015463e32a3cf6ee7f980a6ed0f395ceeaa45060b61d86486c2/anyio-4.13.0-py3-none-any.whl", hash = "sha256:08b310f9e24a9594186fd75b4f73f4a4152069e3853f1ed8bfbf58369f4ad708", size = 114353, upload-time = "2026-03-24T12:59:08.246Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.2.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "exceptiongroup"
+version = "1.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "honcho-ai"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "typing-extensions", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5e/07/fb2a6654a9f44ff1070d88feb269113a865923e0aa91acf7864459179a1b/honcho_ai-2.1.0.tar.gz", hash = "sha256:c1988bbbf61492c2db168c2f0aa4317c489e18ea9867f74cb318a5f1b83289c8", size = 48050, upload-time = "2026-03-30T14:59:56.731Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dd/34/b814ea3bed1d96807377814461d58294f0d6c5c66e29f06625c0ac6069b6/honcho_ai-2.1.0-py3-none-any.whl", hash = "sha256:c07389036ef839ff31dc66e4757fa451da25ce976830bce108372e0756daf500", size = 58295, upload-time = "2026-03-30T14:59:55.774Z" },
+]
+
+[[package]]
+name = "honcho-zo-skill"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "honcho-ai" },
+    { name = "python-dotenv", version = "1.2.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "python-dotenv", version = "1.2.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "pytest", version = "8.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "pytest", version = "9.0.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "honcho-ai", specifier = ">=2.1.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
+    { name = "python-dotenv", specifier = ">=1.0.0" },
+]
+provides-extras = ["dev"]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio", version = "4.12.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "anyio", version = "4.13.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.1.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f2/97/ebf4da567aa6827c909642694d71c9fcf53e5b504f2d96afea02718862f3/iniconfig-2.1.0.tar.gz", hash = "sha256:3abbd2e30b36733fee78f9c7f7308f2d0050e88f0087fd25c2645f63c773e1c7", size = 4793, upload-time = "2025-03-19T20:09:59.721Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2c/e1/e6716421ea10d38022b952c159d5161ca1193197fb744506875fbb87ea7b/iniconfig-2.1.0-py3-none-any.whl", hash = "sha256:9deba5723312380e77435581c6bf4935c94cbfab9b1ed33ef8d238ea168eb760", size = 6050, upload-time = "2025-03-19T20:10:01.071Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" },
+    { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" },
+    { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" },
+    { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" },
+    { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" },
+    { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" },
+    { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" },
+    { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" },
+    { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/54/db/160dffb57ed9a3705c4cbcbff0ac03bdae45f1ca7d58ab74645550df3fbd/pydantic_core-2.41.5-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:8bfeaf8735be79f225f3fefab7f941c712aaca36f1128c9d7e2352ee1aa87bdf", size = 2107999, upload-time = "2025-11-04T13:42:03.885Z" },
+    { url = "https://files.pythonhosted.org/packages/a3/7d/88e7de946f60d9263cc84819f32513520b85c0f8322f9b8f6e4afc938383/pydantic_core-2.41.5-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:346285d28e4c8017da95144c7f3acd42740d637ff41946af5ce6e5e420502dd5", size = 1929745, upload-time = "2025-11-04T13:42:06.075Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/c2/aef51e5b283780e85e99ff19db0f05842d2d4a8a8cd15e63b0280029b08f/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a75dafbf87d6276ddc5b2bf6fae5254e3d0876b626eb24969a574fff9149ee5d", size = 1920220, upload-time = "2025-11-04T13:42:08.457Z" },
+    { url = "https://files.pythonhosted.org/packages/c7/97/492ab10f9ac8695cd76b2fdb24e9e61f394051df71594e9bcc891c9f586e/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:7b93a4d08587e2b7e7882de461e82b6ed76d9026ce91ca7915e740ecc7855f60", size = 2067296, upload-time = "2025-11-04T13:42:10.817Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/23/984149650e5269c59a2a4c41d234a9570adc68ab29981825cfaf4cfad8f4/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e8465ab91a4bd96d36dde3263f06caa6a8a6019e4113f24dc753d79a8b3a3f82", size = 2231548, upload-time = "2025-11-04T13:42:13.843Z" },
+    { url = "https://files.pythonhosted.org/packages/71/0c/85bcbb885b9732c28bec67a222dbed5ed2d77baee1f8bba2002e8cd00c5c/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:299e0a22e7ae2b85c1a57f104538b2656e8ab1873511fd718a1c1c6f149b77b5", size = 2362571, upload-time = "2025-11-04T13:42:16.208Z" },
+    { url = "https://files.pythonhosted.org/packages/c0/4a/412d2048be12c334003e9b823a3fa3d038e46cc2d64dd8aab50b31b65499/pydantic_core-2.41.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:707625ef0983fcfb461acfaf14de2067c5942c6bb0f3b4c99158bed6fedd3cf3", size = 2068175, upload-time = "2025-11-04T13:42:18.911Z" },
+    { url = "https://files.pythonhosted.org/packages/73/f4/c58b6a776b502d0a5540ad02e232514285513572060f0d78f7832ca3c98b/pydantic_core-2.41.5-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f41eb9797986d6ebac5e8edff36d5cef9de40def462311b3eb3eeded1431e425", size = 2177203, upload-time = "2025-11-04T13:42:22.578Z" },
+    { url = "https://files.pythonhosted.org/packages/ed/ae/f06ea4c7e7a9eead3d165e7623cd2ea0cb788e277e4f935af63fc98fa4e6/pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:0384e2e1021894b1ff5a786dbf94771e2986ebe2869533874d7e43bc79c6f504", size = 2148191, upload-time = "2025-11-04T13:42:24.89Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/57/25a11dcdc656bf5f8b05902c3c2934ac3ea296257cc4a3f79a6319e61856/pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_armv7l.whl", hash = "sha256:f0cd744688278965817fd0839c4a4116add48d23890d468bc436f78beb28abf5", size = 2343907, upload-time = "2025-11-04T13:42:27.683Z" },
+    { url = "https://files.pythonhosted.org/packages/96/82/e33d5f4933d7a03327c0c43c65d575e5919d4974ffc026bc917a5f7b9f61/pydantic_core-2.41.5-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:753e230374206729bf0a807954bcc6c150d3743928a73faffee51ac6557a03c3", size = 2322174, upload-time = "2025-11-04T13:42:30.776Z" },
+    { url = "https://files.pythonhosted.org/packages/81/45/4091be67ce9f469e81656f880f3506f6a5624121ec5eb3eab37d7581897d/pydantic_core-2.41.5-cp39-cp39-win32.whl", hash = "sha256:873e0d5b4fb9b89ef7c2d2a963ea7d02879d9da0da8d9d4933dee8ee86a8b460", size = 1990353, upload-time = "2025-11-04T13:42:33.111Z" },
+    { url = "https://files.pythonhosted.org/packages/44/8a/a98aede18db6e9cd5d66bcacd8a409fcf8134204cdede2e7de35c5a2c5ef/pydantic_core-2.41.5-cp39-cp39-win_amd64.whl", hash = "sha256:e4f4a984405e91527a0d62649ee21138f8e3d0ef103be488c1dc11a80d7f184b", size = 2015698, upload-time = "2025-11-04T13:42:35.484Z" },
+    { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" },
+    { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" },
+    { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" },
+    { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" },
+    { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" },
+    { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" },
+    { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.20.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/c3/b2/bc9c9196916376152d655522fdcebac55e66de6603a76a02bca1b6414f6c/pygments-2.20.0.tar.gz", hash = "sha256:6757cd03768053ff99f3039c1a36d6c0aa0b263438fcab17520b30a303a82b5f", size = 4955991, upload-time = "2026-03-29T13:29:33.898Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/7e/a72dd26f3b0f4f2bf1dd8923c85f7ceb43172af56d63c7383eb62b332364/pygments-2.20.0-py3-none-any.whl", hash = "sha256:81a9e26dd42fd28a23a2d169d86d7ac03b46e2f8b59ed4698fb4785f946d0176", size = 1231151, upload-time = "2026-03-29T13:29:30.038Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "8.4.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version < '3.10' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version < '3.10'" },
+    { name = "iniconfig", version = "2.1.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.10'" },
+    { name = "packaging", marker = "python_full_version < '3.10'" },
+    { name = "pluggy", marker = "python_full_version < '3.10'" },
+    { name = "pygments", marker = "python_full_version < '3.10'" },
+    { name = "tomli", marker = "python_full_version < '3.10'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/a3/5c/00a0e072241553e1a7496d638deababa67c5058571567b92a7eaa258397c/pytest-8.4.2.tar.gz", hash = "sha256:86c0d0b93306b961d58d62a4db4879f27fe25513d4b969df351abdddb3c30e01", size = 1519618, upload-time = "2025-09-04T14:34:22.711Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/a8/a4/20da314d277121d6534b3a980b29035dcd51e6744bd79075a6ce8fa4eb8d/pytest-8.4.2-py3-none-any.whl", hash = "sha256:872f880de3fc3a5bdc88a11b39c9710c3497a547cfa9320bc3c5e62fbf272e79", size = 365750, upload-time = "2025-09-04T14:34:20.226Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+dependencies = [
+    { name = "colorama", marker = "python_full_version >= '3.10' and sys_platform == 'win32'" },
+    { name = "exceptiongroup", marker = "python_full_version == '3.10.*'" },
+    { name = "iniconfig", version = "2.3.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.10'" },
+    { name = "packaging", marker = "python_full_version >= '3.10'" },
+    { name = "pluggy", marker = "python_full_version >= '3.10'" },
+    { name = "pygments", marker = "python_full_version >= '3.10'" },
+    { name = "tomli", marker = "python_full_version == '3.10.*'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.2.1"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version < '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f0/26/19cadc79a718c5edbec86fd4919a6b6d3f681039a2f6d66d14be94e75fb9/python_dotenv-1.2.1.tar.gz", hash = "sha256:42667e897e16ab0d66954af0e60a9caa94f0fd4ecf3aaf6d2d260eec1aa36ad6", size = 44221, upload-time = "2025-10-26T15:12:10.434Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/1b/a298b06749107c305e1fe0f814c6c74aea7b2f1e10989cb30f544a1b3253/python_dotenv-1.2.1-py3-none-any.whl", hash = "sha256:b81ee9561e9ca4004139c6cbba3a238c32b03e4894671e181b671e8cb8425d61", size = 21230, upload-time = "2025-10-26T15:12:09.109Z" },
+]
+
+[[package]]
+name = "python-dotenv"
+version = "1.2.2"
+source = { registry = "https://pypi.org/simple" }
+resolution-markers = [
+    "python_full_version >= '3.10'",
+]
+sdist = { url = "https://files.pythonhosted.org/packages/82/ed/0301aeeac3e5353ef3d94b6ec08bbcabd04a72018415dcb29e588514bba8/python_dotenv-1.2.2.tar.gz", hash = "sha256:2c371a91fbd7ba082c2c1dc1f8bf89ca22564a087c2c287cd9b662adde799cf3", size = 50135, upload-time = "2026-03-01T16:00:26.196Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0b/d7/1959b9648791274998a9c3526f6d0ec8fd2233e4d4acce81bbae76b44b2a/python_dotenv-1.2.2-py3-none-any.whl", hash = "sha256:1d8214789a24de455a8b8bd8ae6fe3c6b69a5e3d64aa8a8e5d68e694bbcb285a", size = 22101, upload-time = "2026-03-01T16:00:25.09Z" },
+]
+
+[[package]]
+name = "tomli"
+version = "2.4.1"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/22/de/48c59722572767841493b26183a0d1cc411d54fd759c5607c4590b6563a6/tomli-2.4.1.tar.gz", hash = "sha256:7c7e1a961a0b2f2472c1ac5b69affa0ae1132c39adcb67aba98568702b9cc23f", size = 17543, upload-time = "2026-03-25T20:22:03.828Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/f4/11/db3d5885d8528263d8adc260bb2d28ebf1270b96e98f0e0268d32b8d9900/tomli-2.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f8f0fc26ec2cc2b965b7a3b87cd19c5c6b8c5e5f436b984e85f486d652285c30", size = 154704, upload-time = "2026-03-25T20:21:10.473Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/f7/675db52c7e46064a9aa928885a9b20f4124ecb9bc2e1ce74c9106648d202/tomli-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4ab97e64ccda8756376892c53a72bd1f964e519c77236368527f758fbc36a53a", size = 149454, upload-time = "2026-03-25T20:21:12.036Z" },
+    { url = "https://files.pythonhosted.org/packages/61/71/81c50943cf953efa35bce7646caab3cf457a7d8c030b27cfb40d7235f9ee/tomli-2.4.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96481a5786729fd470164b47cdb3e0e58062a496f455ee41b4403be77cb5a076", size = 237561, upload-time = "2026-03-25T20:21:13.098Z" },
+    { url = "https://files.pythonhosted.org/packages/48/c1/f41d9cb618acccca7df82aaf682f9b49013c9397212cb9f53219e3abac37/tomli-2.4.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:5a881ab208c0baf688221f8cecc5401bd291d67e38a1ac884d6736cbcd8247e9", size = 243824, upload-time = "2026-03-25T20:21:14.569Z" },
+    { url = "https://files.pythonhosted.org/packages/22/e4/5a816ecdd1f8ca51fb756ef684b90f2780afc52fc67f987e3c61d800a46d/tomli-2.4.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:47149d5bd38761ac8be13a84864bf0b7b70bc051806bc3669ab1cbc56216b23c", size = 242227, upload-time = "2026-03-25T20:21:15.712Z" },
+    { url = "https://files.pythonhosted.org/packages/6b/49/2b2a0ef529aa6eec245d25f0c703e020a73955ad7edf73e7f54ddc608aa5/tomli-2.4.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ec9bfaf3ad2df51ace80688143a6a4ebc09a248f6ff781a9945e51937008fcbc", size = 247859, upload-time = "2026-03-25T20:21:17.001Z" },
+    { url = "https://files.pythonhosted.org/packages/83/bd/6c1a630eaca337e1e78c5903104f831bda934c426f9231429396ce3c3467/tomli-2.4.1-cp311-cp311-win32.whl", hash = "sha256:ff2983983d34813c1aeb0fa89091e76c3a22889ee83ab27c5eeb45100560c049", size = 97204, upload-time = "2026-03-25T20:21:18.079Z" },
+    { url = "https://files.pythonhosted.org/packages/42/59/71461df1a885647e10b6bb7802d0b8e66480c61f3f43079e0dcd315b3954/tomli-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:5ee18d9ebdb417e384b58fe414e8d6af9f4e7a0ae761519fb50f721de398dd4e", size = 108084, upload-time = "2026-03-25T20:21:18.978Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/83/dceca96142499c069475b790e7913b1044c1a4337e700751f48ed723f883/tomli-2.4.1-cp311-cp311-win_arm64.whl", hash = "sha256:c2541745709bad0264b7d4705ad453b76ccd191e64aa6f0fc66b69a293a45ece", size = 95285, upload-time = "2026-03-25T20:21:20.309Z" },
+    { url = "https://files.pythonhosted.org/packages/c1/ba/42f134a3fe2b370f555f44b1d72feebb94debcab01676bf918d0cb70e9aa/tomli-2.4.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c742f741d58a28940ce01d58f0ab2ea3ced8b12402f162f4d534dfe18ba1cd6a", size = 155924, upload-time = "2026-03-25T20:21:21.626Z" },
+    { url = "https://files.pythonhosted.org/packages/dc/c7/62d7a17c26487ade21c5422b646110f2162f1fcc95980ef7f63e73c68f14/tomli-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:7f86fd587c4ed9dd76f318225e7d9b29cfc5a9d43de44e5754db8d1128487085", size = 150018, upload-time = "2026-03-25T20:21:23.002Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/05/79d13d7c15f13bdef410bdd49a6485b1c37d28968314eabee452c22a7fda/tomli-2.4.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ff18e6a727ee0ab0388507b89d1bc6a22b138d1e2fa56d1ad494586d61d2eae9", size = 244948, upload-time = "2026-03-25T20:21:24.04Z" },
+    { url = "https://files.pythonhosted.org/packages/10/90/d62ce007a1c80d0b2c93e02cab211224756240884751b94ca72df8a875ca/tomli-2.4.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:136443dbd7e1dee43c68ac2694fde36b2849865fa258d39bf822c10e8068eac5", size = 253341, upload-time = "2026-03-25T20:21:25.177Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/7e/caf6496d60152ad4ed09282c1885cca4eea150bfd007da84aea07bcc0a3e/tomli-2.4.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:5e262d41726bc187e69af7825504c933b6794dc3fbd5945e41a79bb14c31f585", size = 248159, upload-time = "2026-03-25T20:21:26.364Z" },
+    { url = "https://files.pythonhosted.org/packages/99/e7/c6f69c3120de34bbd882c6fba7975f3d7a746e9218e56ab46a1bc4b42552/tomli-2.4.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5cb41aa38891e073ee49d55fbc7839cfdb2bc0e600add13874d048c94aadddd1", size = 253290, upload-time = "2026-03-25T20:21:27.46Z" },
+    { url = "https://files.pythonhosted.org/packages/d6/2f/4a3c322f22c5c66c4b836ec58211641a4067364f5dcdd7b974b4c5da300c/tomli-2.4.1-cp312-cp312-win32.whl", hash = "sha256:da25dc3563bff5965356133435b757a795a17b17d01dbc0f42fb32447ddfd917", size = 98141, upload-time = "2026-03-25T20:21:28.492Z" },
+    { url = "https://files.pythonhosted.org/packages/24/22/4daacd05391b92c55759d55eaee21e1dfaea86ce5c571f10083360adf534/tomli-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:52c8ef851d9a240f11a88c003eacb03c31fc1c9c4ec64a99a0f922b93874fda9", size = 108847, upload-time = "2026-03-25T20:21:29.386Z" },
+    { url = "https://files.pythonhosted.org/packages/68/fd/70e768887666ddd9e9f5d85129e84910f2db2796f9096aa02b721a53098d/tomli-2.4.1-cp312-cp312-win_arm64.whl", hash = "sha256:f758f1b9299d059cc3f6546ae2af89670cb1c4d48ea29c3cacc4fe7de3058257", size = 95088, upload-time = "2026-03-25T20:21:30.677Z" },
+    { url = "https://files.pythonhosted.org/packages/07/06/b823a7e818c756d9a7123ba2cda7d07bc2dd32835648d1a7b7b7a05d848d/tomli-2.4.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:36d2bd2ad5fb9eaddba5226aa02c8ec3fa4f192631e347b3ed28186d43be6b54", size = 155866, upload-time = "2026-03-25T20:21:31.65Z" },
+    { url = "https://files.pythonhosted.org/packages/14/6f/12645cf7f08e1a20c7eb8c297c6f11d31c1b50f316a7e7e1e1de6e2e7b7e/tomli-2.4.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:eb0dc4e38e6a1fd579e5d50369aa2e10acfc9cace504579b2faabb478e76941a", size = 149887, upload-time = "2026-03-25T20:21:33.028Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/e0/90637574e5e7212c09099c67ad349b04ec4d6020324539297b634a0192b0/tomli-2.4.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c7f2c7f2b9ca6bdeef8f0fa897f8e05085923eb091721675170254cbc5b02897", size = 243704, upload-time = "2026-03-25T20:21:34.51Z" },
+    { url = "https://files.pythonhosted.org/packages/10/8f/d3ddb16c5a4befdf31a23307f72828686ab2096f068eaf56631e136c1fdd/tomli-2.4.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f3c6818a1a86dd6dca7ddcaaf76947d5ba31aecc28cb1b67009a5877c9a64f3f", size = 251628, upload-time = "2026-03-25T20:21:36.012Z" },
+    { url = "https://files.pythonhosted.org/packages/e3/f1/dbeeb9116715abee2485bf0a12d07a8f31af94d71608c171c45f64c0469d/tomli-2.4.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:d312ef37c91508b0ab2cee7da26ec0b3ed2f03ce12bd87a588d771ae15dcf82d", size = 247180, upload-time = "2026-03-25T20:21:37.136Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/74/16336ffd19ed4da28a70959f92f506233bd7cfc2332b20bdb01591e8b1d1/tomli-2.4.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:51529d40e3ca50046d7606fa99ce3956a617f9b36380da3b7f0dd3dd28e68cb5", size = 251674, upload-time = "2026-03-25T20:21:38.298Z" },
+    { url = "https://files.pythonhosted.org/packages/16/f9/229fa3434c590ddf6c0aa9af64d3af4b752540686cace29e6281e3458469/tomli-2.4.1-cp313-cp313-win32.whl", hash = "sha256:2190f2e9dd7508d2a90ded5ed369255980a1bcdd58e52f7fe24b8162bf9fedbd", size = 97976, upload-time = "2026-03-25T20:21:39.316Z" },
+    { url = "https://files.pythonhosted.org/packages/6a/1e/71dfd96bcc1c775420cb8befe7a9d35f2e5b1309798f009dca17b7708c1e/tomli-2.4.1-cp313-cp313-win_amd64.whl", hash = "sha256:8d65a2fbf9d2f8352685bc1364177ee3923d6baf5e7f43ea4959d7d8bc326a36", size = 108755, upload-time = "2026-03-25T20:21:40.248Z" },
+    { url = "https://files.pythonhosted.org/packages/83/7a/d34f422a021d62420b78f5c538e5b102f62bea616d1d75a13f0a88acb04a/tomli-2.4.1-cp313-cp313-win_arm64.whl", hash = "sha256:4b605484e43cdc43f0954ddae319fb75f04cc10dd80d830540060ee7cd0243cd", size = 95265, upload-time = "2026-03-25T20:21:41.219Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/fb/9a5c8d27dbab540869f7c1f8eb0abb3244189ce780ba9cd73f3770662072/tomli-2.4.1-cp314-cp314-macosx_10_15_x86_64.whl", hash = "sha256:fd0409a3653af6c147209d267a0e4243f0ae46b011aa978b1080359fddc9b6cf", size = 155726, upload-time = "2026-03-25T20:21:42.23Z" },
+    { url = "https://files.pythonhosted.org/packages/62/05/d2f816630cc771ad836af54f5001f47a6f611d2d39535364f148b6a92d6b/tomli-2.4.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:a120733b01c45e9a0c34aeef92bf0cf1d56cfe81ed9d47d562f9ed591a9828ac", size = 149859, upload-time = "2026-03-25T20:21:43.386Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/48/66341bdb858ad9bd0ceab5a86f90eddab127cf8b046418009f2125630ecb/tomli-2.4.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:559db847dc486944896521f68d8190be1c9e719fced785720d2216fe7022b662", size = 244713, upload-time = "2026-03-25T20:21:44.474Z" },
+    { url = "https://files.pythonhosted.org/packages/df/6d/c5fad00d82b3c7a3ab6189bd4b10e60466f22cfe8a08a9394185c8a8111c/tomli-2.4.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01f520d4f53ef97964a240a035ec2a869fe1a37dde002b57ebc4417a27ccd853", size = 252084, upload-time = "2026-03-25T20:21:45.62Z" },
+    { url = "https://files.pythonhosted.org/packages/00/71/3a69e86f3eafe8c7a59d008d245888051005bd657760e96d5fbfb0b740c2/tomli-2.4.1-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:7f94b27a62cfad8496c8d2513e1a222dd446f095fca8987fceef261225538a15", size = 247973, upload-time = "2026-03-25T20:21:46.937Z" },
+    { url = "https://files.pythonhosted.org/packages/67/50/361e986652847fec4bd5e4a0208752fbe64689c603c7ae5ea7cb16b1c0ca/tomli-2.4.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:ede3e6487c5ef5d28634ba3f31f989030ad6af71edfb0055cbbd14189ff240ba", size = 256223, upload-time = "2026-03-25T20:21:48.467Z" },
+    { url = "https://files.pythonhosted.org/packages/8c/9a/b4173689a9203472e5467217e0154b00e260621caa227b6fa01feab16998/tomli-2.4.1-cp314-cp314-win32.whl", hash = "sha256:3d48a93ee1c9b79c04bb38772ee1b64dcf18ff43085896ea460ca8dec96f35f6", size = 98973, upload-time = "2026-03-25T20:21:49.526Z" },
+    { url = "https://files.pythonhosted.org/packages/14/58/640ac93bf230cd27d002462c9af0d837779f8773bc03dee06b5835208214/tomli-2.4.1-cp314-cp314-win_amd64.whl", hash = "sha256:88dceee75c2c63af144e456745e10101eb67361050196b0b6af5d717254dddf7", size = 109082, upload-time = "2026-03-25T20:21:50.506Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/2f/702d5e05b227401c1068f0d386d79a589bb12bf64c3d2c72ce0631e3bc49/tomli-2.4.1-cp314-cp314-win_arm64.whl", hash = "sha256:b8c198f8c1805dc42708689ed6864951fd2494f924149d3e4bce7710f8eb5232", size = 96490, upload-time = "2026-03-25T20:21:51.474Z" },
+    { url = "https://files.pythonhosted.org/packages/45/4b/b877b05c8ba62927d9865dd980e34a755de541eb65fffba52b4cc495d4d2/tomli-2.4.1-cp314-cp314t-macosx_10_15_x86_64.whl", hash = "sha256:d4d8fe59808a54658fcc0160ecfb1b30f9089906c50b23bcb4c69eddc19ec2b4", size = 164263, upload-time = "2026-03-25T20:21:52.543Z" },
+    { url = "https://files.pythonhosted.org/packages/24/79/6ab420d37a270b89f7195dec5448f79400d9e9c1826df982f3f8e97b24fd/tomli-2.4.1-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:7008df2e7655c495dd12d2a4ad038ff878d4ca4b81fccaf82b714e07eae4402c", size = 160736, upload-time = "2026-03-25T20:21:53.674Z" },
+    { url = "https://files.pythonhosted.org/packages/02/e0/3630057d8eb170310785723ed5adcdfb7d50cb7e6455f85ba8a3deed642b/tomli-2.4.1-cp314-cp314t-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1d8591993e228b0c930c4bb0db464bdad97b3289fb981255d6c9a41aedc84b2d", size = 270717, upload-time = "2026-03-25T20:21:55.129Z" },
+    { url = "https://files.pythonhosted.org/packages/7a/b4/1613716072e544d1a7891f548d8f9ec6ce2faf42ca65acae01d76ea06bb0/tomli-2.4.1-cp314-cp314t-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:734e20b57ba95624ecf1841e72b53f6e186355e216e5412de414e3c51e5e3c41", size = 278461, upload-time = "2026-03-25T20:21:56.228Z" },
+    { url = "https://files.pythonhosted.org/packages/05/38/30f541baf6a3f6df77b3df16b01ba319221389e2da59427e221ef417ac0c/tomli-2.4.1-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:8a650c2dbafa08d42e51ba0b62740dae4ecb9338eefa093aa5c78ceb546fcd5c", size = 274855, upload-time = "2026-03-25T20:21:57.653Z" },
+    { url = "https://files.pythonhosted.org/packages/77/a3/ec9dd4fd2c38e98de34223b995a3b34813e6bdadf86c75314c928350ed14/tomli-2.4.1-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:504aa796fe0569bb43171066009ead363de03675276d2d121ac1a4572397870f", size = 283144, upload-time = "2026-03-25T20:21:59.089Z" },
+    { url = "https://files.pythonhosted.org/packages/ef/be/605a6261cac79fba2ec0c9827e986e00323a1945700969b8ee0b30d85453/tomli-2.4.1-cp314-cp314t-win32.whl", hash = "sha256:b1d22e6e9387bf4739fbe23bfa80e93f6b0373a7f1b96c6227c32bef95a4d7a8", size = 108683, upload-time = "2026-03-25T20:22:00.214Z" },
+    { url = "https://files.pythonhosted.org/packages/12/64/da524626d3b9cc40c168a13da8335fe1c51be12c0a63685cc6db7308daae/tomli-2.4.1-cp314-cp314t-win_amd64.whl", hash = "sha256:2c1c351919aca02858f740c6d33adea0c5deea37f9ecca1cc1ef9e884a619d26", size = 121196, upload-time = "2026-03-25T20:22:01.169Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/cd/e80b62269fc78fc36c9af5a6b89c835baa8af28ff5ad28c7028d60860320/tomli-2.4.1-cp314-cp314t-win_arm64.whl", hash = "sha256:eab21f45c7f66c13f2a9e0e1535309cee140182a9cdae1e041d02e47291e8396", size = 100393, upload-time = "2026-03-25T20:22:02.137Z" },
+    { url = "https://files.pythonhosted.org/packages/7b/61/cceae43728b7de99d9b847560c262873a1f6c98202171fd5ed62640b494b/tomli-2.4.1-py3-none-any.whl", hash = "sha256:0d85819802132122da43cb86656f8d1f8c6587d54ae7dcaf30e90533028b49fe", size = 14583, upload-time = "2026-03-25T20:22:03.012Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]

From 95c72d76d3f41303e407997009792db77a7473ec Mon Sep 17 00:00:00 2001
From: ajspig <46900795+ajspig@users.noreply.github.com>
Date: Tue, 7 Apr 2026 10:45:00 -0400
Subject: [PATCH 03/46] docs: add Zo Computer integration page (#504)

---
 docs/docs.json                              |   3 +-
 docs/v3/guides/integrations/zo-computer.mdx | 134 ++++++++++++++++++++
 docs/v3/guides/overview.mdx                 |   3 +
 3 files changed, 139 insertions(+), 1 deletion(-)
 create mode 100644 docs/v3/guides/integrations/zo-computer.mdx

diff --git a/docs/docs.json b/docs/docs.json
index 05053e447..02f7f27fe 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -107,7 +107,8 @@
                   "v3/guides/integrations/mcp",
                   "v3/guides/integrations/n8n",
                   "v3/guides/integrations/openclaw",
-                  "v3/guides/integrations/hermes"
+                  "v3/guides/integrations/hermes",
+                  "v3/guides/integrations/zo-computer"
                 ]
               },
               {
diff --git a/docs/v3/guides/integrations/zo-computer.mdx b/docs/v3/guides/integrations/zo-computer.mdx
new file mode 100644
index 000000000..3e1626a92
--- /dev/null
+++ b/docs/v3/guides/integrations/zo-computer.mdx
@@ -0,0 +1,134 @@
+---
+title: "Zo Computer"
+icon: 'bolt'
+description: "Add persistent memory to Zo Computer skills using Honcho"
+sidebarTitle: 'Zo Computer'
+---
+
+[Zo Computer](https://zo.computer) is a cloud AI platform where users build reusable workflows called skills. The Honcho memory skill gives any Zo workflow persistent memory — saving conversations, answering questions about past interactions, and injecting context into LLM prompts.
+
+<Note>
+The full source code is available on [GitHub](https://github.com/plastic-labs/honcho/tree/main/examples/zo) with working tests and Zo marketplace submission instructions.
+</Note>
+
+## What It Does
+
+The skill provides three tools that any Zo workflow can call:
+
+| Tool | Description |
+| ---- | ----------- |
+| `save_memory` | Save user or assistant messages to a Honcho session |
+| `query_memory` | Ask natural language questions about what Honcho remembers |
+| `get_context` | Retrieve conversation history formatted for LLM use (OpenAI message format) |
+
+## Setup
+
+Install dependencies:
+
+```bash
+pip install honcho-ai python-dotenv
+```
+
+Set your environment variables:
+
+```bash
+HONCHO_API_KEY=your-api-key
+HONCHO_WORKSPACE_ID=default  # optional, defaults to "default"
+```
+
+Get your API key at [app.honcho.dev](https://app.honcho.dev).
+
+## Quick Start
+
+```python
+from tools.save_memory import save_memory
+from tools.query_memory import query_memory
+from tools.get_context import get_context
+
+# Save conversation turns
+save_memory("alice", "I love hiking in the mountains", "user", "session-1")
+save_memory("alice", "That sounds wonderful!", "assistant", "session-1")
+
+# Query what Honcho remembers
+answer = query_memory("alice", "What are my hobbies?", "session-1")
+print(answer)  # "Alice enjoys hiking in the mountains."
+
+# Get context ready for an LLM call
+messages = get_context("alice", "session-1", "assistant", tokens=4000)
+# Returns [{"role": "user", "content": "..."}, ...]
+```
+
+## Saving Messages
+
+`save_memory` creates peers and sessions automatically on first use and persists the message.
+
+```python
+save_memory(
+    user_id="alice",          # unique user identifier
+    content="Hello!",         # message text
+    role="user",              # "user" or "assistant"
+    session_id="session-1",   # conversation identifier
+    assistant_id="assistant", # optional, defaults to "assistant"
+)
+```
+
+## Querying Memory
+
+`query_memory` uses Honcho's Dialectic API to answer natural language questions grounded in stored memory.
+
+```python
+answer = query_memory(
+    user_id="alice",
+    query="What are my interests?",
+    session_id="session-1",  # optional — omit to query global memory
+)
+```
+
+## Retrieving Context
+
+`get_context` fetches recent conversation history within a token budget and returns it in OpenAI message format — ready to pass directly to an LLM.
+
+```python
+messages = get_context(
+    user_id="alice",
+    session_id="session-1",
+    assistant_id="assistant",
+    tokens=4000,  # max tokens to include
+)
+# Use directly: llm.chat.completions.create(messages=messages)
+```
+
+## Concept Mapping
+
+| Zo Computer | Honcho |
+| --- | --- |
+| Account | Workspace |
+| User | Peer |
+| Conversation | Session |
+| Message | Message |
+
+## Publishing to the Zo Marketplace
+
+To submit the skill to the [Zo Skills Registry](https://github.com/zocomputer/skills):
+
+1. Fork the `zocomputer/skills` repository
+2. Copy the `examples/zo` directory into `/Community/honcho-memory/` in your fork
+3. Run `bun validate` to check the skill format
+4. Submit a pull request
+
+## Next Steps
+
+<CardGroup cols={2}>
+  <Card title="Source Code" icon="github" href="https://github.com/plastic-labs/honcho/tree/main/examples/zo">
+    Full source, tests, and SKILL.md for the Zo integration
+  </Card>
+  <Card title="Honcho Architecture" icon="sitemap" href="/v3/documentation/core-concepts/architecture">
+    Understand peers, sessions, and how memory works
+  </Card>
+  <Card title="Chat API" icon="brain" href="/v3/documentation/features/chat">
+    Learn more about querying peer memory with the Dialectic API
+  </Card>
+  <Card title="Get Context" icon="messages" href="/v3/documentation/features/get-context">
+    Details on retrieving and formatting conversation context
+  </Card>
+</CardGroup>
diff --git a/docs/v3/guides/overview.mdx b/docs/v3/guides/overview.mdx
index e86688e8d..71f31aefb 100644
--- a/docs/v3/guides/overview.mdx
+++ b/docs/v3/guides/overview.mdx
@@ -59,6 +59,9 @@ Use Honcho as a memory layer in your agent orchestration stack:
   <Card title="CrewAI" icon="users-gear" href="/v3/guides/integrations/crewai">
     Give CrewAI agents memory that persists across sessions
   </Card>
+  <Card title="Zo Computer" icon="bolt" href="/v3/guides/integrations/zo-computer">
+    Persistent memory skill for Zo Computer AI workflows
+  </Card>
   <Card title="n8n" icon="share-nodes" href="/v3/guides/integrations/n8n">
     Build intelligent automation workflows with persistent memory
   </Card>

From ff116b0601b9d4095b7787f77c6413b97415cf06 Mon Sep 17 00:00:00 2001
From: Eri Barrett <eri@plasticlabs.ai>
Date: Tue, 7 Apr 2026 22:49:57 -0400
Subject: [PATCH 04/46] Self-hosting docs overhaul: single-provider default,
 restructured config guide (#510)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: Inconsistencies in Docs, health endpoint, troubleshooting guide

* fix: (docs) maintain consistency on postgres db name

* chore: (docs) update v2 contributing docs with updates db paths

* docs: overhaul self-hosting docs for provider-agnostic setup

- .env.template: lead with provider options (custom, vllm, google,
  anthropic, openai, groq) instead of baking in vendor-specific keys.
  All provider/model settings commented out so server fails fast until
  configured. Separate endpoint config from per-feature provider+model
  from tuning knobs.
- docker-compose.yml.example: fix healthcheck -d honcho -> -d postgres
  to match POSTGRES_DB=postgres.
- config.toml.example: reorder and document LLM key section with
  OpenRouter and vLLM examples.
- self-hosting.mdx: replace multi-vendor key table with provider options
  table. Add examples for OpenRouter, vLLM/Ollama, and direct vendor
  keys. Remove duplicated key lists from Docker/manual setup sections.
- configuration.mdx: replace scattered provider docs with provider types
  table. Fix Docker Compose snippet to match actual compose file. Note
  code defaults as fallback, not recommended path.
- troubleshooting.mdx: add alternative provider issues section (custom
  provider config, model name format, Docker localhost, structured
  output failures).

* docs: add Docker build troubleshooting for permission errors

- Document BuildKit requirement (RUN --mount syntax)
- AppArmor/SELinux blocking Docker builds on Linux
- Volume mount UID mismatch between host and container app user
- Note in self-hosting docs that Docker path builds from source

* docs: reframe self-hosting as contributor/dev path, point to cloud service

* Revert "docs: reframe self-hosting as contributor/dev path, point to cloud service"

This reverts commit 3e766eb1a9e7febec11b402bc3963fb525de189a.

* docs: add production compose, model guidance, thinking budget docs

- Add docker-compose.prod.yml for VM/server deployment: no source
  mounts, restart policies, 127.0.0.1-bound ports, cache enabled
- Add model tier guidance and community quick-start link to self-hosting
- Document THINKING_BUDGET_TOKENS gotcha for non-Anthropic providers
- Add reverse proxy examples (Caddy + nginx) to production section
- Add backup/restore commands to production considerations

* docs: simplify self-hosting to single provider, restructure config guide

Self-hosting page now defaults to one OpenAI-compatible endpoint
with one model for all features. Moved model tiers, alternative
providers, and per-feature tuning into the configuration guide.
Eliminated duplicate config priority sections, dev/prod split,
and redundant TOML examples.

* docs: merge compose files, restore provider/model to feature sections in .env.template

Single docker-compose.yml.example with dev sections commented out.
Moved PROVIDER and MODEL back alongside each feature in .env.template
so settings stay colocated with their module. Updated self-hosting
docs to reference single compose file.

* fix: broken anchor links, redundant migration step, minor inconsistencies

Fix 4 broken internal links (#llm-provider-setup, #llm-api-keys,
#which-api-keys-do-i-need, #alternative-providers) to point to
correct headings. Remove redundant Docker migration step (entrypoint
already runs alembic). Fix cache URL missing ?suppress=true in
reference config. Fix uv install command to use official method.

* docs: env template ready to use, simplify self-hosting flow

.env.template now has provider/model lines uncommented with
placeholder values — user just sets endpoint, key, and model name.
Thinking budgets default to 0 for non-Anthropic providers.

Self-hosting page: removed 30-line env var wall, LLM setup now
points to the template. Merged duplicate verify sections.
Removed api_key from SDK examples (auth off by default).

* docs: reorder next steps, configuration guide first

* fix: default embedding provider to openrouter for single-endpoint setup

Without this, embeddings default to openai which requires a separate
LLM_OPENAI_API_KEY. Setting to openrouter routes embeddings through
the same OpenAI-compatible endpoint as everything else.

* fix: review issues — hermes page, thinking budget, production wording

Hermes integration page: replaced inline Docker/manual setup with
link to self-hosting guide, added elkimek community link. Removed
old env var names (OPENAI_API_KEY without LLM_ prefix).

Troubleshooting: removed "or 1" from thinking budget guidance.
Self-hosting: softened "production-ready" to "production-oriented"
since auth is disabled by default.

* docs: model examples in template, expanded LLM setup, better verify flow

.env.template: added "e.g. google/gemini-2.5-flash" hints next to
model placeholders so users know the expected format.

Self-hosting: expanded LLM Setup to show the 3 things users need to
set (endpoint, key, model name) with find-replace tip. Added build
time note, deriver log check, and real smoke test (create workspace)
to verify section. Health check now notes it doesn't verify DB/LLM.

* fix: smoke test uses v3 API path, not v1

* docs: clarify deriver metrics port vs Prometheus host port

* fix: remove deprecated memoryMode from hermes config example

* docs: update hermes page to match current memory provider config

Updated config to match hermes-agent docs: removed apiKey (not needed
for self-hosted), added hermes memory setup CLI command, added config
fields table (recallMode, writeFrequency, sessionStrategy, etc.).

Better verification tests: store-and-recall across sessions, direct
tool calling test. Links to upstream hermes docs for full field list.

* fix: invalid THINKING_BUDGET_TOKENS=0 and missing docker/ in image

Comment out THINKING_BUDGET_TOKENS=0 in .env.template — deriver,
summary, and dream validators require gt=0. Dialectic levels also
commented out since non-thinking models don't need the override.

Add COPY for docker/ directory in Dockerfile so entrypoint.sh is
available when docker-compose.yml.example references it.

* chore: Additional troubleshooting step

---------

Co-authored-by: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com>
---
 .env.template                            | 175 +++--
 CONTRIBUTING.md                          |   2 +-
 Dockerfile                               |   3 +-
 README.md                                |  18 +-
 config.toml.example                      |  22 +-
 docker-compose.yml.example               | 106 +--
 docs/docs.json                           |   3 +-
 docs/v2/contributing/configuration.mdx   |   6 +-
 docs/v2/contributing/self-hosting.mdx    |  11 +-
 docs/v3/contributing/configuration.mdx   | 804 +++++++----------------
 docs/v3/contributing/self-hosting.mdx    | 262 +++++---
 docs/v3/contributing/troubleshooting.mdx | 299 +++++++++
 docs/v3/guides/integrations/hermes.mdx   | 116 +---
 src/main.py                              |   6 +
 14 files changed, 911 insertions(+), 922 deletions(-)
 create mode 100644 docs/v3/contributing/troubleshooting.mdx

diff --git a/.env.template b/.env.template
index ed456e915..56a000abc 100644
--- a/.env.template
+++ b/.env.template
@@ -57,160 +57,135 @@ AUTH_USE_AUTH=false
 # AUTH_JWT_SECRET=your-secret-key-here
 
 # =============================================================================
-# LLM API Keys (REQUIRED for full functionality)
+# LLM Provider (REQUIRED)
 # =============================================================================
-# OpenAI API key for embeddings
-LLM_OPENAI_API_KEY=your-openai-api-key-here
-
-# Anthropic API key for dialectic and deriver functionality
-LLM_ANTHROPIC_API_KEY=your-anthropic-api-key-here
-
-# Google API key for summarization (if using Gemini)
-# LLM_GEMINI_API_KEY=your-google-api-key-here
-
-# Groq API key for query generation (if using Groq)
-# LLM_GROQ_API_KEY=your-groq-api-key-here
-
-# Base URL for OpenAI Compatible Requests if you want to use a different provider
-# LLM_OPENAI_COMPATIBLE_BASE_URL=
-# LLM_OPENAI_COMPATIBLE_API_KEY=
-
-# Separate vLLM endpoint (for local models)
-# LLM_VLLM_API_KEY=
-# LLM_VLLM_BASE_URL=
-
-# =============================================================================
-# LLM Configuration
-# =============================================================================
-# Global LLM settings
+# Honcho uses LLMs for memory extraction, summarization, dialectic chat, and
+# dream consolidation. The server will fail to start without a provider configured.
+#
+# Quick start: uncomment the two lines below, set your endpoint and API key,
+# then uncomment the provider/model lines in each feature section below.
+# Any OpenAI-compatible endpoint works (OpenRouter, Together, Fireworks, etc.).
+# Models must support tool calling (function calling).
+#
+LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
+LLM_OPENAI_COMPATIBLE_API_KEY=your-api-key-here
+#
+# Provider options for each feature: custom, vllm, google, anthropic, openai, groq
+# "custom" routes through the OpenAI-compatible endpoint above.
+# Model name format depends on your provider (e.g., OpenRouter: vendor/model-name).
+#
+# ---- Alternative: vLLM self-hosted ------------------------------------------
+# LLM_VLLM_BASE_URL=http://localhost:8000/v1
+# LLM_VLLM_API_KEY=not-needed
+#
+# ---- Alternative: direct vendor keys (no endpoint needed) -------------------
+# LLM_GEMINI_API_KEY=
+# LLM_ANTHROPIC_API_KEY=
+# LLM_OPENAI_API_KEY=
+# LLM_GROQ_API_KEY=
+#
+# ---- General LLM settings ---------------------------------------------------
+# Embedding provider — defaults to openai (requires LLM_OPENAI_API_KEY).
+# Set to openrouter to route embeddings through your custom endpoint instead.
+LLM_EMBEDDING_PROVIDER=openrouter
 # LLM_DEFAULT_MAX_TOKENS=2500
-# LLM_EMBEDDING_PROVIDER=openai
-# LLM_MAX_TOOL_OUTPUT_CHARS=10000  # Max chars for tool output (~2500 tokens)
-# LLM_MAX_MESSAGE_CONTENT_CHARS=2000  # Max chars per message in tool results
+# LLM_MAX_TOOL_OUTPUT_CHARS=10000
+# LLM_MAX_MESSAGE_CONTENT_CHARS=2000
 
 # =============================================================================
-# Deriver (Background Worker) Settings
+# Deriver (Background Worker)
 # =============================================================================
 # DERIVER_ENABLED=true
+DERIVER_PROVIDER=custom
+DERIVER_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
+# DERIVER_THINKING_BUDGET_TOKENS=1024  # gt=0 required; omit for non-thinking models
 # DERIVER_WORKERS=1
 # DERIVER_POLLING_SLEEP_INTERVAL_SECONDS=1.0
 # DERIVER_STALE_SESSION_TIMEOUT_MINUTES=5
-# DERIVER_QUEUE_ERROR_RETENTION_SECONDS=2592000  # 30 days
-# DERIVER_PROVIDER=google
-# DERIVER_MODEL=gemini-2.5-flash-lite
+# DERIVER_QUEUE_ERROR_RETENTION_SECONDS=2592000
 # DERIVER_TEMPERATURE=
 # DERIVER_DEDUPLICATE=true
 # DERIVER_MAX_OUTPUT_TOKENS=4096
-# DERIVER_THINKING_BUDGET_TOKENS=1024
 # DERIVER_LOG_OBSERVATIONS=false
 # DERIVER_MAX_INPUT_TOKENS=23000
 # DERIVER_WORKING_REPRESENTATION_MAX_OBSERVATIONS=100
 # DERIVER_REPRESENTATION_BATCH_MAX_TOKENS=1024
-# DERIVER_FLUSH_ENABLED=false  # Bypass batch token threshold, process work immediately
-# DERIVER_BACKUP_PROVIDER=
-# DERIVER_BACKUP_MODEL=
+# DERIVER_FLUSH_ENABLED=false
 
 # =============================================================================
-# Peer Card Configuration
+# Peer Card
 # =============================================================================
 # PEER_CARD_ENABLED=true
 
 # =============================================================================
-# Dialectic Settings
+# Dialectic
 # =============================================================================
-# Global dialectic settings
 # DIALECTIC_MAX_OUTPUT_TOKENS=8192
 # DIALECTIC_MAX_INPUT_TOKENS=100000
 # DIALECTIC_HISTORY_TOKEN_LIMIT=8192
 # DIALECTIC_SESSION_HISTORY_MAX_TOKENS=4096
-
-# Per-level settings (reasoning_level parameter in API)
-# Each level can have its own provider, model, thinking budget, tool iterations, and max output tokens
-# MAX_OUTPUT_TOKENS is optional per level; if not set, uses global DIALECTIC_MAX_OUTPUT_TOKENS
-
-# Minimal level
-# DIALECTIC_LEVELS__minimal__PROVIDER=google
-# DIALECTIC_LEVELS__minimal__MODEL=gemini-2.5-flash-lite
+#
+# Per-level provider, model, and tuning:
+DIALECTIC_LEVELS__minimal__PROVIDER=custom
+DIALECTIC_LEVELS__minimal__MODEL=your-model-here  # e.g. google/gemini-2.5-flash
 # DIALECTIC_LEVELS__minimal__THINKING_BUDGET_TOKENS=0
 # DIALECTIC_LEVELS__minimal__MAX_TOOL_ITERATIONS=1
-# DIALECTIC_LEVELS__minimal__MAX_OUTPUT_TOKENS=250  # Reduced output for cost savings
-
-# Low level
-# DIALECTIC_LEVELS__low__PROVIDER=google
-# DIALECTIC_LEVELS__low__MODEL=gemini-2.5-flash-lite
+# DIALECTIC_LEVELS__minimal__MAX_OUTPUT_TOKENS=250
+DIALECTIC_LEVELS__low__PROVIDER=custom
+DIALECTIC_LEVELS__low__MODEL=your-model-here
 # DIALECTIC_LEVELS__low__THINKING_BUDGET_TOKENS=0
 # DIALECTIC_LEVELS__low__MAX_TOOL_ITERATIONS=5
-# DIALECTIC_LEVELS__low__MAX_OUTPUT_TOKENS=8192  # Optional: override global default
-
-# Medium level
-# DIALECTIC_LEVELS__medium__PROVIDER=anthropic
-# DIALECTIC_LEVELS__medium__MODEL=claude-haiku-4-5
-# DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=1024
+DIALECTIC_LEVELS__medium__PROVIDER=custom
+DIALECTIC_LEVELS__medium__MODEL=your-model-here
+# DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=0
 # DIALECTIC_LEVELS__medium__MAX_TOOL_ITERATIONS=2
-# DIALECTIC_LEVELS__medium__MAX_OUTPUT_TOKENS=8192  # Optional: override global default
-# DIALECTIC_LEVELS__medium__TOOL_CHOICE=
-
-# High level
-# DIALECTIC_LEVELS__high__PROVIDER=anthropic
-# DIALECTIC_LEVELS__high__MODEL=claude-haiku-4-5
-# DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=1024
+DIALECTIC_LEVELS__high__PROVIDER=custom
+DIALECTIC_LEVELS__high__MODEL=your-model-here
+# DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=0
 # DIALECTIC_LEVELS__high__MAX_TOOL_ITERATIONS=4
-# DIALECTIC_LEVELS__high__MAX_OUTPUT_TOKENS=8192  # Optional: override global default
-
-# Max level
-# DIALECTIC_LEVELS__max__PROVIDER=anthropic
-# DIALECTIC_LEVELS__max__MODEL=claude-haiku-4-5
-# DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=2048
+DIALECTIC_LEVELS__max__PROVIDER=custom
+DIALECTIC_LEVELS__max__MODEL=your-model-here
+# DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=0
 # DIALECTIC_LEVELS__max__MAX_TOOL_ITERATIONS=10
-# DIALECTIC_LEVELS__max__MAX_OUTPUT_TOKENS=8192  # Optional: override global default
-# Optional backup per level (must set both or neither):
-# DIALECTIC_LEVELS__max__BACKUP_PROVIDER=google
-# DIALECTIC_LEVELS__max__BACKUP_MODEL=gemini-2.5-pro
 
 # =============================================================================
-# Summary Settings
+# Summary
 # =============================================================================
 # SUMMARY_ENABLED=true
+SUMMARY_PROVIDER=custom
+SUMMARY_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
+# SUMMARY_THINKING_BUDGET_TOKENS=512  # gt=0 required; omit for non-thinking models
 # SUMMARY_MESSAGES_PER_SHORT_SUMMARY=20
 # SUMMARY_MESSAGES_PER_LONG_SUMMARY=60
-# SUMMARY_PROVIDER=google
-# SUMMARY_MODEL=gemini-2.5-flash
 # SUMMARY_MAX_TOKENS_SHORT=1000
 # SUMMARY_MAX_TOKENS_LONG=4000
-# SUMMARY_THINKING_BUDGET_TOKENS=512
-# SUMMARY_BACKUP_PROVIDER=
-# SUMMARY_BACKUP_MODEL=
 
 # =============================================================================
-# Dream Settings
+# Dream
 # =============================================================================
 # DREAM_ENABLED=true
+DREAM_PROVIDER=custom
+DREAM_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
+DREAM_DEDUCTION_MODEL=your-model-here
+DREAM_INDUCTION_MODEL=your-model-here
+# DREAM_THINKING_BUDGET_TOKENS=8192  # gt=0 required; omit for non-thinking models
 # DREAM_DOCUMENT_THRESHOLD=50
 # DREAM_IDLE_TIMEOUT_MINUTES=60
 # DREAM_MIN_HOURS_BETWEEN_DREAMS=8
 # DREAM_ENABLED_TYPES=["omni"]
-# DREAM_PROVIDER=anthropic
-# DREAM_MODEL=claude-sonnet-4-20250514
 # DREAM_MAX_OUTPUT_TOKENS=16384
-# DREAM_THINKING_BUDGET_TOKENS=8192
 # DREAM_MAX_TOOL_ITERATIONS=20
 # DREAM_HISTORY_TOKEN_LIMIT=16384
-# DREAM_BACKUP_PROVIDER=
-# DREAM_BACKUP_MODEL=
-
-# Specialist models (use same provider as main model)
-# DREAM_DEDUCTION_MODEL=claude-haiku-4-5
-# DREAM_INDUCTION_MODEL=claude-haiku-4-5
-
-# Dream Surprisal Settings (Tree-based observation sampling for targeted reasoning)
+#
+# Surprisal sampling (advanced):
 # DREAM_SURPRISAL__ENABLED=false
-# DREAM_SURPRISAL__TREE_TYPE=kdtree  # Options: kdtree, balltree, rptree, covertree, lsh, graph, prototype
-# DREAM_SURPRISAL__TREE_K=5  # Number of neighbors for kNN-based trees
-# DREAM_SURPRISAL__SAMPLING_STRATEGY=recent  # Options: recent, random, all
-# DREAM_SURPRISAL__SAMPLE_SIZE=200  # Number of observations to sample for tree building
-# DREAM_SURPRISAL__TOP_PERCENT_SURPRISAL=0.10  # Top percentage of observations (0.10 = top 10%)
-# DREAM_SURPRISAL__MIN_HIGH_SURPRISAL_FOR_REPLACE=10  # Hybrid mode: min observations to replace standard questions
-# DREAM_SURPRISAL__INCLUDE_LEVELS=["explicit","deductive"]  # Observation levels to include
+# DREAM_SURPRISAL__TREE_TYPE=kdtree
+# DREAM_SURPRISAL__TREE_K=5
+# DREAM_SURPRISAL__SAMPLING_STRATEGY=recent
+# DREAM_SURPRISAL__SAMPLE_SIZE=200
+# DREAM_SURPRISAL__TOP_PERCENT_SURPRISAL=0.10
+# DREAM_SURPRISAL__MIN_HIGH_SURPRISAL_FOR_REPLACE=10
+# DREAM_SURPRISAL__INCLUDE_LEVELS=["explicit","deductive"]
 
 # =============================================================================
 # Webhook Settings
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 2297b658e..9817dd1e0 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -106,7 +106,7 @@ git commit -m "docs(readme): update installation instructions"
 ### Python Code Style
 
 - Follow [PEP 8](https://www.python.org/dev/peps/pep-0008/) style guidelines
-- Use [Black](https://black.readthedocs.io/) for code formatting (we may add this to CI in the future)
+- Use [ruff](https://docs.astral.sh/ruff/) for linting and code formatting
 - Use type hints where possible
 - Write docstrings for functions and classes using Google style docstrings
 
diff --git a/Dockerfile b/Dockerfile
index 4a68d6171..c116775e1 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -41,6 +41,7 @@ RUN addgroup --system app && adduser --system --group app && mkdir -p /tmp/uv-ca
 COPY --chown=app:app src/ /app/src/
 COPY --chown=app:app migrations/ /app/migrations/
 COPY --chown=app:app scripts/ /app/scripts/
+COPY --chown=app:app docker/ /app/docker/
 COPY --chown=app:app alembic.ini /app/alembic.ini
 # Copy config files - this will copy config.toml if it exists, and config.toml.example
 COPY --chown=app:app config.toml* /app/
@@ -51,6 +52,6 @@ USER app
 EXPOSE 8000
 
 HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/openapi.json')" || exit 1
+    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
 
 CMD ["fastapi", "run", "--host", "0.0.0.0", "src/main.py"]
diff --git a/README.md b/README.md
index 1a909d0fb..990f7cc32 100644
--- a/README.md
+++ b/README.md
@@ -162,8 +162,8 @@ Server.
 
 Honcho is developed using [python](https://www.python.org/) and [uv](https://docs.astral.sh/uv/).
 
-The minimum python version is `3.9`
-The minimum uv version is `0.4.9`
+The minimum python version is `3.10`
+The minimum uv version is `0.5.0`
 
 ### Setup
 
@@ -221,11 +221,11 @@ Below are the required configurations:
 ```env
 DB_CONNECTION_URI= # Connection uri for a postgres database (with postgresql+psycopg prefix)
 
-# LLM Provider API Keys (at least one required depending on your configuration)
-LLM_ANTHROPIC_API_KEY= # API Key for Anthropic (used for dialectic by default)
-LLM_OPENAI_API_KEY= # API Key for OpenAI (optional, for embeddings if EMBED_MESSAGES=true)
-LLM_GEMINI_API_KEY= # API Key for Google Gemini (used for summary/deriver by default)
-LLM_GROQ_API_KEY= # API Key for Groq (used for query generation by default)
+# LLM Provider API Keys
+LLM_GEMINI_API_KEY= # API Key for Google Gemini (used for deriver, summary, and dialectic minimal/low by default)
+LLM_ANTHROPIC_API_KEY= # API Key for Anthropic (used for dialectic medium/high/max and dream by default)
+LLM_OPENAI_API_KEY= # API Key for OpenAI (used for embeddings when EMBED_MESSAGES=true)
+LLM_GROQ_API_KEY= # API Key for Groq (optional)
 ```
 
 > Note that the `DB_CONNECTION_URI` must have the prefix `postgresql+psycopg` to
@@ -455,14 +455,14 @@ If you have this in `config.toml`:
 
 ```toml
 [db]
-CONNECTION_URI = "postgresql://localhost/honcho_dev"
+CONNECTION_URI = "postgresql+psycopg://localhost/honcho_dev"
 POOL_SIZE = 10
 ```
 
 You can override just the connection URI in production:
 
 ```bash
-export DB_CONNECTION_URI="postgresql://prod-server/honcho_prod"
+export DB_CONNECTION_URI="postgresql+psycopg://prod-server/honcho_prod"
 ```
 
 The application will use the production connection URI while keeping the pool size from config.toml.
diff --git a/config.toml.example b/config.toml.example
index b6b407dcf..b9cf84c0e 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -55,17 +55,21 @@ EMBEDDING_PROVIDER = "openai"
 MAX_TOOL_OUTPUT_CHARS = 10000  # Max chars for tool output (~2500 tokens)
 MAX_MESSAGE_CONTENT_CHARS = 2000  # Max chars per message in tool results
 
-# API Keys for LLM providers
-# ANTHROPIC_API_KEY = "your-api-key"
-# OPENAI_API_KEY = "your-api-key"
+# API Keys for LLM providers (set the ones you need)
+# GEMINI_API_KEY = "your-api-key"       # Default: deriver, summary, dialectic minimal/low
+# ANTHROPIC_API_KEY = "your-api-key"    # Default: dialectic medium/high/max, dream
+# OPENAI_API_KEY = "your-api-key"       # Default: embeddings
+# GROQ_API_KEY = "your-api-key"         # Not used by default
+
+# OpenAI-compatible endpoint (OpenRouter, Together, Fireworks, LiteLLM, etc.)
+# Set provider to "custom" in feature config to route calls through this endpoint.
+# OPENAI_COMPATIBLE_BASE_URL = "https://openrouter.ai/api/v1"
 # OPENAI_COMPATIBLE_API_KEY = "your-api-key"
-# GEMINI_API_KEY = "your-api-key"
-# GROQ_API_KEY = "your-api-key"
-# OPENAI_COMPATIBLE_BASE_URL = "your-base-url"
 
-# Separate vLLM endpoint (for local models)
-# VLLM_API_KEY = "your-api-key"
-# VLLM_BASE_URL = "your-base-url"
+# vLLM endpoint (for self-hosted models)
+# Set provider to "vllm" in feature config to route calls through this endpoint.
+# VLLM_BASE_URL = "http://localhost:8000/v1"
+# VLLM_API_KEY = "not-needed"
 
 # Deriver settings
 [deriver]
diff --git a/docker-compose.yml.example b/docker-compose.yml.example
index 8bb8507fd..d59f1cee7 100644
--- a/docker-compose.yml.example
+++ b/docker-compose.yml.example
@@ -1,6 +1,15 @@
+# Honcho Docker Compose
+#
+# Usage:
+#   cp docker-compose.yml.example docker-compose.yml
+#   cp .env.template .env  # edit with your provider config
+#   docker compose up -d --build
+#
+# By default, ports are bound to 127.0.0.1 (localhost only).
+# For development, uncomment the source mounts and monitoring services below.
+
 services:
   api:
-    image: honcho:latest
     build:
       context: .
       dockerfile: Dockerfile
@@ -11,16 +20,20 @@ services:
       redis:
         condition: service_healthy
     ports:
-      - 8000:8000
-    volumes:
-      - .:/app
-      - venv:/app/.venv
+      - "127.0.0.1:8000:8000"
+    # -- Development: mount source for live reload --
+    # volumes:
+    #   - .:/app
+    #   - venv:/app/.venv
     environment:
       - DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@database:5432/postgres
       - CACHE_URL=redis://redis:6379/0?suppress=true
+      - CACHE_ENABLED=true
     env_file:
       - path: .env
         required: false
+    restart: unless-stopped
+
   deriver:
     build:
       context: .
@@ -31,27 +44,29 @@ services:
         condition: service_healthy
       redis:
         condition: service_healthy
-    volumes:
-      - .:/app
-      - venv:/app/.venv
+    # -- Development: mount source for live reload --
+    # volumes:
+    #   - .:/app
+    #   - venv:/app/.venv
     environment:
       - DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@database:5432/postgres
       - CACHE_URL=redis://redis:6379/0?suppress=true
-      - METRICS_ENABLED=true
+      - CACHE_ENABLED=true
     env_file:
       - path: .env
         required: false
+    restart: unless-stopped
+
   database:
     image: pgvector/pgvector:pg15
-    restart: always
+    restart: unless-stopped
     ports:
-      - 5432:5432
-    command: ["postgres", "-c", "max_connections=800"]
+      - "127.0.0.1:5432:5432"
+    command: ["postgres", "-c", "max_connections=200"]
     environment:
       - POSTGRES_DB=postgres
       - POSTGRES_USER=postgres
       - POSTGRES_PASSWORD=postgres
-      - POSTGRES_HOST_AUTH_METHOD=trust
       - PGDATA=/var/lib/postgresql/data/pgdata
     volumes:
       - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
@@ -61,44 +76,49 @@ services:
       interval: 5s
       timeout: 5s
       retries: 5
+
   redis:
     image: redis:8.2
-    restart: always
+    restart: unless-stopped
     ports:
-      - 6379:6379
+      - "127.0.0.1:6379:6379"
     volumes:
-      - ./redis-data:/data
+      - redis-data:/data
     healthcheck:
       test: ["CMD-SHELL", "redis-cli ping"]
       interval: 5s
       timeout: 5s
       retries: 5
-  prometheus:
-    image: prom/prometheus:v3.2.1
-    ports:
-      - 9090:9090
-    volumes:
-      - ./docker/prometheus.yml:/etc/prometheus/prometheus.yml:ro
-      - prometheus-data:/prometheus
-    depends_on:
-      api:
-        condition: service_started
-  grafana:
-    image: grafana/grafana:11.4.0
-    ports:
-      - 3000:3000
-    environment:
-      - GF_SECURITY_ADMIN_USER=admin
-      - GF_SECURITY_ADMIN_PASSWORD=admin
-      - GF_AUTH_ANONYMOUS_ENABLED=true
-      - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
-    volumes:
-      - ./grafana-data:/var/lib/grafana
-      - ./docker/grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro
-    depends_on:
-      prometheus:
-        condition: service_started
+
+  # -- Development: monitoring stack (uncomment to enable) --
+  # prometheus:
+  #   image: prom/prometheus:v3.2.1
+  #   ports:
+  #     - "127.0.0.1:9090:9090"
+  #   volumes:
+  #     - ./docker/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+  #     - prometheus-data:/prometheus
+  #   depends_on:
+  #     api:
+  #       condition: service_started
+  # grafana:
+  #   image: grafana/grafana:11.4.0
+  #   ports:
+  #     - "127.0.0.1:3000:3000"
+  #   environment:
+  #     - GF_SECURITY_ADMIN_USER=admin
+  #     - GF_SECURITY_ADMIN_PASSWORD=admin
+  #     - GF_AUTH_ANONYMOUS_ENABLED=true
+  #     - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
+  #   volumes:
+  #     - ./docker/grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro
+  #   depends_on:
+  #     prometheus:
+  #       condition: service_started
+
 volumes:
   pgdata:
-  venv:
-  prometheus-data:
+  redis-data:
+  # -- Development: uncomment if using source mounts --
+  # venv:
+  # prometheus-data:
diff --git a/docs/docs.json b/docs/docs.json
index 02f7f27fe..a81217cc2 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -143,7 +143,8 @@
                 "group": "Self-Hosting",
                 "pages": [
                   "v3/contributing/self-hosting",
-                  "v3/contributing/configuration"
+                  "v3/contributing/configuration",
+                  "v3/contributing/troubleshooting"
                 ]
               },
               {
diff --git a/docs/v2/contributing/configuration.mdx b/docs/v2/contributing/configuration.mdx
index 59cf5a737..c172369c3 100644
--- a/docs/v2/contributing/configuration.mdx
+++ b/docs/v2/contributing/configuration.mdx
@@ -96,14 +96,14 @@ If you have this in `config.toml`:
 
 ```toml
 [db]
-CONNECTION_URI = "postgresql://localhost/honcho_dev"
+CONNECTION_URI = "postgresql+psycopg://localhost/honcho_dev"
 POOL_SIZE = 10
 ```
 
 You can override just the connection URI in production:
 
 ```bash
-export DB_CONNECTION_URI="postgresql://prod-server/honcho_prod"
+export DB_CONNECTION_URI="postgresql+psycopg://prod-server/honcho_prod"
 ```
 
 The application will use the production connection URI while keeping the pool size from config.toml.
@@ -149,7 +149,7 @@ LOCAL_METRICS_FILE=metrics.jsonl
 DB_CONNECTION_URI=postgresql+psycopg://username:password@host:port/database
 
 # Example for local development
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/honcho
+DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/postgres
 
 # Example for production
 DB_CONNECTION_URI=postgresql+psycopg://honcho_user:secure_password@db.example.com:5432/honcho_prod
diff --git a/docs/v2/contributing/self-hosting.mdx b/docs/v2/contributing/self-hosting.mdx
index 99a22528e..eda94e7fe 100644
--- a/docs/v2/contributing/self-hosting.mdx
+++ b/docs/v2/contributing/self-hosting.mdx
@@ -135,24 +135,21 @@ Download from [postgresql.org](https://www.postgresql.org/download/windows/)
 
 ```bash
 docker run --name honcho-db \
-  -e POSTGRES_DB=honcho \
   -e POSTGRES_USER=postgres \
   -e POSTGRES_PASSWORD=postgres \
   -p 5432:5432 \
   -d pgvector/pgvector:pg15
 ```
 
-### 3. Create Database and Enable Extensions
+### 3. Enable Extensions
 
-Connect to PostgreSQL and set up the database:
+Connect to PostgreSQL and enable pgvector:
 
 ```bash
 # Connect to PostgreSQL
 psql -U postgres
 
-# Create database and enable extensions
-CREATE DATABASE honcho;
-\c honcho
+# Enable extensions on the default database
 CREATE EXTENSION IF NOT EXISTS vector;
 CREATE EXTENSION IF NOT EXISTS pg_trgm;
 \q
@@ -170,7 +167,7 @@ Edit `.env` with your configuration:
 
 ```bash
 # Database connection
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/honcho
+DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/postgres
 
 # Optional API keys (required for LLM features)
 OPENAI_API_KEY=your-openai-api-key
diff --git a/docs/v3/contributing/configuration.mdx b/docs/v3/contributing/configuration.mdx
index 57b77e400..5ae99cc98 100644
--- a/docs/v3/contributing/configuration.mdx
+++ b/docs/v3/contributing/configuration.mdx
@@ -1,285 +1,145 @@
 ---
 title: "Configuration Guide"
-description: "Complete guide to configuring Honcho for development and production"
+description: "Complete reference for configuring Honcho providers, features, and infrastructure"
 icon: "gear"
 ---
 
-Honcho uses a flexible configuration system that supports both TOML files and environment variables. Configuration values are loaded in the following priority order (highest to lowest):
+<Info>
+Most users only need the setup from the [Self-Hosting Guide](./self-hosting#llm-setup). This page is the full reference for customizing providers, tuning features, and hardening your deployment.
+</Info>
 
-1. Environment variables (always take precedence)
-2. `.env` file (for local development)
-3. `config.toml` file (base configuration)
-4. Default values
+Honcho loads configuration in this priority order (highest wins):
 
-## Recommended Configuration Approaches
+1. **Environment variables** (always take precedence)
+2. **`.env` file**
+3. **`config.toml` file**
+4. **Built-in defaults**
 
-### Option 1: Environment Variables Only (Production)
-- Use environment variables for all configuration
-- No config files needed
-- Ideal for containerized deployments (Docker, Kubernetes)
-- Secrets managed by your deployment platform
-
-### Option 2: config.toml (Development/Simple Deployments)
-- Use config.toml for base configuration
-- Override sensitive values with environment variables
-- Good for development and simple deployments
-
-### Option 3: Hybrid Approach
-- Use config.toml for non-sensitive base settings
-- Use .env file for sensitive values (API keys, secrets)
-- Good for development teams
-
-### Option 4: .env Only (Local Development)
-- Use .env file for all configuration
-- Simple for local development
-- Never commit .env files to version control
-
-## Configuration Methods
-
-### Using config.toml
-
-Copy the example configuration file to get started:
+Use `.env` for secrets and overrides, `config.toml` for base settings. Or use environment variables exclusively — whatever fits your deployment. Copy the examples to get started:
 
 ```bash
+cp .env.template .env
 cp config.toml.example config.toml
 ```
 
-Then modify the values as needed. The TOML file is organized into sections:
-
-- `[app]` - Application-level settings (log level, session limits, embedding settings, Langfuse integration, local metrics collection, namespace)
-- `[db]` - Database connection and pool settings (connection URI, pool size, timeouts, connection recycling)
-- `[auth]` - Authentication configuration (enable/disable auth, JWT secret)
-- `[cache]` - Redis cache configuration (enable/disable caching, Redis URL, TTL settings, lock configuration for cache stampede prevention)
-- `[llm]` - LLM provider API keys (Anthropic, OpenAI, Gemini, Groq, vLLM, OpenAI-compatible endpoints) and general LLM settings
-- `[dialectic]` - Dialectic API configuration with per-level reasoning settings (minimal, low, medium, high, max)
-- `[deriver]` - Background worker settings (worker count, polling intervals, queue management) and theory of mind configuration (model, tokens, observation limits)
-- `[peer_card]` - Peer card generation settings (enable/disable)
-- `[summary]` - Session summarization settings (frequency thresholds, provider, model, token limits for short and long summaries)
-- `[dream]` - Dream processing configuration (enable/disable, thresholds, idle timeouts, dream types, LLM settings, surprisal sampling)
-- `[webhook]` - Webhook configuration (webhook secret, workspace limits)
-- `[metrics]` - Prometheus pull-based metrics settings
-- `[telemetry]` - CloudEvents telemetry settings for analytics
-- `[vector_store]` - Vector store configuration (pgvector, Turbopuffer, LanceDB)
-- `[sentry]` - Error tracking and monitoring settings (enable/disable, DSN, environment, sample rates)
-
-### Using Environment Variables
-
-All configuration values can be overridden using environment variables. The environment variable names follow this pattern:
-
-- `{SECTION}_{KEY}` for nested settings
-- Just `{KEY}` for app-level settings
-- `{SECTION}__{NESTED}__{KEY}` for deeply nested settings (double underscore)
-
-Examples:
-
-- `DB_CONNECTION_URI` → `[db].CONNECTION_URI`
-- `DB_POOL_SIZE`      → `[db].POOL_SIZE`
-- `AUTH_JWT_SECRET`   → `[auth].JWT_SECRET`
-- `DERIVER_MODEL` → `[deriver].MODEL`
-- `LOG_LEVEL` (no section) → `[app].LOG_LEVEL`
-- `DIALECTIC_LEVELS__minimal__PROVIDER` → `[dialectic.levels.minimal].PROVIDER`
-- `DREAM_SURPRISAL__ENABLED` → `[dream.surprisal].ENABLED`
-
-### Configuration Priority
-
-When a configuration value is set in multiple places, Honcho uses this priority:
-
-1. **Environment variables** - Always take precedence
-2. **.env file** - Loaded for local development
-3. **config.toml** - Base configuration
-4. **Default values** - Built-in defaults
-
-This allows you to:
-
-- Use `config.toml` for base configuration
-- Override specific values with environment variables in production
-- Use `.env` files for local development without modifying config.toml
-
-### Example
-
-If you have this in `config.toml`:
-
-```toml
-[db]
-CONNECTION_URI = "postgresql://localhost/honcho_dev"
-POOL_SIZE = 10
-```
-
-You can override just the connection URI in production:
-
-```bash
-export DB_CONNECTION_URI="postgresql://prod-server/honcho_prod"
-```
-
-The application will use the production connection URI while keeping the pool size from config.toml.
-
-## Core Configuration
-
-### Application Settings
+### Environment Variable Naming
 
-Application-level settings control core behavior of the Honcho server including logging, session limits, message handling, and optional integrations.
+All config values map to environment variables:
 
-**Basic Application Configuration:**
-```bash
-# Logging and server settings
-LOG_LEVEL=INFO  # DEBUG, INFO, WARNING, ERROR, CRITICAL
+- `{SECTION}_{KEY}` for section settings (e.g., `DB_CONNECTION_URI` → `[db].CONNECTION_URI`)
+- `{KEY}` for app-level settings (e.g., `LOG_LEVEL` → `[app].LOG_LEVEL`)
+- `{SECTION}__{NESTED}__{KEY}` for deeply nested settings (double underscore, e.g., `DIALECTIC_LEVELS__minimal__PROVIDER`)
 
-# Session and context limits
-SESSION_OBSERVERS_LIMIT=10  # Maximum number of observers per session
-GET_CONTEXT_MAX_TOKENS=100000  # Maximum tokens for context retrieval
-MAX_MESSAGE_SIZE=25000  # Maximum message size in characters
-MAX_FILE_SIZE=5242880  # Maximum file size in bytes (5MB)
+## LLM Configuration
 
-# Embedding settings
-EMBED_MESSAGES=true  # Enable vector embeddings for messages
-MAX_EMBEDDING_TOKENS=8192  # Maximum tokens per embedding
-MAX_EMBEDDING_TOKENS_PER_REQUEST=300000  # Batch embedding limit
+The [Self-Hosting Guide](./self-hosting#llm-setup) covers the basic setup: one OpenAI-compatible endpoint, one model for all features. This section covers recommended model tiers, using multiple providers, and per-feature tuning.
 
-# Global namespace (propagated to nested settings if not explicitly set)
-NAMESPACE=honcho
-```
+<Note>
+All Honcho agents (deriver, dialectic, dream) require tool calling. Your models must support the OpenAI tool calling format.
+</Note>
 
-**Optional Integrations:**
-```bash
-# Langfuse integration for LLM observability
-LANGFUSE_HOST=https://cloud.langfuse.com
-LANGFUSE_PUBLIC_KEY=your-langfuse-public-key
+### Choosing Models
 
-# Local metrics collection
-COLLECT_METRICS_LOCAL=false
-LOCAL_METRICS_FILE=metrics.jsonl
+Model choice matters more for tool-use reliability than raw intelligence:
 
-# Reasoning traces (for debugging)
-REASONING_TRACES_FILE=traces.jsonl
-```
+| Tier | Example models | Use case | Notes |
+|---|---|---|---|
+| **Light** | Gemini 2.5 Flash, GLM-4.7-Flash | Deriver, summary, dialectic minimal/low | High throughput, cheap, reliable tool use |
+| **Medium** | Claude Haiku 4.5, Grok 4.1 Fast | Dialectic medium/high | Good reasoning + tool use balance |
+| **Heavy** | Claude Sonnet 4, GLM-5 | Dream, dialectic max | Best quality for rare/complex tasks |
 
-### Database Configuration
+You can mix providers freely — for example, use Gemini for the deriver and Claude for dreaming.
 
-**Required Database Settings:**
-```bash
-# PostgreSQL connection string (required)
-DB_CONNECTION_URI=postgresql+psycopg://username:password@host:port/database
+### Provider Types
 
-# Example for local development
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/honcho
+| Provider value | What it connects to | Key env var |
+|---|---|---|
+| `custom` | Any OpenAI-compatible endpoint (OpenRouter, Together, Fireworks, LiteLLM, Ollama) | `LLM_OPENAI_COMPATIBLE_API_KEY` + `LLM_OPENAI_COMPATIBLE_BASE_URL` |
+| `vllm` | vLLM self-hosted models | `LLM_VLLM_API_KEY` + `LLM_VLLM_BASE_URL` |
+| `google` | Google Gemini (direct) | `LLM_GEMINI_API_KEY` |
+| `anthropic` | Anthropic Claude (direct) | `LLM_ANTHROPIC_API_KEY` |
+| `openai` | OpenAI (direct) | `LLM_OPENAI_API_KEY` |
+| `groq` | Groq (direct) | `LLM_GROQ_API_KEY` |
 
-# Example for production
-DB_CONNECTION_URI=postgresql+psycopg://honcho_user:secure_password@db.example.com:5432/honcho_prod
-```
+### Tiered Model Setup
 
-**Database Pool Settings:**
-```bash
-# Connection pool configuration
-DB_SCHEMA=public
-DB_POOL_CLASS=default
-DB_POOL_PRE_PING=true  # Health check before reusing connections
-DB_POOL_SIZE=10
-DB_MAX_OVERFLOW=20
-DB_POOL_TIMEOUT=30  # seconds (max 5 minutes)
-DB_POOL_RECYCLE=300  # seconds (max 2 hours)
-DB_POOL_USE_LIFO=true  # Use LIFO for connection reuse
-DB_SQL_DEBUG=false  # Echo SQL queries
-DB_TRACING=false  # Enable query tracing
-```
-
-**Docker Compose for PostgreSQL:**
-```yaml
-# docker-compose.yml
-version: '3.8'
-services:
-  database:
-    image: pgvector/pgvector:pg15
-    environment:
-      POSTGRES_USER: postgres
-      POSTGRES_PASSWORD: postgres
-      POSTGRES_DB: honcho
-    ports:
-      - "5432:5432"
-    volumes:
-      - postgres_data:/var/lib/postgresql/data
-      - ./init.sql:/docker-entrypoint-initdb.d/init.sql
-
-volumes:
-  postgres_data:
-```
-
-### Authentication Configuration
+Once you're past initial setup, you can assign different models per feature for better cost/quality tradeoffs. This example uses OpenRouter with light/medium/heavy tiers:
 
-**JWT Authentication:**
 ```bash
-# Enable/disable authentication
-AUTH_USE_AUTH=false  # Set to true for production
+LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
+LLM_OPENAI_COMPATIBLE_API_KEY=sk-or-v1-...
+
+# Light tier — high throughput, cheap
+DERIVER_PROVIDER=custom
+DERIVER_MODEL=google/gemini-2.5-flash-lite
+SUMMARY_PROVIDER=custom
+SUMMARY_MODEL=google/gemini-2.5-flash
+DIALECTIC_LEVELS__minimal__PROVIDER=custom
+DIALECTIC_LEVELS__minimal__MODEL=google/gemini-2.5-flash-lite
+DIALECTIC_LEVELS__low__PROVIDER=custom
+DIALECTIC_LEVELS__low__MODEL=google/gemini-2.5-flash-lite
+
+# Medium tier — better reasoning
+DIALECTIC_LEVELS__medium__PROVIDER=custom
+DIALECTIC_LEVELS__medium__MODEL=anthropic/claude-haiku-4-5
+DIALECTIC_LEVELS__high__PROVIDER=custom
+DIALECTIC_LEVELS__high__MODEL=anthropic/claude-haiku-4-5
+DIALECTIC_LEVELS__max__PROVIDER=custom
+DIALECTIC_LEVELS__max__MODEL=anthropic/claude-haiku-4-5
+
+# Heavy tier — best quality for complex tasks
+DREAM_PROVIDER=custom
+DREAM_MODEL=anthropic/claude-sonnet-4-20250514
+DREAM_DEDUCTION_MODEL=anthropic/claude-haiku-4-5
+DREAM_INDUCTION_MODEL=anthropic/claude-haiku-4-5
+```
+
+### Direct Vendor Keys
+
+Instead of an OpenAI-compatible proxy, you can use vendor APIs directly. Leave `PROVIDER` overrides unset and the code defaults route per feature:
 
-# JWT settings (required if AUTH_USE_AUTH is true)
-AUTH_JWT_SECRET=your-super-secret-jwt-key
-```
-
-**Generate JWT Secret:**
 ```bash
-# Generate a secure JWT secret
-python scripts/generate_jwt_secret.py
+LLM_GEMINI_API_KEY=...       # deriver, summary, dialectic minimal/low
+LLM_ANTHROPIC_API_KEY=...    # dialectic medium/high/max, dream
+LLM_OPENAI_API_KEY=...       # embeddings
 ```
 
-### Cache Configuration
+### Self-Hosted (vLLM / Ollama)
 
-Honcho supports Redis caching to improve performance by caching frequently accessed data like peers, sessions, and working representations. Caching also includes lock mechanisms to prevent cache stampede scenarios.
-
-**Redis Cache Settings:**
 ```bash
-# Enable/disable Redis caching
-CACHE_ENABLED=false  # Set to true to enable caching
-
-# Redis connection
-CACHE_URL=redis://localhost:6379/0?suppress=true
-
-# Cache namespace (inherits from app.NAMESPACE if not set)
-CACHE_NAMESPACE=honcho
+# vLLM
+LLM_VLLM_BASE_URL=http://localhost:8000/v1
+LLM_VLLM_API_KEY=not-needed
+DERIVER_PROVIDER=vllm
+DERIVER_MODEL=your-model-name
 
-# Cache TTL
-CACHE_DEFAULT_TTL_SECONDS=300  # How long items stay in cache (5 minutes)
-
-# Lock settings for preventing cache stampede
-CACHE_DEFAULT_LOCK_TTL_SECONDS=5  # Lock duration when fetching from DB on cache miss
-```
-
-**When to Enable Caching:**
-- High-traffic production environments
-- Applications with many repeated reads of the same data
-- When you need to reduce database load
-
-**Note:** Caching requires a Redis instance. You can run Redis locally with Docker:
-```bash
-docker run -d -p 6379:6379 redis:latest
+# Ollama (uses custom provider)
+LLM_OPENAI_COMPATIBLE_BASE_URL=http://localhost:11434/v1
+LLM_OPENAI_COMPATIBLE_API_KEY=ollama
+DERIVER_PROVIDER=custom
+DERIVER_MODEL=llama3.3:70b
 ```
 
-## LLM Provider Configuration
-
-Honcho supports multiple LLM providers for different tasks. API keys are configured in the `[llm]` section, while specific features use their own configuration sections.
+Set `PROVIDER` and `MODEL` for each feature the same way.
 
-### API Keys
+### Thinking Budget
 
-All provider API keys use the `LLM_` prefix:
+Default configs use `THINKING_BUDGET_TOKENS` tuned for Anthropic models. Non-Anthropic providers don't support extended thinking and will error or silently fail. The [Self-Hosting Guide](./self-hosting#llm-setup) sets these to `0` by default. If you switch to Anthropic models, you can re-enable them:
 
 ```bash
-# Provider API Keys
-LLM_ANTHROPIC_API_KEY=your-anthropic-api-key
-LLM_OPENAI_API_KEY=your-openai-api-key
-LLM_GEMINI_API_KEY=your-gemini-api-key
-LLM_GROQ_API_KEY=your-groq-api-key
-
-# OpenAI-compatible endpoints
-LLM_OPENAI_COMPATIBLE_API_KEY=your-api-key
-LLM_OPENAI_COMPATIBLE_BASE_URL=https://your-openai-compatible-endpoint.com
-
-# vLLM endpoint (for local models)
-LLM_VLLM_API_KEY=your-vllm-api-key
-LLM_VLLM_BASE_URL=http://localhost:8000
+# Anthropic models — enable thinking
+DERIVER_THINKING_BUDGET_TOKENS=1024
+SUMMARY_THINKING_BUDGET_TOKENS=512
+DREAM_THINKING_BUDGET_TOKENS=8192
+DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=1024
+DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=1024
+DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=2048
+# minimal and low stay at 0
 ```
 
 ### General LLM Settings
 
 ```bash
-# Default settings for all LLM calls
 LLM_DEFAULT_MAX_TOKENS=2500
 
 # Embedding provider (used when EMBED_MESSAGES=true)
@@ -292,23 +152,23 @@ LLM_MAX_MESSAGE_CONTENT_CHARS=2000  # Max chars per message in tool results
 
 ### Feature-Specific Model Configuration
 
-Different features can use different providers and models:
+Each feature can use a different provider and model. Below are all the tuning knobs.
 
 **Dialectic API:**
 
-The Dialectic API provides theory-of-mind informed responses by integrating long-term facts with current context. It uses a tiered reasoning system with five levels:
+The Dialectic API provides theory-of-mind informed responses. It uses a tiered reasoning system with five levels:
 
 ```bash
 # Global dialectic settings
 DIALECTIC_MAX_OUTPUT_TOKENS=8192
 DIALECTIC_MAX_INPUT_TOKENS=100000
-DIALECTIC_HISTORY_TOKEN_LIMIT=8192  # Token limit for get_recent_history tool
-DIALECTIC_SESSION_HISTORY_MAX_TOKENS=4096  # Max tokens of recent messages to include
+DIALECTIC_HISTORY_TOKEN_LIMIT=8192
+DIALECTIC_SESSION_HISTORY_MAX_TOKENS=4096
 ```
 
 **Per-Level Configuration:**
 
-Each reasoning level (minimal, low, medium, high, max) has its own provider, model, and settings:
+Each reasoning level has its own provider, model, and settings:
 
 ```toml
 # config.toml example
@@ -317,8 +177,8 @@ PROVIDER = "google"
 MODEL = "gemini-2.5-flash-lite"
 THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 1
-MAX_OUTPUT_TOKENS = 250  # Optional: overrides global MAX_OUTPUT_TOKENS
-TOOL_CHOICE = "any"  # Options: null/auto, "any", "required"
+MAX_OUTPUT_TOKENS = 250
+TOOL_CHOICE = "any"
 
 [dialectic.levels.low]
 PROVIDER = "google"
@@ -344,12 +204,9 @@ PROVIDER = "anthropic"
 MODEL = "claude-haiku-4-5"
 THINKING_BUDGET_TOKENS = 2048
 MAX_TOOL_ITERATIONS = 10
-# Backup provider (optional, must set both or neither)
-# BACKUP_PROVIDER = "google"
-# BACKUP_MODEL = "gemini-2.5-pro"
 ```
 
-**Environment variables for nested dialectic levels:**
+Environment variables for nested levels use double underscores:
 ```bash
 DIALECTIC_LEVELS__minimal__PROVIDER=google
 DIALECTIC_LEVELS__minimal__MODEL=gemini-2.5-flash-lite
@@ -359,103 +216,67 @@ DIALECTIC_LEVELS__minimal__MAX_TOOL_ITERATIONS=1
 
 **Deriver (Theory of Mind):**
 
-The Deriver is a background processing system that extracts facts from messages and builds theory-of-mind representations of peers.
+The Deriver extracts facts from messages and builds theory-of-mind representations of peers.
 
 ```bash
-# Enable/disable deriver
 DERIVER_ENABLED=true
 
-# LLM settings for deriver
+# LLM settings
 DERIVER_PROVIDER=google
 DERIVER_MODEL=gemini-2.5-flash-lite
 DERIVER_MAX_OUTPUT_TOKENS=4096
 DERIVER_THINKING_BUDGET_TOKENS=1024
-DERIVER_MAX_INPUT_TOKENS=23000  # Maximum input tokens for deriver
-DERIVER_TEMPERATURE=  # Optional temperature override (unset by default)
-
-# Backup provider (optional, must set both or neither)
-# DERIVER_BACKUP_PROVIDER=anthropic
-# DERIVER_BACKUP_MODEL=claude-haiku-4-5
+DERIVER_MAX_INPUT_TOKENS=23000
+DERIVER_TEMPERATURE=  # Optional override (unset by default)
 
 # Worker settings
-DERIVER_WORKERS=1  # Number of background worker processes
-DERIVER_POLLING_SLEEP_INTERVAL_SECONDS=1.0  # Time between queue checks
-DERIVER_STALE_SESSION_TIMEOUT_MINUTES=5  # Timeout for stale sessions
+DERIVER_WORKERS=1  # Increase for higher throughput
+DERIVER_POLLING_SLEEP_INTERVAL_SECONDS=1.0
+DERIVER_STALE_SESSION_TIMEOUT_MINUTES=5
 
 # Queue management
-DERIVER_QUEUE_ERROR_RETENTION_SECONDS=2592000  # Keep errored items for 30 days
-
-# Document settings
-DERIVER_DEDUPLICATE=true  # Deduplicate documents when creating
+DERIVER_QUEUE_ERROR_RETENTION_SECONDS=2592000  # 30 days
 
 # Observation settings
-DERIVER_LOG_OBSERVATIONS=false  # Log all observations
-DERIVER_WORKING_REPRESENTATION_MAX_OBSERVATIONS=100  # Max observations stored
-DERIVER_REPRESENTATION_BATCH_MAX_TOKENS=1024  # Max tokens per batch (must be <= MAX_INPUT_TOKENS)
+DERIVER_DEDUPLICATE=true
+DERIVER_LOG_OBSERVATIONS=false
+DERIVER_WORKING_REPRESENTATION_MAX_OBSERVATIONS=100
+DERIVER_REPRESENTATION_BATCH_MAX_TOKENS=1024
 ```
 
 **Peer Card:**
 
-Peer cards are short, structured summaries of peer identity and characteristics.
-
 ```bash
-# Enable/disable peer card generation
 PEER_CARD_ENABLED=true
 ```
 
 **Summary Generation:**
 
-Session summaries provide compressed context for long conversations. Honcho creates two types: short summaries (frequent) and long summaries (comprehensive).
+Session summaries provide compressed context for long conversations — short summaries (frequent) and long summaries (comprehensive).
 
 ```bash
-# Enable/disable summarization
 SUMMARY_ENABLED=true
-
-# LLM settings for summary generation
 SUMMARY_PROVIDER=google
 SUMMARY_MODEL=gemini-2.5-flash
-SUMMARY_MAX_TOKENS_SHORT=1000  # Max tokens for short summaries
-SUMMARY_MAX_TOKENS_LONG=4000  # Max tokens for long summaries
+SUMMARY_MAX_TOKENS_SHORT=1000
+SUMMARY_MAX_TOKENS_LONG=4000
 SUMMARY_THINKING_BUDGET_TOKENS=512
-
-# Backup provider (optional, must set both or neither)
-# SUMMARY_BACKUP_PROVIDER=anthropic
-# SUMMARY_BACKUP_MODEL=claude-haiku-4-5
-
-# Summary frequency thresholds
-SUMMARY_MESSAGES_PER_SHORT_SUMMARY=20  # Create short summary every N messages
-SUMMARY_MESSAGES_PER_LONG_SUMMARY=60  # Create long summary every N messages
+SUMMARY_MESSAGES_PER_SHORT_SUMMARY=20
+SUMMARY_MESSAGES_PER_LONG_SUMMARY=60
 ```
 
-### Default Provider Usage
+**Dream Processing:**
 
-By default, Honcho uses:
-- **Google** (Gemini) for dialectic API (minimal/low levels), deriver, and summarization
-- **Anthropic** (Claude) for dialectic API (medium/high/max levels) and dream processing
-- **OpenAI** for embeddings (if `EMBED_MESSAGES=true`)
+Dream processing consolidates and refines peer representations during idle periods.
 
-You only need to set the API keys for the providers you plan to use. All providers are configurable per feature.
-
-## Additional Features Configuration
-
-### Dream Processing
-
-Dream processing consolidates and refines peer representations during idle periods, similar to how human memory consolidation works during sleep.
-
-**Dream Settings:**
 ```bash
-# Enable/disable dream processing
 DREAM_ENABLED=true
+DREAM_DOCUMENT_THRESHOLD=50
+DREAM_IDLE_TIMEOUT_MINUTES=60
+DREAM_MIN_HOURS_BETWEEN_DREAMS=8
+DREAM_ENABLED_TYPES=["omni"]
 
-# Trigger thresholds
-DREAM_DOCUMENT_THRESHOLD=50  # Minimum documents to trigger a dream
-DREAM_IDLE_TIMEOUT_MINUTES=60  # Minutes of inactivity before dream can start
-DREAM_MIN_HOURS_BETWEEN_DREAMS=8  # Minimum hours between dreams for a peer
-
-# Dream types to enable
-DREAM_ENABLED_TYPES=["omni"]  # Currently supported: omni
-
-# LLM settings for dream processing
+# LLM settings
 DREAM_PROVIDER=anthropic
 DREAM_MODEL=claude-sonnet-4-20250514
 DREAM_MAX_OUTPUT_TOKENS=16384
@@ -463,10 +284,6 @@ DREAM_THINKING_BUDGET_TOKENS=8192
 DREAM_MAX_TOOL_ITERATIONS=20
 DREAM_HISTORY_TOKEN_LIMIT=16384
 
-# Backup provider (optional, must set both or neither)
-# DREAM_BACKUP_PROVIDER=google
-# DREAM_BACKUP_MODEL=gemini-2.5-flash
-
 # Specialist models (use same provider as main model)
 DREAM_DEDUCTION_MODEL=claude-haiku-4-5
 DREAM_INDUCTION_MODEL=claude-haiku-4-5
@@ -474,155 +291,163 @@ DREAM_INDUCTION_MODEL=claude-haiku-4-5
 
 **Surprisal-Based Sampling (Advanced):**
 
-The dream system includes an optional surprisal-based sampling subsystem for identifying unusual or surprising observations:
+Optional subsystem for identifying unusual observations during dreaming:
 
 ```bash
-# Enable/disable surprisal sampling
 DREAM_SURPRISAL__ENABLED=false
+DREAM_SURPRISAL__TREE_TYPE=kdtree
+DREAM_SURPRISAL__TREE_K=5
+DREAM_SURPRISAL__SAMPLING_STRATEGY=recent
+DREAM_SURPRISAL__SAMPLE_SIZE=200
+DREAM_SURPRISAL__TOP_PERCENT_SURPRISAL=0.10
+DREAM_SURPRISAL__MIN_HIGH_SURPRISAL_FOR_REPLACE=10
+DREAM_SURPRISAL__INCLUDE_LEVELS=["explicit", "deductive"]
+```
 
-# Tree configuration for similarity search
-DREAM_SURPRISAL__TREE_TYPE=kdtree  # Options: kdtree, balltree, rptree, covertree, lsh, graph, prototype
-DREAM_SURPRISAL__TREE_K=5  # k for kNN-based trees
+## Core Configuration
 
-# Sampling strategy
-DREAM_SURPRISAL__SAMPLING_STRATEGY=recent  # Options: recent, random, all
-DREAM_SURPRISAL__SAMPLE_SIZE=200
+### Application Settings
 
-# Surprisal filtering (normalized scores: 0.0 = lowest, 1.0 = highest)
-DREAM_SURPRISAL__TOP_PERCENT_SURPRISAL=0.10  # Top 10% of observations
-DREAM_SURPRISAL__MIN_HIGH_SURPRISAL_FOR_REPLACE=10
+```bash
+LOG_LEVEL=INFO  # DEBUG, INFO, WARNING, ERROR, CRITICAL
+SESSION_OBSERVERS_LIMIT=10
+GET_CONTEXT_MAX_TOKENS=100000
+MAX_MESSAGE_SIZE=25000
+MAX_FILE_SIZE=5242880  # 5MB
+EMBED_MESSAGES=true
+MAX_EMBEDDING_TOKENS=8192
+MAX_EMBEDDING_TOKENS_PER_REQUEST=300000
+NAMESPACE=honcho
+```
 
-# Observation level filtering
-DREAM_SURPRISAL__INCLUDE_LEVELS=["explicit", "deductive"]
+**Optional Integrations:**
+```bash
+LANGFUSE_HOST=https://cloud.langfuse.com
+LANGFUSE_PUBLIC_KEY=your-langfuse-public-key
+COLLECT_METRICS_LOCAL=false
+LOCAL_METRICS_FILE=metrics.jsonl
+REASONING_TRACES_FILE=traces.jsonl
 ```
 
-### Webhook Configuration
+### Database
 
-Webhooks allow you to receive real-time notifications when events occur in Honcho (e.g., new messages, session updates).
+```bash
+# Connection (required)
+DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/postgres
+
+# Pool settings
+DB_SCHEMA=public
+DB_POOL_PRE_PING=true
+DB_POOL_SIZE=10
+DB_MAX_OVERFLOW=20
+DB_POOL_TIMEOUT=30
+DB_POOL_RECYCLE=300
+DB_POOL_USE_LIFO=true
+DB_SQL_DEBUG=false
+```
+
+### Authentication
 
-**Webhook Settings:**
 ```bash
-# Webhook secret for signing payloads (optional but recommended)
-WEBHOOK_SECRET=your-webhook-signing-secret
+AUTH_USE_AUTH=false  # Set to true to require JWT tokens
+AUTH_JWT_SECRET=your-super-secret-jwt-key  # Required when auth is enabled
+```
 
-# Limit on webhooks per workspace
-WEBHOOK_MAX_WORKSPACE_LIMIT=10
+Generate a secret: `python scripts/generate_jwt_secret.py`
+
+### Cache (Redis)
+
+Redis caching is optional. Honcho works without it but benefits from caching in high-traffic scenarios.
+
+```bash
+CACHE_ENABLED=false
+CACHE_URL=redis://localhost:6379/0?suppress=true
+CACHE_NAMESPACE=honcho
+CACHE_DEFAULT_TTL_SECONDS=300
+CACHE_DEFAULT_LOCK_TTL_SECONDS=5  # Cache stampede prevention
 ```
 
-### Vector Store Configuration
+### Webhooks
+
+```bash
+WEBHOOK_SECRET=your-webhook-signing-secret
+WEBHOOK_MAX_WORKSPACE_LIMIT=10
+```
 
-Honcho supports multiple vector store backends for storing embeddings.
+### Vector Store
 
-**Vector Store Settings:**
 ```bash
-# Vector store type
 VECTOR_STORE_TYPE=pgvector  # Options: pgvector, turbopuffer, lancedb
-
-# Migration flag (set to true when migration from pgvector is complete)
 VECTOR_STORE_MIGRATED=false
-
-# Global namespace prefix for all vector namespaces
 VECTOR_STORE_NAMESPACE=honcho
-
-# Embedding dimensions (default for OpenAI text-embedding-3-small)
 VECTOR_STORE_DIMENSIONS=1536
 
-# Reconciliation interval for syncing
-VECTOR_STORE_RECONCILIATION_INTERVAL_SECONDS=300  # 5 minutes
-
-# Turbopuffer-specific settings (required if TYPE=turbopuffer)
+# Turbopuffer-specific
 VECTOR_STORE_TURBOPUFFER_API_KEY=your-turbopuffer-api-key
 VECTOR_STORE_TURBOPUFFER_REGION=us-east-1
 
-# LanceDB-specific settings (local embedded mode)
+# LanceDB-specific
 VECTOR_STORE_LANCEDB_PATH=./lancedb_data
 ```
 
-## Monitoring Configuration
+## Monitoring
 
-### Prometheus Metrics (Pull-based)
+### Prometheus Metrics
 
-Honcho exposes Prometheus metrics via `/metrics` endpoints for scraping:
-- **API process**: Port 8000 at `/metrics`
-- **Deriver process**: Port 9090 at `/metrics`
+Honcho exposes `/metrics` endpoints for scraping:
+- **API process**: Port 8000
+- **Deriver process**: Port 9090
 
-**Metrics Settings:**
 ```bash
-# Enable/disable Prometheus metrics
 METRICS_ENABLED=false
-
-# Namespace label for all metrics (inherits from app.NAMESPACE if not set)
 METRICS_NAMESPACE=honcho
 ```
 
-### CloudEvents Telemetry (Analytics)
+### CloudEvents Telemetry
 
-Honcho can emit structured CloudEvents for analytics purposes.
-
-**Telemetry Settings:**
 ```bash
-# Enable/disable CloudEvents emission
 TELEMETRY_ENABLED=false
-
-# CloudEvents HTTP endpoint
 TELEMETRY_ENDPOINT=https://telemetry.honcho.dev/v1/events
-
-# Optional auth headers (JSON format in env var)
 TELEMETRY_HEADERS='{"Authorization": "Bearer your-token"}'
-
-# Batching configuration
 TELEMETRY_BATCH_SIZE=100
 TELEMETRY_FLUSH_INTERVAL_SECONDS=1.0
-TELEMETRY_FLUSH_THRESHOLD=50
-
-# Retry configuration
 TELEMETRY_MAX_RETRIES=3
-
-# Buffer configuration
 TELEMETRY_MAX_BUFFER_SIZE=10000
-
-# Namespace for instance identification (inherits from app.NAMESPACE if not set)
-TELEMETRY_NAMESPACE=honcho
 ```
 
-### Sentry Error Tracking
+### Sentry
 
-**Sentry Settings:**
 ```bash
-# Enable/disable Sentry error tracking
 SENTRY_ENABLED=false
-
-# Sentry configuration
 SENTRY_DSN=https://your-sentry-dsn@sentry.io/project-id
-SENTRY_RELEASE=2.4.0  # Optional: track which version errors come from
-SENTRY_ENVIRONMENT=production  # Environment name (development, staging, production)
-
-# Sampling rates (0.0 to 1.0)
-SENTRY_TRACES_SAMPLE_RATE=0.1  # 10% of transactions tracked
-SENTRY_PROFILES_SAMPLE_RATE=0.1  # 10% of transactions profiled
+SENTRY_ENVIRONMENT=production
+SENTRY_TRACES_SAMPLE_RATE=0.1
+SENTRY_PROFILES_SAMPLE_RATE=0.1
 ```
 
-## Environment-Specific Examples
+## Reference config.toml
 
-### Development Configuration
+A complete config.toml with all defaults. Copy and modify what you need:
 
-**config.toml for development:**
 ```toml
 [app]
-LOG_LEVEL = "DEBUG"
+LOG_LEVEL = "INFO"
 SESSION_OBSERVERS_LIMIT = 10
-EMBED_MESSAGES = false
-NAMESPACE = "honcho-dev"
+EMBED_MESSAGES = true
+NAMESPACE = "honcho"
 
 [db]
-CONNECTION_URI = "postgresql+psycopg://postgres:postgres@localhost:5432/honcho_dev"
-POOL_SIZE = 5
+CONNECTION_URI = "postgresql+psycopg://postgres:postgres@localhost:5432/postgres"
+POOL_SIZE = 10
+MAX_OVERFLOW = 20
 
 [auth]
 USE_AUTH = false
 
 [cache]
 ENABLED = false
+URL = "redis://localhost:6379/0?suppress=true"
+DEFAULT_TTL_SECONDS = 300
 
 [deriver]
 ENABLED = true
@@ -670,8 +495,6 @@ MAX_TOOL_ITERATIONS = 10
 ENABLED = true
 PROVIDER = "google"
 MODEL = "gemini-2.5-flash"
-MAX_TOKENS_SHORT = 1000
-MAX_TOKENS_LONG = 4000
 
 [dream]
 ENABLED = true
@@ -694,194 +517,25 @@ TYPE = "pgvector"
 ENABLED = false
 ```
 
-**Environment variables for development:**
-```bash
-# .env.development
-LOG_LEVEL=DEBUG
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/honcho_dev
-AUTH_USE_AUTH=false
-CACHE_ENABLED=false
-
-# LLM Provider API Keys
-LLM_ANTHROPIC_API_KEY=your-dev-anthropic-key
-LLM_OPENAI_API_KEY=your-dev-openai-key
-LLM_GEMINI_API_KEY=your-dev-gemini-key
-```
-
-### Production Configuration
-
-**config.toml for production:**
-```toml
-[app]
-LOG_LEVEL = "WARNING"
-SESSION_OBSERVERS_LIMIT = 10
-EMBED_MESSAGES = true
-NAMESPACE = "honcho-prod"
-
-[db]
-CONNECTION_URI = "postgresql+psycopg://honcho_user:secure_password@prod-db:5432/honcho_prod"
-POOL_SIZE = 20
-MAX_OVERFLOW = 40
-
-[auth]
-USE_AUTH = true
-
-[cache]
-ENABLED = true
-URL = "redis://redis:6379/0"
-DEFAULT_TTL_SECONDS = 300
-
-[deriver]
-ENABLED = true
-WORKERS = 4
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-
-[peer_card]
-ENABLED = true
-
-[dialectic]
-MAX_OUTPUT_TOKENS = 8192
-
-[dialectic.levels.minimal]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
-MAX_TOOL_ITERATIONS = 1
-
-[dialectic.levels.low]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
-MAX_TOOL_ITERATIONS = 5
-
-[dialectic.levels.medium]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
-MAX_TOOL_ITERATIONS = 2
-
-[dialectic.levels.high]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
-MAX_TOOL_ITERATIONS = 4
-
-[dialectic.levels.max]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 2048
-MAX_TOOL_ITERATIONS = 10
-
-[summary]
-ENABLED = true
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash"
-MAX_TOKENS_SHORT = 1000
-MAX_TOKENS_LONG = 4000
-
-[dream]
-ENABLED = true
-PROVIDER = "anthropic"
-MODEL = "claude-sonnet-4-20250514"
-
-[webhook]
-MAX_WORKSPACE_LIMIT = 10
-
-[metrics]
-ENABLED = true
-
-[telemetry]
-ENABLED = true
-
-[vector_store]
-TYPE = "pgvector"
-
-[sentry]
-ENABLED = true
-ENVIRONMENT = "production"
-TRACES_SAMPLE_RATE = 0.1
-PROFILES_SAMPLE_RATE = 0.1
-```
-
-**Environment variables for production:**
-```bash
-# .env.production
-LOG_LEVEL=WARNING
-DB_CONNECTION_URI=postgresql+psycopg://honcho_user:secure_password@prod-db:5432/honcho_prod
-
-# Authentication
-AUTH_USE_AUTH=true
-AUTH_JWT_SECRET=your-super-secret-jwt-key
+## Database Migrations
 
-# Cache
-CACHE_ENABLED=true
-CACHE_URL=redis://redis:6379/0
-
-# LLM Provider API Keys
-LLM_ANTHROPIC_API_KEY=your-prod-anthropic-key
-LLM_OPENAI_API_KEY=your-prod-openai-key
-LLM_GEMINI_API_KEY=your-prod-gemini-key
-LLM_GROQ_API_KEY=your-prod-groq-key
-
-# Webhooks
-WEBHOOK_SECRET=your-webhook-signing-secret
-
-# Monitoring
-METRICS_ENABLED=true
-TELEMETRY_ENDPOINT=https://telemetry.honcho.dev/v1/events
-SENTRY_DSN=https://your-sentry-dsn@sentry.io/project-id
-SENTRY_ENVIRONMENT=production
-```
-
-## Migration Management
-
-**Running Database Migrations:**
 ```bash
-# Check current migration status
-uv run alembic current
-
-# Upgrade to latest
-uv run alembic upgrade head
-
-# Downgrade to specific revision
-uv run alembic downgrade revision_id
-
-# Create new migration
-uv run alembic revision --autogenerate -m "Description of changes"
+uv run alembic current          # Check status
+uv run alembic upgrade head     # Upgrade to latest
+uv run alembic downgrade <rev>  # Downgrade to specific revision
+uv run alembic revision --autogenerate -m "Description"  # Create new migration
 ```
 
 ## Troubleshooting
 
-**Common Configuration Issues:**
-
-1. **Database Connection Errors**
-   - Ensure `DB_CONNECTION_URI` uses `postgresql+psycopg://` prefix
-   - Verify database is running and accessible
-   - Check pgvector extension is installed
-
-2. **Authentication Issues**
-   - Set `AUTH_USE_AUTH=true` for production
-   - Generate and set `AUTH_JWT_SECRET` if authentication is enabled
-   - Use `python scripts/generate_jwt_secret.py` to create a secure secret
+1. **Database connection errors** — Ensure `DB_CONNECTION_URI` uses `postgresql+psycopg://` prefix. Verify database is running and pgvector extension is installed.
 
-3. **LLM Provider Issues**
-   - Verify API keys are set correctly
-   - Check model names match provider specifications
-   - Ensure provider is enabled in configuration
+2. **Authentication issues** — Generate and set `AUTH_JWT_SECRET` when `AUTH_USE_AUTH=true`. Use `python scripts/generate_jwt_secret.py`.
 
-4. **Deriver Issues**
-   - Increase `DERIVER_WORKERS` for better performance
-   - Check `DERIVER_STALE_SESSION_TIMEOUT_MINUTES` for session cleanup
-   - Monitor background processing logs
+3. **LLM provider errors** — Verify API keys are set. Check model names match your provider's format. Ensure models support tool calling.
 
-5. **Dialectic Level Configuration**
-   - Ensure all five reasoning levels are configured (minimal, low, medium, high, max)
-   - For Anthropic provider, `THINKING_BUDGET_TOKENS` must be >= 1024 when enabled
-   - `MAX_OUTPUT_TOKENS` must be greater than `THINKING_BUDGET_TOKENS` for all levels
+4. **Deriver not processing** — Check logs. Increase `DERIVER_WORKERS` for throughput. Verify database and LLM connectivity.
 
-6. **Vector Store Issues**
-   - For Turbopuffer, ensure `VECTOR_STORE_TURBOPUFFER_API_KEY` is set
-   - Check `VECTOR_STORE_DIMENSIONS` matches your embedding model
+5. **Dialectic level issues** — All five levels must be configured. For Anthropic, `THINKING_BUDGET_TOKENS` must be >= 1024. For non-Anthropic providers, set to `0`. `MAX_OUTPUT_TOKENS` must exceed `THINKING_BUDGET_TOKENS`.
 
-This configuration guide covers all the settings available in Honcho. Always use environment-specific configuration files and never commit sensitive values like API keys or JWT secrets to version control.
+6. **Vector store issues** — For Turbopuffer, set the API key. Check `VECTOR_STORE_DIMENSIONS` matches your embedding model.
diff --git a/docs/v3/contributing/self-hosting.mdx b/docs/v3/contributing/self-hosting.mdx
index 4c9c4f22b..fc298bd80 100644
--- a/docs/v3/contributing/self-hosting.mdx
+++ b/docs/v3/contributing/self-hosting.mdx
@@ -20,9 +20,9 @@ By the end of this guide, you'll have:
 Before you begin, ensure you have the following installed:
 
 ### Required Software
-- **uv** - Python package manager: `pip install uv` (manages Python installations automatically)
+- **uv** - Python package manager: `curl -LsSf https://astral.sh/uv/install.sh | sh` or `brew install uv`
 - **Git** - [Download from git-scm.com](https://git-scm.com/downloads)
-- **Docker** (optional) - [Download from docker.com](https://www.docker.com/products/docker-desktop/)
+- **Docker** (required for Docker setup, not needed for manual setup) - [Download from docker.com](https://www.docker.com/products/docker-desktop/)
 
 ### Database Options
 You'll need a PostgreSQL database with the pgvector extension. Choose one:
@@ -32,9 +32,48 @@ You'll need a PostgreSQL database with the pgvector extension. Choose one:
 - **Railway** - Simple cloud PostgreSQL hosting
 - **Your own PostgreSQL server**
 
+## LLM Setup
+
+Honcho uses LLMs for memory extraction, summarization, dialectic chat, and dreaming. The server will **fail to start** without a provider configured.
+
+You need one API key and one model. Any OpenAI-compatible endpoint works — OpenRouter, Together, Fireworks, Ollama, vLLM, or a direct vendor API. Models must support tool calling (function calling).
+
+The `.env.template` has provider and model lines ready for each feature. After copying it to `.env`, you need to set three things:
+
+```bash
+# 1. Your endpoint and API key (already uncommented in the template)
+LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
+LLM_OPENAI_COMPATIBLE_API_KEY=sk-or-v1-...
+
+# 2. Replace "your-model-here" everywhere with your model
+#    (these are spread across the Deriver, Dialectic, Summary, and Dream sections)
+DERIVER_MODEL=google/gemini-2.5-flash  # e.g. google/gemini-2.5-flash
+SUMMARY_MODEL=google/gemini-2.5-flash
+DREAM_MODEL=google/gemini-2.5-flash
+DIALECTIC_LEVELS__minimal__MODEL=google/gemini-2.5-flash
+# ... same for low, medium, high, max
+
+# 3. Everything else is already configured:
+#    - PROVIDER=custom for all features (routes through your endpoint)
+#    - THINKING_BUDGET_TOKENS=0 (correct for non-Anthropic models)
+#    - LLM_EMBEDDING_PROVIDER=openrouter (uses same endpoint for embeddings)
+```
+
+Use find-and-replace to swap all `your-model-here` with your chosen model in one step.
+
+<Info>
+For recommended model tiers per feature, using multiple providers, or direct vendor API keys, see the [Configuration Guide](./configuration#llm-configuration).
+</Info>
+
+<Info>
+**Community quick-start**: [elkimek/honcho-self-hosted](https://github.com/elkimek/honcho-self-hosted) provides a one-command installer with pre-configured model tiers, interactive provider setup, and Hermes Agent integration.
+</Info>
+
 ## Docker Setup (Recommended)
 
-The easiest way to get started is using Docker Compose, which handles both the database and Honcho server.
+Docker Compose handles the database, Redis, and Honcho server. The compose file **builds the image from source** (there is no pre-built image on Docker Hub). This requires Docker with BuildKit enabled — see [Troubleshooting](./troubleshooting#docker-build-fails-with-permission-errors) if the build fails.
+
+The compose file is production-oriented by default (ports bound to `127.0.0.1`, restart policies, caching enabled). For development, uncomment the source mounts and monitoring services inside the file.
 
 ### 1. Clone the Repository
 
@@ -51,45 +90,37 @@ Copy the example environment file and configure it:
 cp .env.template .env
 ```
 
-Edit `.env` and set your API keys (if using LLM features):
-
-```bash
-# Optional API keys (required for LLM features)
-OPENAI_API_KEY=your-openai-api-key
-ANTHROPIC_API_KEY=your-anthropic-api-key
-
-# Database will be created automatically by Docker
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@database:5432/postgres
-
-# Disable auth for local development
-AUTH_USE_AUTH=false
-```
+Edit `.env` and configure your LLM provider — see [LLM Setup](#llm-setup) above. The database connection is set in the compose file. Auth is disabled by default (`AUTH_USE_AUTH=false`).
 
 ### 3. Start the Services
 
 ```bash
-# Copy the example docker-compose file
 cp docker-compose.yml.example docker-compose.yml
-
-# Start PostgreSQL and Honcho
-docker compose up -d
+docker compose up -d --build
 ```
 
-### 4. Verify It's Working
+The first build takes a few minutes (compiling from source). Subsequent starts are fast.
 
-Check that both services are running:
+This starts four services: **api** (port 8000), **deriver** (background worker), **database** (PostgreSQL with pgvector, port 5432), and **redis** (port 6379). All ports are bound to `127.0.0.1`. Redis caching is enabled by default.
 
-```bash
-docker compose ps
-```
+For development, uncomment the source mount and monitoring sections inside `docker-compose.yml` to enable live reload, Prometheus, and Grafana.
+
+### 4. Verify
 
-Test the Honcho API:
+Migrations run automatically on startup.
 
 ```bash
+# Check all containers are running
+docker compose ps
+
+# Health check (confirms the process is up)
 curl http://localhost:8000/health
+
+# Check the deriver is processing (look for "polling" or "processing" in logs)
+docker compose logs deriver --tail 20
 ```
 
-You should see a response indicating the service is healthy.
+For a full end-to-end test, see [Verify Your Setup](#verify-your-setup) below.
 
 ## Manual Setup
 
@@ -134,26 +165,22 @@ Download from [postgresql.org](https://www.postgresql.org/download/windows/)
 
 ```bash
 docker run --name honcho-db \
-  -e POSTGRES_DB=honcho \
   -e POSTGRES_USER=postgres \
   -e POSTGRES_PASSWORD=postgres \
   -p 5432:5432 \
   -d pgvector/pgvector:pg15
 ```
 
-### 3. Create Database and Enable Extensions
+### 3. Enable Extensions
 
-Connect to PostgreSQL and set up the database:
+Connect to PostgreSQL and enable pgvector:
 
 ```bash
 # Connect to PostgreSQL
 psql -U postgres
 
-# Create database and enable extensions
-CREATE DATABASE honcho;
-\c honcho
+# Enable the pgvector extension on the default database
 CREATE EXTENSION IF NOT EXISTS vector;
-CREATE EXTENSION IF NOT EXISTS pg_trgm;
 \q
 ```
 
@@ -165,17 +192,10 @@ Create a `.env` file with your settings:
 cp .env.template .env
 ```
 
-Edit `.env` with your configuration:
+Edit `.env` — configure your LLM provider (see [LLM Setup](#llm-setup) above) and set the database connection:
 
 ```bash
-# Database connection
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/honcho
-
-# Optional API keys (required for LLM features)
-OPENAI_API_KEY=your-openai-api-key
-ANTHROPIC_API_KEY=your-anthropic-api-key
-
-# Development settings
+DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/postgres
 AUTH_USE_AUTH=false
 LOG_LEVEL=DEBUG
 ```
@@ -191,11 +211,21 @@ uv run alembic upgrade head
 
 ```bash
 # Start the development server
-fastapi dev src/main.py
+uv run fastapi dev src/main.py
 ```
 
 The server will be available at `http://localhost:8000`.
 
+### 7. Start the Background Worker (Deriver)
+
+In a **separate terminal**, start the deriver background worker:
+
+```bash
+uv run python -m src.deriver
+```
+
+The deriver is essential for Honcho's core functionality. It processes incoming messages to extract observations, build peer representations, generate session summaries, and run dream consolidation. Without it, messages will be stored but no memory or reasoning will occur.
+
 ## Cloud Database Setup
 
 If you prefer to use a managed PostgreSQL service:
@@ -206,7 +236,6 @@ If you prefer to use a managed PostgreSQL service:
 2. **Enable pgvector extension** in the SQL editor:
    ```sql
    CREATE EXTENSION IF NOT EXISTS vector;
-   CREATE EXTENSION IF NOT EXISTS pg_trgm;
    ```
 3. **Get your connection string** from Settings > Database
 4. **Update your `.env` file** with the connection string
@@ -227,23 +256,38 @@ Once your Honcho server is running, verify everything is working:
 
 ```bash
 curl http://localhost:8000/health
+# {"status":"ok"}
 ```
 
-### 2. API Documentation
+Note: `/health` only confirms the process is running. It does not check database or LLM connectivity.
 
-Visit `http://localhost:8000/docs` to see the interactive API documentation.
+### 2. Smoke Test (database + API)
+
+This confirms the database connection, migrations, and API are all working:
+
+```bash
+# Create a workspace
+curl -s -X POST http://localhost:8000/v3/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{"name": "test"}' | python3 -m json.tool
+```
+
+If you get back a workspace object with an `id`, your database is connected and migrations ran correctly.
+
+### 3. API Documentation
 
-### 3. Test with SDK
+Visit `http://localhost:8000/docs` to see the interactive API documentation.
 
-Create a simple test script:
+### 4. Test with SDK
 
 ```python
 from honcho import Honcho
 
-# Connect to your local instance
-client = Honcho(base_url="http://localhost:8000")
+client = Honcho(
+    base_url="http://localhost:8000",
+    workspace_id="test"
+)
 
-# Create a test peer
 peer = client.peer("test-user")
 print(f"Created peer: {peer.id}")
 ```
@@ -259,8 +303,7 @@ Now that Honcho is running locally, you can connect your applications:
 from honcho import Honcho
 
 client = Honcho(
-    base_url="http://localhost:8000",  # Your local instance
-    api_key="your-api-key"  # If auth is enabled
+    base_url="http://localhost:8000",
 )
 ```
 
@@ -269,56 +312,93 @@ client = Honcho(
 import { Honcho } from '@honcho-ai/sdk';
 
 const client = new Honcho({
-  baseUrl: 'http://localhost:8000',  // Your local instance
-  apiKey: 'your-api-key'  // If auth is enabled
+  baseUrl: 'http://localhost:8000',
 });
 ```
 
 ### Next Steps
 
+- **Configure Honcho**: Visit the [Configuration Guide](./configuration) for model tiers, provider options, and tuning
 - **Explore the API**: Check out the [API Reference](../api-reference/introduction)
 - **Try the SDKs**: See our [guides](../guides) for examples
-- **Configure Honcho**: Visit the [Configuration Guide](./configuration) for detailed settings
 - **Join the community**: [Discord](https://discord.gg/honcho)
 
 ## Troubleshooting
 
-### Common Issues
+Running into issues? See the [Troubleshooting Guide](./troubleshooting) for detailed solutions to common problems including:
 
-**Database Connection Errors**
-- Ensure PostgreSQL is running
-- Verify the connection string format: `postgresql+psycopg://...`
-- Check that pgvector extension is installed
+- Startup failures (missing API keys, database issues)
+- Runtime errors ("An unexpected error occurred" on every request)
+- Deriver not processing messages
+- Database connection and migration issues
+- Docker and Redis problems
 
-**API Key Issues**
-- Verify your OpenAI and Anthropic API keys are valid
-- Check that the keys have sufficient credits/quota
-
-**Port Already in Use**
-- Pass a different port to FastAPI or stop other services using port 8000
-
-**Docker Issues**
-- Ensure Docker is running
-- Check container logs: `docker compose logs`
-- Restart containers: `docker compose down && docker compose up -d`
-
-**Migration Errors**
-- Ensure the database exists and pgvector is enabled
-- Check database permissions
-- Run migrations manually: `uv run alembic upgrade head`
-
-### Getting Help
-
-- **GitHub Issues**: [Report bugs](https://github.com/plastic-labs/honcho/issues)
-- **Discord**: [Join our community](https://discord.gg/honcho)
-- **Documentation**: Check the [Configuration Guide](./configuration) for detailed settings
+**Quick checks:**
+- Verify the server is running: `curl http://localhost:8000/health`
+- Check logs: `docker compose logs api` (Docker) or check terminal output (manual setup)
+- Ensure migrations ran: `uv run alembic upgrade head`
 
 ## Production Considerations
 
-When self-hosting for production, consider:
-
-- **Security**: Enable authentication, use HTTPS, secure your database
-- **Scaling**: Use connection pooling, consider load balancing
-- **Monitoring**: Set up logging, error tracking, health checks
-- **Backups**: Regular database backups, disaster recovery plan
-- **Updates**: Keep Honcho and dependencies updated
+The default compose file is already production-oriented — ports bound to `127.0.0.1`, restart policies, caching enabled.
+
+### Security
+- Set `AUTH_USE_AUTH=true` and generate a JWT secret with `python scripts/generate_jwt_secret.py`
+- Use HTTPS via a reverse proxy in front of Honcho. Example with Caddy (automatic TLS):
+  ```
+  honcho.example.com {
+      reverse_proxy localhost:8000
+  }
+  ```
+  Or with nginx:
+  ```nginx
+  server {
+      listen 443 ssl;
+      server_name honcho.example.com;
+      ssl_certificate /etc/letsencrypt/live/honcho.example.com/fullchain.pem;
+      ssl_certificate_key /etc/letsencrypt/live/honcho.example.com/privkey.pem;
+      location / {
+          proxy_pass http://127.0.0.1:8000;
+          proxy_set_header Host $host;
+          proxy_set_header X-Real-IP $remote_addr;
+          proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+          proxy_set_header X-Forwarded-Proto $scheme;
+      }
+  }
+  ```
+- Secure your database with strong credentials and restrict network access
+- The production compose binds PostgreSQL and Redis to `127.0.0.1` only — they are not accessible from the network
+
+### Scaling the Deriver
+- Increase `DERIVER_WORKERS` (default: 1) for higher message throughput
+- You can also run multiple deriver processes across machines — they coordinate via the database queue
+- Monitor deriver logs for processing backlog
+
+### Caching
+- The production compose enables Redis caching by default (`CACHE_ENABLED=true`)
+- For the development compose, enable manually: `CACHE_ENABLED=true`
+- Configure `CACHE_URL` to point to your Redis instance (or use a managed Redis service)
+
+### Database Migrations
+- Always run `uv run alembic upgrade head` after updating Honcho before starting the server
+- Check current migration status with `uv run alembic current`
+
+### LLM Providers
+- Ensure your API keys are configured (see [LLM Setup](#llm-setup))
+- For alternative providers or per-feature model overrides, see the [Configuration Guide](./configuration#llm-configuration)
+
+### Monitoring
+- Enable Prometheus metrics with `METRICS_ENABLED=true`. The API exposes `/metrics` on port 8000, the deriver on port 9090 (internal to its container — not published to the host by default).
+- Enable Sentry error tracking with `SENTRY_ENABLED=true`
+- The development compose includes Prometheus (host port 9090) and Grafana (host port 3000) for scraping and dashboards. Uncomment those services to enable them.
+
+### Backups
+- Set up regular PostgreSQL backups:
+  ```bash
+  # One-off backup
+  docker compose exec database pg_dump -U postgres postgres > backup-$(date +%Y%m%d).sql
+
+  # Restore
+  cat backup.sql | docker compose exec -T database psql -U postgres postgres
+  ```
+- Back up your `.env` or `config.toml` configuration files
diff --git a/docs/v3/contributing/troubleshooting.mdx b/docs/v3/contributing/troubleshooting.mdx
new file mode 100644
index 000000000..f041e2db4
--- /dev/null
+++ b/docs/v3/contributing/troubleshooting.mdx
@@ -0,0 +1,299 @@
+---
+title: 'Troubleshooting'
+sidebarTitle: 'Troubleshooting'
+description: 'Common issues and solutions when self-hosting Honcho'
+icon: 'wrench'
+---
+
+This page covers common issues you may encounter when self-hosting Honcho, what causes them, and how to fix them.
+
+## Startup Failures
+
+### Server won't start: "Missing client for ..."
+
+```
+ValueError: Missing client for Deriver: google
+```
+
+**Cause:** The server validates at startup that all configured LLM providers have API keys. If a provider is referenced in your configuration but the corresponding API key isn't set, the server refuses to start.
+
+**Fix:** Set the API keys for your configured providers. With default configuration, you need:
+
+```bash
+LLM_GEMINI_API_KEY=...    # Used by deriver, summary, dialectic minimal/low
+LLM_ANTHROPIC_API_KEY=... # Used by dialectic medium/high/max, dream
+LLM_OPENAI_API_KEY=...    # Used by embeddings (when EMBED_MESSAGES=true)
+```
+
+See the [LLM Setup](/v3/contributing/self-hosting#llm-setup) section for provider configuration. You can change which providers are used in your `.env` or `config.toml` (see [Configuration Guide](./configuration#llm-configuration)).
+
+### Server won't start: "JWT_SECRET must be set"
+
+```
+ValueError: JWT_SECRET must be set if USE_AUTH is true
+```
+
+**Cause:** You enabled authentication (`AUTH_USE_AUTH=true`) but didn't provide a JWT secret.
+
+**Fix:** Generate a secret and set it:
+
+```bash
+python scripts/generate_jwt_secret.py
+# Then set the output as:
+AUTH_JWT_SECRET=<generated_secret>
+```
+
+Or disable authentication for local development: `AUTH_USE_AUTH=false`
+
+## Runtime Errors
+
+### API returns "An unexpected error occurred" on every request
+
+**Cause:** This is almost always a database issue. The health endpoint (`/health`) will return `{"status": "ok"}` even when the database is unreachable because it doesn't check the database connection. The actual error appears in the server logs.
+
+**Common causes and fixes:**
+
+1. **Database is unreachable** — Check that PostgreSQL is running and the `DB_CONNECTION_URI` is correct
+2. **Migrations haven't been run** — The server starts successfully without tables, but every API call will fail. Run:
+   ```bash
+   uv run alembic upgrade head
+   ```
+   In Docker:
+   ```bash
+   docker compose exec api uv run alembic upgrade head
+   ```
+3. **pgvector extension not installed** — The `vector` extension must be enabled in your database:
+   ```sql
+   CREATE EXTENSION IF NOT EXISTS vector;
+   ```
+
+**How to diagnose:** Check the server logs for the actual error. Look for:
+- `sqlalchemy.exc.OperationalError` — database connection issue
+- `sqlalchemy.exc.ProgrammingError` with "relation does not exist" — migrations not run
+- `psycopg.OperationalError` — connection refused or authentication failed
+
+### Health check passes but API calls fail
+
+The `/health` endpoint is a lightweight check that confirms the server process is running. It does **not** verify:
+- Database connectivity
+- That migrations have been run
+- That LLM providers are reachable
+
+To verify full functionality, try creating a workspace:
+
+```bash
+curl -X POST http://localhost:8000/v3/workspaces \
+  -H "Content-Type: application/json" \
+  -d '{"name": "test"}'
+```
+
+If this succeeds, your database connection and migrations are working.
+
+### Deriver not processing messages
+
+Messages are stored but no observations, summaries, or representations are being generated.
+
+**Common causes:**
+
+1. **Deriver isn't running** — In manual setup, the deriver is a separate process:
+   ```bash
+   uv run python -m src.deriver
+   ```
+   In Docker, it starts automatically via `docker compose up`.
+
+2. **Deriver can't reach the database** — Check deriver logs for connection errors. The deriver uses the same `DB_CONNECTION_URI` as the API server.
+
+3. **Missing LLM API key for deriver provider** — By default the deriver uses Google Gemini (`LLM_GEMINI_API_KEY`). Check deriver logs for API errors.
+
+4. **Processing backlog** — With `DERIVER_WORKERS=1` (default), high message volume can cause a backlog. Increase workers:
+   ```bash
+   DERIVER_WORKERS=4
+   ```
+5. **Representation Batch Max** — By default the deriver is set to buffer its operations until there are enough tokens for a given representation in a session. This is set via the `REPRESENTATION_BATCH_MAX_TOKENS` environment variable. If you aren't seeing tasks continue it may be that the batch size is set too high or enough data hasn't flowed into to the session yet. See [token batching](/v3/documentation/core-concepts/reasoning#token-batching) for more details
+
+## Alternative Provider Issues
+
+### OpenRouter / custom provider not working
+
+If you set `PROVIDER=custom` but calls fail:
+
+1. **Verify the endpoint and key are set:**
+   ```bash
+   LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
+   LLM_OPENAI_COMPATIBLE_API_KEY=sk-or-v1-...
+   ```
+
+2. **Check model names match the provider's format.** OpenRouter uses `vendor/model` format (e.g., `anthropic/claude-haiku-4-5`), not the raw model ID.
+
+3. **Ensure your model supports tool calling.** The deriver, dialectic, and dream agents require tool use. Check the provider's model page for tool calling support.
+
+4. **Check server logs for the actual error.** API errors from the upstream provider will appear in Honcho's logs with the HTTP status code and message body.
+
+### vLLM / Ollama not responding
+
+1. **Verify the model server is running** and accessible from the Honcho process (or container):
+   ```bash
+   curl http://localhost:8000/v1/models   # vLLM
+   curl http://localhost:11434/v1/models  # Ollama
+   ```
+
+2. **In Docker**, `localhost` inside a container doesn't reach the host. Use `host.docker.internal` (macOS/Windows) or the host's network IP:
+   ```bash
+   LLM_VLLM_BASE_URL=http://host.docker.internal:8000/v1
+   ```
+
+3. **Structured output failures** — vLLM's structured output support is limited to certain response formats. If you see JSON parsing errors, check the deriver/dream logs for the raw response.
+
+### Thinking budget errors with non-Anthropic providers
+
+If you see errors like `thinking budget not supported`, `invalid parameter`, or silent failures where agents produce no output, your `THINKING_BUDGET_TOKENS` is likely set to a value > 0 with a provider that doesn't support Anthropic-style extended thinking.
+
+**Fix:** Set `THINKING_BUDGET_TOKENS=0` for every component when using non-Anthropic providers:
+
+```bash
+DERIVER_THINKING_BUDGET_TOKENS=0
+SUMMARY_THINKING_BUDGET_TOKENS=0
+DREAM_THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__minimal__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__low__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=0
+```
+
+This applies to OpenRouter (with non-Anthropic models), vLLM, Ollama, Groq, Google, and OpenAI providers. Only Anthropic models support the thinking budget parameter.
+
+## Database Issues
+
+### Connection string format
+
+The connection URI **must** use the `postgresql+psycopg` prefix:
+
+```bash
+# Correct
+DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@localhost:5432/postgres
+
+# Wrong - will fail
+DB_CONNECTION_URI=postgresql://postgres:postgres@localhost:5432/postgres
+DB_CONNECTION_URI=postgres://postgres:postgres@localhost:5432/postgres
+```
+
+### Checking migration status
+
+```bash
+# See current migration version
+uv run alembic current
+
+# See migration history
+uv run alembic history
+
+# Upgrade to latest
+uv run alembic upgrade head
+```
+
+## Cache & Redis
+
+### Redis is optional
+
+Redis is used for caching when `CACHE_ENABLED=true` (default: `false`). If Redis is unreachable, Honcho **gracefully falls back to in-memory caching** and logs a warning. This means:
+
+- The server and deriver will still start and function normally
+- Performance may be reduced under high load without Redis
+- You do not need Redis for local development or testing
+
+### Redis connection issues
+
+If you see Redis connection warnings in logs but `CACHE_ENABLED=false`, they can be safely ignored. If you want caching:
+
+```bash
+# Start Redis via Docker
+docker run -d -p 6379:6379 redis:latest
+
+# Configure Honcho
+CACHE_ENABLED=true
+CACHE_URL=redis://localhost:6379/0
+```
+
+## Docker Issues
+
+### Docker build fails with permission errors
+
+The Honcho Dockerfile uses BuildKit mount syntax and creates a non-root `app` user. Common build failures:
+
+**1. BuildKit not enabled**
+
+The Dockerfile uses `RUN --mount=type=cache` which requires Docker BuildKit. If you see syntax errors during build:
+
+```bash
+# Ensure BuildKit is enabled
+DOCKER_BUILDKIT=1 docker compose build
+```
+
+Or add to your Docker daemon config (`/etc/docker/daemon.json`):
+```json
+{ "features": { "buildkit": true } }
+```
+
+**2. Permission denied during build or at runtime (Linux)**
+
+On Linux, AppArmor or SELinux can block Docker build operations and volume mounts. Symptoms include permission denied errors during `COPY`, `RUN`, or when the container tries to access mounted volumes.
+
+```bash
+# Check if AppArmor is blocking Docker
+sudo aa-status | grep docker
+
+# Temporarily test without AppArmor (for diagnosis only)
+docker compose down
+sudo aa-remove-unknown
+docker compose up -d
+```
+
+For SELinux, add `:z` to volume mounts in `docker-compose.yml`:
+```yaml
+volumes:
+  - .:/app:z
+```
+
+**3. Volume mount UID mismatch**
+
+The Dockerfile creates a non-root `app` user, but `docker-compose.yml.example` mounts `.:/app` which overlays the container filesystem with host-owned files. The `app` user inside the container may not have permission to read them.
+
+If you see permission errors at runtime (not build time), you can either:
+- Run without the source mount (remove `- .:/app` from volumes — the image already contains the code)
+- Or fix ownership: `sudo chown -R 100:101 .` (matches the `app` user inside the container)
+
+### Containers start but API fails
+
+1. Check container status: `docker compose ps`
+2. Check API logs: `docker compose logs api`
+3. Check database logs: `docker compose logs database`
+4. Ensure migrations ran: `docker compose exec api uv run alembic upgrade head`
+
+### Port conflicts
+
+If port 8000 is already in use:
+
+```bash
+# Check what's using the port
+lsof -i :8000
+
+# Or change the port mapping in docker-compose.yml
+ports:
+  - "8001:8000"  # Map to a different host port
+```
+
+### Rebuilding after code changes
+
+```bash
+docker compose build --no-cache
+docker compose up -d
+```
+
+## Getting Help
+
+If your issue isn't covered here:
+
+- **Check the logs** — most issues are diagnosed from server or deriver logs
+- **GitHub Issues** — [Report bugs](https://github.com/plastic-labs/honcho/issues)
+- **Discord** — [Join our community](https://discord.gg/plasticlabs)
+- **Configuration** — See the [Configuration Guide](./configuration) for all available settings
diff --git a/docs/v3/guides/integrations/hermes.mdx b/docs/v3/guides/integrations/hermes.mdx
index e19d2d64d..9fe469736 100644
--- a/docs/v3/guides/integrations/hermes.mdx
+++ b/docs/v3/guides/integrations/hermes.mdx
@@ -41,130 +41,82 @@ Hermes exposes four Honcho tools to the agent:
 | `honcho_context` | Dialectic Q&A powered by Honcho's LLM. Synthesizes answers from conversation history. |
 | `honcho_conclude` | Writes durable facts to Honcho when the user states preferences, corrections, or important context. |
 
-## Two memory layers
-
-When Honcho is enabled, Hermes operates with two layer memory by default (`hybrid`):
-
-**Local session history** -- the immediate transcript for the current chat, thread, or CLI session. Use it for recent turns, short-lived task context, and follow-up questions.
-
-**Honcho memory** -- the semantic, cross-session layer. Use it for user preferences, durable project facts, cross-session continuity, and synthesized peer context.
-
 ## Running Honcho locally with Hermes
 
-If you want to point Hermes at a local Honcho instance instead of the hosted API:
-
-### Docker (quickest)
-
-```bash
-git clone https://github.com/plastic-labs/honcho.git
-cd honcho
-cp .env.template .env
-cp docker-compose.yml.example docker-compose.yml
-```
-
-Edit `.env`:
-
-```bash
-OPENAI_API_KEY=your-openai-api-key
-ANTHROPIC_API_KEY=your-anthropic-api-key
-DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@database:5432/honcho
-AUTH_USE_AUTH=false
-```
-
-```bash
-docker compose up -d
-curl http://localhost:8000/health
-```
-
-### Manual
-
-```bash
-git clone https://github.com/plastic-labs/honcho.git
-cd honcho
-uv sync
-cp .env.template .env
-```
-
-Edit `.env` with a local or cloud Postgres connection string and API keys, then:
+Follow the [Self-Hosting Guide](/v3/contributing/self-hosting) to get Honcho running locally. Once it's up, point Hermes at your instance:
 
 ```bash
-uv run alembic upgrade head
-uv run fastapi dev src/main.py
+hermes memory setup  # select "honcho", enter http://localhost:8000 as the base URL
 ```
 
-Then update `~/.honcho/config.json` to point at your local instance:
+Or manually create/edit the config file (checked in order: `$HERMES_HOME/honcho.json` > `~/.hermes/honcho.json` > `~/.honcho/config.json`):
 
 ```json
 {
-  "apiKey": "not-needed-with-auth-disabled",
   "baseUrl": "http://localhost:8000",
   "hosts": {
     "hermes": {
-      "workspace": "hermes",
-      "peerName": "your-name",
+      "enabled": true,
       "aiPeer": "hermes",
-      "memoryMode": "hybrid",
-      "enabled": true
+      "peerName": "your-name",
+      "workspace": "hermes"
     }
   }
 }
 ```
 
-The `baseUrl` field overrides the default hosted API. With `AUTH_USE_AUTH=false` on the server, the `apiKey` value is ignored but the field must still be present.
+For the full list of config fields (`recallMode`, `writeFrequency`, `sessionStrategy`, `dialecticReasoningLevel`, etc.), see the [Hermes memory provider docs](https://hermes-agent.nousresearch.com/docs/user-guide/features/memory-providers#honcho).
 
-See the full [self-hosting guide](/v3/contributing/self-hosting) for database options, cloud setup, and troubleshooting.
+<Info>
+**Community quick-start**: [elkimek/honcho-self-hosted](https://github.com/elkimek/honcho-self-hosted) provides a one-command installer with pre-configured model tiers and Hermes Agent integration.
+</Info>
 
 ## Verifying the integration
 
-Steps to test the integration via CLI and agentically by speaking to Hermes agent in natural language.
-
-### 1. Check configuration
+### 1. Check status
 
 ```bash
-hermes honcho status
+hermes memory status
 ```
 
-### 2. Test cross-session recall
+This should show Honcho as the active memory provider with your base URL.
 
-In one conversation:
+### 2. Store a fact and recall it across sessions
+
+In one conversation, tell Hermes something specific:
 
 ```text
-Remember that my test phrase is velvet circuit.
+My favorite programming language is Rust and I always use dark mode.
 ```
 
-In a fresh conversation (different thread, new CLI session):
+Start a **new session** (different thread, new CLI invocation, or a different platform). Ask:
 
 ```text
-What is my test phrase?
+What do you know about my preferences?
 ```
 
-If Hermes recalls "velvet circuit" after short-term context is gone, Honcho is working.
+If Hermes mentions Rust and dark mode without being told again, cross-session memory is working. The deriver processed your messages, extracted observations, and the dialectic recalled them.
 
-### 3. Test writeback
+### 3. Test tool calling directly
 
-Tell Hermes a preference:
+Ask Hermes to use a specific Honcho tool:
 
 ```text
-Remember that I prefer terse answers.
+Use your honcho_search tool to find anything you know about me.
 ```
 
-Wait briefly if writes are asynchronous. Open a fresh conversation:
-
-```text
-How should you respond to me?
-```
-
-If Hermes answers with the stored preference, writeback is functioning.
-
-
-## Session strategy
+If Hermes calls the tool and returns results, the full tool pipeline (API connection, vector search, embedding) is functional.
 
-| Scope                | When to use                                              |
-|----------------------|---------------------------------------------------------|
-| Per-Session | A honcho session starts fresh each time a new Hermes session is created. Hermes remembers the user across sessions. |
-| Per Directory   | One honcho session per project directory. Context is scoped to each directory. Coding/project memory scoped to each repository/workspace. |
-| Global (per user)    | Continuity across all chats, threads, and projects. One honcho session globally for the user and Hermes agent.      |
+## Configuration options
 
+| Field | Default | Description |
+|---|---|---|
+| `recallMode` | `hybrid` | `hybrid` (auto-inject + tools), `context` (inject only), `tools` (tools only) |
+| `writeFrequency` | `async` | `async`, `turn`, `session`, or integer N |
+| `sessionStrategy` | `per-directory` | `per-directory`, `per-repo`, `per-session`, `global` |
+| `dialecticReasoningLevel` | `low` | `minimal`, `low`, `medium`, `high`, `max` |
+| `dialecticDynamic` | `true` | Auto-bump reasoning level by query complexity |
+| `messageMaxChars` | `25000` | Max chars per message (chunked if exceeded) |
 
 ## Next steps
 
@@ -182,6 +134,6 @@ If Hermes answers with the stored preference, writeback is functioning.
   </Card>
 
   <Card title="Self-Hosting Guide" icon="server" href="/v3/contributing/self-hosting">
-    Full local environment setup, database options, and troubleshooting.
+    Full local environment setup, provider configuration, and troubleshooting.
   </Card>
 </CardGroup>
diff --git a/src/main.py b/src/main.py
index 571bbb544..46dc5bc9a 100644
--- a/src/main.py
+++ b/src/main.py
@@ -196,6 +196,12 @@ async def lifespan(_: FastAPI):
 app.add_route("/metrics", metrics_endpoint, methods=["GET"])
 
 
+@app.get("/health")
+async def health_check():
+    """Health check endpoint for monitoring and container orchestration."""
+    return {"status": "ok"}
+
+
 # Global exception handlers
 @app.exception_handler(HonchoException)
 async def honcho_exception_handler(_request: Request, exc: HonchoException):

From 5b6bd59030faca74794360624e98e0534ecd05e0 Mon Sep 17 00:00:00 2001
From: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com>
Date: Wed, 8 Apr 2026 11:14:50 -0400
Subject: [PATCH 05/46] Tighten Transaction Scopes (#525)

* fix: further remove extraneous transactions

* fix: (search) use 2 phase function to reduce un-needed transaction

* fix: refactor agent search to perform external operations before making a transaction

* fix: reduce scope of queue manager transaction

* fix: (bench) add concurrency to test bench

* fix: address review findings for search dedup, webhook idempotency, and bench throttling

* Fix Leakage in non-session-scoped chat call (#526)

* fix: (search) reduce scope for peer based searches

* fix: tests

* fix: (test) address coderabbit comment

* fix: drop db param from deliver_webhook

---------

Co-authored-by: Rajat Ahuja <rahuja445@gmail.com>
---
 src/crud/message.py                          | 375 +++++++++++++------
 src/crud/peer.py                             |  12 +-
 src/crud/session.py                          |  77 +++-
 src/crud/webhook.py                          |  20 +-
 src/crud/workspace.py                        |  11 +-
 src/deriver/consumer.py                      |   3 +-
 src/deriver/queue_manager.py                 | 106 +++---
 src/routers/peers.py                         |   3 +-
 src/routers/sessions.py                      |   2 -
 src/routers/workspaces.py                    |   3 +-
 src/utils/agent_tools.py                     | 215 ++++++-----
 src/utils/search.py                          | 276 ++++++++------
 src/webhooks/webhook_delivery.py             |  53 ++-
 tests/bench/runner_common.py                 | 187 +++++----
 tests/conftest.py                            |   3 +
 tests/integration/test_message_embeddings.py | 213 ++++++++++-
 tests/sdk_typescript/conftest.py             |   3 +
 tests/test_search.py                         | 308 ++++++++++++++-
 tests/utils/test_agent_tools.py              | 133 ++++++-
 tests/webhooks/test_webhook_delivery.py      |   8 +-
 20 files changed, 1472 insertions(+), 539 deletions(-)

diff --git a/src/crud/message.py b/src/crud/message.py
index 41e0053b9..08c4c8610 100644
--- a/src/crud/message.py
+++ b/src/crud/message.py
@@ -9,6 +9,7 @@
 
 from src import models, schemas
 from src.config import settings
+from src.dependencies import tracked_db
 from src.embedding_client import embedding_client
 from src.utils.filter import apply_filter
 from src.utils.formatting import ILIKE_ESCAPE_CHAR, escape_ilike_pattern
@@ -34,6 +35,40 @@ def _deduplicate_messages(
     return result
 
 
+def _expunge_snippets(
+    db: AsyncSession, snippets: list[tuple[list[models.Message], list[models.Message]]]
+) -> None:
+    """Detach snippet messages from the session, guarding against duplicates."""
+    seen: set[int] = set()
+    for matches, context in snippets:
+        for msg in [*matches, *context]:
+            obj_id = id(msg)
+            if obj_id in seen:
+                continue
+            db.expunge(msg)
+            seen.add(obj_id)
+
+
+async def get_peer_session_names(
+    db: AsyncSession,
+    workspace_name: str,
+    peer_name: str,
+) -> list[str]:
+    """Get all session names where a peer has any membership record.
+
+    Any membership record (regardless of joined_at/left_at) grants visibility
+    to all messages in that session.
+    """
+    stmt = (
+        select(models.session_peers_table.c.session_name)
+        .where(models.session_peers_table.c.workspace_name == workspace_name)
+        .where(models.session_peers_table.c.peer_name == peer_name)
+        .distinct()
+    )
+    result = await db.execute(stmt)
+    return [row[0] for row in result.all()]
+
+
 def _apply_token_limit(
     base_conditions: list[ColumnElement[Any]], token_limit: int
 ) -> Select[tuple[models.Message]]:
@@ -595,22 +630,19 @@ async def update_message(
 
 
 async def _search_messages_external(
-    db: AsyncSession,
     workspace_name: str,
     query_embedding: list[float],
     limit: int,
     *,
     session_name: str | None = None,
+    allowed_session_names: list[str] | None = None,
     after_date: datetime | None = None,
     before_date: datetime | None = None,
-) -> list[models.Message]:
-    """Query the external vector store for messages and fetch them from the DB.
+) -> list[str]:
+    """Query the external vector store and return ordered message IDs.
 
     Multiple vector records can map to the same message (chunked embeddings),
     so we oversample from the vector store and deduplicate by message_id.
-
-    Date filters are applied at the DB level since external vector stores
-    don't support temporal filtering.
     """
     external_vector_store = get_external_vector_store()
     if external_vector_store is None:
@@ -621,6 +653,8 @@ async def _search_messages_external(
     vector_filters: dict[str, Any] = {}
     if session_name:
         vector_filters["session_name"] = session_name
+    elif allowed_session_names is not None:
+        vector_filters["session_name"] = {"in": allowed_session_names}
 
     # Oversample: chunks can map to the same message, and date filters are
     # applied post-fetch (vector stores don't support temporal filtering),
@@ -648,7 +682,18 @@ async def _search_messages_external(
     if not message_ids:
         return []
 
-    # Fetch from DB with optional date filtering
+    return message_ids
+
+
+async def _fetch_messages_by_ids(
+    db: AsyncSession,
+    workspace_name: str,
+    message_ids: list[str],
+    *,
+    after_date: datetime | None = None,
+    before_date: datetime | None = None,
+) -> list[models.Message]:
+    """Fetch messages by ID, preserving the supplied ordering."""
     fetch_stmt = (
         select(models.Message)
         .where(models.Message.public_id.in_(message_ids))
@@ -662,18 +707,139 @@ async def _search_messages_external(
     result = await db.execute(fetch_stmt)
     messages_by_id = {msg.public_id: msg for msg in result.scalars().all()}
 
-    # Preserve vector store similarity order, apply limit
-    return [messages_by_id[mid] for mid in message_ids if mid in messages_by_id][:limit]
+    return [messages_by_id[mid] for mid in message_ids if mid in messages_by_id]
 
 
-async def search_messages(
+async def _search_messages_pgvector(
     db: AsyncSession,
+    workspace_name: str,
+    session_name: str | None,
+    *,
+    query_embedding: list[float],
+    allowed_session_names: list[str] | None = None,
+    after_date: datetime | None = None,
+    before_date: datetime | None = None,
+    limit: int = 10,
+    context_window: int = 2,
+) -> list[tuple[list[models.Message], list[models.Message]]]:
+    """Run semantic message search against pgvector-backed embeddings."""
+    # pgvector path: cosine distance in SQL
+    # Oversample because a message with multiple embedding chunks can
+    # produce duplicate rows; we deduplicate in Python to preserve HNSW
+    # index usage (a DISTINCT ON subquery would prevent the index scan).
+    match_stmt = (
+        select(models.Message)
+        .join(
+            models.MessageEmbedding,
+            models.Message.public_id == models.MessageEmbedding.message_id,
+        )
+        .where(models.MessageEmbedding.workspace_name == workspace_name)
+        .order_by(models.MessageEmbedding.embedding.cosine_distance(query_embedding))
+        .limit(limit * 2)
+    )
+
+    if session_name:
+        match_stmt = match_stmt.where(
+            models.MessageEmbedding.session_name == session_name
+        )
+    elif allowed_session_names is not None:
+        match_stmt = match_stmt.where(
+            models.MessageEmbedding.session_name.in_(allowed_session_names)
+        )
+
+    if after_date:
+        match_stmt = match_stmt.where(models.Message.created_at >= after_date)
+    if before_date:
+        match_stmt = match_stmt.where(models.Message.created_at <= before_date)
+
+    result = await db.execute(match_stmt)
+    matched_messages = _deduplicate_messages(result.scalars().all(), limit)
+
+    return await _build_merged_snippets(
+        db, workspace_name, matched_messages, context_window
+    )
+
+
+async def _semantic_search_messages(
+    workspace_name: str,
+    session_name: str | None,
+    *,
+    query_embedding: list[float],
+    limit: int = 10,
+    context_window: int = 2,
+    operation_name: str,
+    after_date: datetime | None = None,
+    before_date: datetime | None = None,
+    observer: str | None = None,
+) -> list[tuple[list[models.Message], list[models.Message]]]:
+    """Run semantic message search with optional temporal filters.
+
+    When observer is provided and session_name is None, results are
+    scoped to sessions the observer has any membership record in.
+    """
+    # Pre-fetch peer session scope if needed (short-lived DB session)
+    allowed_session_names: list[str] | None = None
+    if observer and not session_name:
+        async with tracked_db(f"{operation_name}.peer_scope") as db:
+            allowed_session_names = await get_peer_session_names(
+                db, workspace_name, observer
+            )
+        if not allowed_session_names:
+            return []
+
+    if settings.VECTOR_STORE.TYPE != "pgvector" and settings.VECTOR_STORE.MIGRATED:
+        message_ids = await _search_messages_external(
+            workspace_name,
+            query_embedding,
+            limit,
+            session_name=session_name,
+            allowed_session_names=allowed_session_names,
+            after_date=after_date,
+            before_date=before_date,
+        )
+        if not message_ids:
+            return []
+
+        async with tracked_db(operation_name) as db:
+            matched_messages = (
+                await _fetch_messages_by_ids(
+                    db,
+                    workspace_name,
+                    message_ids,
+                    after_date=after_date,
+                    before_date=before_date,
+                )
+            )[:limit]
+            snippets = await _build_merged_snippets(
+                db, workspace_name, matched_messages, context_window
+            )
+            _expunge_snippets(db, snippets)
+            return snippets
+
+    async with tracked_db(operation_name) as db:
+        snippets = await _search_messages_pgvector(
+            db,
+            workspace_name,
+            session_name,
+            query_embedding=query_embedding,
+            allowed_session_names=allowed_session_names,
+            after_date=after_date,
+            before_date=before_date,
+            limit=limit,
+            context_window=context_window,
+        )
+        _expunge_snippets(db, snippets)
+        return snippets
+
+
+async def search_messages(
     workspace_name: str,
     session_name: str | None,
     query: str,
     limit: int = 10,
     context_window: int = 2,
     embedding: list[float] | None = None,
+    observer: str | None = None,
 ) -> list[tuple[list[models.Message], list[models.Message]]]:
     """
     Search for messages using semantic similarity and return conversation snippets.
@@ -682,67 +848,78 @@ async def search_messages(
     snippets within the same session are merged to avoid repetition.
 
     Args:
-        db: Database session
         workspace_name: Name of the workspace
         session_name: Name of the session (optional)
         query: Search query text
         limit: Maximum number of matching messages to return
         context_window: Number of messages before/after each match to include
         embedding: Optional pre-computed embedding
+        observer: When provided and session_name is None, scope results
+            to sessions this peer belongs to
 
     Returns:
         List of tuples: (matched_messages, context_messages)
         Each snippet may contain multiple matches if they were close together.
         Context messages are ordered chronologically and include the matched messages.
     """
-    # Use provided embedding or generate one
     query_embedding = (
         embedding if embedding is not None else await embedding_client.embed(query)
     )
+    return await _semantic_search_messages(
+        workspace_name,
+        session_name,
+        query_embedding=query_embedding,
+        limit=limit,
+        context_window=context_window,
+        operation_name="message.search_messages",
+        observer=observer,
+    )
 
-    if settings.VECTOR_STORE.TYPE == "pgvector" or not settings.VECTOR_STORE.MIGRATED:
-        # pgvector path: cosine distance in SQL
-        # Oversample because a message with multiple embedding chunks can
-        # produce duplicate rows; we deduplicate in Python to preserve HNSW
-        # index usage (a DISTINCT ON subquery would prevent the index scan).
-        match_stmt = (
-            select(models.Message)
-            .join(
-                models.MessageEmbedding,
-                models.Message.public_id == models.MessageEmbedding.message_id,
-            )
-            .where(models.MessageEmbedding.workspace_name == workspace_name)
-            .order_by(
-                models.MessageEmbedding.embedding.cosine_distance(query_embedding)
-            )
-            .limit(limit * 2)
-        )
 
-        if session_name:
-            match_stmt = match_stmt.where(
-                models.MessageEmbedding.session_name == session_name
-            )
+async def _grep_messages_internal(
+    db: AsyncSession,
+    workspace_name: str,
+    session_name: str | None,
+    text: str,
+    limit: int = 10,
+    context_window: int = 2,
+    allowed_session_names: list[str] | None = None,
+) -> list[tuple[list[models.Message], list[models.Message]]]:
+    """Internal implementation of exact-text message search."""
+    # Build the base query with ILIKE for case-insensitive text search
+    escaped_text = escape_ilike_pattern(text)
+    match_stmt = (
+        select(models.Message)
+        .where(models.Message.workspace_name == workspace_name)
+        .where(
+            models.Message.content.ilike(f"%{escaped_text}%", escape=ILIKE_ESCAPE_CHAR)
+        )
+        .order_by(models.Message.created_at.desc())
+        .limit(limit)
+    )
 
-        result = await db.execute(match_stmt)
-        matched_messages = _deduplicate_messages(result.scalars().all(), limit)
-    else:
-        # External vector store path
-        matched_messages = await _search_messages_external(
-            db, workspace_name, query_embedding, limit, session_name=session_name
+    if session_name:
+        match_stmt = match_stmt.where(models.Message.session_name == session_name)
+    elif allowed_session_names is not None:
+        match_stmt = match_stmt.where(
+            models.Message.session_name.in_(allowed_session_names)
         )
 
+    result = await db.execute(match_stmt)
+    matched_messages = list(result.scalars().all())
+
     return await _build_merged_snippets(
         db, workspace_name, matched_messages, context_window
     )
 
 
 async def grep_messages(
-    db: AsyncSession,
     workspace_name: str,
     session_name: str | None,
     text: str,
     limit: int = 10,
     context_window: int = 2,
+    observer: str | None = None,
 ) -> list[tuple[list[models.Message], list[models.Message]]]:
     """
     Search for messages containing specific text (case-insensitive substring match).
@@ -751,38 +928,39 @@ async def grep_messages(
     specific names, dates, phrases, or keywords.
 
     Args:
-        db: Database session
         workspace_name: Name of the workspace
         session_name: Name of the session (optional - searches all sessions if None)
         text: Text to search for (case-insensitive)
         limit: Maximum number of matching messages to return
         context_window: Number of messages before/after each match to include
+        observer: When provided and session_name is None, scope results
+            to sessions this peer belongs to
 
     Returns:
         List of tuples: (matched_messages, context_messages)
         Each snippet may contain multiple matches if they were close together.
     """
-    # Build the base query with ILIKE for case-insensitive text search
-    escaped_text = escape_ilike_pattern(text)
-    match_stmt = (
-        select(models.Message)
-        .where(models.Message.workspace_name == workspace_name)
-        .where(
-            models.Message.content.ilike(f"%{escaped_text}%", escape=ILIKE_ESCAPE_CHAR)
-        )
-        .order_by(models.Message.created_at.desc())
-        .limit(limit)
-    )
-
-    if session_name:
-        match_stmt = match_stmt.where(models.Message.session_name == session_name)
-
-    result = await db.execute(match_stmt)
-    matched_messages = list(result.scalars().all())
+    async with tracked_db("message.grep_messages") as db:
+        # Pre-fetch peer session scope if needed
+        allowed_session_names = None
+        if observer and not session_name:
+            allowed_session_names = await get_peer_session_names(
+                db, workspace_name, observer
+            )
+            if not allowed_session_names:
+                return []
 
-    return await _build_merged_snippets(
-        db, workspace_name, matched_messages, context_window
-    )
+        snippets = await _grep_messages_internal(
+            db,
+            workspace_name,
+            session_name,
+            text,
+            limit,
+            context_window,
+            allowed_session_names=allowed_session_names,
+        )
+        _expunge_snippets(db, snippets)
+        return snippets
 
 
 async def get_messages_by_date_range(
@@ -793,6 +971,7 @@ async def get_messages_by_date_range(
     before_date: datetime | None = None,
     limit: int = 20,
     order: str = "desc",
+    observer: str | None = None,
 ) -> list[models.Message]:
     """
     Get messages within a date range.
@@ -805,14 +984,27 @@ async def get_messages_by_date_range(
         before_date: Return messages before this datetime
         limit: Maximum messages to return
         order: Sort order - 'asc' for oldest first, 'desc' for newest first
+        observer: When provided and session_name is None, scope results
+            to sessions this peer belongs to
 
     Returns:
         List of messages within the date range
     """
+    # Pre-fetch peer session scope if needed
+    allowed_session_names = None
+    if observer and not session_name:
+        allowed_session_names = await get_peer_session_names(
+            db, workspace_name, observer
+        )
+        if not allowed_session_names:
+            return []
+
     stmt = select(models.Message).where(models.Message.workspace_name == workspace_name)
 
     if session_name:
         stmt = stmt.where(models.Message.session_name == session_name)
+    elif allowed_session_names is not None:
+        stmt = stmt.where(models.Message.session_name.in_(allowed_session_names))
     if after_date:
         stmt = stmt.where(models.Message.created_at >= after_date)
     if before_date:
@@ -830,7 +1022,6 @@ async def get_messages_by_date_range(
 
 
 async def search_messages_temporal(
-    db: AsyncSession,
     workspace_name: str,
     session_name: str | None,
     query: str,
@@ -839,6 +1030,7 @@ async def search_messages_temporal(
     limit: int = 10,
     context_window: int = 2,
     embedding: list[float] | None = None,
+    observer: str | None = None,
 ) -> list[tuple[list[models.Message], list[models.Message]]]:
     """
     Search for messages using semantic similarity with optional date filtering.
@@ -847,7 +1039,6 @@ async def search_messages_temporal(
     to find recent mentions, or before_date to find what was said before a certain point.
 
     Args:
-        db: Database session
         workspace_name: Name of the workspace
         session_name: Name of the session (optional)
         query: Search query text
@@ -856,58 +1047,24 @@ async def search_messages_temporal(
         limit: Maximum number of matching messages to return
         context_window: Number of messages before/after each match to include
         embedding: Optional pre-computed embedding for the query
+        observer: When provided and session_name is None, scope results
+            to sessions this peer belongs to
 
     Returns:
         List of tuples: (matched_messages, context_messages)
         Each snippet may contain multiple matches if they were close together.
     """
-    # Use provided embedding or generate one
     query_embedding = (
         embedding if embedding is not None else await embedding_client.embed(query)
     )
-
-    if settings.VECTOR_STORE.TYPE == "pgvector" or not settings.VECTOR_STORE.MIGRATED:
-        # pgvector path: cosine distance in SQL with date filters
-        # Oversample to handle chunk duplicates (see search_messages comment)
-        match_stmt = (
-            select(models.Message)
-            .join(
-                models.MessageEmbedding,
-                models.Message.public_id == models.MessageEmbedding.message_id,
-            )
-            .where(models.MessageEmbedding.workspace_name == workspace_name)
-        )
-
-        if session_name:
-            match_stmt = match_stmt.where(
-                models.MessageEmbedding.session_name == session_name
-            )
-
-        # Apply date filters on the Message table
-        if after_date:
-            match_stmt = match_stmt.where(models.Message.created_at >= after_date)
-        if before_date:
-            match_stmt = match_stmt.where(models.Message.created_at <= before_date)
-
-        # Order by similarity and limit
-        match_stmt = match_stmt.order_by(
-            models.MessageEmbedding.embedding.cosine_distance(query_embedding)
-        ).limit(limit * 2)
-
-        result = await db.execute(match_stmt)
-        matched_messages = _deduplicate_messages(result.scalars().all(), limit)
-    else:
-        # External vector store path with post-fetch date filtering
-        matched_messages = await _search_messages_external(
-            db,
-            workspace_name,
-            query_embedding,
-            limit,
-            session_name=session_name,
-            after_date=after_date,
-            before_date=before_date,
-        )
-
-    return await _build_merged_snippets(
-        db, workspace_name, matched_messages, context_window
+    return await _semantic_search_messages(
+        workspace_name,
+        session_name,
+        query_embedding=query_embedding,
+        after_date=after_date,
+        before_date=before_date,
+        limit=limit,
+        context_window=context_window,
+        operation_name="message.search_messages_temporal",
+        observer=observer,
     )
diff --git a/src/crud/peer.py b/src/crud/peer.py
index 0088ebebf..4c144b9b0 100644
--- a/src/crud/peer.py
+++ b/src/crud/peer.py
@@ -223,7 +223,11 @@ async def update_peer(
     db: AsyncSession, workspace_name: str, peer_name: str, peer: schemas.PeerUpdate
 ) -> models.Peer:
     """
-    Update a peer.
+    Get or create a peer, then apply metadata and configuration updates.
+
+    If the peer does not exist, the workspace and peer are created first.
+    Provided metadata and configuration replace the existing values when
+    present.
 
     Args:
         db: Database session
@@ -235,9 +239,8 @@ async def update_peer(
         The updated peer
 
     Raises:
-        ResourceNotFoundException: If the peer does not exist
-        ValidationException: If the update data is invalid
-        ConflictException: If the update violates a unique constraint
+        ConflictException: If concurrent creation prevents fetching or creating
+            the peer
     """
     peers_result = await get_or_create_peers(
         db, workspace_name, [schemas.PeerCreate(name=peer_name)]
@@ -269,7 +272,6 @@ async def update_peer(
         return honcho_peer
 
     await db.commit()
-    await db.refresh(honcho_peer)
     await peers_result.post_commit()
 
     cache_key = peer_cache_key(workspace_name, honcho_peer.name)
diff --git a/src/crud/session.py b/src/crud/session.py
index 6ce73db7a..9580c16d5 100644
--- a/src/crud/session.py
+++ b/src/crud/session.py
@@ -137,21 +137,30 @@ async def get_or_create_session(
     _retry: bool = False,
 ) -> GetOrCreateResult[models.Session]:
     """
-    Get or create a session in a workspace with specified peers.
-    If the session already exists, the peers are added to the session.
+    Get an active session in a workspace or create it if it does not exist.
+
+    If the session already exists, provided metadata replaces the current
+    metadata, provided configuration keys are merged into the existing
+    configuration, and any provided peers are ensured to be members of the
+    session. If the session does not exist, the workspace and peers are created
+    as needed before the session is created.
 
     Args:
         db: Database session
-        session: Session creation schema
+        session: Session creation payload, including optional metadata,
+            configuration, and session-peer configuration
         workspace_name: Name of the workspace
-        peer_names: List of peer names to add to the session
-        _retry: Whether to retry the operation
+        _retry: Whether to retry after a concurrent create conflict
+
     Returns:
         GetOrCreateResult containing the session and whether it was created
 
     Raises:
-        ResourceNotFoundException: If the session does not exist and create is false
-        ConflictException: If we fail to get or create the session
+        ValueError: If session.name is empty
+        ResourceNotFoundException: If the named session exists but is inactive
+        ObserverException: If adding peers would exceed the observer limit
+        ConflictException: If concurrent creation prevents fetching or creating
+            the session
     """
 
     if not session.name:
@@ -247,10 +256,10 @@ async def get_or_create_session(
             workspace_name=workspace_name,
             session_name=session.name,
             peer_names=session.peer_names,
+            fetch_after_upsert=False,
         )
 
     await db.commit()
-    await db.refresh(honcho_session)
 
     # Run deferred cache operations from workspace/peer creation
     if ws_result is not None:
@@ -334,7 +343,11 @@ async def update_session(
     session_name: str,
 ) -> models.Session:
     """
-    Update a session.
+    Get or create a session, then apply metadata and configuration updates.
+
+    Provided metadata replaces the current metadata when present. Provided
+    configuration keys are merged into the existing configuration instead of
+    replacing it wholesale.
 
     Args:
         db: Database session
@@ -346,7 +359,9 @@ async def update_session(
         The updated session
 
     Raises:
-        ResourceNotFoundException: If the session does not exist or peer is not in session
+        ResourceNotFoundException: If the named session exists but is inactive
+        ConflictException: If concurrent creation prevents fetching or creating
+            the session
     """
     honcho_session: models.Session = (
         await get_or_create_session(
@@ -381,7 +396,6 @@ async def update_session(
         return honcho_session
 
     await db.commit()
-    await db.refresh(honcho_session)
 
     # Only invalidate if we actually updated
     cache_key = session_cache_key(workspace_name, session_name)
@@ -729,7 +743,6 @@ async def clone_session(
         db.add(new_session_peer)
 
     await db.commit()
-    await db.refresh(new_session)
     logger.debug("Session %s cloned successfully", original_session_name)
 
     # Cache will be populated on next read - read-through pattern
@@ -795,7 +808,13 @@ async def get_peers_from_session(
     # Get all active peers in the session (where left_at is NULL)
     return (
         select(models.Peer)
-        .join(models.SessionPeer, models.Peer.name == models.SessionPeer.peer_name)
+        .join(
+            models.SessionPeer,
+            and_(
+                models.Peer.name == models.SessionPeer.peer_name,
+                models.Peer.workspace_name == models.SessionPeer.workspace_name,
+            ),
+        )
         .where(models.SessionPeer.session_name == session_name)
         .where(models.Peer.workspace_name == workspace_name)
         .where(models.SessionPeer.left_at.is_(None))  # Only active peers
@@ -825,7 +844,13 @@ async def get_session_peer_configuration(
             models.SessionPeer.configuration.label("session_peer_configuration"),
             (models.SessionPeer.left_at.is_(None)).label("is_active"),
         )
-        .join(models.SessionPeer, models.Peer.name == models.SessionPeer.peer_name)
+        .join(
+            models.SessionPeer,
+            and_(
+                models.Peer.name == models.SessionPeer.peer_name,
+                models.Peer.workspace_name == models.SessionPeer.workspace_name,
+            ),
+        )
         .where(models.SessionPeer.session_name == session_name)
         .where(models.Peer.workspace_name == workspace_name)
         .where(models.SessionPeer.workspace_name == workspace_name)
@@ -912,24 +937,35 @@ async def _get_or_add_peers_to_session(
     workspace_name: str,
     session_name: str,
     peer_names: dict[str, schemas.SessionPeerConfig],
+    *,
+    fetch_after_upsert: bool = True,
 ) -> list[models.SessionPeer]:
     """
-    Add multiple peers to an existing session. If a peer already exists in the session,
-    it will be skipped gracefully.
+    Upsert session-peer memberships for a session and optionally fetch the
+    active memberships afterward.
+
+    New peers are inserted, peers that previously left the session are rejoined,
+    and already-active peers keep their existing session-level configuration.
 
     Args:
         db: Database session
+        workspace_name: Name of the workspace
         session_name: Name of the session
-        peer_names: Set of peer names to add to the session
+        peer_names: Mapping of peer names to session-level configuration
+        fetch_after_upsert: If True, query and return the active session peers
+            after the upsert. If False, skip that read and return an empty list.
 
     Returns:
-        List of all SessionPeer objects (both existing and newly created)
+        Active SessionPeer objects after the upsert, or an empty list when the
+        post-upsert fetch is skipped
 
     Raises:
-        ValueError: If adding peers would exceed the maximum limit
+        ObserverException: If adding peers would exceed the observer limit
     """
     # If no peers to add, skip the insert and just return existing active session peers
     if not peer_names:
+        if not fetch_after_upsert:
+            return []
         select_stmt = select(models.SessionPeer).where(
             models.SessionPeer.session_name == session_name,
             models.SessionPeer.workspace_name == workspace_name,
@@ -994,6 +1030,9 @@ async def _get_or_add_peers_to_session(
     )
     await db.execute(stmt)
 
+    if not fetch_after_upsert:
+        return []
+
     # Return all active session peers after the upsert
     select_stmt = select(models.SessionPeer).where(
         models.SessionPeer.session_name == session_name,
diff --git a/src/crud/webhook.py b/src/crud/webhook.py
index 7dc567eb4..b607ed08d 100644
--- a/src/crud/webhook.py
+++ b/src/crud/webhook.py
@@ -18,17 +18,20 @@ async def get_or_create_webhook_endpoint(
     webhook: schemas.WebhookEndpointCreate,
 ) -> GetOrCreateResult[schemas.WebhookEndpoint]:
     """
-    Get or create a webhook endpoint, optionally for a workspace.
+    Get an existing webhook endpoint for a workspace or create it if missing.
 
     Args:
         db: Database session
+        workspace_name: Name of the workspace
         webhook: Webhook endpoint creation schema
 
     Returns:
         GetOrCreateResult containing the webhook endpoint and whether it was created
 
     Raises:
-        ResourceNotFoundException: If the workspace is specified and does not exist
+        ResourceNotFoundException: If the workspace does not exist
+        ValueError: If the workspace already has the maximum number of webhook
+            endpoints
     """
     # Verify workspace exists
     await get_workspace(db, workspace_name=workspace_name)
@@ -39,12 +42,6 @@ async def get_or_create_webhook_endpoint(
     result = await db.execute(stmt)
     endpoints = result.scalars().all()
 
-    # No more than WORKSPACE_LIMIT webhooks per workspace
-    if len(endpoints) >= settings.WEBHOOK.MAX_WORKSPACE_LIMIT:
-        raise ValueError(
-            f"Maximum number of webhook endpoints ({settings.WEBHOOK.MAX_WORKSPACE_LIMIT}) reached for this workspace."
-        )
-
     # Check if webhook already exists for this workspace
     for endpoint in endpoints:
         if endpoint.url == webhook.url:
@@ -52,6 +49,12 @@ async def get_or_create_webhook_endpoint(
                 schemas.WebhookEndpoint.model_validate(endpoint), created=False
             )
 
+    # No more than WORKSPACE_LIMIT webhooks per workspace
+    if len(endpoints) >= settings.WEBHOOK.MAX_WORKSPACE_LIMIT:
+        raise ValueError(
+            f"Maximum number of webhook endpoints ({settings.WEBHOOK.MAX_WORKSPACE_LIMIT}) reached for this workspace."
+        )
+
     # Create new webhook endpoint
     webhook_endpoint = models.WebhookEndpoint(
         workspace_name=workspace_name,
@@ -59,7 +62,6 @@ async def get_or_create_webhook_endpoint(
     )
     db.add(webhook_endpoint)
     await db.commit()
-    await db.refresh(webhook_endpoint)
 
     logger.debug("Webhook endpoint created: %s", webhook.url)
     return GetOrCreateResult(
diff --git a/src/crud/workspace.py b/src/crud/workspace.py
index b59a99b11..3df2bb46d 100644
--- a/src/crud/workspace.py
+++ b/src/crud/workspace.py
@@ -202,7 +202,11 @@ async def update_workspace(
     db: AsyncSession, workspace_name: str, workspace: schemas.WorkspaceUpdate
 ) -> models.Workspace:
     """
-    Update a workspace.
+    Get or create a workspace, then apply metadata and configuration updates.
+
+    Provided metadata replaces the current metadata when present. Provided
+    configuration keys are merged into the existing configuration instead of
+    replacing it wholesale.
 
     Args:
         db: Database session
@@ -211,6 +215,10 @@ async def update_workspace(
 
     Returns:
         The updated workspace
+
+    Raises:
+        ConflictException: If concurrent creation prevents fetching or creating
+            the workspace
     """
     ws_result = await get_or_create_workspace(
         db,
@@ -250,7 +258,6 @@ async def update_workspace(
         return honcho_workspace
 
     await db.commit()
-    await db.refresh(honcho_workspace)
     await ws_result.post_commit()
 
     # Only invalidate if we actually updated
diff --git a/src/deriver/consumer.py b/src/deriver/consumer.py
index fa2b92595..d4fd2a045 100644
--- a/src/deriver/consumer.py
+++ b/src/deriver/consumer.py
@@ -70,8 +70,7 @@ async def process_item(queue_item: models.QueueItem) -> None:
                 queue_payload,
             )
             raise ValueError(f"Invalid payload structure: {str(e)}") from e
-        async with tracked_db() as db:
-            await webhook_delivery.deliver_webhook(db, validated, workspace_name)
+        await webhook_delivery.deliver_webhook(validated, workspace_name)
 
     elif task_type == "summary":
         try:
diff --git a/src/deriver/queue_manager.py b/src/deriver/queue_manager.py
index 5e885255e..cda6decd3 100644
--- a/src/deriver/queue_manager.py
+++ b/src/deriver/queue_manager.py
@@ -56,6 +56,48 @@ class WorkerOwnership(NamedTuple):
     aqs_id: str  # The ID of the ActiveQueueSession that the worker is processing
 
 
+def _detach_queue_batch_objects(
+    db: AsyncSession,
+    messages_context: list[models.Message],
+    items_to_process: list[QueueItem],
+) -> None:
+    """Detach loaded batch objects so they remain usable after tracked_db exits."""
+    seen: set[int] = set()
+    for obj in [*messages_context, *items_to_process]:
+        obj_id = id(obj)
+        if obj_id in seen:
+            continue
+        db.expunge(obj)
+        seen.add(obj_id)
+
+
+def _resolve_batch_configuration(
+    items_to_process: list[QueueItem],
+) -> tuple[list[QueueItem], ResolvedConfiguration | None]:
+    """Keep only the initial homogeneous configuration prefix for a batch."""
+    if not items_to_process:
+        return [], None
+
+    raw_config = items_to_process[0].payload.get("configuration")
+    resolved_config = (
+        None if raw_config is None else ResolvedConfiguration.model_validate(raw_config)
+    )
+
+    valid_items: list[QueueItem] = []
+    for item in items_to_process:
+        item_raw_config = item.payload.get("configuration")
+        item_config = (
+            None
+            if item_raw_config is None
+            else ResolvedConfiguration.model_validate(item_raw_config)
+        )
+        if item_config != resolved_config:
+            break
+        valid_items.append(item)
+
+    return valid_items, resolved_config
+
+
 class QueueManager:
     def __init__(self):
         self.shutdown_event: asyncio.Event = asyncio.Event()
@@ -608,21 +650,19 @@ async def get_queue_item_batch(
             )
 
         batch_max_tokens = settings.DERIVER.REPRESENTATION_BATCH_MAX_TOKENS
+        parsed_key = parse_work_unit_key(work_unit_key)
+        messages_context: list[models.Message] = []
+        items_to_process: list[QueueItem] = []
 
         async with tracked_db("get_queue_item_batch") as db:
             # For batch tasks, get messages based on token limit.
-            # Step 1: Parse work_unit_key to get session context and focused sender
-            parsed_key = parse_work_unit_key(work_unit_key)
-
-            # Verify worker still owns the work_unit_key
+            # Step 1: Verify worker still owns the work_unit_key.
             ownership_check = await db.execute(
                 select(models.ActiveQueueSession.id)
                 .where(models.ActiveQueueSession.work_unit_key == work_unit_key)
                 .where(models.ActiveQueueSession.id == aqs_id)
             )
             if not ownership_check.scalar_one_or_none():
-                # Worker lost ownership, return empty
-                await db.commit()
                 return [], [], None
 
             # Step 2: Build a single SQL query that:
@@ -716,11 +756,8 @@ async def get_queue_item_batch(
             result = await db.execute(query)
             rows = result.all()
             if not rows:
-                await db.commit()
                 return [], [], None
 
-            messages_context: list[models.Message] = []
-            items_to_process: list[QueueItem] = []
             seen_messages: set[int] = set()
             for m, qi in rows:
                 if m.id not in seen_messages:
@@ -729,48 +766,21 @@ async def get_queue_item_batch(
                 if qi is not None:
                     items_to_process.append(qi)
 
-            if items_to_process:
-                # Enforce homogeneous peer_card_config in the batch
-                # We stop collecting items as soon as we encounter a different configuration
-                payload = items_to_process[0].payload
-
-                raw_config = payload.get("configuration")
-                if raw_config is None:
-                    resolved_config = None
-                else:
-                    resolved_config = ResolvedConfiguration.model_validate(raw_config)
+            _detach_queue_batch_objects(db, messages_context, items_to_process)
 
-                valid_items: list[QueueItem] = []
-                for item in items_to_process:
-                    item_raw_config = item.payload.get("configuration")
-                    if item_raw_config is None:
-                        item_config = None
-                    else:
-                        item_config = ResolvedConfiguration.model_validate(
-                            item_raw_config
-                        )
-                    if item_config != resolved_config:
-                        break
-                    valid_items.append(item)
-                items_to_process = valid_items
-            else:
-                resolved_config = None
-
-            if items_to_process:
-                max_queue_item_message_id = max(
-                    [
-                        qi.message_id
-                        for qi in items_to_process
-                        if qi.message_id is not None
-                    ]
-                )
-                messages_context = [  # remove any messages that are after the last message_id from queue items
-                    m for m in messages_context if m.id <= max_queue_item_message_id
-                ]
+        items_to_process, resolved_config = _resolve_batch_configuration(
+            items_to_process
+        )
 
-            await db.commit()
+        if items_to_process:
+            max_queue_item_message_id = max(
+                qi.message_id for qi in items_to_process if qi.message_id is not None
+            )
+            messages_context = [
+                m for m in messages_context if m.id <= max_queue_item_message_id
+            ]
 
-            return messages_context, items_to_process, resolved_config
+        return messages_context, items_to_process, resolved_config
 
     async def mark_queue_items_as_processed(
         self, items: list[QueueItem], work_unit_key: str
diff --git a/src/routers/peers.py b/src/routers/peers.py
index 01aa0f71a..fb765737e 100644
--- a/src/routers/peers.py
+++ b/src/routers/peers.py
@@ -446,11 +446,10 @@ async def search_peer(
         ...,
         description="Message search parameters. Use `limit` to control the number of results returned.",
     ),
-    db: AsyncSession = db,
 ):
     """Search a Peer's messages, optionally filtered by various criteria."""
     # take user-provided filter and add workspace_id and peer_id to it
     filters = body.filters or {}
     filters["workspace_id"] = workspace_id
     filters["peer_id"] = peer_id
-    return await search(db, body.query, filters=filters, limit=body.limit)
+    return await search(body.query, filters=filters, limit=body.limit)
diff --git a/src/routers/sessions.py b/src/routers/sessions.py
index f68f93ffd..9071aef1c 100644
--- a/src/routers/sessions.py
+++ b/src/routers/sessions.py
@@ -794,7 +794,6 @@ async def search_session(
     body: schemas.MessageSearchOptions = Body(
         ..., description="Message search parameters"
     ),
-    db: AsyncSession = db,
 ):
     """
     Search a Session with optional filters. Use `limit` to control the number of results returned.
@@ -804,7 +803,6 @@ async def search_session(
     filters["workspace_id"] = workspace_id
     filters["session_id"] = session_id
     return await search(
-        db,
         body.query,
         filters=filters,
         limit=body.limit,
diff --git a/src/routers/workspaces.py b/src/routers/workspaces.py
index 3402c7231..90530e92a 100644
--- a/src/routers/workspaces.py
+++ b/src/routers/workspaces.py
@@ -142,7 +142,6 @@ async def search_workspace(
     body: schemas.MessageSearchOptions = Body(
         ..., description="Message search parameters"
     ),
-    db: AsyncSession = db,
 ):
     """
     Search messages in a Workspace using optional filters. Use `limit` to control the number of
@@ -151,7 +150,7 @@ async def search_workspace(
     # take user-provided filter and add workspace_id to it
     filters = body.filters or {}
     filters["workspace_id"] = workspace_id
-    return await search(db, body.query, filters=filters, limit=body.limit)
+    return await search(body.query, filters=filters, limit=body.limit)
 
 
 @router.get(
diff --git a/src/utils/agent_tools.py b/src/utils/agent_tools.py
index a6c3009df..211323976 100644
--- a/src/utils/agent_tools.py
+++ b/src/utils/agent_tools.py
@@ -854,6 +854,7 @@ async def get_observation_context(
     workspace_name: str,
     session_name: str | None,
     message_ids: list[str],
+    observer: str | None = None,
 ) -> list[models.Message]:
     """
     Retrieve messages for given message IDs along with surrounding context.
@@ -867,6 +868,8 @@ async def get_observation_context(
         workspace_name: Workspace identifier
         session_name: Session identifier (optional)
         message_ids: List of message IDs to retrieve
+        observer: When provided and session_name is None, scope results
+            to sessions this peer belongs to
 
     Returns:
         List of messages in chronological order, including the requested messages and surrounding context
@@ -874,6 +877,17 @@ async def get_observation_context(
     if not message_ids:
         return []
 
+    # Pre-fetch peer session scope if needed
+    allowed_session_names: list[str] | None = None
+    if observer and not session_name:
+        from src.crud.message import get_peer_session_names
+
+        allowed_session_names = await get_peer_session_names(
+            db, workspace_name, observer
+        )
+        if not allowed_session_names:
+            return []
+
     # Use a CTE to get seq_in_session values for target messages
     stmt = (
         select(models.Message.seq_in_session)
@@ -883,6 +897,8 @@ async def get_observation_context(
 
     if session_name:
         stmt = stmt.where(models.Message.session_name == session_name)
+    elif allowed_session_names is not None:
+        stmt = stmt.where(models.Message.session_name.in_(allowed_session_names))
 
     target_seqs_cte = stmt.cte("target_seqs")
 
@@ -905,6 +921,8 @@ async def get_observation_context(
 
     if session_name:
         stmt = stmt.where(models.Message.session_name == session_name)
+    elif allowed_session_names is not None:
+        stmt = stmt.where(models.Message.session_name.in_(allowed_session_names))
 
     result = await db.execute(stmt)
     messages = list(result.scalars().all())
@@ -916,6 +934,7 @@ async def extract_preferences(
     workspace_name: str,
     session_name: str | None,
     observed: str,
+    observer: str | None = None,
 ) -> dict[str, list[str]]:
     """
     Extract user preferences and standing instructions from conversation history.
@@ -927,6 +946,8 @@ async def extract_preferences(
         workspace_name: Workspace identifier
         session_name: Session identifier (optional)
         observed: The peer whose preferences to extract
+        observer: When provided and session_name is None, scope results
+            to sessions this peer belongs to
 
     Returns:
         Dict with 'messages' list containing potentially relevant messages
@@ -959,27 +980,26 @@ async def extract_preferences(
 
     for query in semantic_queries:
         try:
-            async with tracked_db("extract_preferences") as db:
-                snippets = await crud.search_messages(
-                    db,
-                    workspace_name=workspace_name,
-                    session_name=session_name,
-                    query=query,
-                    limit=10,
-                    context_window=0,
-                    embedding=(
-                        query_embeddings_by_query.get(query)
-                        if query_embeddings_by_query is not None
-                        else None
-                    ),
-                )
-                for matches, _ in snippets:
-                    for msg in matches:
-                        if msg.peer_name == observed:
-                            content_key = msg.content[:100].lower()
-                            if content_key not in seen_content:
-                                seen_content.add(content_key)
-                                messages.append(f"'{msg.content.strip()}'")
+            snippets = await crud.search_messages(
+                workspace_name=workspace_name,
+                session_name=session_name,
+                query=query,
+                limit=10,
+                context_window=0,
+                embedding=(
+                    query_embeddings_by_query.get(query)
+                    if query_embeddings_by_query is not None
+                    else None
+                ),
+                observer=observer,
+            )
+            for matches, _ in snippets:
+                for msg in matches:
+                    if msg.peer_name == observed:
+                        content_key = msg.content[:100].lower()
+                        if content_key not in seen_content:
+                            seen_content.add(content_key)
+                            messages.append(f"'{msg.content.strip()}'")
         except Exception as e:
             logger.warning("Error in semantic search for '%s': %s", query, e)
 
@@ -1265,20 +1285,19 @@ async def _handle_search_memory(ctx: ToolContext, tool_input: dict[str, Any]) ->
         if ctx.agent_type == "dialectic":
             limit = min(_safe_int(tool_input.get("top_k"), 20), 20)
             message_output = None
-            async with tracked_db("tool.search_memory.fallback") as db:
-                snippets = await crud.search_messages(
-                    db,
-                    workspace_name=ctx.workspace_name,
-                    session_name=ctx.session_name,
-                    query=query,
-                    limit=limit,
-                    context_window=0,
-                    embedding=query_embedding,
+            snippets = await crud.search_messages(
+                workspace_name=ctx.workspace_name,
+                session_name=ctx.session_name,
+                query=query,
+                limit=limit,
+                context_window=0,
+                embedding=query_embedding,
+                observer=ctx.observer,
+            )
+            if snippets:
+                message_output = _format_message_snippets(
+                    snippets, f"for query '{query}'"
                 )
-                if snippets:
-                    message_output = _format_message_snippets(
-                        snippets, f"for query '{query}'"
-                    )
             if message_output:
                 return (
                     f"No observations yet. Message search results:\n\n{message_output}"
@@ -1302,6 +1321,7 @@ async def _handle_get_observation_context(
             workspace_name=ctx.workspace_name,
             session_name=ctx.session_name,
             message_ids=tool_input["message_ids"],
+            observer=ctx.observer,
         )
         if not messages:
             return f"No messages found for IDs {tool_input['message_ids']}"
@@ -1326,19 +1346,18 @@ async def _handle_search_messages(ctx: ToolContext, tool_input: dict[str, Any])
     # Pre-compute embedding outside DB session to avoid holding a connection
     # during the external API call (same pattern as _handle_search_memory).
     query_embedding = await embedding_client.embed(query)
-    async with tracked_db("tool.search_messages") as db:
-        snippets = await crud.search_messages(
-            db,
-            workspace_name=ctx.workspace_name,
-            session_name=ctx.session_name,
-            query=query,
-            limit=limit,
-            context_window=2,
-            embedding=query_embedding,
-        )
-        if not snippets:
-            return f"No messages found for query '{query}'"
-        formatted = _format_message_snippets(snippets, f"for query '{query}'")
+    snippets = await crud.search_messages(
+        workspace_name=ctx.workspace_name,
+        session_name=ctx.session_name,
+        query=query,
+        limit=limit,
+        context_window=2,
+        embedding=query_embedding,
+        observer=ctx.observer,
+    )
+    if not snippets:
+        return f"No messages found for query '{query}'"
+    formatted = _format_message_snippets(snippets, f"for query '{query}'")
     return formatted
 
 
@@ -1352,35 +1371,32 @@ async def _handle_grep_messages(ctx: ToolContext, tool_input: dict[str, Any]) ->
         _safe_int(tool_input.get("context_window"), 2), 2
     )  # Cap context
 
-    async with tracked_db("tool.grep_messages") as db:
-        snippets = await crud.grep_messages(
-            db,
-            workspace_name=ctx.workspace_name,
-            session_name=ctx.session_name,
-            text=text,
-            limit=limit,
-            context_window=context_window,
-        )
-        if not snippets:
-            return f"No messages found containing '{text}'"
-
-        # Format with pattern-based snippet extraction
-        snippet_texts: list[str] = []
-        total_matches = sum(len(matches) for matches, _ in snippets)
-        for i, (matches, context) in enumerate(snippets, 1):
-            lines: list[str] = []
-            for msg in context:
-                truncated = _extract_pattern_snippet(msg.content, text)
-                lines.append(
-                    format_new_turn_with_timestamp(
-                        truncated, msg.created_at, msg.peer_name
-                    )
-                )
-            sess = context[0].session_name if context else "unknown"
-            snippet_texts.append(
-                f"--- Snippet {i} (session: {sess}, {len(matches)} match(es)) ---\n"
-                + "\n".join(lines)
+    snippets = await crud.grep_messages(
+        workspace_name=ctx.workspace_name,
+        session_name=ctx.session_name,
+        text=text,
+        limit=limit,
+        context_window=context_window,
+        observer=ctx.observer,
+    )
+    if not snippets:
+        return f"No messages found containing '{text}'"
+
+    # Format with pattern-based snippet extraction
+    snippet_texts: list[str] = []
+    total_matches = sum(len(matches) for matches, _ in snippets)
+    for i, (matches, context) in enumerate(snippets, 1):
+        lines: list[str] = []
+        for msg in context:
+            truncated = _extract_pattern_snippet(msg.content, text)
+            lines.append(
+                format_new_turn_with_timestamp(truncated, msg.created_at, msg.peer_name)
             )
+        sess = context[0].session_name if context else "unknown"
+        snippet_texts.append(
+            f"--- Snippet {i} (session: {sess}, {len(matches)} match(es)) ---\n"
+            + "\n".join(lines)
+        )
 
     output = (
         f"Found {total_matches} messages containing '{text}' in {len(snippets)} conversation snippets:\n\n"
@@ -1425,6 +1441,7 @@ async def _handle_get_messages_by_date_range(
             before_date=before_date,
             limit=limit,
             order=order,
+            observer=ctx.observer,
         )
         msg_count = len(messages)
         messages_text = (
@@ -1483,31 +1500,28 @@ async def _handle_search_messages_temporal(
     # Pre-compute embedding outside DB session to avoid holding a connection
     # during the external API call.
     query_embedding = await embedding_client.embed(query)
-    async with tracked_db("tool.search_messages_temporal") as db:
-        snippets = await crud.search_messages_temporal(
-            db,
-            workspace_name=ctx.workspace_name,
-            session_name=ctx.session_name,
-            query=query,
-            after_date=after_date,
-            before_date=before_date,
-            limit=limit,
-            context_window=context_window,
-            embedding=query_embedding,
-        )
-        date_filter: list[str] = []
-        if after_date_str:
-            date_filter.append(f"after {after_date_str}")
-        if before_date_str:
-            date_filter.append(f"before {before_date_str}")
-        filter_desc = f" ({' and '.join(date_filter)})" if date_filter else ""
-
-        if not snippets:
-            return f"No messages found for query '{query}'{filter_desc}"
-
-        formatted = _format_message_snippets(
-            snippets, f"for query '{query}'{filter_desc}"
-        )
+    snippets = await crud.search_messages_temporal(
+        workspace_name=ctx.workspace_name,
+        session_name=ctx.session_name,
+        query=query,
+        after_date=after_date,
+        before_date=before_date,
+        limit=limit,
+        context_window=context_window,
+        embedding=query_embedding,
+        observer=ctx.observer,
+    )
+    date_filter: list[str] = []
+    if after_date_str:
+        date_filter.append(f"after {after_date_str}")
+    if before_date_str:
+        date_filter.append(f"before {before_date_str}")
+    filter_desc = f" ({' and '.join(date_filter)})" if date_filter else ""
+
+    if not snippets:
+        return f"No messages found for query '{query}'{filter_desc}"
+
+    formatted = _format_message_snippets(snippets, f"for query '{query}'{filter_desc}")
     return formatted
 
 
@@ -1659,6 +1673,7 @@ async def _handle_extract_preferences(
         workspace_name=ctx.workspace_name,
         session_name=ctx.session_name,
         observed=ctx.observed,
+        observer=ctx.observer,
     )
 
     messages = results.get("messages", [])
diff --git a/src/utils/search.py b/src/utils/search.py
index fcc77273b..67a0d355e 100644
--- a/src/utils/search.py
+++ b/src/utils/search.py
@@ -13,6 +13,7 @@
 
 from src import models
 from src.config import settings
+from src.dependencies import tracked_db
 from src.embedding_client import embedding_client
 from src.exceptions import ValidationException
 from src.models import session_peers_table
@@ -23,6 +24,13 @@
 T = TypeVar("T")
 
 
+def _uses_pgvector_message_search() -> bool:
+    """Return True when semantic message search can stay entirely in Postgres."""
+    return (
+        settings.VECTOR_STORE.TYPE == "pgvector" or not settings.VECTOR_STORE.MIGRATED
+    )
+
+
 def reciprocal_rank_fusion(*ranked_lists: list[T], k: int = 60, limit: int) -> list[T]:
     """
     Combine multiple ranked lists using Reciprocal Rank Fusion (RRF).
@@ -65,122 +73,115 @@ def reciprocal_rank_fusion(*ranked_lists: list[T], k: int = 60, limit: int) -> l
     return result[:limit]
 
 
-async def _semantic_search(
-    db: AsyncSession,
-    query: str,
+async def query_external_vector_message_ids(
     workspace_name: str,
+    embedding_query: list[float],
     limit: int,
     filters: dict[str, Any] | None = None,
-) -> list[models.Message]:
-    """
-    Perform semantic search using external vector store for message embeddings.
-
-    Args:
-        db: Database session
-        query: Search query
-        workspace_name: Name of the workspace to search in
-        limit: Maximum number of results to return
-        filters: Optional filters to apply at vector store level (supports: session_id, peer_id)
-
-    Returns:
-        list of messages ordered by semantic similarity
-    """
-    try:
-        embedding_query = await embedding_client.embed(query)
-    except ValueError as e:
-        raise ValidationException(
-            f"Query exceeds maximum token limit of {settings.MAX_EMBEDDING_TOKENS}."
-        ) from e
-
-    # Query Postgres / pgvector directly
-    if settings.EMBED_MESSAGES and (
-        settings.VECTOR_STORE.TYPE == "pgvector" or not settings.VECTOR_STORE.MIGRATED
-    ):
-        # Join message_embeddings with messages to get full message objects
-        distance_expr = models.MessageEmbedding.embedding.cosine_distance(
-            embedding_query
-        )
-
-        stmt = (
-            select(models.Message)
-            .join(
-                models.MessageEmbedding,
-                models.Message.public_id == models.MessageEmbedding.message_id,
-            )
-            .where(models.MessageEmbedding.embedding.isnot(None))
-            .where(models.MessageEmbedding.workspace_name == workspace_name)
-        )
-
-        # Apply all additional filters using the standard filter utility
-        # filters dict uses external names (session_id, peer_id) which apply_filter will map
-        # to internal column names (session_name, peer_name)
-        if filters:
-            # Create a copy with workspace added
-            internal_filters = filters.copy()
-            internal_filters["workspace_id"] = workspace_name
-            stmt = apply_filter(stmt, models.Message, internal_filters)
-
-        # Order by cosine distance and limit
-        stmt = stmt.order_by(distance_expr).limit(limit)
-
-        result = await db.execute(stmt)
-        return list(result.scalars().all())
-
-    # FALLBACK: Use external vector store (Turbopuffer, LanceDB)
+) -> list[str]:
+    """Query the external vector store and return ordered message IDs."""
     external_vector_store = get_external_vector_store()
     if external_vector_store is None:
         return []
 
     namespace = external_vector_store.get_vector_namespace("message", workspace_name)
 
-    # Build vector store filters from the provided filters
     vector_filters: dict[str, Any] = {}
     if filters:
-        # Map external filter keys to vector store metadata keys
         if "session_id" in filters:
             vector_filters["session_name"] = filters["session_id"]
         if "peer_id" in filters:
             vector_filters["peer_name"] = filters["peer_id"]
 
-    # Query external vector store for similar message embeddings
-    # Since all filters are applied at the vector store level, we don't need to oversample
+    # Oversample: multiple chunk-level hits can map to the same message,
+    # so fetch extra to ensure enough unique messages after deduplication.
     vector_results = await external_vector_store.query(
         namespace,
         embedding_query,
-        top_k=limit,
+        top_k=limit * 3,
         filters=vector_filters if vector_filters else None,
     )
 
     if not vector_results:
         return []
 
-    # Extract message IDs from vector metadata
-    # Use dict to deduplicate while preserving order (dict keys maintain insertion order in Python 3.7+)
     seen_message_ids: dict[str, None] = {}
-
     for result in vector_results:
         message_id = result.metadata.get("message_id")
         if message_id and message_id not in seen_message_ids:
             seen_message_ids[message_id] = None
 
-    message_ids = list(seen_message_ids.keys())
+    return list(seen_message_ids.keys())
 
-    # Fetch messages from database by the IDs from vector search and reapply filters
-    semantic_query = select(models.Message).where(
-        models.Message.public_id.in_(message_ids)
-    )
-    semantic_query = apply_filter(semantic_query, models.Message, filters)
 
-    result = await db.execute(semantic_query)
+async def fetch_messages_by_ids(
+    db: AsyncSession,
+    message_ids: list[str],
+    filters: dict[str, Any] | None = None,
+) -> list[models.Message]:
+    """Fetch messages by ID and preserve the input ordering."""
+    if not message_ids:
+        return []
+
+    stmt = select(models.Message).where(models.Message.public_id.in_(message_ids))
+    stmt = apply_filter(stmt, models.Message, filters)
+
+    result = await db.execute(stmt)
     messages = {msg.public_id: msg for msg in result.scalars().all()}
 
-    # Return messages in order of similarity (preserving vector store order)
-    ordered_messages: list[models.Message] = []
-    for msg_id in message_ids:
-        if msg_id in messages:
-            ordered_messages.append(messages[msg_id])
+    return [messages[msg_id] for msg_id in message_ids if msg_id in messages]
+
 
-    return ordered_messages
+async def _semantic_search_pgvector(
+    db: AsyncSession,
+    workspace_name: str,
+    embedding_query: list[float],
+    limit: int,
+    filters: dict[str, Any] | None = None,
+) -> list[models.Message]:
+    """
+    Perform semantic message search using pgvector in Postgres.
+
+    Args:
+        db: Database session
+        workspace_name: Name of the workspace to search in
+        embedding_query: Pre-computed embedding for the search query
+        limit: Maximum number of results to return
+        filters: Optional filters to apply to the message query
+
+    Returns:
+        list of messages ordered by semantic similarity
+    """
+    distance_expr = models.MessageEmbedding.embedding.cosine_distance(embedding_query)
+
+    stmt = (
+        select(models.Message)
+        .join(
+            models.MessageEmbedding,
+            models.Message.public_id == models.MessageEmbedding.message_id,
+        )
+        .where(models.MessageEmbedding.embedding.isnot(None))
+        .where(models.MessageEmbedding.workspace_name == workspace_name)
+    )
+
+    if filters:
+        internal_filters = filters.copy()
+        internal_filters["workspace_id"] = workspace_name
+        stmt = apply_filter(stmt, models.Message, internal_filters)
+
+    # Oversample because a message with multiple embedding chunks can
+    # produce duplicate rows; we deduplicate in Python to preserve HNSW
+    # index usage (a DISTINCT ON subquery would prevent the index scan).
+    stmt = stmt.order_by(distance_expr).limit(limit * 2)
+
+    result = await db.execute(stmt)
+    seen: set[str] = set()
+    deduped: list[models.Message] = []
+    for msg in result.scalars().all():
+        if msg.public_id not in seen:
+            seen.add(msg.public_id)
+            deduped.append(msg)
+    return deduped[:limit]
 
 
 async def _filter_by_peer_perspective(
@@ -308,7 +309,6 @@ async def _fulltext_search(
 
 
 async def search(
-    db: AsyncSession,
     query: str,
     *,
     filters: dict[str, Any] | None = None,
@@ -321,7 +321,6 @@ async def search(
     are available, providing better search results than either method alone.
 
     Args:
-        db: Database session
         query: Search query to match against message content
         filters: Optional filters to scope search (must include workspace_id for semantic search).
             Special filter 'peer_perspective' will search across all messages from sessions that the peer is/was a member of,
@@ -368,50 +367,81 @@ async def search(
 
     stmt = apply_filter(stmt, models.Message, filters)
 
-    search_results: list[list[models.Message]] = []
+    workspace_name: str | None = None
+    if filters:
+        workspace_value = filters.get("workspace_id") or filters.get("workspace_name")
+        if isinstance(workspace_value, str):
+            workspace_name = workspace_value
 
-    # Perform semantic search if enabled and we have workspace context
-    # workspace_id is required for semantic search to determine the vector namespace
-    workspace_name: str | None = filters.get("workspace_id") if filters else None
-    if settings.EMBED_MESSAGES and isinstance(workspace_name, str):
-        # Type narrowing: workspace_name is guaranteed to be str in this block
-        # Get more results for fusion (increase if peer_perspective filtering is applied post-search)
-        semantic_limit = limit * 4 if peer_perspective_name else limit * 2
-        semantic_results = await _semantic_search(
-            db=db,
-            query=query,
-            workspace_name=workspace_name,
-            limit=semantic_limit,
-            filters=filters,
-        )
+    semantic_limit = limit * 4 if peer_perspective_name else limit * 2
+    query_embedding: list[float] | None = None
+    semantic_message_ids: list[str] | None = None
 
-        # Apply peer_perspective filtering to semantic results if needed
-        # Vector store can't handle temporal filtering (joined_at/left_at), so filter post-search
-        if peer_perspective_name:
-            semantic_results = await _filter_by_peer_perspective(
-                db, semantic_results, workspace_name, peer_perspective_name
+    if settings.EMBED_MESSAGES and isinstance(workspace_name, str):
+        try:
+            query_embedding = await embedding_client.embed(query)
+        except ValueError as e:
+            raise ValidationException(
+                f"Query exceeds maximum token limit of {settings.MAX_EMBEDDING_TOKENS}."
+            ) from e
+
+        if not _uses_pgvector_message_search():
+            semantic_message_ids = await query_external_vector_message_ids(
+                workspace_name=workspace_name,
+                embedding_query=query_embedding,
+                limit=semantic_limit,
+                filters=filters,
             )
 
-        search_results.append(semantic_results)
+    async def _run_search(active_db: AsyncSession) -> list[models.Message]:
+        search_results: list[list[models.Message]] = []
 
-    # Perform full-text search
-    # Get more results for fusion
-    fulltext_limit = limit * 2
-    fulltext_results = await _fulltext_search(
-        db=db, query=query, stmt=stmt, limit=fulltext_limit
-    )
-    search_results.append(fulltext_results)
-
-    # Combine results using RRF if we have multiple search methods
-    if len(search_results) > 1:
-        # Use RRF to combine semantic and full-text results
-        combined_results = reciprocal_rank_fusion(*search_results, limit=limit)
-    elif len(search_results) == 1:
-        # Single search method - apply limit directly
-        combined_results = search_results[0]
-        combined_results = combined_results[:limit]
-    else:
-        # No search results
-        combined_results = []
+        if (
+            settings.EMBED_MESSAGES
+            and isinstance(workspace_name, str)
+            and query_embedding is not None
+        ):
+            if _uses_pgvector_message_search():
+                semantic_results = await _semantic_search_pgvector(
+                    db=active_db,
+                    workspace_name=workspace_name,
+                    embedding_query=query_embedding,
+                    limit=semantic_limit,
+                    filters=filters,
+                )
+            else:
+                semantic_results = await fetch_messages_by_ids(
+                    db=active_db,
+                    message_ids=semantic_message_ids or [],
+                    filters=filters,
+                )
+
+            if peer_perspective_name:
+                semantic_results = await _filter_by_peer_perspective(
+                    active_db,
+                    semantic_results,
+                    workspace_name,
+                    peer_perspective_name,
+                )
+
+            search_results.append(semantic_results)
+
+        fulltext_results = await _fulltext_search(
+            db=active_db,
+            query=query,
+            stmt=stmt,
+            limit=limit * 2,
+        )
+        search_results.append(fulltext_results)
+
+        if len(search_results) > 1:
+            return reciprocal_rank_fusion(*search_results, limit=limit)
+        if len(search_results) == 1:
+            return search_results[0][:limit]
+        return []
 
-    return combined_results
+    async with tracked_db("search.messages") as managed_db:
+        combined_results = await _run_search(managed_db)
+        for message in combined_results:
+            managed_db.expunge(message)
+        return combined_results
diff --git a/src/webhooks/webhook_delivery.py b/src/webhooks/webhook_delivery.py
index d26aa404a..3d2df830b 100644
--- a/src/webhooks/webhook_delivery.py
+++ b/src/webhooks/webhook_delivery.py
@@ -9,42 +9,41 @@
 
 from src.config import settings
 from src.crud.webhook import list_webhook_endpoints
+from src.dependencies import tracked_db
 from src.utils.formatting import utc_now_iso
 from src.utils.queue_payload import WebhookPayload
 
 logger = logging.getLogger(__name__)
 
 
-async def deliver_webhook(
-    db: AsyncSession, payload: WebhookPayload, workspace_name: str
-) -> None:
+async def deliver_webhook(payload: WebhookPayload, workspace_name: str) -> None:
     """
     Deliver a single webhook event to its configured endpoints.
     """
-    async with httpx.AsyncClient(timeout=30.0) as client:
-        try:
+    try:
+        async with tracked_db("webhook.deliver") as db:
             webhook_urls = await _get_webhook_urls(db, workspace_name)
-            if not webhook_urls:
-                logger.debug(
-                    f"No webhook endpoints for workspace {workspace_name}, skipping."
-                )
-                return
-
-            event_payload = {
-                "type": payload.event_type,
-                "data": payload.data,
-                "timestamp": utc_now_iso(),
-            }
-            event_json = json.dumps(
-                event_payload, separators=(",", ":"), sort_keys=True
+
+        if not webhook_urls:
+            logger.debug(
+                f"No webhook endpoints for workspace {workspace_name}, skipping."
             )
+            return
 
-            try:
-                signature = _generate_webhook_signature(event_json)
-            except ValueError:
-                logger.exception("Failed to generate webhook signature")
-                return
+        event_payload = {
+            "type": payload.event_type,
+            "data": payload.data,
+            "timestamp": utc_now_iso(),
+        }
+        event_json = json.dumps(event_payload, separators=(",", ":"), sort_keys=True)
 
+        try:
+            signature = _generate_webhook_signature(event_json)
+        except ValueError:
+            logger.exception("Failed to generate webhook signature")
+            return
+
+        async with httpx.AsyncClient(timeout=30.0) as client:
             tasks = [
                 client.post(
                     url=url,
@@ -73,10 +72,10 @@ async def deliver_webhook(
                         f"Failed delivery for {payload.event_type} to {url}. Exception: {result}"
                     )
 
-        except httpx.RequestError:
-            logger.exception(f"Error sending webhook for {workspace_name}.")
-        except Exception:
-            logger.exception("Unexpected error delivering webhook.")
+    except httpx.RequestError:
+        logger.exception(f"Error sending webhook for {workspace_name}.")
+    except Exception:
+        logger.exception("Unexpected error delivering webhook.")
 
 
 async def _get_webhook_urls(db: AsyncSession, workspace_name: str) -> list[str]:
diff --git a/tests/bench/runner_common.py b/tests/bench/runner_common.py
index 093027df3..0f9840e73 100644
--- a/tests/bench/runner_common.py
+++ b/tests/bench/runner_common.py
@@ -441,10 +441,6 @@ def __init__(self, config: RunnerConfig):
             f"{self.get_metrics_prefix()}_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
         )
         self.logger: Logger = configure_logging()
-        # Semaphore for rate limiting concurrent item execution
-        self._concurrency_semaphore: asyncio.Semaphore | None = (
-            asyncio.Semaphore(config.max_concurrent) if config.max_concurrent else None
-        )
 
     # -------------------------------------------------------------------------
     # Abstract methods - must be implemented by subclasses
@@ -559,59 +555,64 @@ async def run(self) -> tuple[list[ResultT], float]:
             print(f"Limiting to {self.config.max_concurrent} concurrent item(s)")
 
         overall_start = time.time()
-        all_results: list[ResultT] = []
-
-        # Process in batches
-        batch_size = self.config.batch_size
-        for i in range(0, len(items), batch_size):
-            batch = items[i : i + batch_size]
-            batch_num = (i // batch_size) + 1
-            total_batches = (len(items) + batch_size - 1) // batch_size
-
-            print(f"\n{'=' * 60}")
-            print(f"Processing batch {batch_num}/{total_batches} ({len(batch)} items)")
-            print(f"{'=' * 60}")
-
-            # Run items in batch concurrently (with optional rate limiting)
-            batch_results = await asyncio.gather(
-                *[
-                    self._execute_item_with_limit(item, self._get_honcho_url(i + idx))
-                    for idx, item in enumerate(batch)
-                ]
-            )
+        all_results: list[ResultT | None] = [None] * len(items)
+
+        # Two-level concurrency:
+        # - inflight_sem limits how many items may be in the pipeline at once
+        # - active_sem limits how many items may actively hit Honcho at once
+        # Items release active_sem while waiting on queue polling so other work
+        # can progress, but inflight_sem prevents an unlimited thundering herd.
+        concurrency = self.config.max_concurrent or self.config.batch_size
+        inflight_sem = asyncio.Semaphore(concurrency)
+        active_sem = asyncio.Semaphore(concurrency)
+
+        async def _run_item(index: int, item: Any) -> None:
+            async with inflight_sem:
+                result = await self.execute_item(
+                    item,
+                    self._get_honcho_url(index),
+                    active_sem=active_sem,
+                )
+                all_results[index] = result
 
-            all_results.extend(batch_results)
+        tasks = [
+            asyncio.create_task(_run_item(index, item))
+            for index, item in enumerate(items)
+        ]
+        await asyncio.gather(*tasks)
 
         overall_duration = time.time() - overall_start
 
         # Finalize metrics
         self.metrics_collector.finalize_collection()
 
-        return all_results, overall_duration
+        missing_indexes = [
+            index for index, result in enumerate(all_results) if result is None
+        ]
+        if missing_indexes:
+            raise RuntimeError(
+                f"Missing benchmark results for item indexes: {missing_indexes}"
+            )
 
-    async def _execute_item_with_limit(self, item: Any, honcho_url: str) -> ResultT:
-        """Wrapper that applies concurrency limiting if configured."""
-        if self._concurrency_semaphore:
-            async with self._concurrency_semaphore:
-                return await self.execute_item(item, honcho_url)
-        return await self.execute_item(item, honcho_url)
+        return [cast(ResultT, result) for result in all_results], overall_duration
 
-    async def execute_item(self, item: Any, honcho_url: str) -> ResultT:
+    async def execute_item(
+        self,
+        item: Any,
+        honcho_url: str,
+        active_sem: asyncio.Semaphore | None = None,
+    ) -> ResultT:
         """
         Execute a single benchmark item.
 
-        This method orchestrates the standard flow:
-        1. Create workspace and client
-        2. Setup peers and session
-        3. Ingest messages
-        4. Wait for queue to empty
-        5. Trigger dreams
-        6. Execute questions
-        7. Cleanup (if configured)
+        Active work (setup, ingest, dream scheduling, query execution) acquires
+        ``active_sem`` when provided. Idle queue polling releases that slot so
+        other items can continue making forward progress.
 
         Args:
             item: The item to process
             honcho_url: URL of the Honcho instance to use
+            active_sem: Optional semaphore limiting active I/O phases
 
         Returns:
             Result for this item
@@ -635,21 +636,22 @@ async def execute_item(self, item: Any, honcho_url: str) -> ResultT:
         start_time = time.time()
 
         try:
-            # Setup peers
-            await self.setup_peers(ctx, item)
-
-            # Setup session
-            await self.setup_session(ctx, item)
+            # Setup peers/session and ingest under the active semaphore.
+            if active_sem:
+                await active_sem.acquire()
+            try:
+                await self.setup_peers(ctx, item)
+                await self.setup_session(ctx, item)
 
-            # Ingest messages
-            print(f"[{workspace_id}] Ingesting messages...")
-            message_count = await self.ingest_messages(ctx, item)
-            print(f"[{workspace_id}] Ingested {message_count} messages")
+                print(f"[{workspace_id}] Ingesting messages...")
+                message_count = await self.ingest_messages(ctx, item)
+                print(f"[{workspace_id}] Ingested {message_count} messages")
+            finally:
+                if active_sem:
+                    active_sem.release()
 
             # Wait for deriver queue
             print(f"[{workspace_id}] Waiting for deriver queue to empty...")
-            await asyncio.sleep(1)  # Give time for tasks to be queued
-
             queue_empty = await self._wait_for_queue_empty(ctx.honcho_client)
             if not queue_empty:
                 raise TimeoutError(
@@ -670,20 +672,72 @@ async def execute_item(self, item: Any, honcho_url: str) -> ResultT:
                 + f"{len(dream_observers)} observer(s) across "
                 + f"{len(dream_session_ids)} session(s)..."
             )
-            for observer in dream_observers:
-                for dream_session_id in dream_session_ids:
-                    success = await self._trigger_dream(
-                        ctx.honcho_client, workspace_id, observer, dream_session_id
-                    )
-                    if not success:
+
+            if self.config.skip_dream:
+                print(f"[{workspace_id}] Skipping dreams (--skip-dream)")
+            else:
+
+                async def _schedule_dream(
+                    observer: str,
+                    session_id: str,
+                ) -> bool:
+                    try:
+                        if active_sem:
+                            await active_sem.acquire()
+                        try:
+                            await ctx.honcho_client.aio.schedule_dream(
+                                observer=observer,
+                                session=session_id,
+                                observed=observer,
+                            )
+                        finally:
+                            if active_sem:
+                                active_sem.release()
+                        print(
+                            f"[{workspace_id}] Dream triggered for "
+                            + f"{observer}/{observer} in {session_id}"
+                        )
+                        return True
+                    except Exception as e:
                         print(
-                            f"[{workspace_id}] Warning: Dream for {observer} in "
-                            + f"session {dream_session_id} did not complete"
+                            f"[{workspace_id}] ERROR: Dream trigger exception "
+                            + f"for {observer} in {session_id}: {e}"
                         )
+                        return False
+
+                dream_results = await asyncio.gather(
+                    *[
+                        _schedule_dream(observer, dream_session_id)
+                        for observer in dream_observers
+                        for dream_session_id in dream_session_ids
+                    ]
+                )
+
+                if all(dream_results):
+                    success = await self._wait_for_queue_empty(ctx.honcho_client)
+                    if success:
+                        print(f"[{workspace_id}] All dreams completed")
+                    else:
+                        print(f"[{workspace_id}] Dreams timed out")
+                elif any(dream_results):
+                    failed = [i for i, ok in enumerate(dream_results) if not ok]
+                    print(
+                        f"[{workspace_id}] Warning: {len(failed)} of "
+                        + f"{len(dream_results)} dream schedules failed"
+                    )
+                    await self._wait_for_queue_empty(ctx.honcho_client)
+                else:
+                    print(f"[{workspace_id}] Warning: No dreams were scheduled")
 
             # Execute questions
             print(f"[{workspace_id}] Executing questions...")
-            result = await self.execute_questions(ctx, item)
+            if active_sem:
+                await active_sem.acquire()
+            try:
+                result = await self.execute_questions(ctx, item)
+            finally:
+                if active_sem:
+                    active_sem.release()
 
             # Cleanup
             if self.config.cleanup_workspace:
@@ -765,13 +819,15 @@ def _get_session_configuration(self) -> SessionConfiguration:
     async def _wait_for_queue_empty(
         self, honcho_client: Honcho, session_id: str | None = None
     ) -> bool:
-        """Wait for the deriver queue to be empty."""
+        """Wait for the deriver queue to be empty with exponential backoff."""
         start_time = time.time()
+        delay = 0.2
         while True:
             try:
                 status = await honcho_client.aio.queue_status(session=session_id)
             except Exception:
-                await asyncio.sleep(1)
+                await asyncio.sleep(delay)
+                delay = min(delay * 1.5, 2.0)
                 if time.time() - start_time >= self.config.timeout_seconds:
                     return False
                 continue
@@ -781,7 +837,8 @@ async def _wait_for_queue_empty(
 
             if time.time() - start_time >= self.config.timeout_seconds:
                 return False
-            await asyncio.sleep(1)
+            await asyncio.sleep(delay)
+            delay = min(delay * 1.5, 2.0)
 
     async def _trigger_dream(
         self,
@@ -815,8 +872,6 @@ async def _trigger_dream(
 
         print(f"[{workspace_id}] Dream triggered for {observer}/{observed}")
 
-        # Wait for dream to complete
-        await asyncio.sleep(2)
         success = await self._wait_for_queue_empty(honcho_client)
         if success:
             print(f"[{workspace_id}] Dream for {observer} completed")
diff --git a/tests/conftest.py b/tests/conftest.py
index bd426e897..2ef7086b6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -752,8 +752,11 @@ async def mock_tracked_db_context(_: str | None = None):
         patch("src.dialectic.chat.tracked_db", mock_tracked_db_context),
         patch("src.utils.summarizer.tracked_db", mock_tracked_db_context),
         patch("src.webhooks.events.tracked_db", mock_tracked_db_context),
+        patch("src.webhooks.webhook_delivery.tracked_db", mock_tracked_db_context),
         patch("src.utils.agent_tools.tracked_db", mock_tracked_db_context),
+        patch("src.utils.search.tracked_db", mock_tracked_db_context),
         patch("src.crud.document.tracked_db", mock_tracked_db_context),
+        patch("src.crud.message.tracked_db", mock_tracked_db_context),
         patch("src.dialectic.core.tracked_db", mock_tracked_db_context),
         patch("src.dreamer.specialists.tracked_db", mock_tracked_db_context),
         patch("src.dreamer.surprisal.tracked_db", mock_tracked_db_context),
diff --git a/tests/integration/test_message_embeddings.py b/tests/integration/test_message_embeddings.py
index de544ee01..ef0450496 100644
--- a/tests/integration/test_message_embeddings.py
+++ b/tests/integration/test_message_embeddings.py
@@ -4,6 +4,8 @@
 These tests verify that message embeddings are created, stored, and can be searched.
 """
 
+from contextlib import asynccontextmanager
+from datetime import datetime, timezone
 from typing import Any
 
 import pytest
@@ -12,7 +14,9 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from src import models
+from src.config import settings
 from src.crud import create_messages
+from src.crud import message as message_crud
 from src.models import Peer, Workspace
 from src.schemas import MessageCreate
 from src.utils.search import search
@@ -240,8 +244,7 @@ async def test_semantic_search_when_embeddings_enabled(
     initial_call_count: int = mock_openai_embeddings["embed"].call_count
 
     search_results = await search(
-        db=db_session,
-        query=search_query,
+        search_query,
         filters={
             "workspace_id": test_workspace.name,
             "session_id": test_session.name,
@@ -257,6 +260,212 @@ async def test_semantic_search_when_embeddings_enabled(
     assert created_message.public_id in found_message_ids
 
 
+@pytest.mark.asyncio
+async def test_search_messages_external_lookup_happens_before_tracked_db(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """External semantic lookup should finish before opening tracked_db."""
+    monkeypatch.setattr(settings.VECTOR_STORE, "MIGRATED", True)
+    monkeypatch.setattr(settings.VECTOR_STORE, "TYPE", "external")
+
+    call_order: list[str] = []
+    message = models.Message(
+        workspace_name="workspace",
+        session_name="session",
+        peer_name="peer",
+        content="Relevant external search result",
+        seq_in_session=1,
+        token_count=5,
+        created_at=datetime.now(timezone.utc),
+    )
+
+    class FakeDb:
+        def expunge(self, _obj: object) -> None:
+            call_order.append("expunge")
+
+    fake_db = FakeDb()
+
+    async def fake_search_messages_external(
+        workspace_name: str,
+        query_embedding: list[float],
+        limit: int,
+        *,
+        session_name: str | None = None,
+        allowed_session_names: list[str] | None = None,
+        after_date: datetime | None = None,
+        before_date: datetime | None = None,
+    ) -> list[str]:
+        _ = (
+            workspace_name,
+            query_embedding,
+            limit,
+            session_name,
+            allowed_session_names,
+            after_date,
+            before_date,
+        )
+        call_order.append("external")
+        return ["message-1"]
+
+    async def fake_fetch_messages_by_ids(
+        db: FakeDb,
+        workspace_name: str,
+        message_ids: list[str],
+        *,
+        after_date: datetime | None = None,
+        before_date: datetime | None = None,
+    ) -> list[models.Message]:
+        _ = (workspace_name, message_ids, after_date, before_date)
+        assert db is fake_db
+        call_order.append("fetch")
+        return [message]
+
+    async def fake_build_merged_snippets(
+        db: FakeDb,
+        workspace_name: str,
+        matched_messages: list[models.Message],
+        context_window: int,
+    ) -> list[tuple[list[models.Message], list[models.Message]]]:
+        _ = (workspace_name, context_window)
+        assert db is fake_db
+        assert matched_messages == [message]
+        call_order.append("build")
+        return [([message], [message])]
+
+    @asynccontextmanager
+    async def fake_tracked_db(_operation_name: str | None = None):
+        call_order.append("enter")
+        yield fake_db
+        call_order.append("exit")
+
+    monkeypatch.setattr(
+        message_crud, "_search_messages_external", fake_search_messages_external
+    )
+    monkeypatch.setattr(
+        message_crud, "_fetch_messages_by_ids", fake_fetch_messages_by_ids
+    )
+    monkeypatch.setattr(
+        message_crud, "_build_merged_snippets", fake_build_merged_snippets
+    )
+    monkeypatch.setattr(message_crud, "tracked_db", fake_tracked_db)
+
+    snippets = await message_crud.search_messages(
+        workspace_name="workspace",
+        session_name="session",
+        query="relevant query",
+        embedding=[0.1, 0.2, 0.3],
+    )
+
+    assert snippets == [([message], [message])]
+    assert call_order.index("external") < call_order.index("enter")
+
+
+@pytest.mark.asyncio
+async def test_search_messages_temporal_external_lookup_happens_before_tracked_db(
+    monkeypatch: pytest.MonkeyPatch,
+):
+    """Temporal external semantic lookup should finish before opening tracked_db."""
+    monkeypatch.setattr(settings.VECTOR_STORE, "MIGRATED", True)
+    monkeypatch.setattr(settings.VECTOR_STORE, "TYPE", "external")
+
+    call_order: list[str] = []
+    after_date = datetime(2024, 1, 1, tzinfo=timezone.utc)
+    before_date = datetime(2024, 12, 31, tzinfo=timezone.utc)
+    message = models.Message(
+        workspace_name="workspace",
+        session_name="session",
+        peer_name="peer",
+        content="Relevant temporal external search result",
+        seq_in_session=1,
+        token_count=5,
+        created_at=datetime.now(timezone.utc),
+    )
+
+    class FakeDb:
+        def expunge(self, _obj: object) -> None:
+            call_order.append("expunge")
+
+    fake_db = FakeDb()
+
+    async def fake_search_messages_external(
+        workspace_name: str,
+        query_embedding: list[float],
+        limit: int,
+        *,
+        session_name: str | None = None,
+        allowed_session_names: list[str] | None = None,
+        after_date: datetime | None = None,
+        before_date: datetime | None = None,
+    ) -> list[str]:
+        _ = (
+            workspace_name,
+            query_embedding,
+            limit,
+            session_name,
+            allowed_session_names,
+        )
+        assert after_date is not None
+        assert before_date is not None
+        call_order.append("external")
+        return ["message-1"]
+
+    async def fake_fetch_messages_by_ids(
+        db: FakeDb,
+        workspace_name: str,
+        message_ids: list[str],
+        *,
+        after_date: datetime | None = None,
+        before_date: datetime | None = None,
+    ) -> list[models.Message]:
+        _ = (workspace_name, message_ids)
+        assert db is fake_db
+        assert after_date is not None
+        assert before_date is not None
+        call_order.append("fetch")
+        return [message]
+
+    async def fake_build_merged_snippets(
+        db: FakeDb,
+        workspace_name: str,
+        matched_messages: list[models.Message],
+        context_window: int,
+    ) -> list[tuple[list[models.Message], list[models.Message]]]:
+        _ = (workspace_name, context_window)
+        assert db is fake_db
+        assert matched_messages == [message]
+        call_order.append("build")
+        return [([message], [message])]
+
+    @asynccontextmanager
+    async def fake_tracked_db(_operation_name: str | None = None):
+        call_order.append("enter")
+        yield fake_db
+        call_order.append("exit")
+
+    monkeypatch.setattr(
+        message_crud, "_search_messages_external", fake_search_messages_external
+    )
+    monkeypatch.setattr(
+        message_crud, "_fetch_messages_by_ids", fake_fetch_messages_by_ids
+    )
+    monkeypatch.setattr(
+        message_crud, "_build_merged_snippets", fake_build_merged_snippets
+    )
+    monkeypatch.setattr(message_crud, "tracked_db", fake_tracked_db)
+
+    snippets = await message_crud.search_messages_temporal(
+        workspace_name="workspace",
+        session_name="session",
+        query="relevant query",
+        after_date=after_date,
+        before_date=before_date,
+        embedding=[0.1, 0.2, 0.3],
+    )
+
+    assert snippets == [([message], [message])]
+    assert call_order.index("external") < call_order.index("enter")
+
+
 @pytest.mark.asyncio
 async def test_message_chunking_creates_multiple_embeddings(
     db_session: AsyncSession,
diff --git a/tests/sdk_typescript/conftest.py b/tests/sdk_typescript/conftest.py
index 15f2c98b6..8abf6848d 100644
--- a/tests/sdk_typescript/conftest.py
+++ b/tests/sdk_typescript/conftest.py
@@ -136,5 +136,8 @@ async def ts_tracked_db(_: str | None = None):
         patch("src.dialectic.chat.tracked_db", ts_tracked_db),
         patch("src.utils.summarizer.tracked_db", ts_tracked_db),
         patch("src.webhooks.events.tracked_db", ts_tracked_db),
+        patch("src.webhooks.webhook_delivery.tracked_db", ts_tracked_db),
+        patch("src.utils.search.tracked_db", ts_tracked_db),
+        patch("src.crud.message.tracked_db", ts_tracked_db),
     ):
         yield
diff --git a/tests/test_search.py b/tests/test_search.py
index 2881cfd53..84f3ffa3d 100644
--- a/tests/test_search.py
+++ b/tests/test_search.py
@@ -6,7 +6,7 @@
 from nanoid import generate as generate_nanoid
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src import models
+from src import crud, models
 from src.utils.search import search
 
 
@@ -62,11 +62,10 @@ async def test_peer_perspective_search_single_session(
         created_at=join_time + datetime.timedelta(seconds=2),
     )
     db_session.add_all([msg1, msg2])
-    await db_session.flush()
+    await db_session.commit()
 
     # Search with peer_perspective filter
     results = await search(
-        db_session,
         "Message",
         filters={"peer_perspective": peer1.name, "workspace_id": workspace.name},
         limit=10,
@@ -132,11 +131,10 @@ async def test_peer_perspective_search_multiple_sessions(
         created_at=join_time + datetime.timedelta(seconds=2),
     )
     db_session.add_all([msg1, msg2])
-    await db_session.flush()
+    await db_session.commit()
 
     # Search with peer_perspective filter
     results = await search(
-        db_session,
         "Message",
         filters={"peer_perspective": peer1.name, "workspace_id": workspace.name},
         limit=10,
@@ -212,11 +210,10 @@ async def test_peer_perspective_search_temporal_constraints(
         created_at=leave_time + datetime.timedelta(seconds=1),
     )
     db_session.add_all([msg_before, msg_during, msg_after])
-    await db_session.flush()
+    await db_session.commit()
 
     # Search with peer_perspective filter
     results = await search(
-        db_session,
         "Message",
         filters={"peer_perspective": peer1.name, "workspace_id": workspace.name},
         limit=10,
@@ -279,11 +276,10 @@ async def test_peer_perspective_search_active_member(
         created_at=join_time + datetime.timedelta(seconds=100),
     )
     db_session.add_all([msg1, msg2])
-    await db_session.flush()
+    await db_session.commit()
 
     # Search with peer_perspective filter
     results = await search(
-        db_session,
         "Message",
         filters={"peer_perspective": peer1.name, "workspace_id": workspace.name},
         limit=10,
@@ -339,11 +335,10 @@ async def test_peer_perspective_search_no_sessions(
         created_at=join_time + datetime.timedelta(seconds=1),
     )
     db_session.add(msg)
-    await db_session.flush()
+    await db_session.commit()
 
     # Search with peer_perspective filter for peer1 (not in any sessions)
     results = await search(
-        db_session,
         "Message",
         filters={"peer_perspective": peer1.name, "workspace_id": workspace.name},
         limit=10,
@@ -408,11 +403,10 @@ async def test_peer_perspective_search_boundary_timestamps(
         created_at=leave_time,  # Exact leave time
     )
     db_session.add_all([msg_at_join, msg_at_leave])
-    await db_session.flush()
+    await db_session.commit()
 
     # Search with peer_perspective filter
     results = await search(
-        db_session,
         "Message",
         filters={"peer_perspective": peer1.name, "workspace_id": workspace.name},
         limit=10,
@@ -422,3 +416,291 @@ async def test_peer_perspective_search_boundary_timestamps(
     assert len(results) == 2
     assert msg_at_join.public_id in [m.public_id for m in results]
     assert msg_at_leave.public_id in [m.public_id for m in results]
+
+
+# =============================================================================
+# Tests for observer scoping in CRUD message functions
+# =============================================================================
+
+
+async def _setup_multi_session_workspace(db_session: AsyncSession):
+    """Helper: create workspace with 2 sessions, 2 peers. peer1 only in session1."""
+    workspace = models.Workspace(name=generate_nanoid())
+    db_session.add(workspace)
+    await db_session.flush()
+
+    peer1 = models.Peer(name="observer", workspace_name=workspace.name)
+    peer2 = models.Peer(name="other", workspace_name=workspace.name)
+    db_session.add_all([peer1, peer2])
+    await db_session.flush()
+
+    session1 = models.Session(name="session_visible", workspace_name=workspace.name)
+    session2 = models.Session(name="session_hidden", workspace_name=workspace.name)
+    db_session.add_all([session1, session2])
+    await db_session.flush()
+
+    join_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
+        minutes=10
+    )
+
+    # peer1 is only in session1
+    await db_session.execute(
+        models.session_peers_table.insert().values(
+            workspace_name=workspace.name,
+            session_name=session1.name,
+            peer_name=peer1.name,
+            joined_at=join_time,
+            left_at=None,
+        )
+    )
+    # peer2 is in both sessions
+    for s in [session1, session2]:
+        await db_session.execute(
+            models.session_peers_table.insert().values(
+                workspace_name=workspace.name,
+                session_name=s.name,
+                peer_name=peer2.name,
+                joined_at=join_time,
+                left_at=None,
+            )
+        )
+    await db_session.flush()
+
+    msg_visible = models.Message(
+        content="visible message with keyword",
+        session_name=session1.name,
+        peer_name=peer2.name,
+        workspace_name=workspace.name,
+        seq_in_session=1,
+        created_at=join_time + datetime.timedelta(seconds=1),
+    )
+    msg_hidden = models.Message(
+        content="hidden message with keyword",
+        session_name=session2.name,
+        peer_name=peer2.name,
+        workspace_name=workspace.name,
+        seq_in_session=1,
+        created_at=join_time + datetime.timedelta(seconds=2),
+    )
+    db_session.add_all([msg_visible, msg_hidden])
+    await db_session.commit()
+
+    return workspace, peer1, peer2, session1, session2, msg_visible, msg_hidden
+
+
+@pytest.mark.asyncio
+async def test_grep_messages_observer_scoping_excludes_non_member_sessions(
+    db_session: AsyncSession,
+):
+    """grep_messages with observer excludes messages from sessions the observer isn't in."""
+    (
+        workspace,
+        peer1,
+        _,
+        _,
+        _,
+        msg_visible,
+        msg_hidden,
+    ) = await _setup_multi_session_workspace(db_session)
+
+    # Without scoping: both messages found
+    results_unscoped = await crud.grep_messages(
+        workspace_name=workspace.name,
+        session_name=None,
+        text="keyword",
+    )
+    all_matched_ids = [m.public_id for matches, _ in results_unscoped for m in matches]
+    assert msg_visible.public_id in all_matched_ids
+    assert msg_hidden.public_id in all_matched_ids
+
+    # With observer scoping: only visible message found
+    results_scoped = await crud.grep_messages(
+        workspace_name=workspace.name,
+        session_name=None,
+        text="keyword",
+        observer=peer1.name,
+    )
+    scoped_ids = [m.public_id for matches, _ in results_scoped for m in matches]
+    assert msg_visible.public_id in scoped_ids
+    assert msg_hidden.public_id not in scoped_ids
+
+
+@pytest.mark.asyncio
+async def test_get_messages_by_date_range_observer_scoping(
+    db_session: AsyncSession,
+):
+    """get_messages_by_date_range with observer excludes non-member sessions."""
+    (
+        workspace,
+        peer1,
+        _,
+        _,
+        _,
+        msg_visible,
+        msg_hidden,
+    ) = await _setup_multi_session_workspace(db_session)
+
+    # Without scoping
+    results_unscoped = await crud.get_messages_by_date_range(
+        db_session,
+        workspace_name=workspace.name,
+        session_name=None,
+    )
+    unscoped_ids = [m.public_id for m in results_unscoped]
+    assert msg_visible.public_id in unscoped_ids
+    assert msg_hidden.public_id in unscoped_ids
+
+    # With observer scoping
+    results_scoped = await crud.get_messages_by_date_range(
+        db_session,
+        workspace_name=workspace.name,
+        session_name=None,
+        observer=peer1.name,
+    )
+    scoped_ids = [m.public_id for m in results_scoped]
+    assert msg_visible.public_id in scoped_ids
+    assert msg_hidden.public_id not in scoped_ids
+
+
+@pytest.mark.asyncio
+async def test_grep_messages_observer_scoping_noop_when_session_provided(
+    db_session: AsyncSession,
+):
+    """When session_name is provided, observer is ignored."""
+    (
+        workspace,
+        peer1,
+        _,
+        _,
+        session_hidden,
+        _,
+        msg_hidden,
+    ) = await _setup_multi_session_workspace(db_session)
+
+    results = await crud.grep_messages(
+        workspace_name=workspace.name,
+        session_name=session_hidden.name,
+        text="keyword",
+        observer=peer1.name,
+    )
+    matched_ids = [m.public_id for matches, _ in results for m in matches]
+    assert msg_hidden.public_id in matched_ids
+
+
+@pytest.mark.asyncio
+async def test_grep_messages_observer_scoping_empty_when_no_sessions(
+    db_session: AsyncSession,
+):
+    """Observer not in any sessions returns empty results."""
+    workspace = models.Workspace(name=generate_nanoid())
+    db_session.add(workspace)
+    await db_session.flush()
+
+    loner = models.Peer(name="loner", workspace_name=workspace.name)
+    other = models.Peer(name="other", workspace_name=workspace.name)
+    db_session.add_all([loner, other])
+    await db_session.flush()
+
+    session = models.Session(name="s1", workspace_name=workspace.name)
+    db_session.add(session)
+    await db_session.flush()
+
+    await db_session.execute(
+        models.session_peers_table.insert().values(
+            workspace_name=workspace.name,
+            session_name=session.name,
+            peer_name=other.name,
+            joined_at=datetime.datetime.now(datetime.timezone.utc),
+            left_at=None,
+        )
+    )
+    await db_session.flush()
+
+    msg = models.Message(
+        content="some keyword content",
+        session_name=session.name,
+        peer_name=other.name,
+        workspace_name=workspace.name,
+        seq_in_session=1,
+        created_at=datetime.datetime.now(datetime.timezone.utc),
+    )
+    db_session.add(msg)
+    await db_session.commit()
+
+    results = await crud.grep_messages(
+        workspace_name=workspace.name,
+        session_name=None,
+        text="keyword",
+        observer=loner.name,
+    )
+    assert results == []
+
+
+@pytest.mark.asyncio
+async def test_grep_messages_observer_scoping_left_session_still_visible(
+    db_session: AsyncSession,
+):
+    """Observer who left a session still sees all messages in that session.
+
+    Any membership record (regardless of left_at) grants full session visibility.
+    """
+    workspace = models.Workspace(name=generate_nanoid())
+    db_session.add(workspace)
+    await db_session.flush()
+
+    observer = models.Peer(name="obs", workspace_name=workspace.name)
+    other = models.Peer(name="other", workspace_name=workspace.name)
+    db_session.add_all([observer, other])
+    await db_session.flush()
+
+    session = models.Session(name="s1", workspace_name=workspace.name)
+    db_session.add(session)
+    await db_session.flush()
+
+    base_time = datetime.datetime.now(datetime.timezone.utc) - datetime.timedelta(
+        minutes=10
+    )
+    join_time = base_time
+    leave_time = base_time + datetime.timedelta(minutes=5)
+
+    await db_session.execute(
+        models.session_peers_table.insert().values(
+            workspace_name=workspace.name,
+            session_name=session.name,
+            peer_name=observer.name,
+            joined_at=join_time,
+            left_at=leave_time,
+        )
+    )
+    await db_session.flush()
+
+    # Message during membership
+    msg_during = models.Message(
+        content="keyword during",
+        session_name=session.name,
+        peer_name=other.name,
+        workspace_name=workspace.name,
+        seq_in_session=1,
+        created_at=join_time + datetime.timedelta(minutes=2),
+    )
+    # Message after observer left — still visible because any membership grants full access
+    msg_after = models.Message(
+        content="keyword after",
+        session_name=session.name,
+        peer_name=other.name,
+        workspace_name=workspace.name,
+        seq_in_session=2,
+        created_at=leave_time + datetime.timedelta(minutes=1),
+    )
+    db_session.add_all([msg_during, msg_after])
+    await db_session.commit()
+
+    results = await crud.grep_messages(
+        workspace_name=workspace.name,
+        session_name=None,
+        text="keyword",
+        observer=observer.name,
+    )
+    matched_ids = [m.public_id for matches, _ in results for m in matches]
+    assert msg_during.public_id in matched_ids
+    assert msg_after.public_id in matched_ids
diff --git a/tests/utils/test_agent_tools.py b/tests/utils/test_agent_tools.py
index 1387f6816..bb0ff9006 100644
--- a/tests/utils/test_agent_tools.py
+++ b/tests/utils/test_agent_tools.py
@@ -29,6 +29,7 @@
     _handle_grep_messages,  # pyright: ignore[reportPrivateUsage]
     _handle_search_memory,  # pyright: ignore[reportPrivateUsage]
     _handle_search_messages,  # pyright: ignore[reportPrivateUsage]
+    _handle_search_messages_temporal,  # pyright: ignore[reportPrivateUsage]
     _handle_update_peer_card,  # pyright: ignore[reportPrivateUsage]
     create_observations,
     create_tool_executor,
@@ -528,15 +529,15 @@ async def fake_query_documents(
             return []
 
         async def fake_search_messages(
-            db: AsyncSession,
             workspace_name: str,
             session_name: str | None,
             query: str,
             limit: int = 10,
             context_window: int = 2,
             embedding: list[float] | None = None,
+            observer: str | None = None,
         ) -> list[tuple[list[models.Message], list[models.Message]]]:
-            _ = (db, workspace_name, session_name, query, limit, context_window)
+            _ = (workspace_name, session_name, query, limit, context_window, observer)
             fallback_embeddings.append(embedding)
             msg = models.Message(
                 workspace_name=ctx.workspace_name,
@@ -610,6 +611,78 @@ async def test_missing_text_param_returns_error(
         assert "ERROR" in result
 
 
+@pytest.mark.asyncio
+class TestSearchMessagesTemporal:
+    """Tests for _handle_search_messages_temporal."""
+
+    async def test_reuses_precomputed_embedding(
+        self,
+        make_tool_context: Callable[..., ToolContext],
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """Embeds once and forwards the precomputed embedding to CRUD search."""
+        ctx = make_tool_context()
+
+        embed_calls: list[str] = []
+        forwarded_embeddings: list[list[float] | None] = []
+
+        async def fake_embed(query: str) -> list[float]:
+            embed_calls.append(query)
+            return [0.9, 0.1, 0.3]
+
+        async def fake_search_messages_temporal(
+            workspace_name: str,
+            session_name: str | None,
+            query: str,
+            after_date: datetime | None = None,
+            before_date: datetime | None = None,
+            limit: int = 10,
+            context_window: int = 2,
+            embedding: list[float] | None = None,
+            observer: str | None = None,
+        ) -> list[tuple[list[models.Message], list[models.Message]]]:
+            _ = (
+                workspace_name,
+                session_name,
+                query,
+                after_date,
+                before_date,
+                limit,
+                context_window,
+                observer,
+            )
+            forwarded_embeddings.append(embedding)
+            msg = models.Message(
+                workspace_name=ctx.workspace_name,
+                session_name=ctx.session_name,
+                peer_name=ctx.observed,
+                content="Relevant temporal fallback message",
+                seq_in_session=1,
+                token_count=5,
+                created_at=datetime.now(timezone.utc),
+            )
+            return [([msg], [msg])]
+
+        monkeypatch.setattr("src.utils.agent_tools.embedding_client.embed", fake_embed)
+        monkeypatch.setattr(
+            "src.utils.agent_tools.crud.search_messages_temporal",
+            fake_search_messages_temporal,
+        )
+
+        result = await _handle_search_messages_temporal(
+            ctx,
+            {
+                "query": "when did this happen",
+                "after_date": "2024-01-01",
+                "before_date": "2024-12-31",
+            },
+        )
+
+        assert "Found" in result
+        assert embed_calls == ["when did this happen"]
+        assert forwarded_embeddings == [[0.9, 0.1, 0.3]]
+
+
 @pytest.mark.asyncio
 class TestGetMessagesByDateRange:
     """Tests for _handle_get_messages_by_date_range."""
@@ -991,15 +1064,15 @@ async def unexpected_embed_call(_query: str) -> list[float]:
         embedding_args: list[list[float] | None] = []
 
         async def fake_search_messages(
-            _db: AsyncSession,
             workspace_name: str,
             session_name: str | None,
             query: str,
             limit: int,
             context_window: int,
             embedding: list[float] | None,
+            observer: str | None = None,
         ) -> list[tuple[list[models.Message], list[models.Message]]]:
-            _ = (limit, context_window)
+            _ = (limit, context_window, observer)
             embedding_args.append(embedding)
             msg = models.Message(
                 workspace_name=workspace_name,
@@ -1292,3 +1365,55 @@ async def test_no_registry_growth_across_many_keys(self):
         # All 100 entries should be cleaned up
         remaining = sum(1 for k in _observation_locks if k[0].startswith("ws_growth_"))
         assert remaining == 0
+
+
+@pytest.mark.asyncio
+class TestObserverPeerNameWiring:
+    """Tests that tool handlers pass observer to CRUD functions."""
+
+    async def test_grep_messages_passes_observer(
+        self,
+        make_tool_context: Callable[..., ToolContext],
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """_handle_grep_messages passes ctx.observer as observer."""
+        ctx = make_tool_context()
+        captured_kwargs: dict[str, Any] = {}
+
+        async def fake_grep_messages(
+            **kwargs: Any,
+        ) -> list[tuple[list[models.Message], list[models.Message]]]:
+            captured_kwargs.update(kwargs)
+            return []
+
+        monkeypatch.setattr(
+            "src.utils.agent_tools.crud.grep_messages", fake_grep_messages
+        )
+
+        await _handle_grep_messages(ctx, {"text": "hello"})
+
+        assert captured_kwargs["observer"] == ctx.observer
+
+    async def test_get_messages_by_date_range_passes_observer(
+        self,
+        make_tool_context: Callable[..., ToolContext],
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """_handle_get_messages_by_date_range passes ctx.observer as observer."""
+        ctx = make_tool_context()
+        captured_kwargs: dict[str, Any] = {}
+
+        async def fake_get_messages_by_date_range(
+            _db: Any, **kwargs: Any
+        ) -> list[models.Message]:
+            captured_kwargs.update(kwargs)
+            return []
+
+        monkeypatch.setattr(
+            "src.utils.agent_tools.crud.get_messages_by_date_range",
+            fake_get_messages_by_date_range,
+        )
+
+        await _handle_get_messages_by_date_range(ctx, {"after_date": "2024-01-01"})
+
+        assert captured_kwargs["observer"] == ctx.observer
diff --git a/tests/webhooks/test_webhook_delivery.py b/tests/webhooks/test_webhook_delivery.py
index 3c4235f25..56198f550 100644
--- a/tests/webhooks/test_webhook_delivery.py
+++ b/tests/webhooks/test_webhook_delivery.py
@@ -121,7 +121,7 @@ def async_client_factory(*args: Any, **kwargs: Any) -> FakeAsyncClient:
     )
 
     payload = WebhookPayload(event_type="peer.created", data={"id": "p_123"})
-    await webhook_delivery.deliver_webhook(AsyncMock(), payload, "workspace-a")
+    await webhook_delivery.deliver_webhook(payload, "workspace-a")
 
     assert fake_client.calls == []
 
@@ -162,7 +162,7 @@ def async_client_factory(*args: Any, **kwargs: Any) -> FakeAsyncClient:
         event_type="message.created",
         data={"id": "m_1", "workspace": "workspace-a"},
     )
-    await webhook_delivery.deliver_webhook(AsyncMock(), payload, "workspace-a")
+    await webhook_delivery.deliver_webhook(payload, "workspace-a")
 
     expected_event_json = json.dumps(
         {
@@ -210,7 +210,7 @@ def async_client_factory(*args: Any, **kwargs: Any) -> FakeAsyncClient:
     monkeypatch.setattr(httpx, "AsyncClient", async_client_factory)
 
     payload = WebhookPayload(event_type="workspace.updated", data={"id": "ws_1"})
-    await webhook_delivery.deliver_webhook(AsyncMock(), payload, "workspace-a")
+    await webhook_delivery.deliver_webhook(payload, "workspace-a")
 
     assert fake_client.calls == []
 
@@ -233,4 +233,4 @@ def async_client_factory(*args: Any, **kwargs: Any) -> FakeAsyncClient:
     monkeypatch.setattr(httpx, "AsyncClient", async_client_factory)
 
     payload = WebhookPayload(event_type="workspace.updated", data={"id": "ws_1"})
-    await webhook_delivery.deliver_webhook(AsyncMock(), payload, "workspace-a")
+    await webhook_delivery.deliver_webhook(payload, "workspace-a")

From 317b4a6cbaa64773279368bc84e1e915bcee3090 Mon Sep 17 00:00:00 2001
From: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com>
Date: Fri, 10 Apr 2026 13:16:42 -0400
Subject: [PATCH 06/46] v3.0.6 Release Candidate (#550)

* chore: (docs) Update changelogs and version numbers

* chore: remove extraneous dep on mintlify
---
 CHANGELOG.md                           | 18 +++++++++++++++
 README.md                              |  2 +-
 docs/bun.lock                          | 31 +++++---------------------
 docs/changelog/compatibility-guide.mdx |  7 +++---
 docs/changelog/introduction.mdx        | 20 ++++++++++++++++-
 docs/package.json                      |  3 +--
 pyproject.toml                         |  2 +-
 src/main.py                            |  2 +-
 uv.lock                                |  2 +-
 9 files changed, 52 insertions(+), 35 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index b0340917c..e5208a953 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,24 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [3.0.6] - 2026-04-10
+
+### Changed
+
+- Tightened transaction scopes across search, agent tools, queue manager, and webhook delivery to minimize DB connection hold time during external operations (#525)
+- Search operations refactored to two-phase pattern — external work (embeddings, LLM calls) completes before opening a transaction (#525)
+- Agent tool executor performs external operations before acquiring DB sessions (#525)
+- Queue manager transaction scope reduced to only the critical section (#525)
+- Webhook delivery no longer holds a DB session parameter (#525)
+
+### Fixed
+
+- Session leakage in non-session-scoped dialectic chat calls (#526)
+
+### Added
+
+- Health check endpoint (`/health`) for container orchestration and load balancer probes (#510)
+
 ## [3.0.5] - 2026-04-03
 
 ### Fixed
diff --git a/README.md b/README.md
index 990f7cc32..5ee20e582 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@
 
 ---
 
-![Static Badge](https://img.shields.io/badge/Version-3.0.5-blue)
+![Static Badge](https://img.shields.io/badge/Version-3.0.6-blue)
 [![PyPI version](https://img.shields.io/pypi/v/honcho-ai.svg)](https://pypi.org/project/honcho-ai/)
 [![NPM version](https://img.shields.io/npm/v/@honcho-ai/sdk.svg)](https://npmjs.org/package/@honcho-ai/sdk)
 [![Discord](https://img.shields.io/discord/1016845111637839922?style=flat&logo=discord&logoColor=23ffffff&label=Plastic%20Labs&labelColor=235865F2)](https://discord.gg/honcho)
diff --git a/docs/bun.lock b/docs/bun.lock
index 182c58c05..b30aeb653 100644
--- a/docs/bun.lock
+++ b/docs/bun.lock
@@ -6,7 +6,6 @@
       "name": "honcho-docs",
       "dependencies": {
         "@mintlify/scraping": "^4.0.467",
-        "honcho-ai": "^0.0.11",
       },
       "devDependencies": {
         "mint": "^4.2.204",
@@ -298,8 +297,6 @@
 
     "@types/node": ["@types/node@18.19.120", "", { "dependencies": { "undici-types": "~5.26.4" } }, "sha512-WtCGHFXnVI8WHLxDAt5TbnCM4eSE+nI0QN2NJtwzcgMhht2eNz6V9evJrk+lwC8bCY8OWV5Ym8Jz7ZEyGnKnMA=="],
 
-    "@types/node-fetch": ["@types/node-fetch@2.6.12", "", { "dependencies": { "@types/node": "*", "form-data": "^4.0.0" } }, "sha512-8nneRWKCg3rMtF69nLQJnOYUcbafYeFSjqkw3jCRLsqkWFlHaoQrr5mXmofFGOx3DKn7UfmBMyov8ySvLRVldA=="],
-
     "@types/react": ["@types/react@19.1.8", "", { "dependencies": { "csstype": "^3.0.2" } }, "sha512-AwAfQ2Wa5bCx9WP8nZL2uMZWod7J7/JSplxbTmBQ5ms6QpqNYm672H0Vu9ZVKVngQ+ii4R/byguVEUZQyeg44g=="],
 
     "@types/unist": ["@types/unist@3.0.3", "", {}, "sha512-ko/gIFJRv177XgZsZcBwnqJN5x/Gien8qNOn0D5bQU/zAzVf9Zt3BlcUiLqhV9y4ARk0GbT3tnUiPNgnTXzc/Q=="],
@@ -326,8 +323,6 @@
 
     "agent-base": ["agent-base@7.1.4", "", {}, "sha512-MnA+YT8fwfJPgBx3m60MNqakm30XOkyIoH1y6huTQvC0PwZG7ki8NacLBcrPbNoo8vEZy7Jpuk7+jMO+CUovTQ=="],
 
-    "agentkeepalive": ["agentkeepalive@4.6.0", "", { "dependencies": { "humanize-ms": "^1.2.1" } }, "sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ=="],
-
     "aggregate-error": ["aggregate-error@4.0.1", "", { "dependencies": { "clean-stack": "^4.0.0", "indent-string": "^5.0.0" } }, "sha512-0poP0T7el6Vq3rstR8Mn4V/IQrpBLO6POkUSrN7RhyY+GF/InCFShQzsQ39T25gkHhLgSLByyAz+Kjb+c2L98w=="],
 
     "ajv": ["ajv@8.17.1", "", { "dependencies": { "fast-deep-equal": "^3.1.3", "fast-uri": "^3.0.1", "json-schema-traverse": "^1.0.0", "require-from-string": "^2.0.2" } }, "sha512-B/gBuNg5SiMTrPkC+A2+cW0RszwxYmn6VYxB/inlBStS5nx6xHIt/ehKRhIMhqusl7a8LjQoZnjCs5vhwxOQ1g=="],
@@ -682,12 +677,10 @@
 
     "form-data": ["form-data@4.0.4", "", { "dependencies": { "asynckit": "^0.4.0", "combined-stream": "^1.0.8", "es-set-tostringtag": "^2.1.0", "hasown": "^2.0.2", "mime-types": "^2.1.12" } }, "sha512-KrGhL9Q4zjj0kiUt5OO4Mr/A/jlI2jDYs5eHBpYHPcBEVSiipAvn2Ko2HnPe20rmcuuvMHNdZFp+4IlGTMF0Ow=="],
 
-    "form-data-encoder": ["form-data-encoder@1.7.2", "", {}, "sha512-qfqtYan3rxrnCk1VYaA4H+Ms9xdpPqvLZa6xmMgFvhO32x7/3J/ExcTd6qpxM0vH2GdMI+poehyBZvqfMTto8A=="],
+    "form-data-encoder": ["form-data-encoder@2.1.4", "", {}, "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw=="],
 
     "format": ["format@0.2.2", "", {}, "sha512-wzsgA6WOq+09wrU1tsJ09udeR/YZRaeArL9e1wPbFg3GG2yDnC2ldKpxs4xunpFF9DgqCqOIra3bc1HWrJ37Ww=="],
 
-    "formdata-node": ["formdata-node@4.4.1", "", { "dependencies": { "node-domexception": "1.0.0", "web-streams-polyfill": "4.0.0-beta.3" } }, "sha512-0iirZp3uVDjVGt9p49aTaqjk84TrglENEDuqfdlZQ1roC9CWlPk6Avf8EEnZNcAqPonwkG35x4n3ww/1THYAeQ=="],
-
     "forwarded": ["forwarded@0.2.0", "", {}, "sha512-buRG0fpBtRHSTCOASe6hD258tEubFoRLb4ZNA6NxMVHNw2gOcwHo9wyablzMzOA5z9xA9L1KNjk/Nt6MT9aYow=="],
 
     "fresh": ["fresh@0.5.2", "", {}, "sha512-zJ2mQYM18rEFOudeV4GShTGIQ7RbzA7ozbU9I/XBpm7kqgMywgmylMwXHxZJmkVoYkna9d2pVXVXPdYTP9ej8Q=="],
@@ -788,8 +781,6 @@
 
     "hex-rgb": ["hex-rgb@5.0.0", "", {}, "sha512-NQO+lgVUCtHxZ792FodgW0zflK+ozS9X9dwGp9XvvmPlH7pyxd588cn24TD3rmPm/N0AIRXF10Otah8yKqGw4w=="],
 
-    "honcho-ai": ["honcho-ai@0.0.11", "", { "dependencies": { "@types/node": "^18.11.18", "@types/node-fetch": "^2.6.4", "abort-controller": "^3.0.0", "agentkeepalive": "^4.2.1", "form-data-encoder": "1.7.2", "formdata-node": "^4.3.2", "node-fetch": "^2.6.7" } }, "sha512-SUl/PnMldTCz8G4S8faP00M2iFd9qWDkI5U8w0FQ7OC6SgKzTf1nJ/j3gyzctzR2IZ6LrOz/2d5OwO4f/PCMww=="],
-
     "html-void-elements": ["html-void-elements@3.0.0", "", {}, "sha512-bEqo66MRXsUGxWHV5IP0PUiAWwoEjba4VCzg0LjFJBpchPaTfyfCKTG6bc5F8ucKec3q5y6qOdGyYTSBEvhCrg=="],
 
     "http-cache-semantics": ["http-cache-semantics@4.2.0", "", {}, "sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ=="],
@@ -802,8 +793,6 @@
 
     "https-proxy-agent": ["https-proxy-agent@7.0.6", "", { "dependencies": { "agent-base": "^7.1.2", "debug": "4" } }, "sha512-vK9P5/iUfdl95AI+JVyUuIcVtd4ofvtrOr3HNtM2yxC9bnMbEdp3x01OhQNnjb8IJYi38VlTE3mBXwcfvywuSw=="],
 
-    "humanize-ms": ["humanize-ms@1.2.1", "", { "dependencies": { "ms": "^2.0.0" } }, "sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ=="],
-
     "ico-endec": ["ico-endec@0.1.6", "", {}, "sha512-ZdLU38ZoED3g1j3iEyzcQj+wAkY2xfWNkymszfJPoxucIUhK7NayQ+/C4Kv0nDFMIsbtbEHldv3V8PU494/ueQ=="],
 
     "iconv-lite": ["iconv-lite@0.7.0", "", { "dependencies": { "safer-buffer": ">= 2.1.2 < 3.0.0" } }, "sha512-cf6L2Ds3h57VVmkZe+Pn+5APsT7FpqJtEhhieDCvrE2MK5Qk9MyffgQyuxQTm6BChfeZNtcOLHp9IcWRVcIcBQ=="],
@@ -1136,9 +1125,7 @@
 
     "nlcst-to-string": ["nlcst-to-string@4.0.0", "", { "dependencies": { "@types/nlcst": "^2.0.0" } }, "sha512-YKLBCcUYKAg0FNlOBT6aI91qFmSiFKiluk655WzPF+DDMA02qIyy8uiRqI8QXtcFpEvll12LpL5MXqEmAZ+dcA=="],
 
-    "node-domexception": ["node-domexception@1.0.0", "", {}, "sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ=="],
-
-    "node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="],
+    "node-fetch": ["node-fetch@2.6.7", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ=="],
 
     "normalize-path": ["normalize-path@3.0.0", "", {}, "sha512-6eZs5Ls3WtCisHWp9S2GUy8dqkpGi4BVSz3GaqiE6ezub0512ESztXUwUB6C6IKbQkY2Pnb/mD4WYojCRwcwLA=="],
 
@@ -1590,8 +1577,6 @@
 
     "web-namespaces": ["web-namespaces@2.0.1", "", {}, "sha512-bKr1DkiNa2krS7qxNtdrtHAmzuYGFQLiQ13TsorsdT6ULTkPLKuu5+GsFpDlg6JFjUTwX2DyhMPG2be8uPrqsQ=="],
 
-    "web-streams-polyfill": ["web-streams-polyfill@4.0.0-beta.3", "", {}, "sha512-QW95TCTaHmsYfHDybGMwO5IJIM93I/6vTRk+daHTWFPhwh+C8Cg7j7XyKrwrj8Ib6vYXe0ocYNrmzY4xAAN6ug=="],
-
     "webidl-conversions": ["webidl-conversions@3.0.1", "", {}, "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="],
 
     "whatwg-url": ["whatwg-url@5.0.0", "", { "dependencies": { "tr46": "~0.0.3", "webidl-conversions": "^3.0.0" } }, "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw=="],
@@ -1646,8 +1631,6 @@
 
     "@asyncapi/parser/ajv-formats": ["ajv-formats@2.1.1", "", { "dependencies": { "ajv": "^8.0.0" } }, "sha512-Wx0Kx52hxE7C18hkMEggYlEifqWZtYaRgouJor+WMdPnQyEK13vgEWyVNup7SoeeoLMsr4kf5h6dOW11I15MUA=="],
 
-    "@asyncapi/parser/node-fetch": ["node-fetch@2.6.7", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-ZjMPFEfVx5j+y2yF35Kzx5sF7kDzxuDj6ziH4FFbOp87zKDZNx8yExJIb05OGF4Nlt9IHFIMBkRl41VdvcNdbQ=="],
-
     "@inquirer/checkbox/@inquirer/core": ["@inquirer/core@10.3.2", "", { "dependencies": { "@inquirer/ansi": "^1.0.2", "@inquirer/figures": "^1.0.15", "@inquirer/type": "^3.0.10", "cli-width": "^4.1.0", "mute-stream": "^2.0.0", "signal-exit": "^4.1.0", "wrap-ansi": "^6.2.0", "yoctocolors-cjs": "^2.1.3" }, "peerDependencies": { "@types/node": ">=18" }, "optionalPeers": ["@types/node"] }, "sha512-43RTuEbfP8MbKzedNqBrlhhNKVwoK//vUFNW3Q3vZ88BLcrs4kYpGg+B2mm5p2K/HfygoCxuKwJJiv8PbGmE0A=="],
 
     "@inquirer/checkbox/@inquirer/type": ["@inquirer/type@3.0.10", "", { "peerDependencies": { "@types/node": ">=18" }, "optionalPeers": ["@types/node"] }, "sha512-BvziSRxfz5Ov8ch0z/n3oijRSEcEsHnhggm4xFZe93DHcUCTlutlq9Ox4SVENAfcRD22UQq7T/atg9Wr3k09eA=="],
@@ -1714,6 +1697,8 @@
 
     "@stoplight/better-ajv-errors/leven": ["leven@3.1.0", "", {}, "sha512-qsda+H8jTaUaN/x5vzW2rzc+8Rw4TAQ/4KjB46IwK5VH+IlVeeeje/EoZRpiXvIqjFgK84QffqPztGI3VBLG1A=="],
 
+    "@stoplight/json-ref-readers/node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="],
+
     "@stoplight/json-ref-readers/tslib": ["tslib@1.14.1", "", {}, "sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg=="],
 
     "@stoplight/spectral-core/@stoplight/types": ["@stoplight/types@13.6.0", "", { "dependencies": { "@types/json-schema": "^7.0.4", "utility-types": "^3.10.0" } }, "sha512-dzyuzvUjv3m1wmhPfq82lCVYGcXG0xUYgqnWfCq3PCVR4BKFhjdkHrnJ+jIDoMKvXb05AZP/ObQF6+NpDo29IQ=="],
@@ -1724,6 +1709,8 @@
 
     "@stoplight/spectral-parsers/@stoplight/types": ["@stoplight/types@14.1.1", "", { "dependencies": { "@types/json-schema": "^7.0.4", "utility-types": "^3.10.0" } }, "sha512-/kjtr+0t0tjKr+heVfviO9FrU/uGLc+QNX3fHJc19xsCNYqU7lVhaXxDmEID9BZTjG+/r9pK9xP/xU02XGg65g=="],
 
+    "@stoplight/spectral-runtime/node-fetch": ["node-fetch@2.7.0", "", { "dependencies": { "whatwg-url": "^5.0.0" }, "peerDependencies": { "encoding": "^0.1.0" }, "optionalPeers": ["encoding"] }, "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A=="],
+
     "@stoplight/yaml/@stoplight/types": ["@stoplight/types@14.1.1", "", { "dependencies": { "@types/json-schema": "^7.0.4", "utility-types": "^3.10.0" } }, "sha512-/kjtr+0t0tjKr+heVfviO9FrU/uGLc+QNX3fHJc19xsCNYqU7lVhaXxDmEID9BZTjG+/r9pK9xP/xU02XGg65g=="],
 
     "body-parser/debug": ["debug@2.6.9", "", { "dependencies": { "ms": "2.0.0" } }, "sha512-bC7ElrdJaJnPbAP+1EotYvqZsb3ecl5wi6Bfi6BJTUcNowp6cvspg0jXznRTKDjm/E7AdgFBVeAPVMNcKGsHMA=="],
@@ -1772,8 +1759,6 @@
 
     "glob/minipass": ["minipass@7.1.2", "", {}, "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw=="],
 
-    "got/form-data-encoder": ["form-data-encoder@2.1.4", "", {}, "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw=="],
-
     "gray-matter/js-yaml": ["js-yaml@3.14.1", "", { "dependencies": { "argparse": "^1.0.7", "esprima": "^4.0.0" }, "bin": { "js-yaml": "bin/js-yaml.js" } }, "sha512-okMH7OXXJ7YrN9Ok3/SXrnu4iX9yOk+25nqX4imS2npuvTYDmo/QEZoqwZkYaIDk3jVvBOTOIEgEhaLOynBS9g=="],
 
     "ink/string-width": ["string-width@7.2.0", "", { "dependencies": { "emoji-regex": "^10.3.0", "get-east-asian-width": "^1.0.0", "strip-ansi": "^7.1.0" } }, "sha512-tsaTIkKW9b4N+AEj+SVA+WhJzV7/zMhcSu78mLKWSk7cXMOSHsBKFWUs0fWwq8QyK3MgJBQRX6Gbi4kYbdvGkQ=="],
@@ -1940,10 +1925,6 @@
 
     "inquirer/ansi-escapes/type-fest": ["type-fest@0.21.3", "", {}, "sha512-t0rzBq87m3fVcduHDUFhKmyyX+9eo6WQjZvf51Ea/M0Q7+T374Jp1aUiyUl0GKxp8M/OETVHSDvmkyPgvX+X2w=="],
 
-    "is-online/got/form-data-encoder": ["form-data-encoder@2.1.4", "", {}, "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw=="],
-
-    "public-ip/got/form-data-encoder": ["form-data-encoder@2.1.4", "", {}, "sha512-yDYSgNMraqvnxiEXO4hi88+YZxaHC6QKzb5N84iRCTDeRO7ZALpir/lVmf/uXUhnwUr2O4HU8s/n6x+yNjQkHw=="],
-
     "send/debug/ms": ["ms@2.0.0", "", {}, "sha512-Tpp60P6IUJDTuOq/5Z8cdskzJujfwqfOTkrwIwj7IRISpnkJnT6SyJ4PCPnGMoFjC9ddhal5KVIYtAt97ix05A=="],
 
     "widest-line/string-width/emoji-regex": ["emoji-regex@10.4.0", "", {}, "sha512-EC+0oUMY1Rqm4O6LLrgjtYDvcVYTy7chDnM4Q7030tP4Kwj3u/pR6gP9ygnp2CJMK5Gq+9Q2oqmrFJAz01DXjw=="],
diff --git a/docs/changelog/compatibility-guide.mdx b/docs/changelog/compatibility-guide.mdx
index 3c86ef499..17fd55c95 100644
--- a/docs/changelog/compatibility-guide.mdx
+++ b/docs/changelog/compatibility-guide.mdx
@@ -10,14 +10,14 @@ This guide helps you match the right SDK version to your Honcho API version. New
 
 <CardGroup cols={2}>
   <Card title="TypeScript SDK" icon="js">
-    **Latest:** v2.1.0
+    **Latest:** v2.1.1
 
     ```bash
     npm install @honcho-ai/sdk
     ```
   </Card>
   <Card title="Python SDK" icon="python">
-    **Latest:** v2.1.0
+    **Latest:** v2.1.1
 
     ```bash
     pip install honcho-ai
@@ -30,7 +30,8 @@ This guide helps you match the right SDK version to your Honcho API version. New
 
 | Honcho API Version | TypeScript SDK | Python SDK |
 |-------------------|---------------|------------|
-| v3.0.5 (Current) | v2.1.0 | v2.1.0 |
+| v3.0.6 (Current) | v2.1.1 | v2.1.1 |
+| v3.0.5 | v2.1.0 | v2.1.0 |
 | v3.0.4 | v2.1.0 | v2.1.0 |
 | v3.0.3 | v2.1.0 | v2.1.0 |
 | v3.0.2 | v2.0.0+ | v2.0.0+ |
diff --git a/docs/changelog/introduction.mdx b/docs/changelog/introduction.mdx
index 253203508..d2d31331a 100644
--- a/docs/changelog/introduction.mdx
+++ b/docs/changelog/introduction.mdx
@@ -27,7 +27,25 @@ Welcome to the Honcho changelog! This section documents all notable changes to t
 ### Honcho API and SDK Changelogs
 <Tabs>
     <Tab title="Honcho API">
-        <Update label="v3.0.5 (Current)">
+        <Update label="v3.0.6 (Current)">
+        ### Changed
+
+        - Tightened transaction scopes across search, agent tools, queue manager, and webhook delivery to minimize DB connection hold time during external operations (#525)
+        - Search operations refactored to two-phase pattern — external work (embeddings, LLM calls) completes before opening a transaction (#525)
+        - Agent tool executor performs external operations before acquiring DB sessions (#525)
+        - Queue manager transaction scope reduced to only the critical section (#525)
+        - Webhook delivery no longer holds a DB session parameter (#525)
+
+        ### Fixed
+
+        - Session leakage in non-session-scoped dialectic chat calls (#526)
+
+        ### Added
+
+        - Health check endpoint (`/health`) for container orchestration and load balancer probes (#510)
+        </Update>
+
+        <Update label="v3.0.5">
         ### Fixed
 
         - explicit rollback on all transactions to force connection closed
diff --git a/docs/package.json b/docs/package.json
index 1e7259a06..e905a4578 100644
--- a/docs/package.json
+++ b/docs/package.json
@@ -11,8 +11,7 @@
   "author": "",
   "license": "ISC",
   "dependencies": {
-    "@mintlify/scraping": "^4.0.467",
-    "honcho-ai": "^0.0.11"
+    "@mintlify/scraping": "^4.0.467"
   },
   "devDependencies": {
     "mint": "^4.2.204"
diff --git a/pyproject.toml b/pyproject.toml
index 4000f4b61..7b0fca890 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "honcho"
-version = "3.0.5"
+version = "3.0.6"
 description = "Honcho Server"
 authors = [
   {name = "Plastic Labs", email = "hello@plasticlabs.ai"},
diff --git a/src/main.py b/src/main.py
index 46dc5bc9a..64439beef 100644
--- a/src/main.py
+++ b/src/main.py
@@ -154,7 +154,7 @@ async def lifespan(_: FastAPI):
     title="Honcho API",
     summary="The Identity Layer for the Agentic World",
     description="""Honcho is a platform for giving agents user-centric memory and social cognition.""",
-    version="3.0.5",
+    version="3.0.6",
     contact={
         "name": "Plastic Labs",
         "url": "https://honcho.dev",
diff --git a/uv.lock b/uv.lock
index 7c814b9b6..0d4647770 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1282,7 +1282,7 @@ wheels = [
 
 [[package]]
 name = "honcho"
-version = "3.0.5"
+version = "3.0.6"
 source = { virtual = "." }
 dependencies = [
     { name = "alembic" },

From 58f9abba98c080a540ee168fe7a85b7d35652c0c Mon Sep 17 00:00:00 2001
From: adavyas <121313528+adavyas@users.noreply.github.com>
Date: Fri, 10 Apr 2026 11:35:14 -0700
Subject: [PATCH 07/46] docs: add paperclip integration docs (#549)

* Simplify Paperclip integration instructions

Clarified instructions for local Honcho setup and removed unnecessary details.


* Update docs.json

* Update links in Paperclip integration guide

* Revise memory initialization instructions in Paperclip guide

Updated instructions for initializing memory and removed optional checks section.
---
 docs/docs.json                            |   3 +-
 docs/v3/guides/integrations/paperclip.mdx | 135 ++++++++++++++++++++++
 docs/v3/guides/overview.mdx               |   3 +
 3 files changed, 140 insertions(+), 1 deletion(-)
 create mode 100644 docs/v3/guides/integrations/paperclip.mdx

diff --git a/docs/docs.json b/docs/docs.json
index a81217cc2..0b3573678 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -108,7 +108,8 @@
                   "v3/guides/integrations/n8n",
                   "v3/guides/integrations/openclaw",
                   "v3/guides/integrations/hermes",
-                  "v3/guides/integrations/zo-computer"
+                  "v3/guides/integrations/zo-computer",
+                  "v3/guides/integrations/paperclip"
                 ]
               },
               {
diff --git a/docs/v3/guides/integrations/paperclip.mdx b/docs/v3/guides/integrations/paperclip.mdx
new file mode 100644
index 000000000..072383a01
--- /dev/null
+++ b/docs/v3/guides/integrations/paperclip.mdx
@@ -0,0 +1,135 @@
+---
+title: "Paperclip"
+icon: "paperclip"
+description: "Add Honcho memory to Paperclip"
+sidebarTitle: "Paperclip"
+---
+
+Honcho for [Paperclip](https://paperclip.ing) adds persistent Honcho memory to Paperclip while keeping Paperclip as the system of record.
+
+<Note>
+This page covers the current public-host-compatible Paperclip plugin. It supports tools, sync, migration import, and manual prompt previews. It does not depend on automatic prompt-context injection hooks, run transcript import, or legacy workspace file import.
+</Note>
+
+## Install the Plugin
+
+1. In Paperclip, open `Instance Settings` -> `Plugins`.
+2. Click `Install Plugin`.
+3. Enter `@honcho-ai/paperclip-honcho`.
+4. Complete the install from the Paperclip UI.
+
+## Quick Setup
+
+### Minimal Path
+
+1. Create a Paperclip secret containing the Honcho API key.
+   - For a local Honcho, use whatever credential your local startup expects & `honchoApiKey` is not needed.
+2. Open the Honcho plugin settings page in Paperclip.
+3. If you are using Honcho Cloud, leave the deployment on the default cloud setting.
+4. If you are using a local Honcho instance, switch the deployment to `Self-hosted / local` and set `honchoApiBaseUrl`.
+5. Set `honchoApiKey`.
+6. Save the settings.
+7. Run `Initialize Honcho memory`.
+
+`honchoApiKey` is the only field required for the standard setup path. The other settings already have defaults.
+
+<Note>
+If you use a local Honcho deployment, `honchoApiBaseUrl` must be reachable from the Paperclip host runtime. If Paperclip is running in Docker, `localhost` may not point at your machine.
+</Note>
+
+## Multi-Agent Hierarchy
+
+### What Maps Where
+
+Paperclip memory is organized around company, issue, and agent boundaries:
+
+- **Company -> workspace**: each Paperclip company maps to one Honcho workspace.
+- **Issue -> session**: each Paperclip issue maps to one Honcho session inside that workspace.
+- **Humans and agents -> peers**: human actors and Paperclip agents map to Honcho peers.
+
+This gives the plugin a natural hierarchy: company-level memory lives at the workspace level, issue-level memory lives at the session level, and people or agents are modeled as peers that participate across those scopes.
+
+### How Agent Observation Works
+
+The current plugin gives agent peers explicit observation settings:
+
+- `observe_me` defaults to `true`
+- `observe_others` defaults to `true`
+
+In practice, that means agent peers can both be observed by Honcho and form representations of other peers they interact with. 
+
+## How It Works
+
+### Identity And Scope
+
+The integration breaks down into four parts:
+
+- **Identity and scope** - each Paperclip company maps to a Honcho workspace, agents and human actors map to peers, and issues map to sessions.
+- **What gets copied into Honcho** - issue comments and document revisions sync into Honcho, with document content sectioned and normalized message content capped before ingestion.
+- **What operators get** - operators get a plugin settings page, migration preview/status data, repair tools, and an issue-level `Memory` tab.
+- **What agents get** - agents get Honcho retrieval and peer-chat tools inside Paperclip.
+
+## Operator Actions
+
+The settings page exposes the main operator workflow directly:
+
+| Action | What it does |
+| --- | --- |
+| `Validate config` | Validates the current plugin configuration before any sync or import work runs. |
+| `Test connection` | Resolves the API key secret, checks the Honcho connection, and returns the mapped workspace ID. |
+| `Initialize memory for this company` | Connects Honcho, creates core mappings, imports baseline issue memory, and verifies manual prompt previews. |
+| `Rescan migration sources` | Scans issue comments and issue documents and writes a fresh import preview. |
+| `Import history` | Imports the approved historical preview into Honcho with idempotent ledger checks. |
+| `Preview prompt context` | Builds a manual prompt-context preview for a company or issue without relying on automatic host hooks. |
+| `Repair mappings` | Recreates missing workspace, peer, and session mappings for the current company. |
+| `Resync this issue` | Replays sync for the current issue from the issue Memory tab. |
+
+## Configuration Defaults And Overrides
+
+### Default Behavior
+
+| Setting | Default | Use when |
+| --- | --- | --- |
+| `honchoApiKey` | — | Required. Points the plugin at the Paperclip secret containing your Honcho API key. |
+| `honchoApiBaseUrl` | `https://api.honcho.dev` | Override this for self-hosted or non-default Honcho deployments. |
+| `workspacePrefix` | `paperclip` | Change this if you want a different workspace namespace. |
+| `syncIssueComments` | `true` | Turn this off if you do not want comment history imported into Honcho. |
+| `syncIssueDocuments` | `true` | Turn this off if you do not want issue document revisions imported. |
+| `enablePeerChat` | `true` | Required for the peer chat tool surface. |
+| `enablePromptContext` | `false` | Keep this off on the public-host-compatible path and use manual prompt previews instead. |
+| `observe_me` | `true` | Controls whether agent peers are observed by Honcho. |
+| `observe_others` | `true` | Controls whether agent peers form representations of other peers they interact with. |
+
+The plugin also accepts additional advanced fields in the settings page, including noise-pattern and metadata-strip controls. Most setups can ignore those and start with the defaults above.
+
+## Agent Tools
+
+The plugin registers the following Honcho tools for Paperclip agents:
+
+| Tool | Description |
+| --- | --- |
+| `honcho_get_issue_context` | Retrieve compact Honcho context for the current issue session. |
+| `honcho_search_memory` | Search Honcho memory within the current workspace, narrowing to the current issue by default. |
+| `honcho_search_messages` | Search raw Honcho messages. |
+| `honcho_search_conclusions` | Search high-signal summarized Honcho memory. |
+| `honcho_get_workspace_context` | Retrieve broad workspace recall from Honcho. |
+| `honcho_get_session` | Retrieve issue session context from Honcho. |
+| `honcho_get_agent_context` | Retrieve peer context for a specific agent. |
+| `honcho_get_hierarchy_context` | Retrieve delegated-work context when the host provides lineage metadata. |
+| `honcho_ask_peer` | Query Honcho peer chat for a target peer. Requires peer chat to be enabled in plugin config. |
+
+## Next Steps
+
+<CardGroup cols={2}>
+  <Card title="Paperclip-Honcho Repository" icon="github" href="https://github.com/plastic-labs/paperclip-honcho">
+    Open the repository for source and setup details.
+  </Card>
+
+  <Card title="Honcho Architecture" icon="sitemap" href="../../documentation/core-concepts/architecture">
+    Review how workspaces, peers, and sessions fit together.
+  </Card>
+
+  <Card title="Representation Scopes" icon="messages" href="../../documentation/features/advanced/representation-scopes">
+    Review how `observe_me` and `observe_others` change what peers can model.
+  </Card>
+</CardGroup>
diff --git a/docs/v3/guides/overview.mdx b/docs/v3/guides/overview.mdx
index 71f31aefb..d3ca8c321 100644
--- a/docs/v3/guides/overview.mdx
+++ b/docs/v3/guides/overview.mdx
@@ -44,6 +44,9 @@ Connect external platforms to Honcho:
   <Card title="Granola" icon="calendar" href="/v3/guides/granola">
     Ingest meeting transcripts with speaker turns and participant data
   </Card>
+  <Card title="Paperclip" icon="paperclip" href="/v3/guides/integrations/paperclip">
+    Add Honcho memory to Paperclip companies, agents, issues, and documents
+  </Card>
   <Card title="Reachy Mini" icon="robot" href="/v3/guides/integrations/reachy-mini">
     Build an embodied voice robot with long-term memory
   </Card>

From 952435848c45a9e797a73dbe0454e1e76cb12011 Mon Sep 17 00:00:00 2001
From: Maestra <beatrice.alexander@pursuit.org>
Date: Thu, 16 Apr 2026 10:48:36 -0400
Subject: [PATCH 08/46] fix(docker): remove api-specific healthcheck from
 shared image (#530)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The HEALTHCHECK directive probes an HTTP endpoint that only the API
serves. The deriver service reuses this image but is a background queue
worker with no HTTP server — the probe can never succeed, so Docker
permanently marks the deriver container as unhealthy.

Remove the HEALTHCHECK from the shared image. Service-level health
checks belong in each service's own configuration (e.g. Kubernetes
readiness/liveness probes on the API Deployment only).

Closes #521

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 Dockerfile | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/Dockerfile b/Dockerfile
index c116775e1..18f23a85d 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -51,7 +51,4 @@ USER app
 
 EXPOSE 8000
 
-HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
-    CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
-
 CMD ["fastapi", "run", "--host", "0.0.0.0", "src/main.py"]

From 96765263f4d33ea97ed93dcb9c90d96aa710028d Mon Sep 17 00:00:00 2001
From: adavyas <121313528+adavyas@users.noreply.github.com>
Date: Fri, 17 Apr 2026 13:25:08 -0700
Subject: [PATCH 09/46] docs: update Paperclip integration guide (#572)

---
 docs/v3/guides/integrations/paperclip.mdx | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docs/v3/guides/integrations/paperclip.mdx b/docs/v3/guides/integrations/paperclip.mdx
index 072383a01..de5792467 100644
--- a/docs/v3/guides/integrations/paperclip.mdx
+++ b/docs/v3/guides/integrations/paperclip.mdx
@@ -17,6 +17,7 @@ This page covers the current public-host-compatible Paperclip plugin. It support
 2. Click `Install Plugin`.
 3. Enter `@honcho-ai/paperclip-honcho`.
 4. Complete the install from the Paperclip UI.
+   - Plugin download does not currently work on Windows because of a Paperclip host-side issue.
 
 ## Quick Setup
 
@@ -66,7 +67,7 @@ The integration breaks down into four parts:
 
 - **Identity and scope** - each Paperclip company maps to a Honcho workspace, agents and human actors map to peers, and issues map to sessions.
 - **What gets copied into Honcho** - issue comments and document revisions sync into Honcho, with document content sectioned and normalized message content capped before ingestion.
-- **What operators get** - operators get a plugin settings page, migration preview/status data, repair tools, and an issue-level `Memory` tab.
+- **What operators get** - operators get a plugin settings page, migration preview/status data, including a per-issue migration mapping preview, repair tools, and an issue-level `Memory` tab.
 - **What agents get** - agents get Honcho retrieval and peer-chat tools inside Paperclip.
 
 ## Operator Actions

From b65d03d2979bf83a01868149a5aed7d30fa87170 Mon Sep 17 00:00:00 2001
From: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com>
Date: Mon, 20 Apr 2026 02:46:37 -0400
Subject: [PATCH 10/46] Refactor clients.py to add modern features and more
 flexible configuration (#459)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix: Add JSON repair for truncated LLM responses across all providers and Gemini thinking budget support

LengthFinishReasonError from OpenAI-compatible providers (custom, openai, groq) was crashing the deriver
with 14k+ occurrences in production. The vLLM path already had repair logic but it was gated on
provider=="vllm", unreachable when routing through litellm as a custom provider.

- Extract shared _repair_response_model_json() helper for all providers
- Catch LengthFinishReasonError in OpenAI/custom parse() path and repair truncated JSON
- Add repair fallback to Anthropic and Gemini response_model paths
- Add repair fallback to Groq response_model path
- Pass thinking_budget_tokens to Gemini 2.5 models via thinking_config
- Add 14 tests covering repair paths for all providers and Gemini thinking budget

Fixes HONCHO-YC

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>

* feat: live llm integration tests

* feat: Consistent Model Config Protocol

* fix: migrate the remaining app callers off the legacy llm_settings path

* fix: Docs and regression tests

* fix: refactor llm runtime path to model-config-only API

* fix: refactor config to nested model-config source of truth

* fix: refactor llm streaming and tool dispatch through backends

* fix: cut over llm config to nested model_config only

* fix: collapse vllm and custom into openai_compatible transport

* feat: refactor llm config to explicit transports and bare model ids

* feat: (embed) Add configurability for embedding model

* fix: tests for embedding provider

* fix: Address Review Comments

* fix: (llm) remove Groq backend and per-vendor base URLs

* chore: move llm tests

* fix: (llm) address review findings — config regressions, backend bugs, dead code

* fix: address backend end silly errors

* chore: (docs) update configuration and self-hosting guides

* chore: fix tests

* fix: address code rabbit comments

* fix: add validation to the dream settings

* fix: further address code rabbit comments

* fix: Address Code Rabbit Comments

* fix: Another round of code rabbit

* fix: Address Code Rabbit Nits

* fix: tests

* refactor: rename thinking validator to reflect transport scope

_validate_anthropic_thinking_minimum only enforces the >=1024 rule for
Anthropic and no-ops for other transports, so the name was misleading
now that it's shared across ConfiguredModelSettings, FallbackModelSettings,
and ModelConfig. Renamed to _validate_thinking_constraints with a docstring
clarifying per-transport behavior. No logic change.

* fix(config): drop transport-specific thinking params when env override changes transport

_fill_defaults_for_nested_field previously preserved the default MODEL_CONFIG's
thinking_budget_tokens/thinking_effort across a transport override. This leaked
Gemini-family defaults (e.g. thinking_budget_tokens=1024) into OpenAI-transport
overrides, and the OpenAI backend then correctly rejected the unsupported param
at call time (OpenAI uses reasoning.effort, not a token budget).

The helper now strips thinking_budget_tokens and thinking_effort from the
default dict when the env override supplies a transport different from the
default's. Explicit thinking params in the override are preserved.

* fix(config): apply thinking-param strip to dialectic level merge too

DialecticSettings._merge_level_defaults does its own inline MODEL_CONFIG
merge (parallel to _fill_defaults_for_nested_field), so the previous fix
missed dialectic-level overrides. E.g. flipping
DIALECTIC_LEVELS__minimal__MODEL_CONFIG__TRANSPORT from gemini (default)
to openai still leaked the default thinking_budget_tokens=0 into the
openai config, which the OpenAI backend then rejected at call time.

The level-merge path now applies the same 'strip transport-specific
thinking params when transport changes' rule as the generic helper.
Added a regression test exercising the merge validator directly.

* refactor(llm): wire ModelConfig knobs through, prune clients.py migration leftovers

Three connected fixes to finish carving the LLM stack out of src/utils/clients.py
and into src/llm/:

1. Propagate ModelConfig tuning knobs into backend calls.
   honcho_llm_call_inner built extra_params from only {json_mode, verbosity},
   silently dropping top_p, top_k, frequency_penalty, presence_penalty, seed,
   and operator-supplied provider_params from any ModelConfig. Thread the
   selected config through ProviderSelection and merge
   build_config_extra_params(selected_config) into extra_params; per-call
   kwargs still win over provider_params defaults. Makes
   _build_config_extra_params public as build_config_extra_params so
   clients.py and request_builder.py share one translation. Adds
   TestModelConfigExtraParamsPropagation covering OpenAI/Anthropic knob
   propagation, provider_params passthrough, and per-call override
   precedence.

2. Drop dead extract_openai_* duplicates in clients.py.
   extract_openai_reasoning_content, extract_openai_reasoning_details, and
   extract_openai_cache_tokens had no callers outside their own definitions
   — the live implementations live in src/llm/backends/openai.py. -103
   lines from clients.py.

3. Unify on ModelTransport, delete SupportedProviders.
   The "google" vs "gemini" split forced a _provider_for_model_config
   translation shim in two places. Replace all SupportedProviders usages
   with ModelTransport, rename CLIENTS["google"] → CLIENTS["gemini"],
   update provider branches + LLMError labels + reasoning-trace entries
   accordingly. Trace JSONL now writes "provider": "gemini" instead of
   "google" — consistent with the broader env-var rename cutover.

Also tidies up pre-existing basedpyright findings in tests/llm/test_model_config.py
(pydantic before-validator dict inputs + descriptor-proxy call).

ruff: clean. basedpyright: 0 errors, 0 warnings. Tests: 153/153 pass across
tests/utils/test_clients.py, tests/utils/test_length_finish_reason.py,
tests/llm/, tests/dialectic/, tests/deriver/.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(llm): finish the src/utils/clients.py → src/llm/ migration

honcho_llm_call_inner now delegates to request_builder.execute_completion
and execute_stream instead of re-implementing backend call scaffolding
inline. The new _effective_config_for_call helper carries per-call kwargs
(temperature, stop_seqs, thinking_budget_tokens, reasoning_effort) onto
the selected ModelConfig — or synthesizes a minimal config for the
test-only callers that pass provider+model directly. max_output_tokens
is zeroed on the effective config to preserve the current
"per-call max_tokens wins" semantic; honoring ModelConfig.max_output_tokens
is a separable correctness concern.

Side effect of routing through the new path: ConfiguredModelSettings'
thinking_budget_tokens validator now fires on synthesized configs.
test_anthropic_thinking_budget was asserting that a sub-1024 budget
propagated to Anthropic — bumped to 1024 to match what Anthropic actually
accepts.

Unified client construction. Promoted the cached client factories in
src/llm/__init__.py (get_anthropic_client, get_openai_client,
get_gemini_client, get_{anthropic,openai,gemini}_override_client) to
public API and added them to __all__. Promoted
credentials._default_transport_api_key → default_transport_api_key.
Deleted the duplicate _build_client and _default_credentials_for_provider
from clients.py; _client_for_model_config now falls through to the
public factories. CLIENTS dict and _get_backend_for_provider stay as the
mockable seam for the ~50 patch.dict(CLIENTS, {...}) test call sites.

Wired operator-configurable Gemini cached-content reuse end-to-end.
PromptCachePolicy moved from src/llm/caching.py into src/config.py so
ModelConfig can reference it as a field without a circular import;
caching.py re-exports the name for existing imports. Added
cache_policy: PromptCachePolicy | None on ConfiguredModelSettings,
FallbackModelSettings, ResolvedFallbackConfig, and ModelConfig.
resolve_model_config, _resolve_fallback_config, and
_select_model_config_for_attempt copy the field through.
honcho_llm_call_inner passes effective_config.cache_policy into
execute_completion / execute_stream, so operators opt in via
e.g. DERIVER_MODEL_CONFIG__CACHE_POLICY__MODE=gemini_cached_content
and the selection actually fires instead of sitting on a dead path.

New regression test test_cache_policy_reaches_gemini_backend asserts the
PromptCachePolicy object reaches the Gemini backend's extra_params.

ruff + basedpyright: clean. Tests: 154/154 pass across
tests/utils/test_clients.py, tests/utils/test_length_finish_reason.py,
tests/llm/, tests/dialectic/, tests/deriver/.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(llm): move all LLM orchestration into src/llm/ and delete clients.py

The 1624-line src/utils/clients.py has been carved up into focused modules
under src/llm/ and deleted. There is now one golden path for LLM
orchestration and no dual entrypoint.

New module layout:

  src/llm/
    __init__.py       thin stable re-export surface
    api.py            public honcho_llm_call with retry + fallback + tool
                      loop delegation
    executor.py       honcho_llm_call_inner (single-call executor); bridges
                      to request_builder.execute_completion / execute_stream
    tool_loop.py      execute_tool_loop + stream_final_response, plus
                      assistant-tool-message and tool-result formatting
    runtime.py        AttemptPlan dataclass (replaces the loose
                      ProviderSelection NamedTuple), effective_config_for_call,
                      plan_attempt, per-retry temperature bump, attempt
                      ContextVar
    registry.py       single owner of CLIENTS dict + cached default and
                      override SDK-client factories + backend/history-adapter
                      selection + high-level get_backend(config)
    conversation.py   count_message_tokens, tool-aware message grouping,
                      truncate_messages_to_fit
    types.py          HonchoLLMCallResponse, HonchoLLMCallStreamChunk,
                      StreamingResponseWithMetadata, IterationData,
                      IterationCallback, ReasoningEffortType, VerbosityType,
                      ProviderClient
    request_builder.py low-level request assembly (ModelConfig → backend
                      complete/stream); no longer owns credential resolution
    credentials.py    default_transport_api_key, resolve_credentials
    caching.py        gemini_cache_store; re-exports PromptCachePolicy
                      from src.config
    backend.py        Protocol + normalized result types
    history_adapters.py provider-specific assistant/tool message shapes
    structured_output.py
    backends/         AnthropicBackend, OpenAIBackend, GeminiBackend

handle_streaming_response had no production callers; it is deleted. The
three tests that used it now drive honcho_llm_call_inner(stream=True,
client_override=...) directly, which exercises the same code path the
public API uses.

Dead credential passthrough removed. The ProviderBackend Protocol and
all three concrete backends no longer accept api_key / api_base — those
are baked into the underlying SDK client at registry construction time
and were being del'd everywhere they appeared. request_builder also
stops resolving and forwarding them.

Client construction is unified. The cached default-client factories
(get_anthropic_client, get_openai_client, get_gemini_client) and override
factories (get_*_override_client) are promoted to public API; the
module-level CLIENTS dict populates from them and remains the
patch.dict(CLIENTS, {...}) mocking seam tests rely on. Old duplicate
helpers (_build_client, _default_credentials_for_provider) are gone.
default_transport_api_key is promoted to public.

Application imports now come from src.llm (dreamer, dialectic, deriver,
summarizer, telemetry-adjacent tests). No code imports from
src.utils.clients anywhere in the repo.

ruff: clean. basedpyright: 0 errors, 0 warnings. Tests: 1013/1013 pass
across the entire non-infra test suite (excluding tests/unified,
tests/bench, tests/live_llm, tests/alembic).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(llm): sanitize tool schemas for Gemini's function_declarations validator

Gemini's native-transport function-declarations validator accepts a narrow
subset of JSON-Schema / OpenAPI: type, format, description, nullable, enum,
properties, required, items, minItems, maxItems, minimum, maximum, title.
Anything else — additionalProperties, allOf, if/then/else, $ref, anyOf,
oneOf, $defs, patternProperties — triggers an INVALID_ARGUMENT 400 at call
time.

Our agent tool schemas in src/utils/agent_tools.py use several of those
(additionalProperties: false, allOf + if/then conditionals) because they
were authored for OpenAI strict-mode + Anthropic, which need the richer
vocabulary. GeminiBackend._convert_tools was passing them straight through.

Add _sanitize_schema(): walks the parameters tree and drops unsupported
keywords while preserving semantics for the keywords that hold user data
(properties maps field-name → sub-schema; required / enum are lists of
literals; items is a single sub-schema). Other backends are untouched and
continue to receive the full strict schemas.

Regression tests:
- test_gemini_sanitize_schema_strips_unsupported_keywords: confirms
  additionalProperties, allOf + if/then, and $defs are stripped at nested
  levels while legitimate fields survive.
- test_gemini_convert_tools_sanitizes_parameters_schema: end-to-end
  _convert_tools output has no forbidden keys.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix: fix tool calling syntax for gemini

* refactor(llm): normalize defaults, widen OpenAI reasoning-model routing

* chore: fix test

* fix(llm): address post-migration review feedback

* fix(llm): gemini robustness + dreamer specialist ergonomics

* chore: addres review comments

* chore: (docs) unrelease changelog addition

* chore: (docs) merge commit changes

---------

Co-authored-by: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Co-authored-by: Erosika <eri@plasticlabs.ai>
---
 .env.template                                |  148 +-
 CHANGELOG.md                                 |   33 +
 README.md                                    |   10 +-
 config.toml.example                          |  148 +-
 docs/v3/contributing/configuration.mdx       |  328 ++-
 docs/v3/contributing/self-hosting.mdx        |   32 +-
 docs/v3/contributing/troubleshooting.mdx     |   33 +-
 docs/v3/guides/integrations/paperclip.mdx    |    2 +-
 pyproject.toml                               |    7 +-
 src/config.py                                |  867 +++++-
 src/crud/document.py                         |    3 +-
 src/crud/representation.py                   |    3 +-
 src/deriver/__main__.py                      |    1 -
 src/deriver/deriver.py                       |   22 +-
 src/dialectic/core.py                        |   24 +-
 src/dreamer/specialists.py                   |   68 +-
 src/embedding_client.py                      |  161 +-
 src/llm/__init__.py                          |   66 +
 src/llm/api.py                               |  359 +++
 src/llm/backend.py                           |   88 +
 src/llm/backends/__init__.py                 |    9 +
 src/llm/backends/anthropic.py                |  347 +++
 src/llm/backends/gemini.py                   |  577 ++++
 src/llm/backends/openai.py                   |  427 +++
 src/llm/caching.py                           |   97 +
 src/llm/conversation.py                      |  185 ++
 src/llm/credentials.py                       |   25 +
 src/llm/executor.py                          |  226 ++
 src/llm/history_adapters.py                  |  137 +
 src/llm/registry.py                          |  185 ++
 src/llm/request_builder.py                   |  119 +
 src/llm/runtime.py                           |  207 ++
 src/llm/structured_output.py                 |  132 +
 src/llm/tool_loop.py                         |  491 ++++
 src/llm/types.py                             |  138 +
 src/schemas/api.py                           |    5 +-
 src/telemetry/reasoning_traces.py            |   14 +-
 src/utils/agent_tools.py                     |  332 ++-
 src/utils/clients.py                         | 2575 ------------------
 src/utils/search.py                          |    2 +-
 src/utils/summarizer.py                      |   12 +-
 src/utils/types.py                           |    1 -
 src/vector_store/lancedb.py                  |    4 +-
 tests/__init__.py                            |    1 +
 tests/bench/harness.py                       |   76 +-
 tests/conftest.py                            |   65 +-
 tests/deriver/test_deriver_processing.py     |   60 +-
 tests/deriver/test_queue_processing.py       |   10 +-
 tests/dialectic/test_model_config_usage.py   |  111 +
 tests/dreamer/test_model_config_usage.py     |   56 +
 tests/integration/test_enqueue.py            |   62 -
 tests/integration/test_message_embeddings.py |    2 +-
 tests/integration/test_token_metrics.py      |    2 +-
 tests/live_llm/README.md                     |   59 +
 tests/live_llm/__init__.py                   |    1 +
 tests/live_llm/conftest.py                   |  120 +
 tests/live_llm/model_matrix.py               |  184 ++
 tests/live_llm/test_live_anthropic.py        |  154 ++
 tests/live_llm/test_live_gemini.py           |  173 ++
 tests/live_llm/test_live_openai.py           |  136 +
 tests/llm/conftest.py                        |   30 +
 tests/llm/test_agent_tool_schemas.py         |   72 +
 tests/llm/test_backends/test_anthropic.py    |  135 +
 tests/llm/test_backends/test_gemini.py       |  391 +++
 tests/llm/test_backends/test_openai.py       |  276 ++
 tests/llm/test_conversation.py               |  103 +
 tests/llm/test_credentials.py                |   50 +
 tests/llm/test_embedding_client.py           |  139 +
 tests/llm/test_history_adapters.py           |   67 +
 tests/llm/test_model_config.py               |  510 ++++
 tests/llm/test_request_builder.py            |   97 +
 tests/routes/test_peers.py                   |   19 -
 tests/routes/test_queue_status.py            |   14 -
 tests/routes/test_scoped_api.py              |   39 -
 tests/utils/test_clients.py                  |  811 +++---
 tests/utils/test_length_finish_reason.py     |  456 ++++
 tests/utils/test_summarizer.py               |   63 +-
 uv.lock                                      |   19 -
 78 files changed, 9499 insertions(+), 3714 deletions(-)
 create mode 100644 src/llm/__init__.py
 create mode 100644 src/llm/api.py
 create mode 100644 src/llm/backend.py
 create mode 100644 src/llm/backends/__init__.py
 create mode 100644 src/llm/backends/anthropic.py
 create mode 100644 src/llm/backends/gemini.py
 create mode 100644 src/llm/backends/openai.py
 create mode 100644 src/llm/caching.py
 create mode 100644 src/llm/conversation.py
 create mode 100644 src/llm/credentials.py
 create mode 100644 src/llm/executor.py
 create mode 100644 src/llm/history_adapters.py
 create mode 100644 src/llm/registry.py
 create mode 100644 src/llm/request_builder.py
 create mode 100644 src/llm/runtime.py
 create mode 100644 src/llm/structured_output.py
 create mode 100644 src/llm/tool_loop.py
 create mode 100644 src/llm/types.py
 delete mode 100644 src/utils/clients.py
 create mode 100644 tests/dialectic/test_model_config_usage.py
 create mode 100644 tests/dreamer/test_model_config_usage.py
 create mode 100644 tests/live_llm/README.md
 create mode 100644 tests/live_llm/__init__.py
 create mode 100644 tests/live_llm/conftest.py
 create mode 100644 tests/live_llm/model_matrix.py
 create mode 100644 tests/live_llm/test_live_anthropic.py
 create mode 100644 tests/live_llm/test_live_gemini.py
 create mode 100644 tests/live_llm/test_live_openai.py
 create mode 100644 tests/llm/conftest.py
 create mode 100644 tests/llm/test_agent_tool_schemas.py
 create mode 100644 tests/llm/test_backends/test_anthropic.py
 create mode 100644 tests/llm/test_backends/test_gemini.py
 create mode 100644 tests/llm/test_backends/test_openai.py
 create mode 100644 tests/llm/test_conversation.py
 create mode 100644 tests/llm/test_credentials.py
 create mode 100644 tests/llm/test_embedding_client.py
 create mode 100644 tests/llm/test_history_adapters.py
 create mode 100644 tests/llm/test_model_config.py
 create mode 100644 tests/llm/test_request_builder.py
 create mode 100644 tests/utils/test_length_finish_reason.py

diff --git a/.env.template b/.env.template
index 56a000abc..123af642c 100644
--- a/.env.template
+++ b/.env.template
@@ -15,8 +15,13 @@ LOG_LEVEL=INFO
 
 # Embedding settings
 # EMBED_MESSAGES=true
-# MAX_EMBEDDING_TOKENS=8192
-# MAX_EMBEDDING_TOKENS_PER_REQUEST=300000
+# EMBEDDING_VECTOR_DIMENSIONS=1536
+# EMBEDDING_MAX_INPUT_TOKENS=8192
+# EMBEDDING_MAX_TOKENS_PER_REQUEST=300000
+# EMBEDDING_MODEL_CONFIG__TRANSPORT=openai
+# EMBEDDING_MODEL_CONFIG__MODEL=text-embedding-3-small
+# EMBEDDING_MODEL_CONFIG__OVERRIDES__BASE_URL=
+# EMBEDDING_MODEL_CONFIG__OVERRIDES__API_KEY_ENV=
 
 # LANGFUSE_HOST=
 # LANGFUSE_PUBLIC_KEY=
@@ -62,55 +67,59 @@ AUTH_USE_AUTH=false
 # Honcho uses LLMs for memory extraction, summarization, dialectic chat, and
 # dream consolidation. The server will fail to start without a provider configured.
 #
-# Quick start: uncomment the two lines below, set your endpoint and API key,
-# then uncomment the provider/model lines in each feature section below.
-# Any OpenAI-compatible endpoint works (OpenRouter, Together, Fireworks, etc.).
+# Quick start: set LLM_OPENAI_API_KEY below to use the built-in defaults.
+# Text-generation features default to transport = "openai" and
+# model = "gpt-5.4-mini". Embeddings default to transport = "openai" and
+# model = "text-embedding-3-small". For OpenAI-compatible proxies
+# (OpenRouter, Together, Fireworks, vLLM, Ollama, LiteLLM), override
+# MODEL_CONFIG__MODEL and MODEL_CONFIG__OVERRIDES__BASE_URL on each feature
+# section you want to route through that endpoint.
 # Models must support tool calling (function calling).
 #
-LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
-LLM_OPENAI_COMPATIBLE_API_KEY=your-api-key-here
+# Supported transports: openai, anthropic, gemini
+# Each transport picks up its API key from the corresponding LLM_*_API_KEY.
+# Base URLs are set per-module via MODEL_CONFIG__OVERRIDES__BASE_URL.
 #
-# Provider options for each feature: custom, vllm, google, anthropic, openai, groq
-# "custom" routes through the OpenAI-compatible endpoint above.
-# Model name format depends on your provider (e.g., OpenRouter: vendor/model-name).
-#
-# ---- Alternative: vLLM self-hosted ------------------------------------------
-# LLM_VLLM_BASE_URL=http://localhost:8000/v1
-# LLM_VLLM_API_KEY=not-needed
-#
-# ---- Alternative: direct vendor keys (no endpoint needed) -------------------
-# LLM_GEMINI_API_KEY=
+LLM_OPENAI_API_KEY=your-api-key-here
 # LLM_ANTHROPIC_API_KEY=
-# LLM_OPENAI_API_KEY=
-# LLM_GROQ_API_KEY=
-#
-# ---- General LLM settings ---------------------------------------------------
-# Embedding provider — defaults to openai (requires LLM_OPENAI_API_KEY).
-# Set to openrouter to route embeddings through your custom endpoint instead.
-LLM_EMBEDDING_PROVIDER=openrouter
+# LLM_GEMINI_API_KEY=
+
+# =============================================================================
+# LLM Configuration
+# =============================================================================
+# Global LLM settings
 # LLM_DEFAULT_MAX_TOKENS=2500
-# LLM_MAX_TOOL_OUTPUT_CHARS=10000
-# LLM_MAX_MESSAGE_CONTENT_CHARS=2000
+# LLM_MAX_TOOL_OUTPUT_CHARS=10000  # Max chars for tool output (~2500 tokens)
+# LLM_MAX_MESSAGE_CONTENT_CHARS=2000  # Max chars per message in tool results
 
 # =============================================================================
 # Deriver (Background Worker)
 # =============================================================================
 # DERIVER_ENABLED=true
-DERIVER_PROVIDER=custom
-DERIVER_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
-# DERIVER_THINKING_BUDGET_TOKENS=1024  # gt=0 required; omit for non-thinking models
+# Defaults:
+# DERIVER_MODEL_CONFIG__TRANSPORT=openai
+# DERIVER_MODEL_CONFIG__MODEL=gpt-5.4-mini
+# Optional overrides:
+# DERIVER_MODEL_CONFIG__MODEL=your-model-here
+# DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
 # DERIVER_WORKERS=1
 # DERIVER_POLLING_SLEEP_INTERVAL_SECONDS=1.0
 # DERIVER_STALE_SESSION_TIMEOUT_MINUTES=5
-# DERIVER_QUEUE_ERROR_RETENTION_SECONDS=2592000
-# DERIVER_TEMPERATURE=
+# DERIVER_QUEUE_ERROR_RETENTION_SECONDS=2592000  # 30 days
+# DERIVER_MODEL_CONFIG__TEMPERATURE=
+# DERIVER_MODEL_CONFIG__THINKING_EFFORT=minimal
+# DERIVER_MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024  # Gemini/Anthropic only
 # DERIVER_DEDUPLICATE=true
-# DERIVER_MAX_OUTPUT_TOKENS=4096
+# DERIVER_MODEL_CONFIG__MAX_OUTPUT_TOKENS=4096
 # DERIVER_LOG_OBSERVATIONS=false
 # DERIVER_MAX_INPUT_TOKENS=23000
 # DERIVER_WORKING_REPRESENTATION_MAX_OBSERVATIONS=100
 # DERIVER_REPRESENTATION_BATCH_MAX_TOKENS=1024
-# DERIVER_FLUSH_ENABLED=false
+# DERIVER_FLUSH_ENABLED=false  # Bypass batch token threshold, process work immediately
+# DERIVER_MODEL_CONFIG__FALLBACK__MODEL=
+# DERIVER_MODEL_CONFIG__FALLBACK__TRANSPORT=
+# DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=
+# DERIVER_MODEL_CONFIG__OVERRIDES__API_KEY_ENV=
 
 # =============================================================================
 # Peer Card
@@ -125,58 +134,79 @@ DERIVER_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
 # DIALECTIC_HISTORY_TOKEN_LIMIT=8192
 # DIALECTIC_SESSION_HISTORY_MAX_TOKENS=4096
 #
-# Per-level provider, model, and tuning:
-DIALECTIC_LEVELS__minimal__PROVIDER=custom
-DIALECTIC_LEVELS__minimal__MODEL=your-model-here  # e.g. google/gemini-2.5-flash
-# DIALECTIC_LEVELS__minimal__THINKING_BUDGET_TOKENS=0
+# Per-level settings (reasoning_level parameter in API)
+# Each level has its own nested MODEL_CONFIG, tool iterations, and max output tokens.
+# MAX_OUTPUT_TOKENS is optional per level; if not set, uses global DIALECTIC_MAX_OUTPUT_TOKENS.
+# Defaults:
+# DIALECTIC_LEVELS__minimal__MODEL_CONFIG__TRANSPORT=openai
+# DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__minimal__MAX_TOOL_ITERATIONS=1
 # DIALECTIC_LEVELS__minimal__MAX_OUTPUT_TOKENS=250
-DIALECTIC_LEVELS__low__PROVIDER=custom
-DIALECTIC_LEVELS__low__MODEL=your-model-here
-# DIALECTIC_LEVELS__low__THINKING_BUDGET_TOKENS=0
+# DIALECTIC_LEVELS__minimal__TOOL_CHOICE=any
+# DIALECTIC_LEVELS__low__MODEL_CONFIG__TRANSPORT=openai
+# DIALECTIC_LEVELS__low__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__low__MAX_TOOL_ITERATIONS=5
-DIALECTIC_LEVELS__medium__PROVIDER=custom
-DIALECTIC_LEVELS__medium__MODEL=your-model-here
-# DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=0
+# DIALECTIC_LEVELS__low__TOOL_CHOICE=any
+# DIALECTIC_LEVELS__medium__MODEL_CONFIG__TRANSPORT=openai
+# DIALECTIC_LEVELS__medium__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__medium__MAX_TOOL_ITERATIONS=2
-DIALECTIC_LEVELS__high__PROVIDER=custom
-DIALECTIC_LEVELS__high__MODEL=your-model-here
-# DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=0
+# DIALECTIC_LEVELS__high__MODEL_CONFIG__TRANSPORT=openai
+# DIALECTIC_LEVELS__high__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__high__MAX_TOOL_ITERATIONS=4
-DIALECTIC_LEVELS__max__PROVIDER=custom
-DIALECTIC_LEVELS__max__MODEL=your-model-here
-# DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=0
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__TRANSPORT=openai
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__max__MAX_TOOL_ITERATIONS=10
+# Optional overrides:
+# DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL=your-model-here
+# DIALECTIC_LEVELS__low__MODEL_CONFIG__MODEL=your-model-here
+# DIALECTIC_LEVELS__medium__MODEL_CONFIG__MODEL=your-model-here
+# DIALECTIC_LEVELS__high__MODEL_CONFIG__MODEL=your-model-here
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__MODEL=your-model-here
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__THINKING_EFFORT=medium
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024
+# Optional backup per level (must set both or neither):
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__FALLBACK__MODEL=gemini-2.5-pro
+# DIALECTIC_LEVELS__max__MODEL_CONFIG__FALLBACK__TRANSPORT=gemini
 
 # =============================================================================
 # Summary
 # =============================================================================
 # SUMMARY_ENABLED=true
-SUMMARY_PROVIDER=custom
-SUMMARY_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
-# SUMMARY_THINKING_BUDGET_TOKENS=512  # gt=0 required; omit for non-thinking models
+# Defaults:
+# SUMMARY_MODEL_CONFIG__TRANSPORT=openai
+# SUMMARY_MODEL_CONFIG__MODEL=gpt-5.4-mini
+# Optional overrides:
+# SUMMARY_MODEL_CONFIG__MODEL=your-model-here
+# SUMMARY_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
+# SUMMARY_MODEL_CONFIG__THINKING_EFFORT=minimal
+# SUMMARY_MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024  # Gemini/Anthropic only
 # SUMMARY_MESSAGES_PER_SHORT_SUMMARY=20
 # SUMMARY_MESSAGES_PER_LONG_SUMMARY=60
 # SUMMARY_MAX_TOKENS_SHORT=1000
 # SUMMARY_MAX_TOKENS_LONG=4000
+# SUMMARY_MODEL_CONFIG__FALLBACK__MODEL=
 
 # =============================================================================
 # Dream
 # =============================================================================
 # DREAM_ENABLED=true
-DREAM_PROVIDER=custom
-DREAM_MODEL=your-model-here  # e.g. google/gemini-2.5-flash
-DREAM_DEDUCTION_MODEL=your-model-here
-DREAM_INDUCTION_MODEL=your-model-here
-# DREAM_THINKING_BUDGET_TOKENS=8192  # gt=0 required; omit for non-thinking models
+# Defaults:
+# DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT=openai
+# DREAM_DEDUCTION_MODEL_CONFIG__MODEL=gpt-5.4-mini
+# DREAM_INDUCTION_MODEL_CONFIG__TRANSPORT=openai
+# DREAM_INDUCTION_MODEL_CONFIG__MODEL=gpt-5.4-mini
+# Optional overrides:
+# DREAM_DEDUCTION_MODEL_CONFIG__MODEL=your-model-here
+# DREAM_DEDUCTION_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
+# DREAM_INDUCTION_MODEL_CONFIG__MODEL=your-model-here
+# DREAM_INDUCTION_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
 # DREAM_DOCUMENT_THRESHOLD=50
 # DREAM_IDLE_TIMEOUT_MINUTES=60
 # DREAM_MIN_HOURS_BETWEEN_DREAMS=8
 # DREAM_ENABLED_TYPES=["omni"]
-# DREAM_MAX_OUTPUT_TOKENS=16384
 # DREAM_MAX_TOOL_ITERATIONS=20
 # DREAM_HISTORY_TOKEN_LIMIT=16384
-#
+
 # Surprisal sampling (advanced):
 # DREAM_SURPRISAL__ENABLED=false
 # DREAM_SURPRISAL__TREE_TYPE=kdtree
diff --git a/CHANGELOG.md b/CHANGELOG.md
index e5208a953..ab9302c62 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,39 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](http://keepachangelog.com/)
 and this project adheres to [Semantic Versioning](http://semver.org/).
 
+## [Unreleased]
+
+### Added
+
+- New `src/llm/` package as the single owner of provider runtime: clients, backends, history adapters, tool loop, request builder, credentials, and caching policy
+- `AttemptPlan` dataclass captures per-retry provider selection (client, model, reasoning_effort, thinking_budget_tokens, selected_config) and pins it across stream-final retries so streaming doesn't bounce back to primary after the tool loop has settled on fallback
+- Gemini JSON-schema sanitizer for `function_declarations` — strips keywords Gemini's validator rejects (`additionalProperties`, `allOf`, etc.) while preserving semantics for all other backends
+- Dreamer specialists derive `effective_max_tokens` from `model_config.max_output_tokens` with a per-specialist default fallback
+- Regression tests covering fallback-config thinking-param reach, provider_params → extra_params boundary, OpenAI reasoning-model parameter routing, Gemini blocked finish_reason handling, and fail-fast `max_tool_iterations` validation
+
+### Changed
+
+- All LLM orchestration moved out of `src/utils/clients.py` into `src/llm/` with modules split by responsibility (api, executor, tool_loop, runtime, registry, conversation, request_builder, credentials, caching, backends, history_adapters)
+- Default `ModelConfig` factories (deriver, summary, dreamer specialists, dialectic levels) normalized to `openai/gpt-5.4-mini` with no extra parameters set by default; operators add transport/thinking overrides explicitly
+- OpenAI reasoning-model routing widened via `_uses_max_completion_tokens` heuristic covering `gpt-5.x` and `o1/o3/o4` — these models receive `max_completion_tokens` instead of `max_tokens`
+- Override client factories switched from unbounded `@cache` to `@lru_cache(maxsize=128)` for predictable memory growth on long-running processes
+- `get_backend` now delegates to `client_for_model_config`, so the live-test path and production path share one missing-API-key validation
+- Blocked Gemini responses (`SAFETY`, `RECITATION`, `PROHIBITED_CONTENT`, `BLOCKLIST`) raise `LLMError` in the streaming path too (previously only the non-streaming path), ensuring retry/fallback logic fires uniformly
+- Transport-change env overrides now strip transport-specific thinking params (thinking_budget_tokens vs. reasoning_effort) during config merge, including at the dialectic-level merge, so switching from Anthropic → OpenAI doesn't leave orphaned Anthropic-only params that the OpenAI backend would reject
+- `max_tool_iterations` out-of-range inputs now raise `ValidationException` instead of being silently clamped
+- Troubleshooting docs updated to reflect nested-env-var form for per-component thinking-budget overrides
+
+### Fixed
+
+- Fallback `ModelConfig` temperature and `thinking_budget_tokens` reach the backend on the final retry — previously the primary's values were pre-populated into caller kwargs early and clobbered fallback values via `effective_config_for_call(update=...)`
+- Stream-final retries pin to the `AttemptPlan` that succeeded rather than re-running provider selection through the outer `current_attempt` ContextVar (which could roll streaming back to primary after the tool loop had already switched to fallback)
+- OpenAI structured-output calls continue to use `chat.completions.parse()` with strict schema enforcement, while tool-calling paths use `chat.completions.create()` without `strict:True` for broader proxy compatibility (OpenRouter, vLLM, Ollama)
+- Gemini `cached_content` reuse keys now include `system_instruction` and `tool_config` so cache hits don't cross configurations that differ only in those fields
+
+### Removed
+
+- `src/utils/clients.py` deleted; its responsibilities are split across `src/llm/registry.py`, `src/llm/credentials.py`, and the backend-specific modules
+
 ## [3.0.6] - 2026-04-10
 
 ### Changed
diff --git a/README.md b/README.md
index 5ee20e582..9a7cf541e 100644
--- a/README.md
+++ b/README.md
@@ -225,7 +225,6 @@ DB_CONNECTION_URI= # Connection uri for a postgres database (with postgresql+psy
 LLM_GEMINI_API_KEY= # API Key for Google Gemini (used for deriver, summary, and dialectic minimal/low by default)
 LLM_ANTHROPIC_API_KEY= # API Key for Anthropic (used for dialectic medium/high/max and dream by default)
 LLM_OPENAI_API_KEY= # API Key for OpenAI (used for embeddings when EMBED_MESSAGES=true)
-LLM_GROQ_API_KEY= # API Key for Groq (optional)
 ```
 
 > Note that the `DB_CONNECTION_URI` must have the prefix `postgresql+psycopg` to
@@ -420,16 +419,17 @@ Then modify the values as needed. The TOML file is organized into sections:
 
 All configuration values can be overridden using environment variables. The environment variable names follow this pattern:
 
-- `{SECTION}_{KEY}` for nested settings
+- `{SECTION}_{KEY}` for top-level section settings
+- Use `__` inside `{KEY}` for nested settings
 - Just `{KEY}` for app-level settings
 
 Examples:
 
 - `DB_CONNECTION_URI` - Database connection string
 - `AUTH_JWT_SECRET` - JWT secret key
-- `DIALECTIC_LEVELS__low__MODEL` - Model for low reasoning level
-- `DERIVER_PROVIDER` - Provider for background deriver
-- `SUMMARY_PROVIDER` - Summary generation provider
+- `DERIVER_MODEL_CONFIG__TRANSPORT` - Transport for the background deriver
+- `SUMMARY_MODEL_CONFIG__MODEL` - Summary model override
+- `DIALECTIC_LEVELS__low__MODEL_CONFIG__MODEL` - Model for low reasoning level
 - `LOG_LEVEL` - Application log level
 - `METRICS_ENABLED` - Enable Prometheus metrics
 - `TELEMETRY_ENABLED` - Enable CloudEvents telemetry
diff --git a/config.toml.example b/config.toml.example
index b9cf84c0e..236f34025 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -11,8 +11,6 @@ GET_CONTEXT_MAX_TOKENS = 100000
 MAX_FILE_SIZE = 5242880  # 5MB
 MAX_MESSAGE_SIZE = 25000 # Characters
 EMBED_MESSAGES = true
-MAX_EMBEDDING_TOKENS = 8192
-MAX_EMBEDDING_TOKENS_PER_REQUEST = 300000
 # LANGFUSE_HOST = "https://api.langfuse.com"
 # LANGFUSE_PUBLIC_KEY = "your-public-key-here"
 # COLLECT_METRICS_LOCAL = false
@@ -51,25 +49,32 @@ PROFILES_SAMPLE_RATE = 0.1
 # LLM settings
 [llm]
 DEFAULT_MAX_TOKENS = 2500
-EMBEDDING_PROVIDER = "openai"
 MAX_TOOL_OUTPUT_CHARS = 10000  # Max chars for tool output (~2500 tokens)
 MAX_MESSAGE_CONTENT_CHARS = 2000  # Max chars per message in tool results
 
 # API Keys for LLM providers (set the ones you need)
-# GEMINI_API_KEY = "your-api-key"       # Default: deriver, summary, dialectic minimal/low
-# ANTHROPIC_API_KEY = "your-api-key"    # Default: dialectic medium/high/max, dream
-# OPENAI_API_KEY = "your-api-key"       # Default: embeddings
-# GROQ_API_KEY = "your-api-key"         # Not used by default
+# Supported transports: openai, anthropic, gemini
+# Base URLs are set per-module via model_config.overrides.base_url
+# Built-in text-generation defaults use openai / gpt-5.4-mini.
+# Embeddings default to openai / text-embedding-3-small.
+OPENAI_API_KEY = "your-api-key-here"
+# ANTHROPIC_API_KEY = "your-api-key"
+# GEMINI_API_KEY = "your-api-key"
 
-# OpenAI-compatible endpoint (OpenRouter, Together, Fireworks, LiteLLM, etc.)
-# Set provider to "custom" in feature config to route calls through this endpoint.
-# OPENAI_COMPATIBLE_BASE_URL = "https://openrouter.ai/api/v1"
-# OPENAI_COMPATIBLE_API_KEY = "your-api-key"
+# Embedding settings
+[embedding]
+VECTOR_DIMENSIONS = 1536
+MAX_INPUT_TOKENS = 8192
+MAX_TOKENS_PER_REQUEST = 300000
 
-# vLLM endpoint (for self-hosted models)
-# Set provider to "vllm" in feature config to route calls through this endpoint.
-# VLLM_BASE_URL = "http://localhost:8000/v1"
-# VLLM_API_KEY = "not-needed"
+[embedding.model_config]
+transport = "openai"
+model = "text-embedding-3-small"
+
+# Optional module-level endpoint overrides
+# [embedding.model_config.overrides]
+# base_url = "https://embedding-proxy.internal.example/v1"
+# api_key_env = "EMBEDDING_CUSTOM_API_KEY"
 
 # Deriver settings
 [deriver]
@@ -78,20 +83,38 @@ WORKERS = 1
 POLLING_SLEEP_INTERVAL_SECONDS = 1.0
 STALE_SESSION_TIMEOUT_MINUTES = 5
 # QUEUE_ERROR_RETENTION_SECONDS = 2592000  # 30 days
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-# TEMPERATURE = 0.0
-# BACKUP_PROVIDER = "anthropic"
-# BACKUP_MODEL = "claude-haiku-4-5"
 DEDUPLICATE = true
-MAX_OUTPUT_TOKENS = 4096
-THINKING_BUDGET_TOKENS = 1024
 LOG_OBSERVATIONS = false
 MAX_INPUT_TOKENS = 23000
 WORKING_REPRESENTATION_MAX_OBSERVATIONS = 100
 REPRESENTATION_BATCH_MAX_TOKENS = 1024
 FLUSH_ENABLED = false  # Bypass batch token threshold, process work immediately
 
+[deriver.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+# temperature = 0.0
+# thinking_effort = "minimal"
+# thinking_budget_tokens = 1024
+# max_output_tokens = 4096
+
+# Optional module-level endpoint overrides
+# transport = "openai"
+# model = "my-local-model"
+# [deriver.model_config.overrides]
+# base_url = "https://llm.internal.example/v1"
+# api_key_env = "DERIVER_CUSTOM_API_KEY"
+
+# Optional fallback model
+# [deriver.model_config.fallback]
+# transport = "anthropic"
+# model = "claude-haiku-4-5"
+# [deriver.model_config.fallback.overrides]
+# base_url = "https://llm-backup.internal.example/v1"
+# api_key_env = "DERIVER_CUSTOM_BACKUP_API_KEY"
+# [deriver.model_config.overrides.provider_params]
+# verbosity = "low"
+
 # Peer card settings
 [peer_card]
 ENABLED = true
@@ -106,55 +129,64 @@ SESSION_HISTORY_MAX_TOKENS = 4096
 # Per-level settings for reasoning levels
 # MAX_OUTPUT_TOKENS is optional per level; if not set, uses global MAX_OUTPUT_TOKENS
 [dialectic.levels.minimal]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 1
 MAX_OUTPUT_TOKENS = 250
+TOOL_CHOICE = "any"
+
+[dialectic.levels.minimal.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dialectic.levels.low]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 5
-# MAX_OUTPUT_TOKENS = 8192  # Optional: override global default
+TOOL_CHOICE = "any"
+
+[dialectic.levels.low.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dialectic.levels.medium]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
 MAX_TOOL_ITERATIONS = 2
-# MAX_OUTPUT_TOKENS = 8192  # Optional: override global default
+
+[dialectic.levels.medium.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dialectic.levels.high]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
 MAX_TOOL_ITERATIONS = 4
-# MAX_OUTPUT_TOKENS = 8192  # Optional: override global default
+
+[dialectic.levels.high.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dialectic.levels.max]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 2048
 MAX_TOOL_ITERATIONS = 10
-# MAX_OUTPUT_TOKENS = 8192  # Optional: override global default
-# Backup provider example (optional, must set both or neither):
-# BACKUP_PROVIDER = "google"
-# BACKUP_MODEL = "gemini-2.5-pro"
+
+[dialectic.levels.max.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
+# [dialectic.levels.max.model_config.fallback]
+# transport = "gemini"
+# model = "gemini-2.5-pro"
 
 # Summary settings
 [summary]
 ENABLED = true
 MESSAGES_PER_SHORT_SUMMARY = 20
 MESSAGES_PER_LONG_SUMMARY = 60
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash"
 MAX_TOKENS_SHORT = 1000
 MAX_TOKENS_LONG = 4000
-THINKING_BUDGET_TOKENS = 512
-# BACKUP_PROVIDER = "google"
-# BACKUP_MODEL = "gemini-2.5-flash"
+
+[summary.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+# thinking_effort = "minimal"
+# thinking_budget_tokens = 1024
+
+# [summary.model_config.fallback]
+# transport = "anthropic"
+# model = "claude-haiku-4-5"
 
 # Dream settings
 [dream]
@@ -163,18 +195,16 @@ DOCUMENT_THRESHOLD = 50
 IDLE_TIMEOUT_MINUTES = 60
 MIN_HOURS_BETWEEN_DREAMS = 8
 ENABLED_TYPES = ["omni"]
-PROVIDER = "anthropic"
-MODEL = "claude-sonnet-4-20250514"
-MAX_OUTPUT_TOKENS = 16384
-THINKING_BUDGET_TOKENS = 8192
 MAX_TOOL_ITERATIONS = 20
 HISTORY_TOKEN_LIMIT = 16384
-# BACKUP_PROVIDER = "google"
-# BACKUP_MODEL = "gemini-2.5-flash"
 
-# Specialist models (use same provider as main model)
-DEDUCTION_MODEL = "claude-haiku-4-5"
-INDUCTION_MODEL = "claude-haiku-4-5"
+[dream.deduction_model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
+[dream.induction_model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 # Surprisal-based sampling subsystem
 [dream.surprisal]
@@ -224,6 +254,8 @@ TYPE = "pgvector"
 # Migration flag: set to true when migration from pgvector is complete
 MIGRATED = false
 NAMESPACE = "honcho"
+# This should match embedding.vector_dimensions. pgvector and dual-write mode
+# currently still require 1536 until a schema migration lands.
 DIMENSIONS = 1536
 # TURBOPUFFER_API_KEY = "your-turbopuffer-api-key"
 # TURBOPUFFER_REGION = "us-east-1"
diff --git a/docs/v3/contributing/configuration.mdx b/docs/v3/contributing/configuration.mdx
index 5ae99cc98..02916ab64 100644
--- a/docs/v3/contributing/configuration.mdx
+++ b/docs/v3/contributing/configuration.mdx
@@ -26,13 +26,13 @@ cp config.toml.example config.toml
 
 All config values map to environment variables:
 
-- `{SECTION}_{KEY}` for section settings (e.g., `DB_CONNECTION_URI` → `[db].CONNECTION_URI`)
+- `{SECTION}_{KEY}` for top-level section settings (e.g., `DB_CONNECTION_URI` → `[db].CONNECTION_URI`)
 - `{KEY}` for app-level settings (e.g., `LOG_LEVEL` → `[app].LOG_LEVEL`)
-- `{SECTION}__{NESTED}__{KEY}` for deeply nested settings (double underscore, e.g., `DIALECTIC_LEVELS__minimal__PROVIDER`)
+- Use `__` inside `{KEY}` for nested settings (e.g., `DIALECTIC_LEVELS__minimal__MODEL_CONFIG__TRANSPORT`, `DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL`)
 
 ## LLM Configuration
 
-The [Self-Hosting Guide](./self-hosting#llm-setup) covers the basic setup: one OpenAI-compatible endpoint, one model for all features. This section covers recommended model tiers, using multiple providers, and per-feature tuning.
+The [Self-Hosting Guide](./self-hosting#llm-setup) covers the basic setup: either the built-in OpenAI defaults or one OpenAI-compatible endpoint/model for all features. This section covers recommended model tiers, using multiple providers, and per-feature tuning.
 
 <Note>
 All Honcho agents (deriver, dialectic, dream) require tool calling. Your models must support the OpenAI tool calling format.
@@ -52,104 +52,175 @@ You can mix providers freely — for example, use Gemini for the deriver and Cla
 
 ### Provider Types
 
-| Provider value | What it connects to | Key env var |
+| Transport value | What it connects to | API key env var |
 |---|---|---|
-| `custom` | Any OpenAI-compatible endpoint (OpenRouter, Together, Fireworks, LiteLLM, Ollama) | `LLM_OPENAI_COMPATIBLE_API_KEY` + `LLM_OPENAI_COMPATIBLE_BASE_URL` |
-| `vllm` | vLLM self-hosted models | `LLM_VLLM_API_KEY` + `LLM_VLLM_BASE_URL` |
-| `google` | Google Gemini (direct) | `LLM_GEMINI_API_KEY` |
+| `openai` | OpenAI or any OpenAI-compatible endpoint (OpenRouter, Together, Fireworks, LiteLLM, vLLM, Ollama) | `LLM_OPENAI_API_KEY` |
 | `anthropic` | Anthropic Claude (direct) | `LLM_ANTHROPIC_API_KEY` |
-| `openai` | OpenAI (direct) | `LLM_OPENAI_API_KEY` |
-| `groq` | Groq (direct) | `LLM_GROQ_API_KEY` |
+| `gemini` | Google Gemini (direct) | `LLM_GEMINI_API_KEY` |
+
+For OpenAI-compatible proxies (OpenRouter, vLLM, Ollama, etc.), use `transport = "openai"` and set `MODEL_CONFIG__OVERRIDES__BASE_URL` on each feature to point at your endpoint.
 
 ### Tiered Model Setup
 
 Once you're past initial setup, you can assign different models per feature for better cost/quality tradeoffs. This example uses OpenRouter with light/medium/heavy tiers:
 
 ```bash
-LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
-LLM_OPENAI_COMPATIBLE_API_KEY=sk-or-v1-...
+LLM_OPENAI_API_KEY=sk-or-v1-...
+
+# All features route through OpenRouter via overrides.base_url
+# (You can set this on each feature's MODEL_CONFIG)
 
 # Light tier — high throughput, cheap
-DERIVER_PROVIDER=custom
-DERIVER_MODEL=google/gemini-2.5-flash-lite
-SUMMARY_PROVIDER=custom
-SUMMARY_MODEL=google/gemini-2.5-flash
-DIALECTIC_LEVELS__minimal__PROVIDER=custom
-DIALECTIC_LEVELS__minimal__MODEL=google/gemini-2.5-flash-lite
-DIALECTIC_LEVELS__low__PROVIDER=custom
-DIALECTIC_LEVELS__low__MODEL=google/gemini-2.5-flash-lite
+DERIVER_MODEL_CONFIG__TRANSPORT=openai
+DERIVER_MODEL_CONFIG__MODEL=google/gemini-2.5-flash-lite
+DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
+SUMMARY_MODEL_CONFIG__TRANSPORT=openai
+SUMMARY_MODEL_CONFIG__MODEL=google/gemini-2.5-flash
+DIALECTIC_LEVELS__minimal__MODEL_CONFIG__TRANSPORT=openai
+DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL=google/gemini-2.5-flash-lite
+DIALECTIC_LEVELS__low__MODEL_CONFIG__TRANSPORT=openai
+DIALECTIC_LEVELS__low__MODEL_CONFIG__MODEL=google/gemini-2.5-flash-lite
 
 # Medium tier — better reasoning
-DIALECTIC_LEVELS__medium__PROVIDER=custom
-DIALECTIC_LEVELS__medium__MODEL=anthropic/claude-haiku-4-5
-DIALECTIC_LEVELS__high__PROVIDER=custom
-DIALECTIC_LEVELS__high__MODEL=anthropic/claude-haiku-4-5
-DIALECTIC_LEVELS__max__PROVIDER=custom
-DIALECTIC_LEVELS__max__MODEL=anthropic/claude-haiku-4-5
+DIALECTIC_LEVELS__medium__MODEL_CONFIG__TRANSPORT=openai
+DIALECTIC_LEVELS__medium__MODEL_CONFIG__MODEL=anthropic/claude-haiku-4-5
+DIALECTIC_LEVELS__high__MODEL_CONFIG__TRANSPORT=openai
+DIALECTIC_LEVELS__high__MODEL_CONFIG__MODEL=anthropic/claude-haiku-4-5
+DIALECTIC_LEVELS__max__MODEL_CONFIG__TRANSPORT=openai
+DIALECTIC_LEVELS__max__MODEL_CONFIG__MODEL=anthropic/claude-haiku-4-5
 
 # Heavy tier — best quality for complex tasks
-DREAM_PROVIDER=custom
-DREAM_MODEL=anthropic/claude-sonnet-4-20250514
-DREAM_DEDUCTION_MODEL=anthropic/claude-haiku-4-5
-DREAM_INDUCTION_MODEL=anthropic/claude-haiku-4-5
+DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT=openai
+DREAM_DEDUCTION_MODEL_CONFIG__MODEL=anthropic/claude-haiku-4-5
+DREAM_INDUCTION_MODEL_CONFIG__TRANSPORT=openai
+DREAM_INDUCTION_MODEL_CONFIG__MODEL=anthropic/claude-haiku-4-5
 ```
 
 ### Direct Vendor Keys
 
-Instead of an OpenAI-compatible proxy, you can use vendor APIs directly. Leave `PROVIDER` overrides unset and the code defaults route per feature:
+Instead of an OpenAI-compatible proxy, you can use vendor APIs directly. Each transport picks up its own `LLM_{TRANSPORT}_API_KEY`.
+
+If you keep the built-in defaults, only `LLM_OPENAI_API_KEY` is required:
+
+```bash
+LLM_OPENAI_API_KEY=...
+
+# Built-in model defaults
+# - deriver: openai / gpt-5.4-mini
+# - dialectic (all levels): openai / gpt-5.4-mini
+# - summary: openai / gpt-5.4-mini
+# - dream specialists: openai / gpt-5.4-mini
+# - embeddings: openai / text-embedding-3-small
+```
+
+To use Gemini or Anthropic directly, override the features you want to move:
 
 ```bash
-LLM_GEMINI_API_KEY=...       # deriver, summary, dialectic minimal/low
-LLM_ANTHROPIC_API_KEY=...    # dialectic medium/high/max, dream
-LLM_OPENAI_API_KEY=...       # embeddings
+LLM_GEMINI_API_KEY=...
+DERIVER_MODEL_CONFIG__TRANSPORT=gemini
+DERIVER_MODEL_CONFIG__MODEL=gemini-2.5-flash
+
+LLM_ANTHROPIC_API_KEY=...
+DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT=anthropic
+DREAM_DEDUCTION_MODEL_CONFIG__MODEL=claude-haiku-4-5
 ```
 
 ### Self-Hosted (vLLM / Ollama)
 
+Use `transport = "openai"` and set `MODEL_CONFIG__OVERRIDES__BASE_URL` on each feature:
+
 ```bash
 # vLLM
-LLM_VLLM_BASE_URL=http://localhost:8000/v1
-LLM_VLLM_API_KEY=not-needed
-DERIVER_PROVIDER=vllm
-DERIVER_MODEL=your-model-name
-
-# Ollama (uses custom provider)
-LLM_OPENAI_COMPATIBLE_BASE_URL=http://localhost:11434/v1
-LLM_OPENAI_COMPATIBLE_API_KEY=ollama
-DERIVER_PROVIDER=custom
-DERIVER_MODEL=llama3.3:70b
+LLM_OPENAI_API_KEY=not-needed
+DERIVER_MODEL_CONFIG__TRANSPORT=openai
+DERIVER_MODEL_CONFIG__MODEL=your-model-name
+DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=http://localhost:8000/v1
+
+# Ollama
+LLM_OPENAI_API_KEY=ollama
+DERIVER_MODEL_CONFIG__TRANSPORT=openai
+DERIVER_MODEL_CONFIG__MODEL=llama3.3:70b
+DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=http://localhost:11434/v1
 ```
 
-Set `PROVIDER` and `MODEL` for each feature the same way.
+Set `MODEL_CONFIG__TRANSPORT`, `MODEL_CONFIG__MODEL`, and `MODEL_CONFIG__OVERRIDES__BASE_URL` for each feature the same way.
+
+The same overrides are available in `config.toml`:
+
+```toml
+[deriver.model_config]
+transport = "openai"
+model = "my-local-model"
+
+[deriver.model_config.overrides]
+base_url = "http://localhost:8000/v1"
+api_key_env = "DERIVER_LOCAL_API_KEY"
+```
 
 ### Thinking Budget
 
-Default configs use `THINKING_BUDGET_TOKENS` tuned for Anthropic models. Non-Anthropic providers don't support extended thinking and will error or silently fail. The [Self-Hosting Guide](./self-hosting#llm-setup) sets these to `0` by default. If you switch to Anthropic models, you can re-enable them:
+Built-in defaults do not set `MODEL_CONFIG__THINKING_BUDGET_TOKENS` or `MODEL_CONFIG__THINKING_EFFORT`. Add one only when your chosen model supports it.
+
+Use `MODEL_CONFIG__THINKING_EFFORT` for OpenAI reasoning models:
 
 ```bash
-# Anthropic models — enable thinking
-DERIVER_THINKING_BUDGET_TOKENS=1024
-SUMMARY_THINKING_BUDGET_TOKENS=512
-DREAM_THINKING_BUDGET_TOKENS=8192
-DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=1024
-DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=1024
-DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=2048
-# minimal and low stay at 0
+DERIVER_MODEL_CONFIG__THINKING_EFFORT=minimal
+DIALECTIC_LEVELS__max__MODEL_CONFIG__THINKING_EFFORT=medium
 ```
 
+Use `MODEL_CONFIG__THINKING_BUDGET_TOKENS` for Anthropic and Gemini models. Set it to `0` or omit it for providers that don't support extended thinking:
+
+```bash
+SUMMARY_MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024
+DREAM_DEDUCTION_MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024
+```
+
+### Provider-Specific Parameters
+
+Each model config supports an `overrides.provider_params` dict for passing arbitrary parameters to the underlying provider SDK. Use this for vendor-specific features that aren't part of the standard config:
+
+```toml
+[deriver.model_config.overrides.provider_params]
+# These are passed directly to the provider SDK
+verbosity = "low"
+```
+
+### Changing Transport
+
+When changing a feature's `transport`, always specify `model` explicitly. Partial overrides that change transport without model will keep the previous model name, which may not be valid for the new provider.
+
 ### General LLM Settings
 
 ```bash
 LLM_DEFAULT_MAX_TOKENS=2500
 
-# Embedding provider (used when EMBED_MESSAGES=true)
-LLM_EMBEDDING_PROVIDER=openai  # Options: openai, gemini, openrouter
-
 # Tool output limits (to prevent token explosion)
 LLM_MAX_TOOL_OUTPUT_CHARS=10000  # ~2500 tokens at 4 chars/token
 LLM_MAX_MESSAGE_CONTENT_CHARS=2000  # Max chars per message in tool results
 ```
 
+### Embedding Configuration
+
+Embeddings use their own nested model config, separate from the main text-generation LLM settings.
+
+```bash
+# Embedding vector settings
+EMBEDDING_VECTOR_DIMENSIONS=1536
+EMBEDDING_MAX_INPUT_TOKENS=8192
+EMBEDDING_MAX_TOKENS_PER_REQUEST=300000
+
+# Embedding transport/model selection
+EMBEDDING_MODEL_CONFIG__TRANSPORT=openai  # openai, gemini
+EMBEDDING_MODEL_CONFIG__MODEL=text-embedding-3-small
+
+# Optional endpoint overrides
+EMBEDDING_MODEL_CONFIG__OVERRIDES__BASE_URL=http://localhost:8000/v1
+EMBEDDING_MODEL_CONFIG__OVERRIDES__API_KEY_ENV=EMBEDDING_CUSTOM_API_KEY
+```
+
+Current constraint:
+- `EMBEDDING_VECTOR_DIMENSIONS` can be changed for fully migrated external vector stores, but pgvector and dual-write mode still require `1536` until the schema migration lands.
+
 ### Feature-Specific Model Configuration
 
 Each feature can use a different provider and model. Below are all the tuning knobs.
@@ -173,45 +244,51 @@ Each reasoning level has its own provider, model, and settings:
 ```toml
 # config.toml example
 [dialectic.levels.minimal]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 1
 MAX_OUTPUT_TOKENS = 250
 TOOL_CHOICE = "any"
 
+[dialectic.levels.minimal.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [dialectic.levels.low]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 5
 TOOL_CHOICE = "any"
 
+[dialectic.levels.low.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [dialectic.levels.medium]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
 MAX_TOOL_ITERATIONS = 2
 
+[dialectic.levels.medium.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [dialectic.levels.high]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
 MAX_TOOL_ITERATIONS = 4
 
+[dialectic.levels.high.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [dialectic.levels.max]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 2048
 MAX_TOOL_ITERATIONS = 10
+
+[dialectic.levels.max.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 ```
 
 Environment variables for nested levels use double underscores:
 ```bash
-DIALECTIC_LEVELS__minimal__PROVIDER=google
-DIALECTIC_LEVELS__minimal__MODEL=gemini-2.5-flash-lite
-DIALECTIC_LEVELS__minimal__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__minimal__MODEL_CONFIG__TRANSPORT=openai
+DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL=gpt-5.4-mini
 DIALECTIC_LEVELS__minimal__MAX_TOOL_ITERATIONS=1
+DIALECTIC_LEVELS__minimal__MAX_OUTPUT_TOKENS=250
+DIALECTIC_LEVELS__minimal__TOOL_CHOICE=any
 ```
 
 **Deriver (Theory of Mind):**
@@ -222,12 +299,16 @@ The Deriver extracts facts from messages and builds theory-of-mind representatio
 DERIVER_ENABLED=true
 
 # LLM settings
-DERIVER_PROVIDER=google
-DERIVER_MODEL=gemini-2.5-flash-lite
-DERIVER_MAX_OUTPUT_TOKENS=4096
-DERIVER_THINKING_BUDGET_TOKENS=1024
+DERIVER_MODEL_CONFIG__TRANSPORT=openai
+DERIVER_MODEL_CONFIG__MODEL=gpt-5.4-mini
 DERIVER_MAX_INPUT_TOKENS=23000
-DERIVER_TEMPERATURE=  # Optional override (unset by default)
+# DERIVER_MODEL_CONFIG__THINKING_EFFORT=minimal
+# DERIVER_MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024
+# DERIVER_MODEL_CONFIG__TEMPERATURE=0.7  # Optional temperature override
+
+# Backup model (optional)
+# DERIVER_MODEL_CONFIG__FALLBACK__MODEL=claude-haiku-4-5
+# DERIVER_MODEL_CONFIG__FALLBACK__TRANSPORT=anthropic
 
 # Worker settings
 DERIVER_WORKERS=1  # Increase for higher throughput
@@ -256,11 +337,12 @@ Session summaries provide compressed context for long conversations — short su
 
 ```bash
 SUMMARY_ENABLED=true
-SUMMARY_PROVIDER=google
-SUMMARY_MODEL=gemini-2.5-flash
+SUMMARY_MODEL_CONFIG__TRANSPORT=openai
+SUMMARY_MODEL_CONFIG__MODEL=gpt-5.4-mini
 SUMMARY_MAX_TOKENS_SHORT=1000
 SUMMARY_MAX_TOKENS_LONG=4000
-SUMMARY_THINKING_BUDGET_TOKENS=512
+# SUMMARY_MODEL_CONFIG__THINKING_EFFORT=minimal
+# SUMMARY_MODEL_CONFIG__THINKING_BUDGET_TOKENS=1024
 SUMMARY_MESSAGES_PER_SHORT_SUMMARY=20
 SUMMARY_MESSAGES_PER_LONG_SUMMARY=60
 ```
@@ -275,18 +357,14 @@ DREAM_DOCUMENT_THRESHOLD=50
 DREAM_IDLE_TIMEOUT_MINUTES=60
 DREAM_MIN_HOURS_BETWEEN_DREAMS=8
 DREAM_ENABLED_TYPES=["omni"]
-
-# LLM settings
-DREAM_PROVIDER=anthropic
-DREAM_MODEL=claude-sonnet-4-20250514
-DREAM_MAX_OUTPUT_TOKENS=16384
-DREAM_THINKING_BUDGET_TOKENS=8192
 DREAM_MAX_TOOL_ITERATIONS=20
 DREAM_HISTORY_TOKEN_LIMIT=16384
 
-# Specialist models (use same provider as main model)
-DREAM_DEDUCTION_MODEL=claude-haiku-4-5
-DREAM_INDUCTION_MODEL=claude-haiku-4-5
+# Specialist model configs (each is independent)
+DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT=openai
+DREAM_DEDUCTION_MODEL_CONFIG__MODEL=gpt-5.4-mini
+DREAM_INDUCTION_MODEL_CONFIG__TRANSPORT=openai
+DREAM_INDUCTION_MODEL_CONFIG__MODEL=gpt-5.4-mini
 ```
 
 **Surprisal-Based Sampling (Advanced):**
@@ -315,8 +393,8 @@ GET_CONTEXT_MAX_TOKENS=100000
 MAX_MESSAGE_SIZE=25000
 MAX_FILE_SIZE=5242880  # 5MB
 EMBED_MESSAGES=true
-MAX_EMBEDDING_TOKENS=8192
-MAX_EMBEDDING_TOKENS_PER_REQUEST=300000
+EMBEDDING_MAX_INPUT_TOKENS=8192
+EMBEDDING_MAX_TOKENS_PER_REQUEST=300000
 NAMESPACE=honcho
 ```
 
@@ -452,8 +530,10 @@ DEFAULT_TTL_SECONDS = 300
 [deriver]
 ENABLED = true
 WORKERS = 1
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
+
+[deriver.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [peer_card]
 ENABLED = true
@@ -462,44 +542,62 @@ ENABLED = true
 MAX_OUTPUT_TOKENS = 8192
 
 [dialectic.levels.minimal]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 1
+MAX_OUTPUT_TOKENS = 250
+TOOL_CHOICE = "any"
+
+[dialectic.levels.minimal.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dialectic.levels.low]
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash-lite"
-THINKING_BUDGET_TOKENS = 0
 MAX_TOOL_ITERATIONS = 5
+TOOL_CHOICE = "any"
+
+[dialectic.levels.low.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dialectic.levels.medium]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
 MAX_TOOL_ITERATIONS = 2
 
+[dialectic.levels.medium.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [dialectic.levels.high]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 1024
 MAX_TOOL_ITERATIONS = 4
 
+[dialectic.levels.high.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [dialectic.levels.max]
-PROVIDER = "anthropic"
-MODEL = "claude-haiku-4-5"
-THINKING_BUDGET_TOKENS = 2048
 MAX_TOOL_ITERATIONS = 10
 
+[dialectic.levels.max.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
 [summary]
 ENABLED = true
-PROVIDER = "google"
-MODEL = "gemini-2.5-flash"
+MAX_TOKENS_SHORT = 1000
+MAX_TOKENS_LONG = 4000
+
+[summary.model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [dream]
 ENABLED = true
-PROVIDER = "anthropic"
-MODEL = "claude-sonnet-4-20250514"
+
+[dream.deduction_model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
+
+[dream.induction_model_config]
+transport = "openai"
+model = "gpt-5.4-mini"
 
 [webhook]
 MAX_WORKSPACE_LIMIT = 10
@@ -536,6 +634,6 @@ uv run alembic revision --autogenerate -m "Description"  # Create new migration
 
 4. **Deriver not processing** — Check logs. Increase `DERIVER_WORKERS` for throughput. Verify database and LLM connectivity.
 
-5. **Dialectic level issues** — All five levels must be configured. For Anthropic, `THINKING_BUDGET_TOKENS` must be >= 1024. For non-Anthropic providers, set to `0`. `MAX_OUTPUT_TOKENS` must exceed `THINKING_BUDGET_TOKENS`.
+5. **Dialectic level issues** — Unset level fields inherit from the built-in defaults. For Anthropic, `THINKING_BUDGET_TOKENS` must be >= 1024 when enabled. For providers without budgeted thinking, omit it or set it to `0`. `MAX_OUTPUT_TOKENS` must exceed `THINKING_BUDGET_TOKENS`.
 
 6. **Vector store issues** — For Turbopuffer, set the API key. Check `VECTOR_STORE_DIMENSIONS` matches your embedding model.
diff --git a/docs/v3/contributing/self-hosting.mdx b/docs/v3/contributing/self-hosting.mdx
index fc298bd80..975f9d769 100644
--- a/docs/v3/contributing/self-hosting.mdx
+++ b/docs/v3/contributing/self-hosting.mdx
@@ -36,30 +36,24 @@ You'll need a PostgreSQL database with the pgvector extension. Choose one:
 
 Honcho uses LLMs for memory extraction, summarization, dialectic chat, and dreaming. The server will **fail to start** without a provider configured.
 
-You need one API key and one model. Any OpenAI-compatible endpoint works — OpenRouter, Together, Fireworks, Ollama, vLLM, or a direct vendor API. Models must support tool calling (function calling).
+If you keep the built-in defaults, you only need one API key: all text-generation features default to `openai / gpt-5.4-mini`, and embeddings default to `openai / text-embedding-3-small`. Any OpenAI-compatible endpoint works too — OpenRouter, Together, Fireworks, Ollama, vLLM, or LiteLLM. Models must support tool calling (function calling).
 
-The `.env.template` has provider and model lines ready for each feature. After copying it to `.env`, you need to set three things:
+After copying `.env.template` to `.env`, the default setup is:
 
 ```bash
-# 1. Your endpoint and API key (already uncommented in the template)
-LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
-LLM_OPENAI_COMPATIBLE_API_KEY=sk-or-v1-...
-
-# 2. Replace "your-model-here" everywhere with your model
-#    (these are spread across the Deriver, Dialectic, Summary, and Dream sections)
-DERIVER_MODEL=google/gemini-2.5-flash  # e.g. google/gemini-2.5-flash
-SUMMARY_MODEL=google/gemini-2.5-flash
-DREAM_MODEL=google/gemini-2.5-flash
-DIALECTIC_LEVELS__minimal__MODEL=google/gemini-2.5-flash
-# ... same for low, medium, high, max
-
-# 3. Everything else is already configured:
-#    - PROVIDER=custom for all features (routes through your endpoint)
-#    - THINKING_BUDGET_TOKENS=0 (correct for non-Anthropic models)
-#    - LLM_EMBEDDING_PROVIDER=openrouter (uses same endpoint for embeddings)
+# Required for the built-in defaults
+LLM_OPENAI_API_KEY=sk-...
 ```
 
-Use find-and-replace to swap all `your-model-here` with your chosen model in one step.
+If you want a different model or an OpenAI-compatible proxy, uncomment and edit the relevant `*_MODEL_CONFIG__TRANSPORT`, `*_MODEL_CONFIG__MODEL`, and `*_MODEL_CONFIG__OVERRIDES__BASE_URL` lines in the Deriver, Dialectic, Summary, and Dream sections. For example:
+
+```bash
+LLM_OPENAI_API_KEY=sk-or-v1-...
+
+DERIVER_MODEL_CONFIG__TRANSPORT=openai
+DERIVER_MODEL_CONFIG__MODEL=google/gemini-2.5-flash
+DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
+```
 
 <Info>
 For recommended model tiers per feature, using multiple providers, or direct vendor API keys, see the [Configuration Guide](./configuration#llm-configuration).
diff --git a/docs/v3/contributing/troubleshooting.mdx b/docs/v3/contributing/troubleshooting.mdx
index f041e2db4..bb71a475d 100644
--- a/docs/v3/contributing/troubleshooting.mdx
+++ b/docs/v3/contributing/troubleshooting.mdx
@@ -115,12 +115,12 @@ Messages are stored but no observations, summaries, or representations are being
 
 ### OpenRouter / custom provider not working
 
-If you set `PROVIDER=custom` but calls fail:
+If calls to an OpenAI-compatible proxy fail:
 
-1. **Verify the endpoint and key are set:**
+1. **Verify the endpoint and key are set.** Use `transport = "openai"` with a base URL override:
    ```bash
-   LLM_OPENAI_COMPATIBLE_BASE_URL=https://openrouter.ai/api/v1
-   LLM_OPENAI_COMPATIBLE_API_KEY=sk-or-v1-...
+   LLM_OPENAI_API_KEY=sk-or-v1-...
+   DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
    ```
 
 2. **Check model names match the provider's format.** OpenRouter uses `vendor/model` format (e.g., `anthropic/claude-haiku-4-5`), not the raw model ID.
@@ -139,29 +139,30 @@ If you set `PROVIDER=custom` but calls fail:
 
 2. **In Docker**, `localhost` inside a container doesn't reach the host. Use `host.docker.internal` (macOS/Windows) or the host's network IP:
    ```bash
-   LLM_VLLM_BASE_URL=http://host.docker.internal:8000/v1
+   DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=http://host.docker.internal:8000/v1
    ```
 
 3. **Structured output failures** — vLLM's structured output support is limited to certain response formats. If you see JSON parsing errors, check the deriver/dream logs for the raw response.
 
 ### Thinking budget errors with non-Anthropic providers
 
-If you see errors like `thinking budget not supported`, `invalid parameter`, or silent failures where agents produce no output, your `THINKING_BUDGET_TOKENS` is likely set to a value > 0 with a provider that doesn't support Anthropic-style extended thinking.
+If you see errors like `thinking budget not supported`, `invalid parameter`, or silent failures where agents produce no output, one of your per-component `*_MODEL_CONFIG__THINKING_BUDGET_TOKENS` overrides is likely set to a value > 0 with a provider that doesn't support Anthropic-style extended thinking. The built-in defaults do not set thinking budgets, so this only applies if you added those overrides yourself.
 
-**Fix:** Set `THINKING_BUDGET_TOKENS=0` for every component when using non-Anthropic providers:
+**Fix:** Set `*_MODEL_CONFIG__THINKING_BUDGET_TOKENS=0` for every component when using models that don't support thinking:
 
 ```bash
-DERIVER_THINKING_BUDGET_TOKENS=0
-SUMMARY_THINKING_BUDGET_TOKENS=0
-DREAM_THINKING_BUDGET_TOKENS=0
-DIALECTIC_LEVELS__minimal__THINKING_BUDGET_TOKENS=0
-DIALECTIC_LEVELS__low__THINKING_BUDGET_TOKENS=0
-DIALECTIC_LEVELS__medium__THINKING_BUDGET_TOKENS=0
-DIALECTIC_LEVELS__high__THINKING_BUDGET_TOKENS=0
-DIALECTIC_LEVELS__max__THINKING_BUDGET_TOKENS=0
+DERIVER_MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+SUMMARY_MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DREAM_DEDUCTION_MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DREAM_INDUCTION_MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__minimal__MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__low__MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__medium__MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__high__MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
+DIALECTIC_LEVELS__max__MODEL_CONFIG__THINKING_BUDGET_TOKENS=0
 ```
 
-This applies to OpenRouter (with non-Anthropic models), vLLM, Ollama, Groq, Google, and OpenAI providers. Only Anthropic models support the thinking budget parameter.
+For OpenAI reasoning models, use `*_MODEL_CONFIG__THINKING_EFFORT` instead of `*_MODEL_CONFIG__THINKING_BUDGET_TOKENS`.
 
 ## Database Issues
 
diff --git a/docs/v3/guides/integrations/paperclip.mdx b/docs/v3/guides/integrations/paperclip.mdx
index de5792467..3b2aa1568 100644
--- a/docs/v3/guides/integrations/paperclip.mdx
+++ b/docs/v3/guides/integrations/paperclip.mdx
@@ -57,7 +57,7 @@ The current plugin gives agent peers explicit observation settings:
 - `observe_me` defaults to `true`
 - `observe_others` defaults to `true`
 
-In practice, that means agent peers can both be observed by Honcho and form representations of other peers they interact with. 
+In practice, that means agent peers can both be observed by Honcho and form representations of other peers they interact with.
 
 ## How It Works
 
diff --git a/pyproject.toml b/pyproject.toml
index 7b0fca890..228ca719b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -9,7 +9,6 @@ readme = "README.md"
 requires-python = ">=3.10"
 dependencies = [
     "fastapi[standard]>=0.131.0",
-    "groq>=0.31.0",
     "python-dotenv>=1.0.0",
     "sqlalchemy>=2.0.30",
     "fastapi-pagination>=0.14.2",
@@ -95,6 +94,12 @@ asyncio_default_fixture_loop_scope = "session"
 addopts = "--strict-markers -n auto --ignore=tests/alembic"
 testpaths = ["tests"]
 pythonpath = ["src"]
+markers = [
+    "live_llm: calls live LLM provider APIs and requires --live-llm",
+    "requires_anthropic: requires LLM_ANTHROPIC_API_KEY",
+    "requires_openai: requires LLM_OPENAI_API_KEY",
+    "requires_gemini: requires LLM_GEMINI_API_KEY",
+]
 filterwarnings = [
     "ignore:Call to deprecated close\\. \\(Use aclose\\(\\) instead\\).*:DeprecationWarning",
     "ignore:websockets\\.legacy is deprecated; see .* for upgrade instructions:DeprecationWarning",
diff --git a/src/config.py b/src/config.py
index f01c451b1..cae0c5dee 100644
--- a/src/config.py
+++ b/src/config.py
@@ -1,10 +1,11 @@
 import logging
+import os
 from pathlib import Path
-from typing import Annotated, Any, ClassVar, Literal, Protocol
+from typing import Annotated, Any, ClassVar, Literal, cast
 
 import tomllib
 from dotenv import load_dotenv
-from pydantic import BaseModel, Field, field_validator, model_validator
+from pydantic import AliasChoices, BaseModel, Field, field_validator, model_validator
 from pydantic.fields import FieldInfo
 from pydantic_settings import (
     BaseSettings,
@@ -14,17 +15,27 @@
     SettingsConfigDict,
 )
 
-from src.utils.types import SupportedProviders
-
 # Load .env file for local development.
 # Make sure this is called before AppSettings is instantiated if you rely on .env for AppSettings construction.
-load_dotenv(override=True)
+if not os.getenv("PYTHON_DOTENV_DISABLED"):
+    load_dotenv(override=True)
 
 logger = logging.getLogger(__name__)
 
+ModelTransport = Literal["anthropic", "openai", "gemini"]
+EmbeddingTransport = Literal["openai", "gemini"]
+
+
+def _default_embedding_model_for_transport(transport: EmbeddingTransport) -> str:
+    if transport == "gemini":
+        return "gemini-embedding-001"
+    return "text-embedding-3-small"
+
 
 def load_toml_config(config_path: str = "config.toml") -> dict[str, Any]:
     """Load configuration from TOML file if it exists."""
+    if config_path == "config.toml" and os.getenv("HONCHO_CONFIG_TOML_DISABLED"):
+        return {}
     config_file = Path(config_path)
     if config_file.exists():
         try:
@@ -40,13 +51,463 @@ def load_toml_config(config_path: str = "config.toml") -> dict[str, Any]:
 TOML_CONFIG = load_toml_config()
 
 
-class LLMComponentSettings(Protocol):
-    """Protocol for settings classes that use LLM providers with backup support."""
+ThinkingEffortLevel = Literal[
+    "none", "minimal", "low", "medium", "high", "xhigh", "max"
+]
+
+
+class ModelOverrideSettings(BaseModel):
+    """Advanced module-level transport overrides."""
+
+    api_key: str | None = None
+    api_key_env: str | None = None
+    base_url: str | None = None
+
+    provider_params: dict[str, Any] = Field(default_factory=dict)
+
+
+class PromptCachePolicy(BaseModel):
+    """Per-call prompt-caching configuration.
+
+    Lives in config.py (not src/llm/caching.py) so ModelConfig can reference
+    it as a field without a circular import. src/llm/caching.py re-exports
+    this class for existing import paths.
+    """
+
+    mode: Literal["none", "prefix", "gemini_cached_content"] = "none"
+    ttl_seconds: int | None = None
+    key_version: str = "v1"
+
+
+def _normalize_model_transport(data: Any) -> Any:
+    """Normalize 'provider/model' shorthand into separate transport + model fields."""
+    if not isinstance(data, dict):
+        return data
+    raw_data = cast(dict[Any, Any], data)
+    update: dict[str, Any] = {str(key): value for key, value in raw_data.items()}
+    model_value = update.get("model")
+    transport_value = update.get("transport")
+    if isinstance(model_value, str) and "/" in model_value and transport_value is None:
+        prefix, bare_model = model_value.split("/", 1)
+        if prefix in {"anthropic", "openai", "gemini"}:
+            update["transport"] = prefix
+            update["model"] = bare_model
+    return update
+
+
+def _validate_thinking_constraints(
+    transport: ModelTransport, thinking_budget_tokens: int | None
+) -> None:
+    """Enforce transport-specific thinking_budget_tokens rules.
+
+    Anthropic requires a minimum of 1024 tokens when thinking is enabled.
+    Gemini/OpenAI accept any non-negative value (including 0 to disable).
+    """
+    if (
+        transport == "anthropic"
+        and thinking_budget_tokens is not None
+        and 0 < thinking_budget_tokens < 1024
+    ):
+        raise ValueError("thinking_budget_tokens must be >= 1024 for Anthropic models")
+
+
+class FallbackModelSettings(BaseModel):
+    """Independent fallback model configuration. No inheritance from primary."""
+
+    model: str
+    transport: ModelTransport
+
+    temperature: float | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    frequency_penalty: float | None = None
+    presence_penalty: float | None = None
+    seed: int | None = None
+
+    thinking_effort: ThinkingEffortLevel | None = Field(
+        default=None,
+        validation_alias=AliasChoices("thinking_effort", "reasoning_effort"),
+    )
+    thinking_budget_tokens: int | None = None
+
+    max_output_tokens: int | None = None
+    stop_sequences: list[str] | None = None
+
+    cache_policy: PromptCachePolicy | None = None
+
+    overrides: ModelOverrideSettings = Field(default_factory=ModelOverrideSettings)
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_model_format(cls, data: Any) -> Any:
+        return _normalize_model_transport(data)
+
+    @property
+    def reasoning_effort(self) -> ThinkingEffortLevel | None:
+        return self.thinking_effort
+
+    @model_validator(mode="after")
+    def _validate_runtime_shape(self) -> "FallbackModelSettings":
+        _validate_thinking_constraints(self.transport, self.thinking_budget_tokens)
+        return self
+
+
+class ConfiguredModelSettings(BaseModel):
+    """Operator-configurable persisted model settings."""
+
+    model: str
+    transport: ModelTransport
+
+    fallback: FallbackModelSettings | None = None
+
+    temperature: float | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    frequency_penalty: float | None = None
+    presence_penalty: float | None = None
+    seed: int | None = None
+
+    thinking_effort: ThinkingEffortLevel | None = Field(
+        default=None,
+        validation_alias=AliasChoices("thinking_effort", "reasoning_effort"),
+    )
+    thinking_budget_tokens: int | None = None
+
+    max_output_tokens: int | None = None
+    stop_sequences: list[str] | None = None
+
+    cache_policy: PromptCachePolicy | None = None
+
+    overrides: ModelOverrideSettings = Field(default_factory=ModelOverrideSettings)
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_model_format(cls, data: Any) -> Any:
+        return _normalize_model_transport(data)
+
+    @property
+    def reasoning_effort(self) -> ThinkingEffortLevel | None:
+        """Backward-compatible alias for the generic thinking effort field."""
+        return self.thinking_effort
+
+    @model_validator(mode="after")
+    def _validate_runtime_shape(self) -> "ConfiguredModelSettings":
+        _validate_thinking_constraints(self.transport, self.thinking_budget_tokens)
+        return self
+
+
+class ResolvedFallbackConfig(BaseModel):
+    """Runtime-resolved fallback config with credentials already resolved."""
+
+    model: str
+    transport: ModelTransport
+
+    api_key: str | None = None
+    base_url: str | None = None
+
+    temperature: float | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    frequency_penalty: float | None = None
+    presence_penalty: float | None = None
+    seed: int | None = None
+
+    thinking_effort: ThinkingEffortLevel | None = Field(
+        default=None,
+        validation_alias=AliasChoices("thinking_effort", "reasoning_effort"),
+    )
+    thinking_budget_tokens: int | None = None
+    provider_params: dict[str, Any] = Field(default_factory=dict)
+
+    max_output_tokens: int | None = None
+    stop_sequences: list[str] | None = None
+
+    cache_policy: PromptCachePolicy | None = None
+
+    @property
+    def reasoning_effort(self) -> ThinkingEffortLevel | None:
+        return self.thinking_effort
+
+
+class ModelConfig(BaseModel):
+    """Reusable model configuration for any non-embedding LLM caller."""
+
+    model: str
+    transport: ModelTransport
+
+    fallback: ResolvedFallbackConfig | None = None
+
+    api_key: str | None = None
+    base_url: str | None = None
+
+    temperature: float | None = None
+    top_p: float | None = None
+    top_k: int | None = None
+    frequency_penalty: float | None = None
+    presence_penalty: float | None = None
+    seed: int | None = None
+
+    thinking_effort: ThinkingEffortLevel | None = Field(
+        default=None,
+        validation_alias=AliasChoices("thinking_effort", "reasoning_effort"),
+    )
+    thinking_budget_tokens: int | None = None
+    provider_params: dict[str, Any] = Field(default_factory=dict)
+
+    max_output_tokens: int | None = None
+    stop_sequences: list[str] | None = None
+
+    cache_policy: PromptCachePolicy | None = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_model_format(cls, data: Any) -> Any:
+        return _normalize_model_transport(data)
+
+    @property
+    def reasoning_effort(self) -> ThinkingEffortLevel | None:
+        """Backward-compatible alias for the generic thinking effort field."""
+        return self.thinking_effort
+
+    @model_validator(mode="after")
+    def _validate_thinking_constraints_on_self(self) -> "ModelConfig":
+        _validate_thinking_constraints(self.transport, self.thinking_budget_tokens)
+        return self
+
+    def for_model(
+        self,
+        model_override: str,
+        *,
+        transport_override: ModelTransport | None = None,
+    ) -> "ModelConfig":
+        return self.model_copy(
+            update={
+                "model": model_override,
+                "transport": transport_override or self.transport,
+            }
+        )
+
+
+class ConfiguredEmbeddingModelSettings(BaseModel):
+    """Operator-configurable persisted embedding settings."""
+
+    model: str = "text-embedding-3-small"
+    transport: EmbeddingTransport = "openai"
+    overrides: ModelOverrideSettings = Field(default_factory=ModelOverrideSettings)
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_model_format(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+
+        raw_data = cast(dict[Any, Any], data)
+        update: dict[str, Any] = {str(key): value for key, value in raw_data.items()}
+        model_value = update.get("model")
+        transport_value = update.get("transport")
+        if (
+            isinstance(model_value, str)
+            and "/" in model_value
+            and transport_value is None
+        ):
+            prefix, bare_model = model_value.split("/", 1)
+            if prefix in {"openai", "gemini"}:
+                update["transport"] = prefix
+                update["model"] = bare_model
+        return update
+
+    @model_validator(mode="after")
+    def _default_model_for_transport(self) -> "ConfiguredEmbeddingModelSettings":
+        if "model" not in self.model_fields_set:
+            self.model = _default_embedding_model_for_transport(self.transport)
+        return self
+
+
+class EmbeddingModelConfig(BaseModel):
+    """Runtime embedding configuration with resolved credentials."""
+
+    model: str = "text-embedding-3-small"
+    transport: EmbeddingTransport = "openai"
+    api_key: str | None = None
+    base_url: str | None = None
+
+    @model_validator(mode="before")
+    @classmethod
+    def _normalize_legacy_model_format(cls, data: Any) -> Any:
+        if not isinstance(data, dict):
+            return data
+
+        raw_data = cast(dict[Any, Any], data)
+        update: dict[str, Any] = {str(key): value for key, value in raw_data.items()}
+        model_value = update.get("model")
+        transport_value = update.get("transport")
+        if (
+            isinstance(model_value, str)
+            and "/" in model_value
+            and transport_value is None
+        ):
+            prefix, bare_model = model_value.split("/", 1)
+            if prefix in {"openai", "gemini"}:
+                update["transport"] = prefix
+                update["model"] = bare_model
+        return update
+
+    @model_validator(mode="after")
+    def _default_model_for_transport(self) -> "EmbeddingModelConfig":
+        if "model" not in self.model_fields_set:
+            self.model = _default_embedding_model_for_transport(self.transport)
+        return self
+
+
+def _resolve_secret(value: str | None, env_name: str | None) -> str | None:
+    if value is not None:
+        return value
+    if env_name is None:
+        return None
+    return os.getenv(env_name)
+
+
+def _resolve_fallback_config(
+    fallback: FallbackModelSettings,
+) -> ResolvedFallbackConfig:
+    """Resolve a FallbackModelSettings into a runtime ResolvedFallbackConfig."""
+    return ResolvedFallbackConfig(
+        model=fallback.model,
+        transport=fallback.transport,
+        api_key=_resolve_secret(
+            fallback.overrides.api_key,
+            fallback.overrides.api_key_env,
+        ),
+        base_url=fallback.overrides.base_url,
+        temperature=fallback.temperature,
+        top_p=fallback.top_p,
+        top_k=fallback.top_k,
+        frequency_penalty=fallback.frequency_penalty,
+        presence_penalty=fallback.presence_penalty,
+        seed=fallback.seed,
+        thinking_effort=fallback.thinking_effort,
+        thinking_budget_tokens=fallback.thinking_budget_tokens,
+        provider_params=fallback.overrides.provider_params,
+        max_output_tokens=fallback.max_output_tokens,
+        stop_sequences=fallback.stop_sequences,
+        cache_policy=fallback.cache_policy,
+    )
+
+
+def resolve_model_config(configured: ConfiguredModelSettings) -> ModelConfig:
+    """Resolve persisted model settings into the runtime ModelConfig."""
+
+    resolved_fallback = (
+        _resolve_fallback_config(configured.fallback)
+        if configured.fallback is not None
+        else None
+    )
+
+    return ModelConfig(
+        model=configured.model,
+        transport=configured.transport,
+        fallback=resolved_fallback,
+        api_key=_resolve_secret(
+            configured.overrides.api_key,
+            configured.overrides.api_key_env,
+        ),
+        base_url=configured.overrides.base_url,
+        temperature=configured.temperature,
+        top_p=configured.top_p,
+        top_k=configured.top_k,
+        frequency_penalty=configured.frequency_penalty,
+        presence_penalty=configured.presence_penalty,
+        seed=configured.seed,
+        thinking_effort=configured.thinking_effort,
+        thinking_budget_tokens=configured.thinking_budget_tokens,
+        provider_params=configured.overrides.provider_params,
+        max_output_tokens=configured.max_output_tokens,
+        stop_sequences=configured.stop_sequences,
+        cache_policy=configured.cache_policy,
+    )
+
 
-    PROVIDER: SupportedProviders
-    MODEL: str
-    BACKUP_PROVIDER: SupportedProviders | None
-    BACKUP_MODEL: str | None
+def _default_embedding_api_key(transport: EmbeddingTransport) -> str | None:
+    """Fall back to the global LLM API key for the matching transport."""
+    if transport == "openai":
+        return settings.LLM.OPENAI_API_KEY
+    if transport == "gemini":
+        return settings.LLM.GEMINI_API_KEY
+
+
+def resolve_embedding_model_config(
+    configured: ConfiguredEmbeddingModelSettings,
+) -> EmbeddingModelConfig:
+    """Resolve persisted embedding settings into the runtime config."""
+
+    api_key = _resolve_secret(
+        configured.overrides.api_key,
+        configured.overrides.api_key_env,
+    )
+    if api_key is None:
+        api_key = _default_embedding_api_key(configured.transport)
+
+    return EmbeddingModelConfig(
+        model=configured.model,
+        transport=configured.transport,
+        api_key=api_key,
+        base_url=configured.overrides.base_url,
+    )
+
+
+_TRANSPORT_SPECIFIC_THINKING_KEYS: frozenset[str] = frozenset(
+    {"thinking_budget_tokens", "thinking_effort"}
+)
+
+
+def _fill_defaults_for_nested_field(
+    data: dict[str, Any],
+    field_name: str,
+    default_factory: Any,
+) -> dict[str, Any]:
+    """Fill missing keys in a partial nested dict from the field's defaults.
+
+    When Pydantic's env_nested_delimiter splits an env var like
+    ``DERIVER_MODEL_CONFIG__THINKING_BUDGET_TOKENS=2048`` it produces
+    ``{"MODEL_CONFIG": {"THINKING_BUDGET_TOKENS": 2048}}``.  Without merging
+    that partial dict would fail validation because required keys like
+    ``model`` and ``transport`` are missing.  This helper fills them from
+    the field's ``default_factory`` so partial overrides work.
+
+    If the env override switches ``transport`` to a value that differs from
+    the default's, transport-specific thinking params
+    (``thinking_budget_tokens``, ``thinking_effort``) are dropped from the
+    default before merging.  This prevents e.g. a Gemini default's
+    ``thinking_budget_tokens=1024`` from leaking into an OpenAI override,
+    which would then be rejected by the OpenAI backend (OpenAI uses
+    ``reasoning.effort``, not a token budget). Explicit thinking params in
+    the env override are preserved.
+    """
+    raw: Any = data.get(field_name) or data.get(field_name.lower())
+    if not isinstance(raw, dict):
+        return data
+
+    default_obj = default_factory()
+    if isinstance(default_obj, BaseModel):
+        default_dict: dict[str, Any] = default_obj.model_dump(by_alias=True)
+    else:
+        default_dict = dict(default_obj)
+
+    raw_dict = cast(dict[str, Any], raw)
+    raw_lower = {k.lower(): v for k, v in raw_dict.items()}
+    default_lower = {k.lower(): v for k, v in default_dict.items()}
+    override_transport = raw_lower.get("transport")
+    default_transport = default_lower.get("transport")
+    if override_transport is not None and override_transport != default_transport:
+        for k in list(default_dict.keys()):
+            if k.lower() in _TRANSPORT_SPECIFIC_THINKING_KEYS:
+                del default_dict[k]
+
+    merged: dict[str, Any] = {**default_dict, **raw_dict}
+    # Preserve the key casing used in data
+    key = field_name if field_name in data else field_name.lower()
+    data[key] = merged
+    return data
 
 
 class TomlConfigSettingsSource(PydanticBaseSettingsSource):
@@ -61,6 +522,7 @@ def __init__(self, settings_cls: type[BaseSettings]) -> None:
         "SENTRY": "sentry",
         "CACHE": "cache",
         "LLM": "llm",
+        "EMBEDDING": "embedding",
         "DERIVER": "deriver",
         "PEER_CARD": "peer_card",
         "DIALECTIC": "dialectic",
@@ -132,26 +594,6 @@ def settings_customise_sources(  # pyright: ignore
         )
 
 
-class BackupLLMSettingsMixin:
-    """Mixin class for settings that support backup LLM provider configuration.
-
-    Provides backup provider and model fields along with validation to ensure
-    both fields are set together or both are None.
-    """
-
-    BACKUP_PROVIDER: SupportedProviders | None = None
-    BACKUP_MODEL: str | None = None
-
-    @model_validator(mode="after")
-    def _validate_backup_configuration(self):
-        """Ensure both backup fields are set together or both are None."""
-        if (self.BACKUP_PROVIDER is None) != (self.BACKUP_MODEL is None):
-            raise ValueError(
-                "BACKUP_PROVIDER and BACKUP_MODEL must both be set or both be None"
-            )
-        return self
-
-
 class DBSettings(HonchoSettings):
     model_config = SettingsConfigDict(env_prefix="DB_", extra="ignore")  # pyright: ignore
 
@@ -204,16 +646,7 @@ class LLMSettings(HonchoSettings):
     # API Keys for LLM providers
     ANTHROPIC_API_KEY: str | None = None
     OPENAI_API_KEY: str | None = None
-    OPENAI_COMPATIBLE_API_KEY: str | None = None
     GEMINI_API_KEY: str | None = None
-    GROQ_API_KEY: str | None = None
-    OPENAI_COMPATIBLE_BASE_URL: str | None = None
-
-    # Separate vLLM endpoint (for local models)
-    VLLM_API_KEY: str | None = None
-    VLLM_BASE_URL: str | None = None
-
-    EMBEDDING_PROVIDER: Literal["openai", "gemini", "openrouter"] = "openai"
 
     # General LLM settings
     DEFAULT_MAX_TOKENS: Annotated[int, Field(default=1000, gt=0, le=100_000)] = 2500
@@ -232,8 +665,41 @@ class LLMSettings(HonchoSettings):
     )
 
 
-class DeriverSettings(BackupLLMSettingsMixin, HonchoSettings):
-    model_config = SettingsConfigDict(env_prefix="DERIVER_", extra="ignore")  # pyright: ignore
+class EmbeddingSettings(HonchoSettings):
+    model_config = SettingsConfigDict(  # pyright: ignore
+        env_prefix="EMBEDDING_", env_nested_delimiter="__", extra="ignore"
+    )
+
+    @staticmethod
+    def _MODEL_CONFIG_DEFAULT() -> ConfiguredEmbeddingModelSettings:
+        return ConfiguredEmbeddingModelSettings(
+            transport="openai",
+            model="text-embedding-3-small",
+        )
+
+    MODEL_CONFIG: ConfiguredEmbeddingModelSettings = Field(
+        default_factory=_MODEL_CONFIG_DEFAULT
+    )
+    VECTOR_DIMENSIONS: Annotated[int, Field(default=1536, gt=0)] = 1536
+    MAX_INPUT_TOKENS: Annotated[int, Field(default=8192, gt=0)] = 8192
+    MAX_TOKENS_PER_REQUEST: Annotated[int, Field(default=300_000, gt=0)] = 300_000
+
+    @model_validator(mode="before")
+    @classmethod
+    def _merge_model_config_defaults(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            _fill_defaults_for_nested_field(
+                cast(dict[str, Any], data),
+                "MODEL_CONFIG",
+                cls._MODEL_CONFIG_DEFAULT,
+            )
+        return data  # pyright: ignore[reportUnknownVariableType]
+
+
+class DeriverSettings(HonchoSettings):
+    model_config = SettingsConfigDict(  # pyright: ignore
+        env_prefix="DERIVER_", env_nested_delimiter="__", extra="ignore"
+    )
 
     ENABLED: bool = True
 
@@ -248,16 +714,21 @@ class DeriverSettings(BackupLLMSettingsMixin, HonchoSettings):
         int, Field(default=30 * 24 * 3600, gt=0)
     ] = 30 * 24 * 3600  # 30 days default
 
-    PROVIDER: SupportedProviders = "google"
-    MODEL: str = "gemini-2.5-flash-lite"
-    TEMPERATURE: float | None = None
+    @staticmethod
+    def _MODEL_CONFIG_DEFAULT() -> ConfiguredModelSettings:
+        # Minimal default: transport + model only. Any other knobs would merge
+        # into operator-supplied env / config.toml overrides via
+        # _fill_defaults_for_nested_field and clobber intent.
+        return ConfiguredModelSettings(
+            transport="openai",
+            model="gpt-5.4-mini",
+        )
+
+    MODEL_CONFIG: ConfiguredModelSettings = Field(default_factory=_MODEL_CONFIG_DEFAULT)
 
     # Whether to deduplicate documents when creating them
     DEDUPLICATE: bool = True
 
-    MAX_OUTPUT_TOKENS: Annotated[int, Field(default=4096, gt=0, le=100_000)] = 4096
-    THINKING_BUDGET_TOKENS: Annotated[int, Field(default=1024, gt=0, le=5000)] = 1024
-
     LOG_OBSERVATIONS: bool = False
 
     MAX_INPUT_TOKENS: Annotated[int, Field(default=23000, gt=0, le=23000)] = 23000
@@ -276,6 +747,17 @@ class DeriverSettings(BackupLLMSettingsMixin, HonchoSettings):
     # When enabled, bypasses the batch token threshold and processes work immediately
     FLUSH_ENABLED: bool = False
 
+    @model_validator(mode="before")
+    @classmethod
+    def _merge_model_config_defaults(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            _fill_defaults_for_nested_field(
+                cast(dict[str, Any], data),
+                "MODEL_CONFIG",
+                cls._MODEL_CONFIG_DEFAULT,
+            )
+        return data  # pyright: ignore[reportUnknownVariableType]
+
     @model_validator(mode="after")
     def validate_batch_tokens_vs_context_limit(self):
         if self.REPRESENTATION_BATCH_MAX_TOKENS > self.MAX_INPUT_TOKENS:
@@ -307,14 +789,9 @@ class DialecticLevelSettings(BaseModel):
 
     model_config = SettingsConfigDict(populate_by_name=True)  # pyright: ignore
 
-    PROVIDER: Annotated[SupportedProviders, Field(validation_alias="provider")]
-    MODEL: Annotated[str, Field(validation_alias="model")]
-    BACKUP_PROVIDER: Annotated[
-        SupportedProviders | None, Field(validation_alias="backup_provider")
-    ] = None
-    BACKUP_MODEL: Annotated[str | None, Field(validation_alias="backup_model")] = None
-    THINKING_BUDGET_TOKENS: Annotated[
-        int, Field(ge=0, le=100_000, validation_alias="thinking_budget_tokens")
+    MODEL_CONFIG: Annotated[
+        ConfiguredModelSettings,
+        Field(validation_alias="model_config"),
     ]
     MAX_TOOL_ITERATIONS: Annotated[
         int, Field(ge=0, le=50, validation_alias="max_tool_iterations")
@@ -326,72 +803,69 @@ class DialecticLevelSettings(BaseModel):
         None  # None/auto lets model decide, "any"/"required" forces tool use
     )
 
-    @model_validator(mode="after")
-    def _validate_backup_configuration(self) -> "DialecticLevelSettings":
-        """Ensure both backup fields are set together or both are None."""
-        if (self.BACKUP_PROVIDER is None) != (self.BACKUP_MODEL is None):
-            raise ValueError(
-                "BACKUP_PROVIDER and BACKUP_MODEL must both be set or both be None"
-            )
-        return self
-
     @model_validator(mode="after")
     def _validate_anthropic_thinking_budget(self) -> "DialecticLevelSettings":
         """Ensure Anthropic thinking budget is >= 1024 when enabled."""
         if (
-            self.PROVIDER == "anthropic"
-            and self.THINKING_BUDGET_TOKENS > 0
-            and self.THINKING_BUDGET_TOKENS < 1024
+            self.MODEL_CONFIG.transport == "anthropic"
+            and self.MODEL_CONFIG.thinking_budget_tokens is not None
+            and self.MODEL_CONFIG.thinking_budget_tokens > 0
+            and self.MODEL_CONFIG.thinking_budget_tokens < 1024
         ):
             raise ValueError(
-                f"THINKING_BUDGET_TOKENS must be >= 1024 for Anthropic provider when enabled (got {self.THINKING_BUDGET_TOKENS})"
+                "MODEL_CONFIG.thinking_budget_tokens must be >= 1024 for "
+                + "Anthropic models when enabled "
+                + f"(got {self.MODEL_CONFIG.thinking_budget_tokens})"
             )
         return self
 
 
+def _default_dialectic_levels() -> dict[ReasoningLevel, DialecticLevelSettings]:
+    # Minimal defaults per level: transport + model only. Non-MODEL_CONFIG
+    # level tuning (MAX_TOOL_ITERATIONS, MAX_OUTPUT_TOKENS, TOOL_CHOICE)
+    # stays here because it's the per-level behavior, not a model knob —
+    # operators still override any of it via
+    # DIALECTIC_LEVELS__<level>__MODEL_CONFIG__* without conflict.
+    def _default_model_config() -> ConfiguredModelSettings:
+        return ConfiguredModelSettings(
+            transport="openai",
+            model="gpt-5.4-mini",
+        )
+
+    return {
+        "minimal": DialecticLevelSettings(
+            MODEL_CONFIG=_default_model_config(),
+            MAX_TOOL_ITERATIONS=1,
+            MAX_OUTPUT_TOKENS=250,
+            TOOL_CHOICE="any",
+        ),
+        "low": DialecticLevelSettings(
+            MODEL_CONFIG=_default_model_config(),
+            MAX_TOOL_ITERATIONS=5,
+            TOOL_CHOICE="any",
+        ),
+        "medium": DialecticLevelSettings(
+            MODEL_CONFIG=_default_model_config(),
+            MAX_TOOL_ITERATIONS=2,
+        ),
+        "high": DialecticLevelSettings(
+            MODEL_CONFIG=_default_model_config(),
+            MAX_TOOL_ITERATIONS=4,
+        ),
+        "max": DialecticLevelSettings(
+            MODEL_CONFIG=_default_model_config(),
+            MAX_TOOL_ITERATIONS=10,
+        ),
+    }
+
+
 class DialecticSettings(HonchoSettings):
     model_config = SettingsConfigDict(  # pyright: ignore
         env_prefix="DIALECTIC_", env_nested_delimiter="__", extra="ignore"
     )
 
-    # Per-level settings for provider, model, thinking budget, and tool iterations
-    # TODO: Fill in appropriate values for each reasoning level
     LEVELS: dict[ReasoningLevel, DialecticLevelSettings] = Field(
-        default_factory=lambda: {
-            "minimal": DialecticLevelSettings(
-                PROVIDER="google",
-                MODEL="gemini-2.5-flash-lite",
-                THINKING_BUDGET_TOKENS=0,
-                MAX_TOOL_ITERATIONS=1,
-                MAX_OUTPUT_TOKENS=250,
-                TOOL_CHOICE="any",
-            ),
-            "low": DialecticLevelSettings(
-                PROVIDER="google",
-                MODEL="gemini-2.5-flash-lite",
-                THINKING_BUDGET_TOKENS=0,
-                MAX_TOOL_ITERATIONS=5,
-                TOOL_CHOICE="any",
-            ),
-            "medium": DialecticLevelSettings(
-                PROVIDER="anthropic",
-                MODEL="claude-haiku-4-5",
-                THINKING_BUDGET_TOKENS=1024,
-                MAX_TOOL_ITERATIONS=2,
-            ),
-            "high": DialecticLevelSettings(
-                PROVIDER="anthropic",
-                MODEL="claude-haiku-4-5",
-                THINKING_BUDGET_TOKENS=1024,
-                MAX_TOOL_ITERATIONS=4,
-            ),
-            "max": DialecticLevelSettings(
-                PROVIDER="anthropic",
-                MODEL="claude-haiku-4-5",
-                THINKING_BUDGET_TOKENS=2048,
-                MAX_TOOL_ITERATIONS=10,
-            ),
-        }
+        default_factory=_default_dialectic_levels
     )
 
     MAX_OUTPUT_TOKENS: Annotated[int, Field(default=8192, gt=0, le=100_000)] = 8192
@@ -406,13 +880,68 @@ class DialecticSettings(HonchoSettings):
         int, Field(default=4_096, ge=0, le=16_384)
     ] = 4_096
 
+    @model_validator(mode="before")
+    @classmethod
+    def _merge_level_defaults(cls, data: Any) -> Any:
+        """Merge partial level overrides with built-in defaults."""
+        if not isinstance(data, dict):
+            return data
+        typed_data = cast(dict[str, Any], data)
+        levels_raw: dict[str, Any] | None = typed_data.get("LEVELS") or typed_data.get(
+            "levels"
+        )
+        if not isinstance(levels_raw, dict):
+            return data  # pyright: ignore[reportUnknownVariableType]
+        defaults = _default_dialectic_levels()
+        for level_name_key, level_override_val in levels_raw.items():
+            level_name = str(level_name_key)
+            if not isinstance(level_override_val, dict):
+                continue
+            level_override = cast(dict[str, Any], level_override_val)
+            if level_name in defaults:
+                base: dict[str, Any] = defaults[level_name].model_dump(by_alias=True)
+                # Recursively merge nested MODEL_CONFIG / model_config too.
+                # model_dump() always produces the Python field name
+                # ("MODEL_CONFIG"), but TOML overrides arrive as lowercase
+                # ("model_config").  Check both casings in the override and
+                # resolve the base value from whichever casing is present.
+                for mc_key in ("MODEL_CONFIG", "model_config"):
+                    if mc_key in level_override and isinstance(
+                        level_override[mc_key], dict
+                    ):
+                        base_mc: dict[str, Any] = dict(
+                            base.get("MODEL_CONFIG") or base.get("model_config") or {}
+                        )
+                        override_mc = cast(dict[str, Any], level_override[mc_key])
+                        override_lower = {k.lower(): v for k, v in override_mc.items()}
+                        base_lower = {k.lower(): v for k, v in base_mc.items()}
+                        override_transport = override_lower.get("transport")
+                        base_transport = base_lower.get("transport")
+                        if (
+                            override_transport is not None
+                            and override_transport != base_transport
+                        ):
+                            for k in list(base_mc.keys()):
+                                if k.lower() in _TRANSPORT_SPECIFIC_THINKING_KEYS:
+                                    del base_mc[k]
+                        level_override[mc_key] = {**base_mc, **override_mc}
+                levels_raw[level_name] = {**base, **level_override}
+        return data  # pyright: ignore[reportUnknownVariableType]
+
     @model_validator(mode="after")
     def _validate_token_budgets(self) -> "DialecticSettings":
         """Ensure the output token limit exceeds all thinking budgets."""
         for level, level_settings in self.LEVELS.items():
-            if self.MAX_OUTPUT_TOKENS <= level_settings.THINKING_BUDGET_TOKENS:
+            thinking_budget = level_settings.MODEL_CONFIG.thinking_budget_tokens or 0
+            effective_max = (
+                level_settings.MAX_OUTPUT_TOKENS
+                if level_settings.MAX_OUTPUT_TOKENS is not None
+                else self.MAX_OUTPUT_TOKENS
+            )
+            if thinking_budget > 0 and thinking_budget >= effective_max:
                 raise ValueError(
-                    f"MAX_OUTPUT_TOKENS must be greater than THINKING_BUDGET_TOKENS for level '{level}'"
+                    "MAX_OUTPUT_TOKENS must be greater than MODEL_CONFIG."
+                    + f"thinking_budget_tokens for level '{level}'"
                 )
         return self
 
@@ -425,21 +954,40 @@ def _validate_all_levels_present(self) -> "DialecticSettings":
         return self
 
 
-class SummarySettings(BackupLLMSettingsMixin, HonchoSettings):
-    model_config = SettingsConfigDict(env_prefix="SUMMARY_", extra="ignore")  # pyright: ignore
+class SummarySettings(HonchoSettings):
+    model_config = SettingsConfigDict(  # pyright: ignore
+        env_prefix="SUMMARY_", env_nested_delimiter="__", extra="ignore"
+    )
 
     ENABLED: bool = True
 
     MESSAGES_PER_SHORT_SUMMARY: Annotated[int, Field(default=20, gt=0, le=100)] = 20
     MESSAGES_PER_LONG_SUMMARY: Annotated[int, Field(default=60, gt=0, le=500)] = 60
 
-    PROVIDER: SupportedProviders = "google"
-    MODEL: str = "gemini-2.5-flash"
+    @staticmethod
+    def _MODEL_CONFIG_DEFAULT() -> ConfiguredModelSettings:
+        # Minimal default; extra knobs would merge into env/TOML overrides.
+        return ConfiguredModelSettings(
+            transport="openai",
+            model="gpt-5.4-mini",
+        )
+
+    MODEL_CONFIG: ConfiguredModelSettings = Field(default_factory=_MODEL_CONFIG_DEFAULT)
+
+    @model_validator(mode="before")
+    @classmethod
+    def _merge_model_config_defaults(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            _fill_defaults_for_nested_field(
+                cast(dict[str, Any], data),
+                "MODEL_CONFIG",
+                cls._MODEL_CONFIG_DEFAULT,
+            )
+        return data  # pyright: ignore[reportUnknownVariableType]
+
     MAX_TOKENS_SHORT: Annotated[int, Field(default=1000, gt=0, le=10_000)] = 1000
     MAX_TOKENS_LONG: Annotated[int, Field(default=4000, gt=0, le=20_000)] = 4000
 
-    THINKING_BUDGET_TOKENS: Annotated[int, Field(default=512, gt=0, le=2000)] = 512
-
 
 class WebhookSettings(HonchoSettings):
     model_config = SettingsConfigDict(env_prefix="WEBHOOK_", extra="ignore")  # pyright: ignore
@@ -528,7 +1076,7 @@ class SurprisalSettings(BaseModel):
     INCLUDE_LEVELS: list[str] = ["explicit", "deductive"]
 
 
-class DreamSettings(BackupLLMSettingsMixin, HonchoSettings):
+class DreamSettings(HonchoSettings):
     model_config = SettingsConfigDict(  # pyright: ignore
         env_prefix="DREAM_", env_nested_delimiter="__", extra="ignore"
     )
@@ -539,11 +1087,6 @@ class DreamSettings(BackupLLMSettingsMixin, HonchoSettings):
     MIN_HOURS_BETWEEN_DREAMS: Annotated[int, Field(default=8, gt=0, le=72)] = 8
     ENABLED_TYPES: list[str] = ["omni"]
 
-    PROVIDER: SupportedProviders = "anthropic"
-    MODEL: str = "claude-sonnet-4-20250514"
-    MAX_OUTPUT_TOKENS: Annotated[int, Field(default=16_384, gt=0, le=64_000)] = 16_384
-    THINKING_BUDGET_TOKENS: Annotated[int, Field(default=8192, gt=0, le=32_000)] = 8192
-
     # Agent iteration limit - increased for extended reasoning workflow
     MAX_TOOL_ITERATIONS: Annotated[int, Field(default=20, gt=0, le=50)] = 20
 
@@ -552,23 +1095,66 @@ class DreamSettings(BackupLLMSettingsMixin, HonchoSettings):
         16_384
     )
 
-    ## NOTE: specialist models use the same provider as the main model
+    @staticmethod
+    def _DEDUCTION_MODEL_CONFIG_DEFAULT() -> ConfiguredModelSettings:
+        # Minimal default; extra knobs would merge into env/TOML overrides.
+        return ConfiguredModelSettings(
+            transport="openai",
+            model="gpt-5.4-mini",
+        )
+
+    DEDUCTION_MODEL_CONFIG: ConfiguredModelSettings = Field(
+        default_factory=_DEDUCTION_MODEL_CONFIG_DEFAULT
+    )
+
+    @staticmethod
+    def _INDUCTION_MODEL_CONFIG_DEFAULT() -> ConfiguredModelSettings:
+        # Minimal default; extra knobs would merge into env/TOML overrides.
+        return ConfiguredModelSettings(
+            transport="openai",
+            model="gpt-5.4-mini",
+        )
 
-    # Deduction Specialist: handles logical inference
-    DEDUCTION_MODEL: str = "claude-haiku-4-5"
-    # Induction Specialist: identifies patterns across observations
-    INDUCTION_MODEL: str = "claude-haiku-4-5"
+    INDUCTION_MODEL_CONFIG: ConfiguredModelSettings = Field(
+        default_factory=_INDUCTION_MODEL_CONFIG_DEFAULT
+    )
 
     # Surprisal-based sampling subsystem
     SURPRISAL: SurprisalSettings = Field(default_factory=SurprisalSettings)
 
-    @model_validator(mode="after")
-    def _validate_token_budgets(self) -> "DreamSettings":
-        """Ensure the output token limit exceeds the thinking budget."""
-        if self.MAX_OUTPUT_TOKENS <= self.THINKING_BUDGET_TOKENS:
-            raise ValueError(
-                "MAX_OUTPUT_TOKENS must be greater than THINKING_BUDGET_TOKENS"
+    @model_validator(mode="before")
+    @classmethod
+    def _merge_model_config_defaults(cls, data: Any) -> Any:
+        if isinstance(data, dict):
+            typed_data = cast(dict[str, Any], data)
+            _fill_defaults_for_nested_field(
+                typed_data,
+                "DEDUCTION_MODEL_CONFIG",
+                cls._DEDUCTION_MODEL_CONFIG_DEFAULT,
+            )
+            _fill_defaults_for_nested_field(
+                typed_data,
+                "INDUCTION_MODEL_CONFIG",
+                cls._INDUCTION_MODEL_CONFIG_DEFAULT,
             )
+        return data  # pyright: ignore[reportUnknownVariableType]
+
+    @model_validator(mode="after")
+    def _validate_specialist_token_budgets(self) -> "DreamSettings":
+        """Ensure thinking_budget_tokens < max_output_tokens for each specialist."""
+        for name, cfg in (
+            ("DEDUCTION_MODEL_CONFIG", self.DEDUCTION_MODEL_CONFIG),
+            ("INDUCTION_MODEL_CONFIG", self.INDUCTION_MODEL_CONFIG),
+        ):
+            if (
+                cfg.max_output_tokens is not None
+                and cfg.thinking_budget_tokens is not None
+                and cfg.max_output_tokens <= cfg.thinking_budget_tokens
+            ):
+                raise ValueError(
+                    f"dream.{name}.max_output_tokens must be greater than "
+                    + f"dream.{name}.thinking_budget_tokens"
+                )
         return self
 
 
@@ -633,10 +1219,6 @@ class AppSettings(HonchoSettings):
 
     MAX_MESSAGE_SIZE: Annotated[int, Field(default=25_000, gt=0)] = 25_000
     EMBED_MESSAGES: bool = True
-    MAX_EMBEDDING_TOKENS: Annotated[int, Field(default=8192, gt=0)] = 8192
-    MAX_EMBEDDING_TOKENS_PER_REQUEST: Annotated[int, Field(default=300_000, gt=0)] = (
-        300_000
-    )
     LANGFUSE_HOST: str | None = None
     LANGFUSE_PUBLIC_KEY: str | None = None
 
@@ -651,6 +1233,7 @@ class AppSettings(HonchoSettings):
     AUTH: AuthSettings = Field(default_factory=AuthSettings)
     SENTRY: SentrySettings = Field(default_factory=SentrySettings)
     LLM: LLMSettings = Field(default_factory=LLMSettings)
+    EMBEDDING: EmbeddingSettings = Field(default_factory=EmbeddingSettings)
     DERIVER: DeriverSettings = Field(default_factory=DeriverSettings)
     DIALECTIC: DialecticSettings = Field(default_factory=DialecticSettings)
     PEER_CARD: PeerCardSettings = Field(default_factory=PeerCardSettings)
@@ -676,11 +1259,25 @@ def propagate_namespace(self) -> "AppSettings":
             self.CACHE.NAMESPACE = self.NAMESPACE
         if "NAMESPACE" not in self.VECTOR_STORE.model_fields_set:
             self.VECTOR_STORE.NAMESPACE = self.NAMESPACE
+        if "DIMENSIONS" not in self.VECTOR_STORE.model_fields_set:
+            self.VECTOR_STORE.DIMENSIONS = self.EMBEDDING.VECTOR_DIMENSIONS
+        elif self.VECTOR_STORE.DIMENSIONS != self.EMBEDDING.VECTOR_DIMENSIONS:
+            raise ValueError(
+                "VECTOR_STORE.DIMENSIONS must match EMBEDDING.VECTOR_DIMENSIONS"
+            )
         if "NAMESPACE" not in self.TELEMETRY.model_fields_set:
             self.TELEMETRY.NAMESPACE = self.NAMESPACE
         if "NAMESPACE" not in self.METRICS.model_fields_set:
             self.METRICS.NAMESPACE = self.NAMESPACE
 
+        if self.EMBEDDING.VECTOR_DIMENSIONS != 1536 and (
+            self.VECTOR_STORE.TYPE == "pgvector" or not self.VECTOR_STORE.MIGRATED
+        ):
+            raise ValueError(
+                "EMBEDDING.VECTOR_DIMENSIONS must remain 1536 while pgvector is "
+                + "active or vector-store migration is incomplete"
+            )
+
         return self
 
 
diff --git a/src/crud/document.py b/src/crud/document.py
index 688ed2f01..7de9dfd6e 100644
--- a/src/crud/document.py
+++ b/src/crud/document.py
@@ -348,7 +348,8 @@ async def query_documents(
             embedding = await embedding_client.embed(query)
         except ValueError as e:
             raise ValidationException(
-                f"Query exceeds maximum token limit of {settings.MAX_EMBEDDING_TOKENS}."
+                "Query exceeds maximum token limit of "
+                + f"{settings.EMBEDDING.MAX_INPUT_TOKENS}."
             ) from e
 
     if _uses_pgvector():
diff --git a/src/crud/representation.py b/src/crud/representation.py
index 616a17d14..de97d0d34 100644
--- a/src/crud/representation.py
+++ b/src/crud/representation.py
@@ -80,7 +80,8 @@ async def save_representation(
             embeddings = await embedding_client.simple_batch_embed(observation_texts)
         except ValueError as e:
             raise exceptions.ValidationException(
-                f"Observation content exceeds maximum token limit of {settings.MAX_EMBEDDING_TOKENS}."
+                "Observation content exceeds maximum token limit of "
+                + f"{settings.EMBEDDING.MAX_INPUT_TOKENS}."
             ) from e
 
         batch_embed_duration = (time.perf_counter() - batch_embed_start) * 1000
diff --git a/src/deriver/__main__.py b/src/deriver/__main__.py
index e79346781..c3d498b27 100644
--- a/src/deriver/__main__.py
+++ b/src/deriver/__main__.py
@@ -50,7 +50,6 @@ def setup_logging():
     logging.getLogger("httpcore").setLevel(logging.WARNING)
     logging.getLogger("httpx").setLevel(logging.WARNING)
     logging.getLogger("openai._base_client").setLevel(logging.WARNING)
-    logging.getLogger("groq._base_client").setLevel(logging.WARNING)
 
 
 async def run_deriver():
diff --git a/src/deriver/deriver.py b/src/deriver/deriver.py
index b8735e3c2..1fcc5ad24 100644
--- a/src/deriver/deriver.py
+++ b/src/deriver/deriver.py
@@ -2,9 +2,10 @@
 import time
 
 from src import crud
-from src.config import settings
+from src.config import ConfiguredModelSettings, settings
 from src.crud.representation import RepresentationManager
 from src.dependencies import tracked_db
+from src.llm import honcho_llm_call
 from src.models import Message
 from src.schemas import ResolvedConfiguration
 from src.telemetry import prometheus_metrics
@@ -16,7 +17,6 @@
     TokenTypes,
 )
 from src.telemetry.sentry import with_sentry_transaction
-from src.utils.clients import honcho_llm_call
 from src.utils.config_helpers import get_configuration
 from src.utils.formatting import format_new_turn_with_timestamp
 from src.utils.representation import PromptRepresentation, Representation
@@ -27,6 +27,10 @@
 logger = logging.getLogger(__name__)
 
 
+def _get_deriver_model_config() -> ConfiguredModelSettings:
+    return settings.DERIVER.MODEL_CONFIG
+
+
 @with_sentry_transaction("minimal_deriver_batch", op="deriver")
 async def process_representation_tasks_batch(
     messages: list[Message],
@@ -119,22 +123,24 @@ async def process_representation_tasks_batch(
     )
 
     # validation on settings means max_tokens will always be > 0
-    max_tokens = settings.DERIVER.MAX_OUTPUT_TOKENS or settings.LLM.DEFAULT_MAX_TOKENS
+    base_model_config = _get_deriver_model_config()
+    max_tokens = base_model_config.max_output_tokens or settings.LLM.DEFAULT_MAX_TOKENS
+    model_config = base_model_config.model_copy(
+        update={
+            "stop_sequences": ["   \n", "\n\n\n\n"],
+        }
+    )
 
     # Single LLM call
     llm_start = time.perf_counter()
     response = await honcho_llm_call(
-        llm_settings=settings.DERIVER,
+        model_config=model_config,
         prompt=prompt,
         max_tokens=max_tokens,
         track_name="Minimal Deriver",
         response_model=PromptRepresentation,
         json_mode=True,
-        temperature=settings.DERIVER.TEMPERATURE,
-        stop_seqs=["   \n", "\n\n\n\n"],
-        thinking_budget_tokens=settings.DERIVER.THINKING_BUDGET_TOKENS,
         max_input_tokens=settings.DERIVER.MAX_INPUT_TOKENS,
-        reasoning_effort="minimal",
         enable_retry=True,
         retry_attempts=3,
         trace_name="minimal_deriver",
diff --git a/src/dialectic/core.py b/src/dialectic/core.py
index f2dee4266..f8f3b841c 100644
--- a/src/dialectic/core.py
+++ b/src/dialectic/core.py
@@ -12,10 +12,15 @@
 from typing import Any, cast
 
 from src import crud
-from src.config import ReasoningLevel, settings
+from src.config import ConfiguredModelSettings, ReasoningLevel, settings
 from src.dependencies import tracked_db
 from src.dialectic import prompts
 from src.embedding_client import embedding_client
+from src.llm import (
+    HonchoLLMCallResponse,
+    StreamingResponseWithMetadata,
+    honcho_llm_call,
+)
 from src.telemetry import prometheus_metrics
 from src.telemetry.events import DialecticCompletedEvent, emit
 from src.telemetry.logging import (
@@ -30,16 +35,17 @@
     create_tool_executor,
     search_memory,
 )
-from src.utils.clients import (
-    HonchoLLMCallResponse,
-    StreamingResponseWithMetadata,
-    honcho_llm_call,
-)
 from src.utils.formatting import format_new_turn_with_timestamp
 
 logger = logging.getLogger(__name__)
 
 
+def _get_dialectic_level_model_config(
+    reasoning_level: ReasoningLevel,
+) -> ConfiguredModelSettings:
+    return settings.DIALECTIC.LEVELS[reasoning_level].MODEL_CONFIG
+
+
 class DialecticAgent:
     """
     An agentic dialectic that iteratively gathers context to answer queries.
@@ -405,7 +411,7 @@ async def answer(self, query: str) -> str:
         )
 
         response: HonchoLLMCallResponse[str] = await honcho_llm_call(
-            llm_settings=level_settings,
+            model_config=_get_dialectic_level_model_config(self.reasoning_level),
             prompt="",  # Ignored since we pass messages
             max_tokens=max_tokens,
             tools=tools,
@@ -414,7 +420,6 @@ async def answer(self, query: str) -> str:
             max_tool_iterations=level_settings.MAX_TOOL_ITERATIONS,
             messages=self.messages,
             track_name="Dialectic Agent",
-            thinking_budget_tokens=level_settings.THINKING_BUDGET_TOKENS,
             max_input_tokens=settings.DIALECTIC.MAX_INPUT_TOKENS,
             trace_name="dialectic_chat",
         )
@@ -471,7 +476,7 @@ async def answer_stream(self, query: str) -> AsyncIterator[str]:
         response = cast(
             StreamingResponseWithMetadata,
             await honcho_llm_call(
-                llm_settings=level_settings,
+                model_config=_get_dialectic_level_model_config(self.reasoning_level),
                 prompt="",  # Ignored since we pass messages
                 max_tokens=max_tokens,
                 stream=True,
@@ -482,7 +487,6 @@ async def answer_stream(self, query: str) -> AsyncIterator[str]:
                 max_tool_iterations=level_settings.MAX_TOOL_ITERATIONS,
                 messages=self.messages,
                 track_name="Dialectic Agent Stream",
-                thinking_budget_tokens=level_settings.THINKING_BUDGET_TOKENS,
                 max_input_tokens=settings.DIALECTIC.MAX_INPUT_TOKENS,
                 trace_name="dialectic_chat",
             ),
diff --git a/src/dreamer/specialists.py b/src/dreamer/specialists.py
index 608db3b0e..c7277586b 100644
--- a/src/dreamer/specialists.py
+++ b/src/dreamer/specialists.py
@@ -19,8 +19,10 @@
 from typing import Any
 
 from src import crud, schemas
-from src.config import settings
+from src.config import ConfiguredModelSettings, settings
 from src.dependencies import tracked_db
+from src.exceptions import ValidationException
+from src.llm import HonchoLLMCallResponse, honcho_llm_call
 from src.schemas import ResolvedConfiguration
 from src.telemetry import prometheus_metrics
 from src.telemetry.events import DreamSpecialistEvent, emit
@@ -31,11 +33,22 @@
     INDUCTION_SPECIALIST_TOOLS,
     create_tool_executor,
 )
-from src.utils.clients import HonchoLLMCallResponse, honcho_llm_call
 
 logger = logging.getLogger(__name__)
 
 
+def _require_specialist_model_config(
+    model_config: ConfiguredModelSettings | None,
+    *,
+    specialist_name: str,
+) -> ConfiguredModelSettings:
+    if model_config is None:
+        raise ValidationException(
+            f"{specialist_name} MODEL_CONFIG must be resolved before use"
+        )
+    return model_config
+
+
 @dataclass
 class SpecialistResult:
     """Result of a specialist run for telemetry and aggregation."""
@@ -70,8 +83,8 @@ def get_tools(self, *, peer_card_enabled: bool = True) -> list[dict[str, Any]]:
         ...
 
     @abstractmethod
-    def get_model(self) -> str:
-        """Get the model to use for this specialist."""
+    def get_model_config(self) -> ConfiguredModelSettings:
+        """Get the configured model to use for this specialist."""
         ...
 
     def get_max_tokens(self) -> int:
@@ -196,9 +209,18 @@ async def run(
             parent_category="dream",
         )
 
-        # Get model with potential override
-        model = self.get_model()
-        llm_settings = settings.DREAM.model_copy(update={"MODEL": model})
+        model_config = self.get_model_config()
+
+        # Respect operator-configured max_output_tokens on the specialist's
+        # ModelConfig (e.g. DREAM_DEDUCTION_MODEL_CONFIG__MAX_OUTPUT_TOKENS).
+        # Only fall back to the specialist's hardcoded default when the
+        # config leaves max_output_tokens unset or non-positive.
+        configured_max = model_config.max_output_tokens
+        effective_max_tokens = (
+            configured_max
+            if configured_max and configured_max > 0
+            else self.get_max_tokens()
+        )
 
         # Track iterations via callback
         iteration_count = 0
@@ -209,9 +231,9 @@ def iteration_callback(data: Any) -> None:
 
         # Run the agent loop
         response: HonchoLLMCallResponse[str] = await honcho_llm_call(
-            llm_settings=llm_settings,
+            model_config=model_config,
             prompt="",  # Ignored since we pass messages
-            max_tokens=self.get_max_tokens(),
+            max_tokens=effective_max_tokens,
             tools=self.get_tools(peer_card_enabled=peer_card_enabled),
             tool_choice=None,
             tool_executor=tool_executor,
@@ -305,8 +327,11 @@ def get_tools(self, *, peer_card_enabled: bool = True) -> list[dict[str, Any]]:
             if t["name"] not in PEER_CARD_TOOL_NAMES
         ]
 
-    def get_model(self) -> str:
-        return settings.DREAM.DEDUCTION_MODEL
+    def get_model_config(self) -> ConfiguredModelSettings:
+        return _require_specialist_model_config(
+            settings.DREAM.DEDUCTION_MODEL_CONFIG,
+            specialist_name="DREAM DEDUCTION",
+        )
 
     def get_max_tokens(self) -> int:
         return 8192
@@ -377,11 +402,12 @@ def build_system_prompt(
 
 ## CREATING OBSERVATIONS
 
+Use `create_observations_deductive`.
+
 ```json
 {{
   "observations": [{{
     "content": "The logical conclusion",
-    "level": "deductive",  // or "contradiction"
     "source_ids": ["id1", "id2"],
     "premises": ["premise 1 text", "premise 2 text"]
   }}]
@@ -393,8 +419,9 @@ def build_system_prompt(
 1. Don't explain your reasoning - just call tools
 2. Create observations based on what you ACTUALLY FIND, not what you expect
 3. Always include source_ids linking to the observations you're synthesizing
-4. Delete outdated observations - don't leave duplicates
-5. Quality over quantity - fewer good deductions beat many weak ones"""
+4. Empty or missing source_ids will be rejected
+5. Delete outdated observations - don't leave duplicates
+6. Quality over quantity - fewer good deductions beat many weak ones"""
 
     def build_user_prompt(
         self,
@@ -448,8 +475,11 @@ def get_tools(self, *, peer_card_enabled: bool = True) -> list[dict[str, Any]]:
             if t["name"] not in PEER_CARD_TOOL_NAMES
         ]
 
-    def get_model(self) -> str:
-        return settings.DREAM.INDUCTION_MODEL
+    def get_model_config(self) -> ConfiguredModelSettings:
+        return _require_specialist_model_config(
+            settings.DREAM.INDUCTION_MODEL_CONFIG,
+            specialist_name="DREAM INDUCTION",
+        )
 
     def get_max_tokens(self) -> int:
         return 8192
@@ -514,11 +544,12 @@ def build_system_prompt(
 
 ## CREATING OBSERVATIONS
 
+Use `create_observations_inductive`.
+
 ```json
 {{
   "observations": [{{
     "content": "The pattern or generalization",
-    "level": "inductive",
     "source_ids": ["id1", "id2", "id3"],
     "sources": ["evidence 1", "evidence 2"],
     "pattern_type": "tendency",  // preference|behavior|personality|tendency|correlation
@@ -533,7 +564,8 @@ def build_system_prompt(
 2. Don't just restate a single fact as a pattern
 3. Confidence based on evidence count: 2=low, 3-4=medium, 5+=high
 4. Look for HOW things change over time, not just static facts
-5. Include source_ids - always link back to evidence"""
+5. Include source_ids - always link back to evidence
+6. Empty or missing source_ids will be rejected"""
 
     def build_user_prompt(
         self,
diff --git a/src/embedding_client.py b/src/embedding_client.py
index 9798dc02b..e163cc66c 100644
--- a/src/embedding_client.py
+++ b/src/embedding_client.py
@@ -6,9 +6,10 @@
 
 import tiktoken
 from google import genai
+from google.genai import types as genai_types
 from openai import AsyncOpenAI
 
-from .config import settings
+from .config import EmbeddingModelConfig, resolve_embedding_model_config, settings
 
 logger = logging.getLogger(__name__)
 
@@ -26,49 +27,58 @@ class _EmbeddingClient:
     Embedding client supporting OpenAI and Gemini with chunking and batching support.
     """
 
-    def __init__(self, api_key: str | None = None, provider: str | None = None):
-        self.provider: str = provider or settings.LLM.EMBEDDING_PROVIDER
-
-        if self.provider == "gemini":
-            if api_key is None:
-                api_key = settings.LLM.GEMINI_API_KEY
-            if not api_key:
+    def __init__(
+        self,
+        config: EmbeddingModelConfig,
+        *,
+        vector_dimensions: int,
+        max_input_tokens: int,
+        max_tokens_per_request: int,
+    ):
+        self.transport: str = config.transport
+        self.model: str = config.model
+        self.vector_dimensions: int = vector_dimensions
+
+        if self.transport == "gemini":
+            if not config.api_key:
                 raise ValueError("Gemini API key is required")
-            self.client: genai.Client | AsyncOpenAI = genai.Client(api_key=api_key)
-            self.model: str = "gemini-embedding-001"
+            http_options = (
+                genai_types.HttpOptions(base_url=config.base_url)
+                if config.base_url
+                else None
+            )
+            self.client: genai.Client | AsyncOpenAI = genai.Client(
+                api_key=config.api_key,
+                http_options=http_options,
+            )
             # Gemini has a 2048 token limit
-            self.max_embedding_tokens: int = min(settings.MAX_EMBEDDING_TOKENS, 2048)
+            self.max_embedding_tokens: int = min(max_input_tokens, 2048)
             # Gemini batch size is not documented, using conservative estimate
             self.max_batch_size: int = 100
-        elif self.provider == "openrouter":
-            if api_key is None:
-                api_key = settings.LLM.OPENAI_COMPATIBLE_API_KEY
-            if not api_key:
-                raise ValueError(
-                    "OpenRouter API key (LLM_OPENAI_COMPATIBLE_API_KEY) is required"
-                )
-            base_url = (
-                settings.LLM.OPENAI_COMPATIBLE_BASE_URL
-                or "https://openrouter.ai/api/v1"
-            )
-            self.client = AsyncOpenAI(api_key=api_key, base_url=base_url)
-            self.model = "openai/text-embedding-3-small"
-            self.max_embedding_tokens = settings.MAX_EMBEDDING_TOKENS
-            self.max_batch_size = 2048  # Same as OpenAI
         else:  # openai
-            if api_key is None:
-                api_key = settings.LLM.OPENAI_API_KEY
-            if not api_key:
+            if not config.api_key:
                 raise ValueError("OpenAI API key is required")
-            self.client = AsyncOpenAI(api_key=api_key)
-            self.model = "text-embedding-3-small"
-            self.max_embedding_tokens = settings.MAX_EMBEDDING_TOKENS
+            self.client = AsyncOpenAI(
+                api_key=config.api_key,
+                base_url=config.base_url,
+            )
+            self.max_embedding_tokens = max_input_tokens
             self.max_batch_size = 2048  # OpenAI batch limit
 
         self.encoding: tiktoken.Encoding = tiktoken.get_encoding("o200k_base")
-        self.max_embedding_tokens_per_request: int = (
-            settings.MAX_EMBEDDING_TOKENS_PER_REQUEST
-        )
+        self.max_embedding_tokens_per_request: int = max_tokens_per_request
+
+    @property
+    def provider(self) -> str:
+        return self.transport
+
+    def _validate_embedding_dimensions(self, embedding: list[float]) -> list[float]:
+        if len(embedding) != self.vector_dimensions:
+            raise ValueError(
+                f"Embedding dimension mismatch for {self.transport}:{self.model}. "
+                + f"Expected {self.vector_dimensions}, got {len(embedding)}."
+            )
+        return embedding
 
     async def embed(self, query: str) -> list[float]:
         token_count = len(self.encoding.encode(query))
@@ -82,16 +92,16 @@ async def embed(self, query: str) -> list[float]:
             response = await self.client.aio.models.embed_content(
                 model=self.model,
                 contents=query,
-                config={"output_dimensionality": 1536},
+                config={"output_dimensionality": self.vector_dimensions},
             )
             if not response.embeddings or not response.embeddings[0].values:
                 raise ValueError("No embedding returned from Gemini API")
-            return response.embeddings[0].values
+            return self._validate_embedding_dimensions(response.embeddings[0].values)
         else:  # openai
             response = await self.client.embeddings.create(
                 model=self.model, input=query
             )
-            return response.data[0].embedding
+            return self._validate_embedding_dimensions(response.data[0].embedding)
 
     async def simple_batch_embed(self, texts: list[str]) -> list[list[float]]:
         """
@@ -116,18 +126,25 @@ async def simple_batch_embed(self, texts: list[str]) -> list[list[float]]:
                     response = await self.client.aio.models.embed_content(
                         model=self.model,
                         contents=batch,  # pyright: ignore[reportArgumentType]
-                        config={"output_dimensionality": 1536},
+                        config={"output_dimensionality": self.vector_dimensions},
                     )
                     if response.embeddings:
                         for emb in response.embeddings:
                             if emb.values:
-                                embeddings.append(emb.values)
+                                embeddings.append(
+                                    self._validate_embedding_dimensions(emb.values)
+                                )
                 else:  # openai
                     response = await self.client.embeddings.create(
                         input=batch,
                         model=self.model,
                     )
-                    embeddings.extend([data.embedding for data in response.data])
+                    embeddings.extend(
+                        [
+                            self._validate_embedding_dimensions(data.embedding)
+                            for data in response.data
+                        ]
+                    )
             except Exception as e:
                 # Check if it's a token limit error and re-raise as ValueError for consistency
                 if "token" in str(e).lower():
@@ -252,7 +269,7 @@ async def _process_batch(
                     response = await self.client.aio.models.embed_content(
                         model=self.model,
                         contents=[item.text for item in batch],
-                        config={"output_dimensionality": 1536},
+                        config={"output_dimensionality": self.vector_dimensions},
                     )
                     if response.embeddings:
                         for item, embedding in zip(
@@ -260,15 +277,19 @@ async def _process_batch(
                         ):
                             if embedding.values:
                                 result[item.text_id][item.chunk_index] = (
-                                    embedding.values
+                                    self._validate_embedding_dimensions(
+                                        embedding.values
+                                    )
                                 )
-                else:  # openai / openrouter
+                else:  # openai
                     response = await self.client.embeddings.create(
                         model=self.model, input=[item.text for item in batch]
                     )
                     for item, embedding_data in zip(batch, response.data, strict=True):
                         result[item.text_id][item.chunk_index] = (
-                            embedding_data.embedding
+                            self._validate_embedding_dimensions(
+                                embedding_data.embedding
+                            )
                         )
 
                 return dict(result)
@@ -358,6 +379,7 @@ class EmbeddingClient:
     """
 
     _instance: "_EmbeddingClient | None" = None
+    _instance_signature: tuple[object, ...] | None = None
     _lock: threading.Lock = threading.Lock()
     _wrapper_instance: "EmbeddingClient | None" = None
 
@@ -374,26 +396,41 @@ def _get_client(self) -> _EmbeddingClient:
 
         Uses double-checked locking for thread-safe lazy initialization.
         """
-        if self._instance is None:
+        signature = self._get_settings_signature()
+        if self._instance is None or self._instance_signature != signature:
             with self._lock:
-                if self._instance is None:
-                    provider = settings.LLM.EMBEDDING_PROVIDER
-                    if provider == "gemini":
-                        api_key = settings.LLM.GEMINI_API_KEY
-                    elif provider == "openrouter":
-                        api_key = settings.LLM.OPENAI_COMPATIBLE_API_KEY
-                    else:
-                        api_key = settings.LLM.OPENAI_API_KEY
-
+                if self._instance is None or self._instance_signature != signature:
+                    runtime_config = self._resolve_runtime_config()
                     self._instance = _EmbeddingClient(
-                        api_key=api_key, provider=provider
+                        runtime_config,
+                        vector_dimensions=settings.EMBEDDING.VECTOR_DIMENSIONS,
+                        max_input_tokens=settings.EMBEDDING.MAX_INPUT_TOKENS,
+                        max_tokens_per_request=settings.EMBEDDING.MAX_TOKENS_PER_REQUEST,
                     )
+                    self._instance_signature = signature
                     logger.debug(
-                        f"Initialized embedding client with provider: {provider}"
+                        "Initialized embedding client with transport: %s model: %s",
+                        runtime_config.transport,
+                        runtime_config.model,
                     )
 
         return self._instance
 
+    def _resolve_runtime_config(self) -> EmbeddingModelConfig:
+        return resolve_embedding_model_config(settings.EMBEDDING.MODEL_CONFIG)
+
+    def _get_settings_signature(self) -> tuple[object, ...]:
+        runtime_config = self._resolve_runtime_config()
+        return (
+            runtime_config.transport,
+            runtime_config.model,
+            runtime_config.api_key,
+            runtime_config.base_url,
+            settings.EMBEDDING.VECTOR_DIMENSIONS,
+            settings.EMBEDDING.MAX_INPUT_TOKENS,
+            settings.EMBEDDING.MAX_TOKENS_PER_REQUEST,
+        )
+
     async def embed(self, query: str) -> list[float]:
         """Embed a single query string."""
         return await self._get_client().embed(query)
@@ -418,11 +455,21 @@ def model(self) -> str:
         """Get the model name."""
         return self._get_client().model
 
+    @property
+    def transport(self) -> str:
+        """Get the transport name."""
+        return self._get_client().transport
+
     @property
     def max_embedding_tokens(self) -> int:
         """Get the maximum embedding tokens."""
         return self._get_client().max_embedding_tokens
 
+    @property
+    def vector_dimensions(self) -> int:
+        """Get the configured embedding dimensions."""
+        return self._get_client().vector_dimensions
+
     @property
     def encoding(self) -> tiktoken.Encoding:
         """Get the tiktoken encoding."""
diff --git a/src/llm/__init__.py b/src/llm/__init__.py
new file mode 100644
index 000000000..ae47bc533
--- /dev/null
+++ b/src/llm/__init__.py
@@ -0,0 +1,66 @@
+"""Honcho LLM orchestration package — stable public surface.
+
+Application code should import from `src.llm` (or specific submodules like
+`src.llm.api` / `src.llm.types`). The old `src/utils/clients.py` entrypoint
+is gone; everything lives here now.
+"""
+
+from __future__ import annotations
+
+from .api import honcho_llm_call
+from .backend import CompletionResult, ProviderBackend, StreamChunk, ToolCallResult
+from .credentials import default_transport_api_key, resolve_credentials
+from .executor import honcho_llm_call_inner
+from .registry import (
+    CLIENTS,
+    backend_for_provider,
+    client_for_model_config,
+    get_anthropic_client,
+    get_anthropic_override_client,
+    get_backend,
+    get_gemini_client,
+    get_gemini_override_client,
+    get_openai_client,
+    get_openai_override_client,
+    history_adapter_for_provider,
+)
+from .types import (
+    HonchoLLMCallResponse,
+    HonchoLLMCallStreamChunk,
+    IterationCallback,
+    IterationData,
+    ProviderClient,
+    ReasoningEffortType,
+    StreamingResponseWithMetadata,
+    VerbosityType,
+)
+
+__all__ = [
+    "CLIENTS",
+    "CompletionResult",
+    "HonchoLLMCallResponse",
+    "HonchoLLMCallStreamChunk",
+    "IterationCallback",
+    "IterationData",
+    "ProviderBackend",
+    "ProviderClient",
+    "ReasoningEffortType",
+    "StreamChunk",
+    "StreamingResponseWithMetadata",
+    "ToolCallResult",
+    "VerbosityType",
+    "backend_for_provider",
+    "client_for_model_config",
+    "default_transport_api_key",
+    "get_anthropic_client",
+    "get_anthropic_override_client",
+    "get_backend",
+    "get_gemini_client",
+    "get_gemini_override_client",
+    "get_openai_client",
+    "get_openai_override_client",
+    "history_adapter_for_provider",
+    "honcho_llm_call",
+    "honcho_llm_call_inner",
+    "resolve_credentials",
+]
diff --git a/src/llm/api.py b/src/llm/api.py
new file mode 100644
index 000000000..4639cad46
--- /dev/null
+++ b/src/llm/api.py
@@ -0,0 +1,359 @@
+"""Public LLM entrypoint: `honcho_llm_call`.
+
+Orchestrates:
+- Runtime config resolution from ConfiguredModelSettings → ModelConfig.
+- Per-attempt planning (primary vs fallback selection).
+- Retry with exponential backoff via tenacity.
+- Tool-loop delegation when tools are supplied.
+- Single-call delegation to the executor otherwise.
+- Reasoning-trace telemetry emission.
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import AsyncIterator, Callable
+from typing import Any, Literal, TypeVar, cast, overload
+
+from pydantic import BaseModel
+from sentry_sdk.ai.monitoring import ai_track
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from src.config import ConfiguredModelSettings, ModelConfig
+from src.exceptions import ValidationException
+from src.telemetry.logging import conditional_observe
+from src.telemetry.reasoning_traces import log_reasoning_trace
+
+from .executor import honcho_llm_call_inner
+from .runtime import (
+    AttemptPlan,
+    current_attempt,
+    effective_temperature,
+    plan_attempt,
+    resolve_runtime_model_config,
+)
+from .tool_loop import execute_tool_loop
+from .types import (
+    HonchoLLMCallResponse,
+    HonchoLLMCallStreamChunk,
+    IterationCallback,
+    ReasoningEffortType,
+    StreamingResponseWithMetadata,
+)
+
+logger = logging.getLogger(__name__)
+
+M = TypeVar("M", bound=BaseModel)
+
+
+@overload
+async def honcho_llm_call(
+    *,
+    model_config: ModelConfig | ConfiguredModelSettings,
+    prompt: str,
+    max_tokens: int,
+    track_name: str | None = None,
+    response_model: type[M],
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    enable_retry: bool = True,
+    retry_attempts: int = 3,
+    stream: Literal[False] = False,
+    stream_final_only: bool = False,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
+    max_tool_iterations: int = 10,
+    messages: list[dict[str, Any]] | None = None,
+    max_input_tokens: int | None = None,
+    trace_name: str | None = None,
+    iteration_callback: IterationCallback | None = None,
+) -> HonchoLLMCallResponse[M]: ...
+
+
+@overload
+async def honcho_llm_call(
+    *,
+    model_config: ModelConfig | ConfiguredModelSettings,
+    prompt: str,
+    max_tokens: int,
+    track_name: str | None = None,
+    response_model: None = None,
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    enable_retry: bool = True,
+    retry_attempts: int = 3,
+    stream: Literal[False] = False,
+    stream_final_only: bool = False,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
+    max_tool_iterations: int = 10,
+    messages: list[dict[str, Any]] | None = None,
+    max_input_tokens: int | None = None,
+    trace_name: str | None = None,
+    iteration_callback: IterationCallback | None = None,
+) -> HonchoLLMCallResponse[str]: ...
+
+
+@overload
+async def honcho_llm_call(
+    *,
+    model_config: ModelConfig | ConfiguredModelSettings,
+    prompt: str,
+    max_tokens: int,
+    track_name: str | None = None,
+    response_model: type[BaseModel] | None = None,
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    enable_retry: bool = True,
+    retry_attempts: int = 3,
+    stream: Literal[True] = ...,
+    stream_final_only: bool = False,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
+    max_tool_iterations: int = 10,
+    messages: list[dict[str, Any]] | None = None,
+    max_input_tokens: int | None = None,
+    trace_name: str | None = None,
+    iteration_callback: IterationCallback | None = None,
+) -> AsyncIterator[HonchoLLMCallStreamChunk] | StreamingResponseWithMetadata: ...
+
+
+@conditional_observe(name="LLM Call")
+async def honcho_llm_call(
+    *,
+    model_config: ModelConfig | ConfiguredModelSettings,
+    prompt: str,
+    max_tokens: int,
+    track_name: str | None = None,
+    response_model: type[BaseModel] | None = None,
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    enable_retry: bool = True,
+    retry_attempts: int = 3,
+    stream: bool = False,
+    stream_final_only: bool = False,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
+    max_tool_iterations: int = 10,
+    messages: list[dict[str, Any]] | None = None,
+    max_input_tokens: int | None = None,
+    trace_name: str | None = None,
+    iteration_callback: IterationCallback | None = None,
+) -> (
+    HonchoLLMCallResponse[Any]
+    | AsyncIterator[HonchoLLMCallStreamChunk]
+    | StreamingResponseWithMetadata
+):
+    """Make an LLM call with retry, optional backup failover, and optional tool loop.
+
+    Backup provider/model (if configured on the primary ModelConfig's
+    `fallback`) is used on the final retry attempt, which is 3 by default.
+
+    Raises:
+        ValidationException: If streaming and tool calling are combined
+                             without `stream_final_only=True`.
+    """
+    runtime_model_config = resolve_runtime_model_config(model_config)
+
+    # Caller kwargs left at None are resolved downstream by
+    # effective_config_for_call against whichever ModelConfig wins the
+    # attempt (primary or fallback). Defaulting here from
+    # runtime_model_config would clobber a fallback config's own
+    # temperature/thinking params on the final retry, so we deliberately
+    # keep the locals as the caller supplied them.
+
+    if stream and tools and not stream_final_only:
+        raise ValidationException(
+            "Streaming is not supported with tool calling. "
+            + "Set stream=False when using tools, or use stream_final_only=True "
+            + "to stream only the final response after tool calls."
+        )
+
+    # tenacity uses 1-indexed attempts.
+    current_attempt.set(1)
+
+    def _get_attempt_plan() -> AttemptPlan:
+        return plan_attempt(
+            runtime_model_config=runtime_model_config,
+            attempt=current_attempt.get(),
+            retry_attempts=retry_attempts,
+            call_thinking_budget_tokens=thinking_budget_tokens,
+            call_reasoning_effort=reasoning_effort,
+        )
+
+    async def _call_with_provider_selection() -> (
+        HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
+    ):
+        """Select provider/model based on current attempt, then call once.
+
+        This closure is what tenacity wraps, so selection re-runs per attempt
+        (and the fallback kicks in on the final attempt automatically).
+        """
+        plan = _get_attempt_plan()
+
+        if stream:
+            return await honcho_llm_call_inner(
+                plan.provider,
+                plan.model,
+                prompt,
+                max_tokens,
+                response_model,
+                json_mode,
+                effective_temperature(temperature),
+                stop_seqs,
+                plan.reasoning_effort,
+                verbosity,
+                plan.thinking_budget_tokens,
+                stream=True,
+                client_override=plan.client,
+                tools=tools,
+                tool_choice=tool_choice,
+                selected_config=plan.selected_config,
+            )
+        return await honcho_llm_call_inner(
+            plan.provider,
+            plan.model,
+            prompt,
+            max_tokens,
+            response_model,
+            json_mode,
+            effective_temperature(temperature),
+            stop_seqs,
+            plan.reasoning_effort,
+            verbosity,
+            plan.thinking_budget_tokens,
+            stream=False,
+            client_override=plan.client,
+            tools=tools,
+            tool_choice=tool_choice,
+            selected_config=plan.selected_config,
+        )
+
+    decorated = _call_with_provider_selection
+
+    if track_name:
+        decorated = ai_track(track_name)(decorated)
+
+    def before_retry_callback(retry_state: Any) -> None:
+        """Update attempt counter before each retry + log transient failures.
+
+        tenacity's before_sleep fires AFTER an attempt fails, BEFORE sleeping,
+        so we increment to the next attempt number here.
+        """
+        next_attempt = retry_state.attempt_number + 1
+        current_attempt.set(next_attempt)
+        exc = retry_state.outcome.exception() if retry_state.outcome else None
+        if exc:
+            logger.warning(
+                f"Error on attempt {retry_state.attempt_number}/{retry_attempts} with "
+                + f"{runtime_model_config.transport}/{runtime_model_config.model}: {exc}"
+            )
+            logger.info(f"Will retry with attempt {next_attempt}/{retry_attempts}")
+
+    if enable_retry:
+        decorated = retry(
+            stop=stop_after_attempt(retry_attempts),
+            wait=wait_exponential(multiplier=1, min=4, max=10),
+            before_sleep=before_retry_callback,
+        )(decorated)
+
+    def _trace_thinking_budget() -> int | None:
+        # Trace log should reflect what got applied, so fall back to the
+        # runtime config's value when the caller left the kwarg unset.
+        return (
+            thinking_budget_tokens
+            if thinking_budget_tokens is not None
+            else runtime_model_config.thinking_budget_tokens
+        )
+
+    def _trace_reasoning_effort() -> ReasoningEffortType:
+        if reasoning_effort is not None:
+            return reasoning_effort
+        config_effort = runtime_model_config.thinking_effort
+        return cast(ReasoningEffortType, config_effort) if config_effort else None
+
+    def _trace_stop_seqs() -> list[str] | None:
+        return (
+            stop_seqs if stop_seqs is not None else runtime_model_config.stop_sequences
+        )
+
+    # Tool-less path: call once and return.
+    if not tools or not tool_executor:
+        result: (
+            HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
+        ) = await decorated()
+        if trace_name and isinstance(result, HonchoLLMCallResponse):
+            log_reasoning_trace(
+                task_type=trace_name,
+                model_config=runtime_model_config,
+                prompt=prompt,
+                response=result,
+                max_tokens=max_tokens,
+                thinking_budget_tokens=_trace_thinking_budget(),
+                reasoning_effort=_trace_reasoning_effort(),
+                json_mode=json_mode,
+                stop_seqs=_trace_stop_seqs(),
+                messages=messages,
+            )
+        return result
+
+    # execute_tool_loop raises ValidationException on out-of-range
+    # max_tool_iterations; fail-fast is cheaper than silent clamping here.
+    result = await execute_tool_loop(
+        prompt=prompt,
+        max_tokens=max_tokens,
+        messages=messages,
+        tools=tools,
+        tool_choice=tool_choice,
+        tool_executor=tool_executor,
+        max_tool_iterations=max_tool_iterations,
+        response_model=response_model,
+        json_mode=json_mode,
+        temperature=temperature,
+        stop_seqs=stop_seqs,
+        verbosity=verbosity,
+        enable_retry=enable_retry,
+        retry_attempts=retry_attempts,
+        max_input_tokens=max_input_tokens,
+        get_attempt_plan=_get_attempt_plan,
+        before_retry_callback=before_retry_callback,
+        stream_final=stream_final_only,
+        iteration_callback=iteration_callback,
+    )
+    if trace_name and isinstance(result, HonchoLLMCallResponse):
+        log_reasoning_trace(
+            task_type=trace_name,
+            model_config=runtime_model_config,
+            prompt=prompt,
+            response=result,
+            max_tokens=max_tokens,
+            thinking_budget_tokens=_trace_thinking_budget(),
+            reasoning_effort=_trace_reasoning_effort(),
+            json_mode=json_mode,
+            stop_seqs=_trace_stop_seqs(),
+            messages=messages,
+        )
+    return result
+
+
+__all__ = ["honcho_llm_call"]
diff --git a/src/llm/backend.py b/src/llm/backend.py
new file mode 100644
index 000000000..5645998cb
--- /dev/null
+++ b/src/llm/backend.py
@@ -0,0 +1,88 @@
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from dataclasses import dataclass, field
+from typing import Any, Protocol, runtime_checkable
+
+from pydantic import BaseModel
+
+
+@dataclass(slots=True)
+class ToolCallResult:
+    """Normalized tool call from any provider."""
+
+    id: str
+    name: str
+    input: dict[str, Any]
+    thought_signature: str | None = None
+
+
+@dataclass(slots=True)
+class CompletionResult:
+    """Normalized completion result returned by provider backends."""
+
+    content: Any = ""
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_creation_input_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    finish_reason: str = "stop"
+    tool_calls: list[ToolCallResult] = field(default_factory=list)
+    thinking_content: str | None = None
+    thinking_blocks: list[dict[str, Any]] = field(default_factory=list)
+    reasoning_details: list[dict[str, Any]] = field(default_factory=list)
+    raw_response: Any = None
+
+
+@dataclass(slots=True)
+class StreamChunk:
+    """A single chunk in a streaming response."""
+
+    content: str = ""
+    is_done: bool = False
+    finish_reason: str | None = None
+    output_tokens: int | None = None
+
+
+@runtime_checkable
+class ProviderBackend(Protocol):
+    """Transport-agnostic interface for LLM providers.
+
+    Credentials are baked into the underlying SDK client at backend construction
+    time (see src/llm/registry.py), so these method signatures deliberately do
+    not accept api_key / api_base.
+    """
+
+    async def complete(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> CompletionResult: ...
+
+    def stream(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> AsyncIterator[StreamChunk]: ...
diff --git a/src/llm/backends/__init__.py b/src/llm/backends/__init__.py
new file mode 100644
index 000000000..dfba81eca
--- /dev/null
+++ b/src/llm/backends/__init__.py
@@ -0,0 +1,9 @@
+from .anthropic import AnthropicBackend
+from .gemini import GeminiBackend
+from .openai import OpenAIBackend
+
+__all__ = [
+    "AnthropicBackend",
+    "GeminiBackend",
+    "OpenAIBackend",
+]
diff --git a/src/llm/backends/anthropic.py b/src/llm/backends/anthropic.py
new file mode 100644
index 000000000..cdf775bef
--- /dev/null
+++ b/src/llm/backends/anthropic.py
@@ -0,0 +1,347 @@
+from __future__ import annotations
+
+import copy
+import json
+from collections.abc import AsyncIterator
+from typing import Any
+
+from anthropic.types import TextBlock, ThinkingBlock, ToolUseBlock
+from pydantic import BaseModel, ValidationError
+
+from src.llm.backend import CompletionResult, StreamChunk, ToolCallResult
+from src.llm.structured_output import repair_response_model_json
+
+
+class AnthropicBackend:
+    """Provider backend wrapping the native Anthropic SDK."""
+
+    def __init__(self, client: Any) -> None:
+        self._client: Any = client
+
+    async def complete(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> CompletionResult:
+        del max_output_tokens
+        if thinking_effort is not None:
+            raise ValueError(
+                "Anthropic backend does not support thinking_effort; use thinking_budget_tokens instead"
+            )
+
+        request_messages, system_messages = self._extract_system(messages)
+        params: dict[str, Any] = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "messages": request_messages,
+        }
+
+        if temperature is not None:
+            params["temperature"] = temperature
+        if stop:
+            params["stop_sequences"] = stop
+        if system_messages:
+            params["system"] = [
+                {
+                    "type": "text",
+                    "text": "\n\n".join(system_messages),
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ]
+        if tools:
+            params["tools"] = tools
+            converted_tool_choice = self._convert_tool_choice(tool_choice)
+            if converted_tool_choice is not None:
+                params["tool_choice"] = converted_tool_choice
+        if thinking_budget_tokens:
+            params["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": thinking_budget_tokens,
+            }
+        if extra_params:
+            for key in ("top_p", "top_k"):
+                if key in extra_params:
+                    params[key] = extra_params[key]
+
+        use_json_prefill = (
+            bool(response_format or self._json_mode(extra_params))
+            and not thinking_budget_tokens
+            and self._supports_assistant_prefill(model)
+        )
+        if use_json_prefill and params["messages"]:
+            if response_format and isinstance(response_format, type):
+                schema_json = json.dumps(response_format.model_json_schema(), indent=2)
+                self._append_text_to_last_message(
+                    params["messages"],
+                    f"\n\nRespond with valid JSON matching this schema:\n{schema_json}",
+                )
+            params["messages"].append({"role": "assistant", "content": "{"})
+        elif (
+            response_format and isinstance(response_format, type) and params["messages"]
+        ):
+            schema_json = json.dumps(response_format.model_json_schema(), indent=2)
+            self._append_text_to_last_message(
+                params["messages"],
+                f"\n\nRespond with valid JSON matching this schema:\n{schema_json}",
+            )
+
+        response = await self._client.messages.create(**params)
+        return self._normalize_response(
+            response=response,
+            response_format=response_format
+            if isinstance(response_format, type)
+            else None,
+            prefilled_json=use_json_prefill,
+            model_name=model,
+        )
+
+    async def stream(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> AsyncIterator[StreamChunk]:
+        is_json_mode = self._json_mode(extra_params)
+        del max_output_tokens
+        if thinking_effort is not None:
+            raise ValueError(
+                "Anthropic backend does not support thinking_effort; use thinking_budget_tokens instead"
+            )
+
+        request_messages, system_messages = self._extract_system(messages)
+        params: dict[str, Any] = {
+            "model": model,
+            "max_tokens": max_tokens,
+            "messages": request_messages,
+        }
+        if temperature is not None:
+            params["temperature"] = temperature
+        if stop:
+            params["stop_sequences"] = stop
+        if tools:
+            params["tools"] = tools
+            converted_tool_choice = self._convert_tool_choice(tool_choice)
+            if converted_tool_choice is not None:
+                params["tool_choice"] = converted_tool_choice
+        if system_messages:
+            params["system"] = [
+                {
+                    "type": "text",
+                    "text": "\n\n".join(system_messages),
+                    "cache_control": {"type": "ephemeral"},
+                }
+            ]
+        if extra_params:
+            for key in ("top_p", "top_k"):
+                if key in extra_params:
+                    params[key] = extra_params[key]
+        use_json_prefill = (
+            bool(response_format or is_json_mode)
+            and not thinking_budget_tokens
+            and self._supports_assistant_prefill(model)
+        )
+        if use_json_prefill and params["messages"]:
+            if response_format and isinstance(response_format, type):
+                schema_json = json.dumps(response_format.model_json_schema(), indent=2)
+                self._append_text_to_last_message(
+                    params["messages"],
+                    f"\n\nRespond with valid JSON matching this schema:\n{schema_json}",
+                )
+            params["messages"].append({"role": "assistant", "content": "{"})
+        elif (
+            response_format and isinstance(response_format, type) and params["messages"]
+        ):
+            schema_json = json.dumps(response_format.model_json_schema(), indent=2)
+            self._append_text_to_last_message(
+                params["messages"],
+                f"\n\nRespond with valid JSON matching this schema:\n{schema_json}",
+            )
+        if thinking_budget_tokens:
+            params["thinking"] = {
+                "type": "enabled",
+                "budget_tokens": thinking_budget_tokens,
+            }
+
+        async with self._client.messages.stream(**params) as stream:
+            async for chunk in stream:
+                if (
+                    chunk.type == "content_block_delta"
+                    and hasattr(chunk, "delta")
+                    and hasattr(chunk.delta, "text")
+                ):
+                    yield StreamChunk(content=getattr(chunk.delta, "text", ""))
+
+            final_message = await stream.get_final_message()
+            output_tokens = (
+                final_message.usage.output_tokens if final_message.usage else None
+            )
+            yield StreamChunk(
+                is_done=True,
+                finish_reason=final_message.stop_reason,
+                output_tokens=output_tokens,
+            )
+
+    def _normalize_response(
+        self,
+        *,
+        response: Any,
+        response_format: type[BaseModel] | None,
+        prefilled_json: bool,
+        model_name: str,
+    ) -> CompletionResult:
+        text_blocks: list[str] = []
+        thinking_text_blocks: list[str] = []
+        thinking_full_blocks: list[dict[str, Any]] = []
+        tool_calls: list[ToolCallResult] = []
+
+        for block in response.content:
+            if isinstance(block, TextBlock):
+                text_blocks.append(block.text)
+            elif isinstance(block, ThinkingBlock):
+                thinking_text_blocks.append(block.thinking)
+                thinking_full_blocks.append(
+                    {
+                        "type": "thinking",
+                        "thinking": block.thinking,
+                        "signature": block.signature,
+                    }
+                )
+            elif isinstance(block, ToolUseBlock):
+                tool_calls.append(
+                    ToolCallResult(
+                        id=block.id,
+                        name=block.name,
+                        input=dict(block.input),
+                    )
+                )
+
+        usage = response.usage
+        cache_creation_tokens = (
+            getattr(usage, "cache_creation_input_tokens", 0) or 0 if usage else 0
+        )
+        cache_read_tokens = (
+            getattr(usage, "cache_read_input_tokens", 0) or 0 if usage else 0
+        )
+        uncached_tokens = usage.input_tokens if usage else 0
+        total_input_tokens = uncached_tokens + cache_creation_tokens + cache_read_tokens
+
+        text_content = "\n".join(text_blocks)
+        thinking_content = (
+            "\n".join(thinking_text_blocks) if thinking_text_blocks else None
+        )
+
+        content: Any = text_content
+        if response_format is not None:
+            raw_content = f"{{{text_content}" if prefilled_json else text_content
+            try:
+                if prefilled_json:
+                    parsed_json = json.loads(raw_content)
+                    content = response_format.model_validate(parsed_json)
+                else:
+                    content = response_format.model_validate_json(raw_content)
+            except (json.JSONDecodeError, ValidationError, ValueError):
+                content = repair_response_model_json(
+                    raw_content,
+                    response_format,
+                    model_name,
+                )
+
+        return CompletionResult(
+            content=content,
+            input_tokens=total_input_tokens,
+            output_tokens=usage.output_tokens if usage else 0,
+            cache_creation_input_tokens=cache_creation_tokens,
+            cache_read_input_tokens=cache_read_tokens,
+            finish_reason=response.stop_reason or "stop",
+            tool_calls=tool_calls,
+            thinking_content=thinking_content,
+            thinking_blocks=thinking_full_blocks,
+            raw_response=response,
+        )
+
+    @staticmethod
+    def _supports_assistant_prefill(model: str) -> bool:
+        # Claude 4-class models reject assistant-prefill and require the
+        # conversation to end with a user message.
+        return not model.startswith(
+            (
+                "claude-opus-4",
+                "claude-sonnet-4",
+                "claude-haiku-4",
+            )
+        )
+
+    @staticmethod
+    def _extract_system(
+        messages: list[dict[str, Any]],
+    ) -> tuple[list[dict[str, Any]], list[str]]:
+        system_messages: list[str] = []
+        non_system_messages: list[dict[str, Any]] = []
+        for message in messages:
+            if message.get("role") == "system" and isinstance(
+                message.get("content"),
+                str,
+            ):
+                system_messages.append(message["content"])
+            else:
+                non_system_messages.append(copy.deepcopy(message))
+        return non_system_messages, system_messages
+
+    @staticmethod
+    def _convert_tool_choice(
+        tool_choice: str | dict[str, Any] | None,
+    ) -> dict[str, Any] | None:
+        if tool_choice is None:
+            return None
+        if isinstance(tool_choice, dict):
+            return tool_choice
+        if tool_choice == "auto":
+            return {"type": "auto"}
+        if tool_choice in {"any", "required"}:
+            return {"type": "any"}
+        if tool_choice == "none":
+            return {"type": "none"}
+        return {"type": "tool", "name": tool_choice}
+
+    @staticmethod
+    def _append_text_to_last_message(
+        messages: list[dict[str, Any]], suffix: str
+    ) -> None:
+        """Append text to the last message, handling both string and list content."""
+        last = messages[-1]
+        content = last.get("content")
+        if isinstance(content, str):
+            last["content"] = content + suffix
+        elif isinstance(content, list):
+            # Content block list — append to the last text block or add one
+            blocks: list[dict[str, Any]] = content  # pyright: ignore[reportUnknownVariableType]
+            for block in reversed(blocks):
+                if block.get("type") == "text":
+                    block["text"] = block["text"] + suffix
+                    return
+            blocks.append({"type": "text", "text": suffix})
+
+    @staticmethod
+    def _json_mode(extra_params: dict[str, Any] | None) -> bool:
+        return bool(extra_params and extra_params.get("json_mode"))
diff --git a/src/llm/backends/gemini.py b/src/llm/backends/gemini.py
new file mode 100644
index 000000000..b14cefe46
--- /dev/null
+++ b/src/llm/backends/gemini.py
@@ -0,0 +1,577 @@
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from datetime import datetime, timedelta, timezone
+from typing import Any, ClassVar, cast
+
+from pydantic import BaseModel
+
+from src.exceptions import LLMError, ValidationException
+from src.llm.backend import CompletionResult, StreamChunk, ToolCallResult
+from src.llm.caching import (
+    GeminiCacheHandle,
+    PromptCachePolicy,
+    build_cache_key,
+    gemini_cache_store,
+)
+from src.llm.structured_output import repair_response_model_json
+
+GEMINI_BLOCKED_FINISH_REASONS = {
+    "SAFETY",
+    "RECITATION",
+    "PROHIBITED_CONTENT",
+    "BLOCKLIST",
+}
+
+
+class GeminiBackend:
+    """Provider backend wrapping the Google GenAI SDK."""
+
+    def __init__(self, client: Any) -> None:
+        self._client: Any = client
+
+    async def complete(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> CompletionResult:
+        contents, system_instruction = self._convert_messages(messages)
+        config = self._build_config(
+            max_tokens=max_output_tokens or max_tokens,
+            temperature=temperature,
+            stop=stop,
+            tools=tools,
+            tool_choice=tool_choice,
+            response_format=response_format,
+            thinking_budget_tokens=thinking_budget_tokens,
+            thinking_effort=thinking_effort,
+            extra_params=extra_params,
+        )
+        if system_instruction:
+            config["system_instruction"] = system_instruction
+
+        cache_policy = (
+            extra_params.get("cache_policy")
+            if extra_params and "cache_policy" in extra_params
+            else None
+        )
+        if isinstance(cache_policy, PromptCachePolicy) and isinstance(contents, list):
+            # Cache the history prefix; only the last turn is sent as new input.
+            cacheable = contents[:-1] if contents else []
+            await self._attach_cached_content(
+                model=model,
+                config=config,
+                cache_policy=cache_policy,
+                contents=cacheable,
+                tools=tools,
+            )
+            if "cached_content" in config and contents:
+                contents = contents[-1:]
+
+        if isinstance(contents, list) and not contents:
+            raise LLMError(
+                "No non-system messages to send to Gemini",
+                provider="gemini",
+                model=model,
+            )
+
+        response = await self._client.aio.models.generate_content(
+            model=model,
+            contents=contents,
+            config=config or None,
+        )
+        return self._normalize_response(
+            response=response,
+            response_format=response_format
+            if isinstance(response_format, type)
+            else None,
+            model_name=model,
+        )
+
+    async def stream(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> AsyncIterator[StreamChunk]:
+        contents, system_instruction = self._convert_messages(messages)
+        config = self._build_config(
+            max_tokens=max_output_tokens or max_tokens,
+            temperature=temperature,
+            stop=stop,
+            tools=tools,
+            tool_choice=tool_choice,
+            response_format=response_format,
+            thinking_budget_tokens=thinking_budget_tokens,
+            thinking_effort=thinking_effort,
+            extra_params=extra_params,
+        )
+        if system_instruction:
+            config["system_instruction"] = system_instruction
+
+        cache_policy = (
+            extra_params.get("cache_policy")
+            if extra_params and "cache_policy" in extra_params
+            else None
+        )
+        if isinstance(cache_policy, PromptCachePolicy) and isinstance(contents, list):
+            # Cache the history prefix; only the last turn is sent as new input.
+            cacheable = contents[:-1] if contents else []
+            await self._attach_cached_content(
+                model=model,
+                config=config,
+                cache_policy=cache_policy,
+                contents=cacheable,
+                tools=tools,
+            )
+            if "cached_content" in config and contents:
+                contents = contents[-1:]
+
+        if isinstance(contents, list) and not contents:
+            raise LLMError(
+                "No non-system messages to send to Gemini",
+                provider="gemini",
+                model=model,
+            )
+
+        stream = await self._client.aio.models.generate_content_stream(
+            model=model,
+            contents=contents,
+            config=config or None,
+        )
+
+        final_chunk = None
+        any_text = False
+        async for chunk in stream:
+            if chunk.text:
+                any_text = True
+                yield StreamChunk(content=chunk.text)
+            final_chunk = chunk
+
+        finish_reason = "stop"
+        output_tokens: int | None = None
+        if (
+            final_chunk
+            and getattr(final_chunk, "candidates", None)
+            and final_chunk.candidates[0].finish_reason
+        ):
+            finish_reason = final_chunk.candidates[0].finish_reason.name
+        if (
+            final_chunk
+            and getattr(final_chunk, "usage_metadata", None)
+            and getattr(final_chunk.usage_metadata, "candidates_token_count", None)
+        ):
+            output_tokens = final_chunk.usage_metadata.candidates_token_count or None
+
+        # Mirror complete()'s behavior on SAFETY / RECITATION / etc. — if
+        # Gemini blocked the response and produced no usable text, raise
+        # LLMError rather than silently yielding a terminal chunk carrying
+        # the blocked finish_reason. Downstream callers should get a clean
+        # exception and a chance to retry / fall back.
+        if not any_text and finish_reason in GEMINI_BLOCKED_FINISH_REASONS:
+            raise LLMError(
+                f"Gemini response blocked (finish_reason={finish_reason})",
+                provider="gemini",
+                model=model,
+                finish_reason=finish_reason,
+            )
+
+        yield StreamChunk(
+            is_done=True,
+            finish_reason=finish_reason,
+            output_tokens=output_tokens,
+        )
+
+    def _build_config(
+        self,
+        *,
+        max_tokens: int,
+        temperature: float | None,
+        stop: list[str] | None,
+        tools: list[dict[str, Any]] | None,
+        tool_choice: str | dict[str, Any] | None,
+        response_format: type[BaseModel] | dict[str, Any] | None,
+        thinking_budget_tokens: int | None,
+        thinking_effort: str | None,
+        extra_params: dict[str, Any] | None,
+    ) -> dict[str, Any]:
+        config: dict[str, Any] = {
+            "max_output_tokens": max_tokens,
+        }
+        if temperature is not None:
+            config["temperature"] = temperature
+        if stop:
+            config["stop_sequences"] = stop
+        if tools:
+            config["tools"] = self._convert_tools(tools)
+        if tool_choice:
+            config["tool_config"] = self._convert_tool_choice(tool_choice)
+        if response_format is not None:
+            config["response_mime_type"] = "application/json"
+            config["response_schema"] = response_format
+        elif extra_params and extra_params.get("json_mode") and not tools:
+            config["response_mime_type"] = "application/json"
+        thinking_config: dict[str, Any] = {}
+        if thinking_budget_tokens is not None:
+            thinking_config["thinking_budget"] = thinking_budget_tokens
+        if thinking_effort is not None:
+            thinking_config["thinking_level"] = thinking_effort
+        if len(thinking_config) > 1:
+            raise ValidationException(
+                "Gemini backend does not support sending both thinking_budget_tokens and thinking_effort in the same request"
+            )
+        if thinking_config:
+            config["thinking_config"] = thinking_config
+        for key in ("top_p", "top_k", "frequency_penalty", "presence_penalty", "seed"):
+            if extra_params and key in extra_params:
+                config[key] = extra_params[key]
+        return config
+
+    def _normalize_response(
+        self,
+        *,
+        response: Any,
+        response_format: type[BaseModel] | None,
+        model_name: str,
+    ) -> CompletionResult:
+        candidate = response.candidates[0] if response.candidates else None
+        finish_reason = (
+            candidate.finish_reason.name
+            if candidate is not None and candidate.finish_reason
+            else "stop"
+        )
+
+        text_parts: list[str] = []
+        tool_calls: list[ToolCallResult] = []
+        candidate_parts = (
+            cast(list[Any] | None, getattr(candidate.content, "parts", None))
+            if candidate is not None and getattr(candidate, "content", None)
+            else None
+        )
+        if isinstance(candidate_parts, list):
+            for part in candidate_parts:
+                part_text = getattr(part, "text", None)
+                if isinstance(part_text, str) and part_text:
+                    text_parts.append(part_text)
+                function_call = getattr(part, "function_call", None)
+                if function_call is not None:
+                    function_name = getattr(function_call, "name", None)
+                    function_args = getattr(function_call, "args", None)
+                    if not isinstance(function_name, str):
+                        continue
+                    tool_calls.append(
+                        ToolCallResult(
+                            id=f"call_{function_name}_{len(tool_calls)}",
+                            name=function_name,
+                            input=dict(cast(dict[str, Any], function_args))
+                            if function_args
+                            else {},
+                            thought_signature=getattr(part, "thought_signature", None),
+                        )
+                    )
+        response_text = getattr(response, "text", None)
+        if not text_parts and isinstance(response_text, str) and response_text:
+            text_parts.append(response_text)
+        response_function_calls = cast(
+            list[Any] | None,
+            getattr(response, "function_calls", None),
+        )
+        if not tool_calls and isinstance(response_function_calls, list):
+            for function_call in response_function_calls:
+                function_name = getattr(function_call, "name", None)
+                function_args = getattr(function_call, "args", None)
+                if not isinstance(function_name, str):
+                    continue
+                tool_calls.append(
+                    ToolCallResult(
+                        id=f"call_{function_name}_{len(tool_calls)}",
+                        name=function_name,
+                        input=dict(cast(dict[str, Any], function_args))
+                        if function_args
+                        else {},
+                    )
+                )
+
+        content: Any = "\n".join(text_parts) if text_parts else ""
+        if response_format is not None:
+            parsed_response = getattr(response, "parsed", None)
+            if isinstance(parsed_response, response_format):
+                content = parsed_response
+            elif isinstance(parsed_response, dict):
+                content = response_format.model_validate(parsed_response)
+            elif isinstance(parsed_response, str):
+                content = response_format.model_validate_json(parsed_response)
+            else:
+                if finish_reason in GEMINI_BLOCKED_FINISH_REASONS:
+                    raise LLMError(
+                        f"Gemini response blocked (finish_reason={finish_reason})",
+                        provider="gemini",
+                        model=model_name,
+                        finish_reason=finish_reason,
+                    )
+                raw_text = "".join(text_parts)
+                content = repair_response_model_json(
+                    raw_text,
+                    response_format,
+                    model_name,
+                )
+        elif (
+            not content
+            and not tool_calls
+            and finish_reason in GEMINI_BLOCKED_FINISH_REASONS
+        ):
+            raise LLMError(
+                f"Gemini response blocked (finish_reason={finish_reason})",
+                provider="gemini",
+                model=model_name,
+                finish_reason=finish_reason,
+            )
+
+        usage = response.usage_metadata
+        cache_read_input_tokens = 0
+        if usage is not None:
+            cached_tokens = getattr(usage, "cached_content_token_count", 0)
+            if isinstance(cached_tokens, int):
+                cache_read_input_tokens = cached_tokens
+        return CompletionResult(
+            content=content,
+            input_tokens=usage.prompt_token_count if usage else 0,
+            output_tokens=usage.candidates_token_count if usage else 0,
+            cache_read_input_tokens=cache_read_input_tokens,
+            finish_reason=finish_reason,
+            tool_calls=tool_calls,
+            raw_response=response,
+        )
+
+    async def _attach_cached_content(
+        self,
+        *,
+        model: str,
+        config: dict[str, Any],
+        cache_policy: PromptCachePolicy,
+        contents: list[dict[str, Any]],
+        tools: list[dict[str, Any]] | None,
+    ) -> None:
+        if cache_policy.mode != "gemini_cached_content":
+            return
+        # Worth caching if there are history messages, system instruction, or tools
+        has_cacheable = bool(
+            contents or config.get("system_instruction") or config.get("tools")
+        )
+        if not has_cacheable:
+            return
+
+        cache_key = build_cache_key(
+            config=self._cache_model_config(model),
+            cache_policy=cache_policy,
+            cacheable_messages=contents,
+            tools=tools,
+            system_instruction=config.get("system_instruction"),
+            tool_config=config.get("tool_config"),
+        )
+        cached_handle = gemini_cache_store.get(cache_key)
+        if cached_handle is None:
+            ttl_seconds = cache_policy.ttl_seconds or 300
+            cache_config: dict[str, Any] = {
+                "system_instruction": config.get("system_instruction"),
+                "tools": config.get("tools"),
+                "tool_config": config.get("tool_config"),
+                "ttl": f"{ttl_seconds}s",
+            }
+            if contents:
+                cache_config["contents"] = contents
+            cached_content = await self._client.aio.caches.create(
+                model=model,
+                config=cache_config,
+            )
+            expires_at = getattr(cached_content, "expire_time", None)
+            if expires_at is None:
+                expires_at = datetime.now(timezone.utc) + timedelta(seconds=ttl_seconds)
+            cached_handle = gemini_cache_store.set(
+                GeminiCacheHandle(
+                    key=cache_key,
+                    cached_content_name=cached_content.name,
+                    expires_at=expires_at,
+                )
+            )
+        # Once a cached-content handle is attached, Gemini rejects repeating
+        # system/tool configuration on the generate call.
+        config.pop("system_instruction", None)
+        config.pop("tools", None)
+        config.pop("tool_config", None)
+        config["cached_content"] = cached_handle.cached_content_name
+
+    @staticmethod
+    def _cache_model_config(model: str):
+        from src.config import ModelConfig
+
+        return ModelConfig(transport="gemini", model=model)
+
+    @staticmethod
+    def _convert_messages(
+        messages: list[dict[str, Any]],
+    ) -> tuple[list[dict[str, Any]] | str, str | None]:
+        system_messages: list[str] = []
+        contents: list[dict[str, Any]] = []
+
+        for message in messages:
+            role = message.get("role", "user")
+            if role == "system":
+                if isinstance(message.get("content"), str):
+                    system_messages.append(message["content"])
+                continue
+
+            if role == "assistant":
+                role = "model"
+
+            if isinstance(message.get("parts"), list):
+                message_copy = message.copy()
+                message_copy["role"] = role
+                contents.append(message_copy)
+                continue
+
+            if isinstance(message.get("content"), str):
+                contents.append({"role": role, "parts": [{"text": message["content"]}]})
+                continue
+
+            if isinstance(message.get("content"), list):
+                parts: list[dict[str, Any]] = []
+                for block in message["content"]:
+                    block_type = block.get("type")
+                    if block_type == "text":
+                        parts.append({"text": block["text"]})
+                    else:
+                        # Silently dropping non-"text" blocks would mask real
+                        # input-shape bugs — e.g., an Anthropic-shaped
+                        # tool_use/tool_result payload accidentally routed to
+                        # the Gemini backend without going through the
+                        # history adapter. Fail fast so the caller knows.
+                        raise ValidationException(
+                            "Gemini backend cannot translate content block "
+                            + f"of type {block_type!r}; translate to "
+                            + "Gemini-native 'parts' via the history adapter "
+                            + "before passing to the backend"
+                        )
+                if parts:
+                    contents.append({"role": role, "parts": parts})
+
+        system_instruction = "\n\n".join(system_messages) if system_messages else None
+        return contents, system_instruction
+
+    @staticmethod
+    def _convert_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        if tools and "function_declarations" in tools[0]:
+            return tools
+        return [
+            {
+                "function_declarations": [
+                    {
+                        "name": tool["name"],
+                        "description": tool["description"],
+                        "parameters": GeminiBackend._sanitize_schema(
+                            tool["input_schema"]
+                        ),
+                    }
+                    for tool in tools
+                ]
+            }
+        ]
+
+    # JSON-Schema keywords Gemini's function_declarations validator accepts.
+    # See https://ai.google.dev/api/caching#Schema. Anything outside this set
+    # (e.g. additionalProperties, allOf, if/then/else, $ref, anyOf, oneOf,
+    # patternProperties) triggers an INVALID_ARGUMENT 400 at call time, so we
+    # strip on the way out. Other backends keep the richer schema.
+    _GEMINI_ALLOWED_SCHEMA_KEYS: ClassVar[frozenset[str]] = frozenset(
+        {
+            "type",
+            "format",
+            "description",
+            "nullable",
+            "enum",
+            "properties",
+            "required",
+            "items",
+            "minItems",
+            "maxItems",
+            "minimum",
+            "maximum",
+            "title",
+        }
+    )
+
+    @staticmethod
+    def _sanitize_schema(schema: Any) -> Any:
+        """Recursively strip JSON-Schema keywords Gemini rejects.
+
+        ``properties`` holds user-supplied field names → sub-schemas, so we
+        recurse into its values but preserve its keys. ``required`` and
+        ``enum`` are lists of literals (field names / allowed values) and are
+        passed through verbatim. Everything else is a scalar schema keyword.
+        """
+        if not isinstance(schema, dict):
+            return schema
+        schema_dict = cast(dict[str, Any], schema)
+        cleaned: dict[str, Any] = {}
+        for key, value in schema_dict.items():
+            if key not in GeminiBackend._GEMINI_ALLOWED_SCHEMA_KEYS:
+                continue
+            if key == "properties" and isinstance(value, dict):
+                cleaned["properties"] = {
+                    prop_name: GeminiBackend._sanitize_schema(prop_schema)
+                    for prop_name, prop_schema in cast(dict[str, Any], value).items()
+                }
+            elif key == "items":
+                cleaned["items"] = GeminiBackend._sanitize_schema(value)
+            elif key == "required" and isinstance(value, list):
+                cleaned["required"] = list(cast(list[Any], value))
+            elif key == "enum" and isinstance(value, list):
+                cleaned["enum"] = list(cast(list[Any], value))
+            else:
+                cleaned[key] = value
+        return cleaned
+
+    @staticmethod
+    def _convert_tool_choice(
+        tool_choice: str | dict[str, Any],
+    ) -> dict[str, Any]:
+        if isinstance(tool_choice, dict) and "name" in tool_choice:
+            return {
+                "function_calling_config": {
+                    "mode": "ANY",
+                    "allowed_function_names": [tool_choice["name"]],
+                }
+            }
+        if tool_choice == "auto":
+            return {"function_calling_config": {"mode": "AUTO"}}
+        if tool_choice in {"any", "required"}:
+            return {"function_calling_config": {"mode": "ANY"}}
+        if tool_choice == "none":
+            return {"function_calling_config": {"mode": "NONE"}}
+        return {
+            "function_calling_config": {
+                "mode": "ANY",
+                "allowed_function_names": [tool_choice],
+            }
+        }
diff --git a/src/llm/backends/openai.py b/src/llm/backends/openai.py
new file mode 100644
index 000000000..1e01e78a2
--- /dev/null
+++ b/src/llm/backends/openai.py
@@ -0,0 +1,427 @@
+from __future__ import annotations
+
+import json
+import logging
+from collections.abc import AsyncIterator
+from typing import Any, cast
+
+from openai import BadRequestError, LengthFinishReasonError
+from pydantic import BaseModel, ValidationError
+
+from src.exceptions import ValidationException
+from src.llm.backend import CompletionResult, StreamChunk, ToolCallResult
+from src.llm.structured_output import (
+    repair_response_model_json,
+    validate_structured_output,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def _uses_max_completion_tokens(model: str) -> bool:
+    """OpenAI reasoning models (gpt-5 family + o-series) require
+    ``max_completion_tokens`` instead of the classic ``max_tokens`` parameter.
+
+    Matches: gpt-5, gpt-5-anything, gpt-5.anything (incl. gpt-5.4, gpt-5.4-mini),
+    o1*, o3*, o4*. Anything else (gpt-4.x, gpt-4o, chat models on proxies)
+    stays on ``max_tokens``.
+    """
+    m = model.lower()
+    if m == "gpt-5" or m.startswith("gpt-5-") or m.startswith("gpt-5."):
+        return True
+    for prefix in ("o1", "o3", "o4"):
+        if m == prefix or m.startswith(prefix + "-"):
+            return True
+    return False
+
+
+def extract_openai_reasoning_content(response: Any) -> str | None:
+    try:
+        message = response.choices[0].message
+        if hasattr(message, "reasoning_details") and message.reasoning_details:
+            reasoning_parts: list[str] = []
+            for detail in message.reasoning_details:
+                detail_content = getattr(detail, "content", None)
+                if isinstance(detail_content, str) and detail_content:
+                    reasoning_parts.append(detail_content)
+                elif isinstance(detail, dict):
+                    detail_dict = cast(dict[str, Any], detail)
+                    dict_content = detail_dict.get("content")
+                    if isinstance(dict_content, str) and dict_content:
+                        reasoning_parts.append(dict_content)
+            if reasoning_parts:
+                return "\n".join(reasoning_parts)
+        if hasattr(message, "reasoning_content") and message.reasoning_content:
+            return message.reasoning_content
+    except (AttributeError, IndexError, TypeError):
+        return None
+    return None
+
+
+def extract_openai_reasoning_details(response: Any) -> list[dict[str, Any]]:
+    try:
+        message = response.choices[0].message
+        if hasattr(message, "reasoning_details") and message.reasoning_details:
+            details: list[dict[str, Any]] = []
+            for detail in message.reasoning_details:
+                if hasattr(detail, "model_dump"):
+                    dumped = detail.model_dump()
+                    if isinstance(dumped, dict):
+                        details.append(cast(dict[str, Any], dumped))
+                elif isinstance(detail, dict):
+                    details.append(cast(dict[str, Any], detail))
+                else:
+                    detail_content = getattr(detail, "content", None)
+                    if isinstance(detail_content, str) and detail_content:
+                        details.append({"content": detail_content})
+            return details
+    except (AttributeError, IndexError, TypeError):
+        return []
+    return []
+
+
+def extract_openai_cache_tokens(usage: Any) -> tuple[int, int]:
+    if not usage:
+        return 0, 0
+
+    cache_read = 0
+    if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
+        details = usage.prompt_tokens_details
+        if hasattr(details, "cached_tokens") and details.cached_tokens:
+            cache_read = details.cached_tokens
+
+    if cache_read == 0:
+        if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens:
+            cache_read = usage.cache_read_input_tokens
+        elif hasattr(usage, "cached_tokens") and usage.cached_tokens:
+            cache_read = usage.cached_tokens
+
+    cache_creation = 0
+    if (
+        hasattr(usage, "cache_creation_input_tokens")
+        and usage.cache_creation_input_tokens
+    ):
+        cache_creation = usage.cache_creation_input_tokens
+
+    return cache_creation, cache_read
+
+
+class OpenAIBackend:
+    """Provider backend wrapping AsyncOpenAI."""
+
+    def __init__(self, client: Any) -> None:
+        self._client: Any = client
+
+    async def complete(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> CompletionResult:
+        if thinking_budget_tokens is not None:
+            raise ValidationException(
+                "OpenAI backend does not support thinking_budget_tokens; use thinking_effort instead"
+            )
+
+        params = self._build_params(
+            model=model,
+            messages=messages,
+            max_tokens=max_output_tokens or max_tokens,
+            temperature=temperature,
+            stop=stop,
+            tools=tools,
+            tool_choice=tool_choice,
+            thinking_effort=thinking_effort,
+            extra_params=extra_params,
+        )
+
+        if isinstance(response_format, type):
+            params["response_format"] = response_format
+            try:
+                response = await self._client.chat.completions.parse(**params)
+            except LengthFinishReasonError as exc:
+                truncated = exc.completion
+                raw_content = truncated.choices[0].message.content or ""
+                content = repair_response_model_json(
+                    raw_content,
+                    response_format,
+                    model,
+                )
+                return self._normalize_response(
+                    truncated,
+                    content_override=content,
+                )
+            except (BadRequestError, json.JSONDecodeError, ValidationError):
+                fallback_response = await self._create_structured_response(
+                    params=params,
+                    response_format=response_format,
+                )
+                content = self._parse_or_repair_structured_content(
+                    fallback_response,
+                    response_format,
+                    model,
+                )
+                return self._normalize_response(
+                    fallback_response,
+                    content_override=content,
+                )
+            parsed = response.choices[0].message.parsed
+            raw_content = response.choices[0].message.content or ""
+            if parsed is None and raw_content:
+                content = repair_response_model_json(
+                    raw_content,
+                    response_format,
+                    model,
+                )
+                return self._normalize_response(response, content_override=content)
+            if parsed is None:
+                refusal = getattr(response.choices[0].message, "refusal", None)
+                if refusal:
+                    return self._normalize_response(
+                        response,
+                        content_override=refusal,
+                    )
+                raise ValidationException("No parsed content in structured response")
+            return self._normalize_response(
+                response,
+                content_override=validate_structured_output(parsed, response_format),
+            )
+        if response_format is not None:
+            params["response_format"] = response_format
+
+        if extra_params and extra_params.get("json_mode"):
+            params["response_format"] = {"type": "json_object"}
+
+        response = await self._client.chat.completions.create(**params)
+        return self._normalize_response(response)
+
+    async def stream(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None = None,
+        stop: list[str] | None = None,
+        tools: list[dict[str, Any]] | None = None,
+        tool_choice: str | dict[str, Any] | None = None,
+        response_format: type[BaseModel] | dict[str, Any] | None = None,
+        thinking_budget_tokens: int | None = None,
+        thinking_effort: str | None = None,
+        max_output_tokens: int | None = None,
+        extra_params: dict[str, Any] | None = None,
+    ) -> AsyncIterator[StreamChunk]:
+        if thinking_budget_tokens is not None:
+            raise ValidationException(
+                "OpenAI backend does not support thinking_budget_tokens; use thinking_effort instead"
+            )
+
+        params = self._build_params(
+            model=model,
+            messages=messages,
+            max_tokens=max_output_tokens or max_tokens,
+            temperature=temperature,
+            stop=stop,
+            tools=tools,
+            tool_choice=tool_choice,
+            thinking_effort=thinking_effort,
+            extra_params=extra_params,
+        )
+        params["stream"] = True
+        params["stream_options"] = {"include_usage": True}
+        if isinstance(response_format, type):
+            # parse() supports BaseModel types but streaming create() does not —
+            # convert to a json_schema dict so the streaming path works.
+            params["response_format"] = {
+                "type": "json_schema",
+                "json_schema": {
+                    "name": response_format.__name__,
+                    "schema": response_format.model_json_schema(),
+                },
+            }
+        elif response_format is not None:
+            params["response_format"] = response_format
+        elif extra_params and extra_params.get("json_mode"):
+            params["response_format"] = {"type": "json_object"}
+
+        response_stream = await self._client.chat.completions.create(**params)
+        finish_reason: str | None = None
+        usage_chunk_received = False
+        async for chunk in response_stream:
+            if chunk.choices and chunk.choices[0].delta.content:
+                yield StreamChunk(content=chunk.choices[0].delta.content)
+            if chunk.choices and chunk.choices[0].finish_reason:
+                finish_reason = chunk.choices[0].finish_reason
+            if hasattr(chunk, "usage") and chunk.usage:
+                yield StreamChunk(
+                    is_done=True,
+                    finish_reason=finish_reason,
+                    output_tokens=chunk.usage.completion_tokens,
+                )
+                usage_chunk_received = True
+
+        if not usage_chunk_received and finish_reason:
+            yield StreamChunk(is_done=True, finish_reason=finish_reason)
+
+    def _build_params(
+        self,
+        *,
+        model: str,
+        messages: list[dict[str, Any]],
+        max_tokens: int,
+        temperature: float | None,
+        stop: list[str] | None,
+        tools: list[dict[str, Any]] | None,
+        tool_choice: str | dict[str, Any] | None,
+        thinking_effort: str | None,
+        extra_params: dict[str, Any] | None,
+    ) -> dict[str, Any]:
+        params: dict[str, Any] = {
+            "model": model,
+            "messages": messages,
+        }
+
+        if _uses_max_completion_tokens(model):
+            params["max_completion_tokens"] = max_tokens
+            if extra_params and extra_params.get("verbosity"):
+                params["verbosity"] = extra_params["verbosity"]
+        else:
+            params["max_tokens"] = max_tokens
+
+        if temperature is not None:
+            params["temperature"] = temperature
+
+        if thinking_effort:
+            params["reasoning_effort"] = thinking_effort
+
+        if stop:
+            params["stop"] = stop
+        if tools:
+            params["tools"] = self._convert_tools(tools)
+            if tool_choice is not None:
+                params["tool_choice"] = tool_choice
+        if extra_params:
+            for key in (
+                "top_p",
+                "frequency_penalty",
+                "presence_penalty",
+                "seed",
+            ):
+                if key in extra_params:
+                    params[key] = extra_params[key]
+        return params
+
+    def _normalize_response(
+        self,
+        response: Any,
+        *,
+        content_override: Any | None = None,
+    ) -> CompletionResult:
+        usage = response.usage
+        finish_reason = response.choices[0].finish_reason
+        tool_calls: list[ToolCallResult] = []
+        message = response.choices[0].message
+        if getattr(message, "tool_calls", None):
+            for tool_call in message.tool_calls:
+                tool_input: dict[str, Any] = {}
+                if tool_call.function.arguments:
+                    try:
+                        tool_input = json.loads(tool_call.function.arguments)
+                    except (json.JSONDecodeError, TypeError) as exc:
+                        # Don't log the raw arguments payload — LLM-generated
+                        # tool calls can mirror user PII from the prompt into
+                        # their arguments, and this runs at WARN level.
+                        logger.warning(
+                            "Malformed tool arguments for %s (id=%s): %s",
+                            tool_call.function.name,
+                            tool_call.id,
+                            exc.__class__.__name__,
+                        )
+                tool_calls.append(
+                    ToolCallResult(
+                        id=tool_call.id,
+                        name=tool_call.function.name,
+                        input=tool_input,
+                    )
+                )
+
+        cache_creation, cache_read = extract_openai_cache_tokens(usage)
+        return CompletionResult(
+            content=content_override
+            if content_override is not None
+            else (message.content or ""),
+            input_tokens=usage.prompt_tokens if usage else 0,
+            output_tokens=usage.completion_tokens if usage else 0,
+            cache_creation_input_tokens=cache_creation,
+            cache_read_input_tokens=cache_read,
+            finish_reason=finish_reason or "stop",
+            tool_calls=tool_calls,
+            thinking_content=extract_openai_reasoning_content(response),
+            reasoning_details=extract_openai_reasoning_details(response),
+            raw_response=response,
+        )
+
+    async def _create_structured_response(
+        self,
+        *,
+        params: dict[str, Any],
+        response_format: type[BaseModel],
+    ) -> Any:
+        structured_params = dict(params)
+        structured_params["response_format"] = {
+            "type": "json_schema",
+            "json_schema": {
+                "name": response_format.__name__,
+                "schema": response_format.model_json_schema(),
+            },
+        }
+        return await self._client.chat.completions.create(**structured_params)
+
+    @staticmethod
+    def _parse_or_repair_structured_content(
+        response: Any,
+        response_format: type[BaseModel],
+        model: str,
+    ) -> BaseModel | str:
+        raw_content = response.choices[0].message.content or ""
+        if raw_content:
+            return repair_response_model_json(raw_content, response_format, model)
+        refusal = getattr(response.choices[0].message, "refusal", None)
+        if refusal:
+            return refusal
+        raise ValidationException(
+            "No raw content available for structured output repair"
+        )
+
+    @staticmethod
+    def _convert_tools(tools: list[dict[str, Any]]) -> list[dict[str, Any]]:
+        if not tools or tools[0].get("type") == "function":
+            return tools
+        # Tool schemas in src/utils/agent_tools.py use optional fields with
+        # defaults and don't declare additionalProperties: false. OpenAI's
+        # strict function-calling mode forbids both, so we intentionally
+        # don't set strict: True. Standard function calling on GPT-4.x /
+        # GPT-5 remains reliable, and this stays compatible with
+        # OpenAI-compatible proxies (OpenRouter, Together, vLLM, Ollama)
+        # whose strict-mode support is inconsistent.
+        return [
+            {
+                "type": "function",
+                "function": {
+                    "name": tool["name"],
+                    "description": tool["description"],
+                    "parameters": tool["input_schema"],
+                },
+            }
+            for tool in tools
+        ]
diff --git a/src/llm/caching.py b/src/llm/caching.py
new file mode 100644
index 000000000..75020dceb
--- /dev/null
+++ b/src/llm/caching.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+import hashlib
+import json
+from collections import OrderedDict
+from datetime import datetime, timezone
+from threading import Lock
+from typing import Any
+
+from pydantic import BaseModel
+
+from src.config import ModelConfig, PromptCachePolicy
+
+__all__ = [
+    "GeminiCacheHandle",
+    "InMemoryGeminiCacheStore",
+    "PromptCachePolicy",
+    "build_cache_key",
+    "gemini_cache_store",
+]
+
+
+class GeminiCacheHandle(BaseModel):
+    key: str
+    cached_content_name: str
+    expires_at: datetime
+
+
+def build_cache_key(
+    *,
+    config: ModelConfig,
+    cache_policy: PromptCachePolicy,
+    cacheable_messages: list[dict[str, Any]],
+    tools: list[dict[str, Any]] | None,
+    system_instruction: str | None = None,
+    tool_config: dict[str, Any] | None = None,
+) -> str:
+    """Deterministic key over the cacheable shape of a request.
+
+    ``system_instruction`` and ``tool_config`` must be part of the key
+    because the provider's cached-content handle captures them at creation
+    time — two requests that differ only by system prompt or tool
+    constraints would otherwise hit the same cached handle and silently get
+    the wrong system prompt / tool policy.
+    """
+    payload = {
+        "transport": config.transport,
+        "model": config.model,
+        "cache_policy": cache_policy.model_dump(mode="json"),
+        "messages": cacheable_messages,
+        "tools": tools,
+        "system_instruction": system_instruction,
+        "tool_config": tool_config,
+    }
+    encoded = json.dumps(payload, sort_keys=True, separators=(",", ":"))
+    digest = hashlib.sha256(encoded.encode("utf-8")).hexdigest()
+    return f"llm-cache:{cache_policy.key_version}:{digest}"
+
+
+class InMemoryGeminiCacheStore:
+    """Best-effort local cache for Gemini cached-content handles.
+
+    Uses LRU eviction with a max entry limit to prevent unbounded growth.
+    """
+
+    MAX_ENTRIES: int = 1024
+
+    def __init__(self) -> None:
+        self._handles: OrderedDict[str, GeminiCacheHandle] = OrderedDict()
+        self._lock: Lock = Lock()
+
+    def get(self, key: str) -> GeminiCacheHandle | None:
+        with self._lock:
+            handle = self._handles.get(key)
+            if handle is None:
+                return None
+            if handle.expires_at <= datetime.now(timezone.utc):
+                self._handles.pop(key, None)
+                return None
+            self._handles.move_to_end(key)
+            return handle
+
+    def set(self, handle: GeminiCacheHandle) -> GeminiCacheHandle:
+        with self._lock:
+            now = datetime.now(timezone.utc)
+            expired = [k for k, h in self._handles.items() if h.expires_at <= now]
+            for k in expired:
+                self._handles.pop(k, None)
+            if handle.key in self._handles:
+                self._handles.move_to_end(handle.key)
+            self._handles[handle.key] = handle
+            while len(self._handles) > self.MAX_ENTRIES:
+                self._handles.popitem(last=False)
+        return handle
+
+
+gemini_cache_store = InMemoryGeminiCacheStore()
diff --git a/src/llm/conversation.py b/src/llm/conversation.py
new file mode 100644
index 000000000..4697b819e
--- /dev/null
+++ b/src/llm/conversation.py
@@ -0,0 +1,185 @@
+"""Conversation-shaping helpers: token counting + tool-aware truncation.
+
+Moved out of src/utils/clients.py as part of the migration into src/llm/.
+These are pure helpers with no orchestration dependencies.
+"""
+
+from __future__ import annotations
+
+import json
+import logging
+from typing import Any, cast
+
+from src.utils.tokens import estimate_tokens
+
+logger = logging.getLogger(__name__)
+
+
+def count_message_tokens(messages: list[dict[str, Any]]) -> int:
+    """Count tokens in a list of messages using tiktoken."""
+    total = 0
+    for msg in messages:
+        content = msg.get("content", "")
+        if isinstance(content, str):
+            total += estimate_tokens(content)
+        elif isinstance(content, list):
+            # Anthropic-style content blocks
+            total += estimate_tokens(json.dumps(content))
+        if "parts" in msg:
+            try:
+                total += estimate_tokens(json.dumps(msg["parts"]))
+            except TypeError:
+                # Non-JSON-serializable content (e.g. bytes) — estimate from repr.
+                total += estimate_tokens(str(msg["parts"]))
+    return total
+
+
+def _is_tool_use_message(msg: dict[str, Any]) -> bool:
+    """Check if a message contains tool calls (any format).
+
+    Recognizes:
+    - Anthropic: ``content`` is a list containing a ``{"type": "tool_use"}`` block.
+    - Gemini: ``parts`` is a list containing a ``{"function_call": …}`` entry.
+    - OpenAI: assistant message with a non-empty ``tool_calls`` field.
+    """
+    content = msg.get("content")
+    if isinstance(content, list):
+        for block in cast(list[dict[str, Any]], content):
+            if block.get("type") == "tool_use":
+                return True
+    parts = msg.get("parts")
+    if isinstance(parts, list):
+        for part in cast(list[dict[str, Any]], parts):
+            if "function_call" in part:
+                return True
+    return bool(msg.get("tool_calls"))
+
+
+def _is_tool_result_message(msg: dict[str, Any]) -> bool:
+    """Check if a message contains tool results (any format).
+
+    Recognizes:
+    - Anthropic: ``content`` is a list containing a ``{"type": "tool_result"}`` block.
+    - Gemini: ``parts`` is a list containing a ``{"function_response": …}`` entry.
+    - OpenAI: message with ``role == "tool"``.
+    """
+    content = msg.get("content")
+    if isinstance(content, list):
+        for block in cast(list[dict[str, Any]], content):
+            if block.get("type") == "tool_result":
+                return True
+    parts = msg.get("parts")
+    if isinstance(parts, list):
+        for part in cast(list[dict[str, Any]], parts):
+            if "function_response" in part:
+                return True
+    return msg.get("role") == "tool"
+
+
+def _group_into_units(
+    messages: list[dict[str, Any]],
+) -> list[list[dict[str, Any]]]:
+    """Group messages into logical conversation units.
+
+    A unit is either:
+    - A tool_use message + ALL consecutive tool_result messages that follow
+    - A single non-tool message
+
+    Keeps tool_use / tool_result pairs together so truncation never breaks
+    them apart.
+    """
+    units: list[list[dict[str, Any]]] = []
+    i = 0
+
+    while i < len(messages):
+        msg = messages[i]
+
+        if _is_tool_use_message(msg):
+            j = i + 1
+            while j < len(messages) and _is_tool_result_message(messages[j]):
+                j += 1
+            unit = messages[i:j]
+            if len(unit) > 1:
+                units.append(unit)
+                i = j
+            else:
+                # Orphaned tool_use with no results — skip it.
+                logger.debug(f"Skipping orphaned tool_use at index {i}")
+                i += 1
+        elif _is_tool_result_message(msg):
+            # Orphaned tool_result — skip it.
+            logger.debug(f"Skipping orphaned tool_result at index {i}")
+            i += 1
+        else:
+            units.append([msg])
+            i += 1
+
+    return units
+
+
+def truncate_messages_to_fit(
+    messages: list[dict[str, Any]],
+    max_tokens: int,
+    preserve_system: bool = True,
+) -> list[dict[str, Any]]:
+    """Truncate messages to fit within a token limit while maintaining valid structure.
+
+    Strategy:
+    1. Group messages into units (tool_use + results together, or single messages)
+    2. Remove oldest units first to preserve recent context
+    3. Units stay intact so tool_use/tool_result pairs are never broken
+    """
+    current_tokens = count_message_tokens(messages)
+    if current_tokens <= max_tokens:
+        return messages
+
+    logger.info(f"Truncating: {current_tokens} tokens exceeds {max_tokens} limit")
+
+    system_messages: list[dict[str, Any]] = []
+    conversation: list[dict[str, Any]] = []
+
+    for msg in messages:
+        if msg.get("role") == "system" and preserve_system:
+            system_messages.append(msg)
+        else:
+            conversation.append(msg)
+
+    system_tokens = count_message_tokens(system_messages)
+    available_tokens = max_tokens - system_tokens
+
+    if available_tokens <= 0:
+        logger.warning("System message exceeds max_input_tokens")
+        return messages
+
+    units = _group_into_units(conversation)
+
+    if not units:
+        logger.warning("No valid conversation units")
+        return system_messages
+
+    # Drop oldest units until conversation fits, but keep at least one unit so
+    # we never erase the entire non-system conversation.
+    while len(units) > 1:
+        flat_messages = [m for unit in units for m in unit]
+        if count_message_tokens(flat_messages) <= available_tokens:
+            break
+        removed_unit = units.pop(0)
+        logger.debug(
+            "Dropping conversation unit with "
+            + f"{len(removed_unit)} messages "
+            + f"(~{count_message_tokens(removed_unit)} tokens)"
+        )
+
+    result = system_messages + [m for unit in units for m in unit]
+    result_tokens = count_message_tokens(result)
+    logger.info(
+        f"Truncation complete: {current_tokens} → {result_tokens} tokens "
+        + f"({len(messages)} → {len(result)} messages)"
+    )
+    return result
+
+
+__all__ = [
+    "count_message_tokens",
+    "truncate_messages_to_fit",
+]
diff --git a/src/llm/credentials.py b/src/llm/credentials.py
new file mode 100644
index 000000000..9b41e77d9
--- /dev/null
+++ b/src/llm/credentials.py
@@ -0,0 +1,25 @@
+from __future__ import annotations
+
+from src.config import ModelConfig, settings
+from src.exceptions import ValidationException
+
+
+def resolve_credentials(config: ModelConfig) -> dict[str, str | None]:
+    """Resolve credentials for the effective model transport."""
+
+    default_api_key = default_transport_api_key(config.transport)
+    return {
+        "api_key": config.api_key or default_api_key,
+        "api_base": config.base_url,
+    }
+
+
+def default_transport_api_key(transport: str) -> str | None:
+    """Fall back to the global LLM API key for the matching transport."""
+    if transport == "anthropic":
+        return settings.LLM.ANTHROPIC_API_KEY
+    if transport == "openai":
+        return settings.LLM.OPENAI_API_KEY
+    if transport == "gemini":
+        return settings.LLM.GEMINI_API_KEY
+    raise ValidationException(f"Unknown transport: {transport}")
diff --git a/src/llm/executor.py b/src/llm/executor.py
new file mode 100644
index 000000000..d96008af0
--- /dev/null
+++ b/src/llm/executor.py
@@ -0,0 +1,226 @@
+"""Single-call executor: the inner LLM-call path without tool-loop orchestration.
+
+`honcho_llm_call_inner` handles one backend call (complete or stream), building
+the effective ModelConfig and delegating to request_builder. Result / stream
+chunk types are bridged to the public Honcho* shapes here.
+
+Used by:
+- src/llm/api.py (the public entrypoint, for both tool-less and tool-enabled paths)
+- src/llm/tool_loop.py (each iteration of the tool loop calls this)
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from typing import Any, Literal, TypeVar, overload
+
+from pydantic import BaseModel
+
+from src.config import ModelConfig, ModelTransport
+
+from .backend import CompletionResult as BackendCompletionResult
+from .backend import StreamChunk as BackendStreamChunk
+from .backend import ToolCallResult
+from .registry import CLIENTS, backend_for_provider
+from .request_builder import execute_completion, execute_stream
+from .runtime import effective_config_for_call
+from .types import (
+    HonchoLLMCallResponse,
+    HonchoLLMCallStreamChunk,
+    ProviderClient,
+    ReasoningEffortType,
+)
+
+M = TypeVar("M", bound=BaseModel)
+
+
+def _tool_call_result_to_dict(tool_call: ToolCallResult) -> dict[str, Any]:
+    result = {
+        "id": tool_call.id,
+        "name": tool_call.name,
+        "input": tool_call.input,
+    }
+    if tool_call.thought_signature is not None:
+        result["thought_signature"] = tool_call.thought_signature
+    return result
+
+
+def completion_result_to_response(
+    result: BackendCompletionResult,
+) -> HonchoLLMCallResponse[Any]:
+    return HonchoLLMCallResponse(
+        content=result.content,
+        input_tokens=result.input_tokens,
+        output_tokens=result.output_tokens,
+        cache_creation_input_tokens=result.cache_creation_input_tokens,
+        cache_read_input_tokens=result.cache_read_input_tokens,
+        finish_reasons=[result.finish_reason] if result.finish_reason else [],
+        tool_calls_made=[_tool_call_result_to_dict(tc) for tc in result.tool_calls],
+        thinking_content=result.thinking_content,
+        thinking_blocks=result.thinking_blocks,
+        reasoning_details=result.reasoning_details,
+    )
+
+
+def stream_chunk_to_response_chunk(
+    chunk: BackendStreamChunk,
+) -> HonchoLLMCallStreamChunk:
+    return HonchoLLMCallStreamChunk(
+        content=chunk.content,
+        is_done=chunk.is_done,
+        finish_reasons=[chunk.finish_reason] if chunk.finish_reason else [],
+        output_tokens=chunk.output_tokens,
+    )
+
+
+@overload
+async def honcho_llm_call_inner(
+    provider: ModelTransport,
+    model: str,
+    prompt: str,
+    max_tokens: int,
+    response_model: type[M],
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    stream: Literal[False] = False,
+    client_override: ProviderClient | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    messages: list[dict[str, Any]] | None = None,
+    selected_config: ModelConfig | None = None,
+) -> HonchoLLMCallResponse[M]: ...
+
+
+@overload
+async def honcho_llm_call_inner(
+    provider: ModelTransport,
+    model: str,
+    prompt: str,
+    max_tokens: int,
+    response_model: None = None,
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    stream: Literal[False] = False,
+    client_override: ProviderClient | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    messages: list[dict[str, Any]] | None = None,
+    selected_config: ModelConfig | None = None,
+) -> HonchoLLMCallResponse[str]: ...
+
+
+@overload
+async def honcho_llm_call_inner(
+    provider: ModelTransport,
+    model: str,
+    prompt: str,
+    max_tokens: int,
+    response_model: type[BaseModel] | None = None,
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    stream: Literal[True] = ...,
+    client_override: ProviderClient | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    messages: list[dict[str, Any]] | None = None,
+    selected_config: ModelConfig | None = None,
+) -> AsyncIterator[HonchoLLMCallStreamChunk]: ...
+
+
+async def honcho_llm_call_inner(
+    provider: ModelTransport,
+    model: str,
+    prompt: str,
+    max_tokens: int,
+    response_model: type[BaseModel] | None = None,
+    json_mode: bool = False,
+    temperature: float | None = None,
+    stop_seqs: list[str] | None = None,
+    reasoning_effort: ReasoningEffortType = None,
+    verbosity: Literal["low", "medium", "high"] | None = None,
+    thinking_budget_tokens: int | None = None,
+    stream: bool = False,
+    client_override: ProviderClient | None = None,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    messages: list[dict[str, Any]] | None = None,
+    selected_config: ModelConfig | None = None,
+) -> HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]:
+    """One backend call. No retry, no fallback, no tool loop.
+
+    The outer src/llm/api.py `honcho_llm_call` handles retry + fallback +
+    tool orchestration on top of this.
+    """
+    client = client_override or CLIENTS.get(provider)
+    if client is None:
+        raise ValueError(f"Missing client for {provider}")
+
+    if messages is None:
+        messages = [{"role": "user", "content": prompt}]
+
+    backend = backend_for_provider(provider, client)
+
+    effective_config = effective_config_for_call(
+        selected_config=selected_config,
+        provider=provider,
+        model=model,
+        temperature=temperature,
+        stop_seqs=stop_seqs,
+        thinking_budget_tokens=thinking_budget_tokens,
+        reasoning_effort=reasoning_effort,
+    )
+    # json_mode + verbosity are per-call transport toggles, not ModelConfig
+    # knobs — they pass through extra_params. execute_completion merges
+    # build_config_extra_params(effective_config) on top for top_p/seed/etc.
+    call_extras: dict[str, Any] = {"json_mode": json_mode, "verbosity": verbosity}
+
+    if stream:
+
+        async def _stream() -> AsyncIterator[HonchoLLMCallStreamChunk]:
+            stream_iter = await execute_stream(
+                backend,
+                effective_config,
+                messages=messages,
+                max_tokens=max_tokens,
+                tools=tools,
+                tool_choice=tool_choice,
+                response_format=response_model,
+                cache_policy=effective_config.cache_policy,
+                extra_params=call_extras,
+            )
+            async for chunk in stream_iter:
+                yield stream_chunk_to_response_chunk(chunk)
+
+        return _stream()
+
+    result = await execute_completion(
+        backend,
+        effective_config,
+        messages=messages,
+        max_tokens=max_tokens,
+        tools=tools,
+        tool_choice=tool_choice,
+        response_format=response_model,
+        cache_policy=effective_config.cache_policy,
+        extra_params=call_extras,
+    )
+    return completion_result_to_response(result)
+
+
+__all__ = [
+    "completion_result_to_response",
+    "honcho_llm_call_inner",
+    "stream_chunk_to_response_chunk",
+]
diff --git a/src/llm/history_adapters.py b/src/llm/history_adapters.py
new file mode 100644
index 000000000..02d2ea057
--- /dev/null
+++ b/src/llm/history_adapters.py
@@ -0,0 +1,137 @@
+from __future__ import annotations
+
+import json
+from typing import Any, Protocol
+
+from .backend import CompletionResult
+
+
+class HistoryAdapter(Protocol):
+    def format_assistant_tool_message(
+        self,
+        result: CompletionResult,
+    ) -> dict[str, Any]: ...
+
+    def format_tool_results(
+        self,
+        tool_results: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]: ...
+
+
+class AnthropicHistoryAdapter:
+    def format_assistant_tool_message(
+        self,
+        result: CompletionResult,
+    ) -> dict[str, Any]:
+        content_blocks: list[dict[str, Any]] = []
+        if result.thinking_blocks:
+            content_blocks.extend(result.thinking_blocks)
+        if isinstance(result.content, str) and result.content:
+            content_blocks.append({"type": "text", "text": result.content})
+        for tool_call in result.tool_calls:
+            content_blocks.append(
+                {
+                    "type": "tool_use",
+                    "id": tool_call.id,
+                    "name": tool_call.name,
+                    "input": tool_call.input,
+                }
+            )
+        return {"role": "assistant", "content": content_blocks}
+
+    def format_tool_results(
+        self,
+        tool_results: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        return [
+            {
+                "role": "user",
+                "content": [
+                    {
+                        "type": "tool_result",
+                        "tool_use_id": tr["tool_id"],
+                        "content": str(tr["result"]),
+                        "is_error": tr.get("is_error", False),
+                    }
+                    for tr in tool_results
+                ],
+            }
+        ]
+
+
+class GeminiHistoryAdapter:
+    def format_assistant_tool_message(
+        self,
+        result: CompletionResult,
+    ) -> dict[str, Any]:
+        parts: list[dict[str, Any]] = []
+        if isinstance(result.content, str) and result.content:
+            parts.append({"text": result.content})
+        for tool_call in result.tool_calls:
+            part: dict[str, Any] = {
+                "function_call": {
+                    "name": tool_call.name,
+                    "args": tool_call.input,
+                }
+            }
+            if tool_call.thought_signature is not None:
+                part["thought_signature"] = tool_call.thought_signature
+            parts.append(part)
+        return {"role": "model", "parts": parts}
+
+    def format_tool_results(
+        self,
+        tool_results: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        return [
+            {
+                "role": "user",
+                "parts": [
+                    {
+                        "function_response": {
+                            "name": tr["tool_name"],
+                            "response": {"result": str(tr["result"])},
+                        }
+                    }
+                    for tr in tool_results
+                ],
+            }
+        ]
+
+
+class OpenAIHistoryAdapter:
+    def format_assistant_tool_message(
+        self,
+        result: CompletionResult,
+    ) -> dict[str, Any]:
+        message: dict[str, Any] = {
+            "role": "assistant",
+            "content": result.content if isinstance(result.content, str) else None,
+            "tool_calls": [
+                {
+                    "id": tool_call.id,
+                    "type": "function",
+                    "function": {
+                        "name": tool_call.name,
+                        "arguments": json.dumps(tool_call.input),
+                    },
+                }
+                for tool_call in result.tool_calls
+            ],
+        }
+        if result.reasoning_details:
+            message["reasoning_details"] = result.reasoning_details
+        return message
+
+    def format_tool_results(
+        self,
+        tool_results: list[dict[str, Any]],
+    ) -> list[dict[str, Any]]:
+        return [
+            {
+                "role": "tool",
+                "tool_call_id": tr["tool_id"],
+                "content": str(tr["result"]),
+            }
+            for tr in tool_results
+        ]
diff --git a/src/llm/registry.py b/src/llm/registry.py
new file mode 100644
index 000000000..73cf60c8b
--- /dev/null
+++ b/src/llm/registry.py
@@ -0,0 +1,185 @@
+"""Single owner of provider runtime objects: clients, backends, history adapters.
+
+Consolidates wiring that previously lived in both `src/llm/__init__.py` and
+`src/utils/clients.py`. Everything that touches provider SDKs at runtime
+(default client construction, override client caching, backend selection,
+history adapter selection) lives here now.
+"""
+
+from __future__ import annotations
+
+from functools import lru_cache
+from typing import assert_never
+
+from anthropic import AsyncAnthropic
+from google import genai
+from google.genai import types as genai_types
+from openai import AsyncOpenAI
+
+from src.config import ModelConfig, ModelTransport, settings
+from src.exceptions import ValidationException
+
+from .backend import ProviderBackend
+from .backends.anthropic import AnthropicBackend
+from .backends.gemini import GeminiBackend
+from .backends.openai import OpenAIBackend
+from .credentials import default_transport_api_key
+from .history_adapters import (
+    AnthropicHistoryAdapter,
+    GeminiHistoryAdapter,
+    HistoryAdapter,
+    OpenAIHistoryAdapter,
+)
+from .types import ProviderClient
+
+
+@lru_cache(maxsize=1)
+def get_anthropic_client() -> AsyncAnthropic:
+    """Default Anthropic client built from settings.LLM.ANTHROPIC_API_KEY."""
+    return AsyncAnthropic(
+        api_key=settings.LLM.ANTHROPIC_API_KEY,
+        timeout=600.0,
+    )
+
+
+@lru_cache(maxsize=1)
+def get_openai_client() -> AsyncOpenAI:
+    """Default OpenAI client built from settings.LLM.OPENAI_API_KEY."""
+    return AsyncOpenAI(
+        api_key=settings.LLM.OPENAI_API_KEY,
+    )
+
+
+@lru_cache(maxsize=1)
+def get_gemini_client() -> genai.Client:
+    """Default Gemini client built from settings.LLM.GEMINI_API_KEY."""
+    return genai.Client(api_key=settings.LLM.GEMINI_API_KEY)
+
+
+# Bounded cache — in practice the (base_url, api_key) key space is small
+# and process-scoped, but maxsize=128 keeps worst-case memory predictable.
+@lru_cache(maxsize=128)
+def get_openai_override_client(
+    base_url: str | None, api_key: str | None
+) -> AsyncOpenAI:
+    """OpenAI client for a specific (base_url, api_key) pair. Cached by key."""
+    return AsyncOpenAI(api_key=api_key, base_url=base_url)
+
+
+@lru_cache(maxsize=128)
+def get_anthropic_override_client(
+    base_url: str | None,
+    api_key: str | None,
+) -> AsyncAnthropic:
+    """Anthropic client for a specific (base_url, api_key) pair. Cached by key."""
+    return AsyncAnthropic(api_key=api_key, base_url=base_url, timeout=600.0)
+
+
+@lru_cache(maxsize=128)
+def get_gemini_override_client(
+    base_url: str | None, api_key: str | None
+) -> genai.Client:
+    """Gemini client for a specific (base_url, api_key) pair. Cached by key."""
+    http_options = genai_types.HttpOptions(base_url=base_url) if base_url else None
+    return genai.Client(api_key=api_key, http_options=http_options)
+
+
+# Module-level default-client registry, populated at import time. Tests patch
+# this dict via `patch.dict(CLIENTS, {...})` to inject mock provider clients.
+CLIENTS: dict[ModelTransport, ProviderClient] = {}
+
+if settings.LLM.ANTHROPIC_API_KEY:
+    CLIENTS["anthropic"] = AsyncAnthropic(
+        api_key=settings.LLM.ANTHROPIC_API_KEY,
+        timeout=600.0,
+    )
+
+if settings.LLM.OPENAI_API_KEY:
+    CLIENTS["openai"] = AsyncOpenAI(
+        api_key=settings.LLM.OPENAI_API_KEY,
+    )
+
+if settings.LLM.GEMINI_API_KEY:
+    CLIENTS["gemini"] = genai.client.Client(
+        api_key=settings.LLM.GEMINI_API_KEY,
+    )
+
+
+def client_for_model_config(
+    provider: ModelTransport,
+    model_config: ModelConfig,
+) -> ProviderClient:
+    """Resolve the provider client for a ModelConfig.
+
+    Fast path: no overrides → reuse the module-level default client from
+    CLIENTS (the test-mockable seam). Otherwise route through the cached
+    override factories.
+    """
+    if model_config.api_key is None and model_config.base_url is None:
+        existing_client = CLIENTS.get(provider)
+        if existing_client is not None:
+            return existing_client
+
+    api_key = model_config.api_key or default_transport_api_key(provider)
+    base_url = model_config.base_url
+    if not api_key:
+        raise ValidationException(f"Missing API key for {provider} model config")
+
+    if provider == "anthropic":
+        return get_anthropic_override_client(base_url, api_key)
+    if provider == "openai":
+        return get_openai_override_client(base_url, api_key)
+    if provider == "gemini":
+        return get_gemini_override_client(base_url, api_key)
+    assert_never(provider)
+
+
+def backend_for_provider(
+    provider: ModelTransport,
+    client: ProviderClient,
+) -> ProviderBackend:
+    """Wrap a raw provider SDK client in the matching ProviderBackend adapter."""
+    if provider == "anthropic":
+        return AnthropicBackend(client)
+    if provider == "openai":
+        return OpenAIBackend(client)
+    if provider == "gemini":
+        return GeminiBackend(client)
+    assert_never(provider)
+
+
+def history_adapter_for_provider(provider: ModelTransport) -> HistoryAdapter:
+    """Provider-appropriate HistoryAdapter for assistant/tool message formatting."""
+    if provider == "anthropic":
+        return AnthropicHistoryAdapter()
+    if provider == "gemini":
+        return GeminiHistoryAdapter()
+    return OpenAIHistoryAdapter()
+
+
+def get_backend(config: ModelConfig) -> ProviderBackend:
+    """High-level one-shot backend factory: ModelConfig → ProviderBackend.
+
+    Delegates client resolution to ``client_for_model_config``, which owns
+    the CLIENTS fast-path and the missing-API-key validation. Both the
+    production path (via ``honcho_llm_call_inner``) and the live-test path
+    (via this function) now construct clients through the same helper, so
+    validation behavior stays consistent.
+    """
+    client = client_for_model_config(config.transport, config)
+    return backend_for_provider(config.transport, client)
+
+
+__all__ = [
+    "CLIENTS",
+    "backend_for_provider",
+    "client_for_model_config",
+    "get_anthropic_client",
+    "get_anthropic_override_client",
+    "get_backend",
+    "get_gemini_client",
+    "get_gemini_override_client",
+    "get_openai_client",
+    "get_openai_override_client",
+    "history_adapter_for_provider",
+]
diff --git a/src/llm/request_builder.py b/src/llm/request_builder.py
new file mode 100644
index 000000000..d6be5a22e
--- /dev/null
+++ b/src/llm/request_builder.py
@@ -0,0 +1,119 @@
+"""Low-level request assembly: flatten a ModelConfig into backend calls.
+
+Does NOT own: retry, fallback, tool loop, provider selection. Those live in
+src/llm/api.py, src/llm/tool_loop.py, src/llm/runtime.py.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator
+from typing import Any
+
+from pydantic import BaseModel
+
+from src.config import ModelConfig, PromptCachePolicy
+
+from .backend import CompletionResult, ProviderBackend, StreamChunk
+
+
+def build_config_extra_params(config: ModelConfig) -> dict[str, Any]:
+    """Flatten ModelConfig's optional knobs and provider_params into extra_params.
+
+    Backends read per-call tuning parameters (top_p, top_k, frequency_penalty,
+    presence_penalty, seed) and the free-form provider_params passthrough out
+    of ``extra_params``. Single source of truth for that translation.
+    """
+    extra_params: dict[str, Any] = {}
+    if config.top_p is not None:
+        extra_params["top_p"] = config.top_p
+    if config.top_k is not None:
+        extra_params["top_k"] = config.top_k
+    if config.frequency_penalty is not None:
+        extra_params["frequency_penalty"] = config.frequency_penalty
+    if config.presence_penalty is not None:
+        extra_params["presence_penalty"] = config.presence_penalty
+    if config.seed is not None:
+        extra_params["seed"] = config.seed
+
+    if config.provider_params:
+        extra_params.update(config.provider_params)
+
+    return extra_params
+
+
+async def execute_completion(
+    backend: ProviderBackend,
+    config: ModelConfig,
+    *,
+    messages: list[dict[str, Any]],
+    max_tokens: int,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    response_format: type[BaseModel] | dict[str, Any] | None = None,
+    stop: list[str] | None = None,
+    cache_policy: PromptCachePolicy | None = None,
+    extra_params: dict[str, Any] | None = None,
+) -> CompletionResult:
+    # Preserve 0 as an explicit "disable thinking" value (used by Gemini);
+    # only convert to None when the field is truly unset.
+    effective_max_tokens = config.max_output_tokens or max_tokens
+
+    merged_extra_params = {
+        **build_config_extra_params(config),
+        **(extra_params or {}),
+    }
+    if cache_policy is not None:
+        merged_extra_params["cache_policy"] = cache_policy
+
+    return await backend.complete(
+        model=config.model,
+        messages=messages,
+        max_tokens=effective_max_tokens,
+        temperature=config.temperature,
+        stop=stop if stop is not None else config.stop_sequences,
+        tools=tools,
+        tool_choice=tool_choice,
+        response_format=response_format,
+        thinking_budget_tokens=config.thinking_budget_tokens,
+        thinking_effort=config.thinking_effort,
+        max_output_tokens=effective_max_tokens,
+        extra_params=merged_extra_params,
+    )
+
+
+async def execute_stream(
+    backend: ProviderBackend,
+    config: ModelConfig,
+    *,
+    messages: list[dict[str, Any]],
+    max_tokens: int,
+    tools: list[dict[str, Any]] | None = None,
+    tool_choice: str | dict[str, Any] | None = None,
+    response_format: type[BaseModel] | dict[str, Any] | None = None,
+    stop: list[str] | None = None,
+    cache_policy: PromptCachePolicy | None = None,
+    extra_params: dict[str, Any] | None = None,
+) -> AsyncIterator[StreamChunk]:
+    effective_max_tokens = config.max_output_tokens or max_tokens
+
+    merged_extra_params = {
+        **build_config_extra_params(config),
+        **(extra_params or {}),
+    }
+    if cache_policy is not None:
+        merged_extra_params["cache_policy"] = cache_policy
+
+    return backend.stream(
+        model=config.model,
+        messages=messages,
+        max_tokens=effective_max_tokens,
+        temperature=config.temperature,
+        stop=stop if stop is not None else config.stop_sequences,
+        tools=tools,
+        tool_choice=tool_choice,
+        response_format=response_format,
+        thinking_budget_tokens=config.thinking_budget_tokens,
+        thinking_effort=config.thinking_effort,
+        max_output_tokens=effective_max_tokens,
+        extra_params=merged_extra_params,
+    )
diff --git a/src/llm/runtime.py b/src/llm/runtime.py
new file mode 100644
index 000000000..27bc56d92
--- /dev/null
+++ b/src/llm/runtime.py
@@ -0,0 +1,207 @@
+"""Runtime config planning and retry/fallback selection.
+
+Owns:
+- Resolution of ConfiguredModelSettings → ModelConfig.
+- Per-attempt planning (AttemptPlan) including primary/fallback selection and
+  reasoning-effort/thinking-budget resolution.
+- Per-call effective config construction (applying caller kwarg overrides onto
+  the selected ModelConfig).
+- Retry attempt tracking via a ContextVar, plus the temperature-bump heuristic.
+"""
+
+from __future__ import annotations
+
+import logging
+from contextvars import ContextVar
+from dataclasses import dataclass
+from typing import Any
+
+from src.config import (
+    ConfiguredModelSettings,
+    ModelConfig,
+    ModelTransport,
+    resolve_model_config,
+)
+
+from .registry import backend_for_provider, client_for_model_config
+from .types import ProviderClient, ReasoningEffortType
+
+logger = logging.getLogger(__name__)
+
+# ContextVar tracking the current retry attempt for provider switching.
+current_attempt: ContextVar[int] = ContextVar("current_attempt", default=0)
+
+
+@dataclass(frozen=True)
+class AttemptPlan:
+    """Per-attempt plan produced by `plan_attempt`.
+
+    Replaces the old loose tuple-of-six (`ProviderSelection`) with a single
+    dataclass. Carries everything the executor / tool loop needs to make one
+    backend call without re-resolving configuration mid-call.
+    """
+
+    provider: ModelTransport
+    model: str
+    client: ProviderClient
+    thinking_budget_tokens: int | None
+    reasoning_effort: ReasoningEffortType
+    selected_config: ModelConfig
+
+
+def resolve_runtime_model_config(
+    model_config: ModelConfig | ConfiguredModelSettings,
+) -> ModelConfig:
+    """Return a runtime ModelConfig, resolving settings-shape inputs if needed."""
+    if isinstance(model_config, ModelConfig):
+        return model_config
+    return resolve_model_config(model_config)
+
+
+def select_model_config_for_attempt(
+    model_config: ModelConfig,
+    *,
+    attempt: int,
+    retry_attempts: int,
+) -> ModelConfig:
+    """Pick the effective config for this attempt.
+
+    Primary config on all attempts except the last, which swaps to the
+    resolved fallback (if any).
+    """
+    if attempt != retry_attempts or model_config.fallback is None:
+        return model_config
+
+    fb = model_config.fallback
+    return ModelConfig(
+        model=fb.model,
+        transport=fb.transport,
+        fallback=None,
+        api_key=fb.api_key,
+        base_url=fb.base_url,
+        temperature=fb.temperature,
+        top_p=fb.top_p,
+        top_k=fb.top_k,
+        frequency_penalty=fb.frequency_penalty,
+        presence_penalty=fb.presence_penalty,
+        seed=fb.seed,
+        thinking_effort=fb.thinking_effort,
+        thinking_budget_tokens=fb.thinking_budget_tokens,
+        provider_params=fb.provider_params,
+        max_output_tokens=fb.max_output_tokens,
+        stop_sequences=fb.stop_sequences,
+        cache_policy=fb.cache_policy,
+    )
+
+
+def plan_attempt(
+    *,
+    runtime_model_config: ModelConfig,
+    attempt: int,
+    retry_attempts: int,
+    call_thinking_budget_tokens: int | None,
+    call_reasoning_effort: ReasoningEffortType,
+) -> AttemptPlan:
+    """Build the AttemptPlan for `attempt`.
+
+    Reasoning params are drawn from the caller when we're still on the
+    primary config, and from the fallback config otherwise, so cross-transport
+    fallbacks use provider-appropriate params.
+    """
+    selected = select_model_config_for_attempt(
+        runtime_model_config,
+        attempt=attempt,
+        retry_attempts=retry_attempts,
+    )
+    provider = selected.transport
+    client = client_for_model_config(provider, selected)
+
+    is_primary = selected is runtime_model_config
+    attempt_thinking_budget = (
+        call_thinking_budget_tokens if is_primary else selected.thinking_budget_tokens
+    )
+    attempt_reasoning_effort: ReasoningEffortType = (
+        call_reasoning_effort if is_primary else selected.thinking_effort
+    )
+
+    if attempt == retry_attempts and runtime_model_config.fallback is not None:
+        logger.warning(
+            f"Final retry attempt {attempt}/{retry_attempts}: switching from "
+            + f"{runtime_model_config.transport}/{runtime_model_config.model} to "
+            + f"backup {provider}/{selected.model}"
+        )
+
+    return AttemptPlan(
+        provider=provider,
+        model=selected.model,
+        client=client,
+        thinking_budget_tokens=attempt_thinking_budget,
+        reasoning_effort=attempt_reasoning_effort,
+        selected_config=selected,
+    )
+
+
+def effective_config_for_call(
+    *,
+    selected_config: ModelConfig | None,
+    provider: ModelTransport,
+    model: str,
+    temperature: float | None,
+    stop_seqs: list[str] | None,
+    thinking_budget_tokens: int | None,
+    reasoning_effort: ReasoningEffortType,
+) -> ModelConfig:
+    """Build the ModelConfig passed to the executor / request_builder.
+
+    Per-call kwargs (temperature, stop_seqs, thinking_*) win when set; otherwise
+    the selected_config's values are used. When selected_config is None
+    (test-only callers passing provider+model directly) a minimal ModelConfig
+    is synthesized.
+
+    max_output_tokens is forced to None so the per-call max_tokens kwarg is
+    authoritative — matching historical honcho_llm_call_inner behavior.
+    """
+    if selected_config is None:
+        return ModelConfig(
+            model=model,
+            transport=provider,
+            temperature=temperature,
+            stop_sequences=stop_seqs,
+            thinking_budget_tokens=thinking_budget_tokens,
+            thinking_effort=reasoning_effort,
+        )
+    updates: dict[str, Any] = {"max_output_tokens": None}
+    if temperature is not None:
+        updates["temperature"] = temperature
+    if stop_seqs is not None:
+        updates["stop_sequences"] = stop_seqs
+    if thinking_budget_tokens is not None:
+        updates["thinking_budget_tokens"] = thinking_budget_tokens
+    if reasoning_effort is not None:
+        updates["thinking_effort"] = reasoning_effort
+    return selected_config.model_copy(update=updates)
+
+
+def effective_temperature(temperature: float | None) -> float | None:
+    """Bump temperature from 0.0 → 0.2 on retry attempts for variety."""
+    if temperature == 0.0 and current_attempt.get() > 1:
+        logger.debug("Bumping temperature from 0.0 to 0.2 on retry")
+        return 0.2
+    return temperature
+
+
+def resolve_backend_for_plan(plan: AttemptPlan) -> Any:
+    """Convenience helper: plan → ready-to-call ProviderBackend."""
+    return backend_for_provider(plan.provider, plan.client)
+
+
+__all__ = [
+    "AttemptPlan",
+    "current_attempt",
+    "effective_config_for_call",
+    "effective_temperature",
+    "plan_attempt",
+    "resolve_backend_for_plan",
+    "resolve_runtime_model_config",
+    "select_model_config_for_attempt",
+]
diff --git a/src/llm/structured_output.py b/src/llm/structured_output.py
new file mode 100644
index 000000000..76c0690ad
--- /dev/null
+++ b/src/llm/structured_output.py
@@ -0,0 +1,132 @@
+from __future__ import annotations
+
+import json
+from collections.abc import Awaitable, Callable
+from typing import Literal
+
+from pydantic import BaseModel, ValidationError
+
+from src.utils.json_parser import validate_and_repair_json
+from src.utils.representation import PromptRepresentation
+
+from .backend import CompletionResult
+
+StructuredOutputFailurePolicy = Literal[
+    "raise",
+    "repair_then_raise",
+    "repair_then_empty",
+]
+
+
+class StructuredOutputError(ValueError):
+    """Raised when structured output cannot be validated or repaired."""
+
+
+def repair_response_model_json(
+    raw_content: str,
+    response_model: type[BaseModel],
+    _model: str,
+) -> BaseModel:
+    """Repair truncated or malformed JSON and validate against the response model."""
+
+    try:
+        final = validate_and_repair_json(raw_content)
+        repaired_data = json.loads(final)
+
+        if (
+            response_model is PromptRepresentation
+            and "deductive" in repaired_data
+            and isinstance(repaired_data["deductive"], list)
+        ):
+            for item in repaired_data["deductive"]:
+                if isinstance(item, dict):
+                    if "conclusion" not in item and "premises" in item:
+                        if item["premises"]:
+                            item["conclusion"] = (
+                                f"[Incomplete reasoning from premises: {item['premises'][0][:100]}...]"
+                            )
+                        else:
+                            item["conclusion"] = (
+                                "[Incomplete reasoning - conclusion missing]"
+                            )
+                    if "premises" not in item:
+                        item["premises"] = []
+
+        final = json.dumps(repaired_data)
+    except (json.JSONDecodeError, KeyError, TypeError, ValueError):
+        final = ""
+
+    try:
+        return response_model.model_validate_json(final)
+    except ValidationError:
+        if response_model is PromptRepresentation:
+            return PromptRepresentation(explicit=[])
+        raise
+
+
+def validate_structured_output(
+    content: object,
+    response_model: type[BaseModel],
+) -> BaseModel:
+    if isinstance(content, response_model):
+        return content
+    if isinstance(content, str):
+        return response_model.model_validate_json(content)
+    if isinstance(content, dict):
+        return response_model.model_validate(content)
+    raise StructuredOutputError(
+        f"Unsupported structured output payload: {type(content).__name__}"
+    )
+
+
+def attempt_structured_output_repair(
+    content: object,
+    response_model: type[BaseModel],
+    model: str,
+) -> BaseModel | None:
+    if not isinstance(content, str):
+        return None
+    try:
+        return repair_response_model_json(content, response_model, model)
+    except (StructuredOutputError, ValidationError):
+        return None
+
+
+def empty_structured_output(response_model: type[BaseModel]) -> BaseModel:
+    if response_model is PromptRepresentation:
+        return PromptRepresentation(explicit=[])
+    return response_model.model_validate({})
+
+
+async def execute_structured_output_call(
+    executor: Callable[[], Awaitable[CompletionResult]],
+    *,
+    response_model: type[BaseModel],
+    model_name: str,
+    failure_policy: StructuredOutputFailurePolicy = "repair_then_raise",
+) -> CompletionResult:
+    result = await executor()
+
+    try:
+        result.content = validate_structured_output(result.content, response_model)
+        return result
+    except (StructuredOutputError, ValidationError):
+        if failure_policy == "raise":
+            raise
+
+    repaired = attempt_structured_output_repair(
+        result.content,
+        response_model,
+        model_name,
+    )
+    if repaired is not None:
+        result.content = repaired
+        return result
+
+    if failure_policy == "repair_then_empty":
+        result.content = empty_structured_output(response_model)
+        return result
+
+    raise StructuredOutputError(
+        f"Failed to produce valid structured output for {model_name}"
+    )
diff --git a/src/llm/tool_loop.py b/src/llm/tool_loop.py
new file mode 100644
index 000000000..2db87e9a6
--- /dev/null
+++ b/src/llm/tool_loop.py
@@ -0,0 +1,491 @@
+"""Agentic/tool orchestration — the multi-iteration tool execution loop.
+
+`execute_tool_loop` owns:
+- initial tool-enabled call
+- tool execution
+- conversation augmentation with assistant messages + tool results
+- max-iteration handling and synthesis call
+- stream-final-only mode
+- empty-response retry (one retry nudge when the model returns empty content)
+"""
+
+from __future__ import annotations
+
+import logging
+from collections.abc import AsyncIterator, Callable
+from typing import Any
+
+from pydantic import BaseModel
+from tenacity import retry, stop_after_attempt, wait_exponential
+
+from src.config import ModelTransport
+from src.exceptions import ValidationException
+from src.utils.types import set_current_iteration
+
+from .executor import honcho_llm_call_inner
+from .registry import history_adapter_for_provider
+from .runtime import (
+    AttemptPlan,
+    current_attempt,
+    effective_temperature,
+)
+from .types import (
+    HonchoLLMCallResponse,
+    HonchoLLMCallStreamChunk,
+    IterationCallback,
+    IterationData,
+    StreamingResponseWithMetadata,
+    VerbosityType,
+)
+
+logger = logging.getLogger(__name__)
+
+# Bounds for max_tool_iterations to prevent runaway loops.
+MIN_TOOL_ITERATIONS = 1
+MAX_TOOL_ITERATIONS = 100
+
+
+def format_assistant_tool_message(
+    provider: ModelTransport,
+    content: Any,
+    tool_calls: list[dict[str, Any]],
+    thinking_blocks: list[dict[str, Any]] | None = None,
+    reasoning_details: list[dict[str, Any]] | None = None,
+) -> dict[str, Any]:
+    """Format an assistant message with tool calls in provider-native shape."""
+    from .backend import CompletionResult as BackendCompletionResult
+    from .backend import ToolCallResult
+
+    adapter = history_adapter_for_provider(provider)
+    result = BackendCompletionResult(
+        content=content,
+        tool_calls=[
+            ToolCallResult(
+                id=tool_call["id"],
+                name=tool_call["name"],
+                input=tool_call["input"],
+                thought_signature=tool_call.get("thought_signature"),
+            )
+            for tool_call in tool_calls
+        ],
+        thinking_blocks=thinking_blocks or [],
+        reasoning_details=reasoning_details or [],
+    )
+    return adapter.format_assistant_tool_message(result)
+
+
+def append_tool_results(
+    provider: ModelTransport,
+    tool_results: list[dict[str, Any]],
+    conversation_messages: list[dict[str, Any]],
+) -> None:
+    """Append tool results to `conversation_messages` in provider-native shape."""
+    adapter = history_adapter_for_provider(provider)
+    conversation_messages.extend(adapter.format_tool_results(tool_results))
+
+
+async def stream_final_response(
+    *,
+    winning_plan: AttemptPlan,
+    prompt: str,
+    max_tokens: int,
+    conversation_messages: list[dict[str, Any]],
+    response_model: type[BaseModel] | None,
+    json_mode: bool,
+    temperature: float | None,
+    stop_seqs: list[str] | None,
+    verbosity: VerbosityType,
+    enable_retry: bool,
+    retry_attempts: int,
+    before_retry_callback: Callable[[Any], None],
+) -> AsyncIterator[HonchoLLMCallStreamChunk]:
+    """Stream the final response after tool execution is complete.
+
+    Uses the AttemptPlan captured at the moment streaming began (typically
+    the plan whose inner LLM call just succeeded) and pins it across any
+    retries of the stream setup. Re-running provider selection here would
+    bleed the outer current_attempt ContextVar into streaming retries,
+    potentially rolling the selection back to primary after the tool loop
+    had already settled on fallback. Tenacity retries re-issue the same
+    streaming call against the same pinned model for transient errors.
+    """
+
+    async def _setup_stream() -> AsyncIterator[HonchoLLMCallStreamChunk]:
+        return await honcho_llm_call_inner(
+            winning_plan.provider,
+            winning_plan.model,
+            prompt,
+            max_tokens,
+            response_model,
+            json_mode,
+            effective_temperature(temperature),
+            stop_seqs,
+            winning_plan.reasoning_effort,
+            verbosity,
+            winning_plan.thinking_budget_tokens,
+            stream=True,
+            client_override=winning_plan.client,
+            tools=None,
+            tool_choice=None,
+            messages=conversation_messages,
+            selected_config=winning_plan.selected_config,
+        )
+
+    if enable_retry:
+        wrapped = retry(
+            stop=stop_after_attempt(retry_attempts),
+            wait=wait_exponential(multiplier=1, min=4, max=10),
+            before_sleep=before_retry_callback,
+        )(_setup_stream)
+        stream = await wrapped()
+    else:
+        stream = await _setup_stream()
+
+    async for chunk in stream:
+        yield chunk
+
+
+async def execute_tool_loop(
+    *,
+    prompt: str,
+    max_tokens: int,
+    messages: list[dict[str, Any]] | None,
+    tools: list[dict[str, Any]],
+    tool_choice: str | dict[str, Any] | None,
+    tool_executor: Callable[[str, dict[str, Any]], Any],
+    max_tool_iterations: int,
+    response_model: type[BaseModel] | None,
+    json_mode: bool,
+    temperature: float | None,
+    stop_seqs: list[str] | None,
+    verbosity: VerbosityType,
+    enable_retry: bool,
+    retry_attempts: int,
+    max_input_tokens: int | None,
+    get_attempt_plan: Callable[[], AttemptPlan],
+    before_retry_callback: Callable[[Any], None],
+    stream_final: bool = False,
+    iteration_callback: IterationCallback | None = None,
+) -> HonchoLLMCallResponse[Any] | StreamingResponseWithMetadata:
+    """Run the iterative tool calling loop for agentic LLM interactions.
+
+    Loop per iteration:
+      1. Make an LLM call with tools available
+      2. Execute any tool calls the LLM requests
+      3. Append tool results to the conversation
+      4. Repeat until the LLM stops calling tools or max iterations reached
+
+    Returns:
+        Final HonchoLLMCallResponse with accumulated token counts and tool call
+        history, or a StreamingResponseWithMetadata if stream_final=True.
+    """
+    from .conversation import truncate_messages_to_fit
+
+    if not MIN_TOOL_ITERATIONS <= max_tool_iterations <= MAX_TOOL_ITERATIONS:
+        raise ValidationException(
+            "max_tool_iterations must be in "
+            + f"[{MIN_TOOL_ITERATIONS}, {MAX_TOOL_ITERATIONS}]; "
+            + f"got {max_tool_iterations}"
+        )
+
+    conversation_messages: list[dict[str, Any]] = (
+        messages.copy() if messages else [{"role": "user", "content": prompt}]
+    )
+
+    iteration = 0
+    all_tool_calls: list[dict[str, Any]] = []
+    total_input_tokens = 0
+    total_output_tokens = 0
+    total_cache_creation_tokens = 0
+    total_cache_read_tokens = 0
+    empty_response_retries = 0
+    # Track effective tool_choice — switches from "required"/"any" to "auto" after iter 1.
+    effective_tool_choice = tool_choice
+
+    while iteration < max_tool_iterations:
+        # Reset attempt counter so each iteration starts with the primary provider.
+        current_attempt.set(1)
+        logger.debug(f"Tool execution iteration {iteration + 1}/{max_tool_iterations}")
+
+        if max_input_tokens is not None:
+            conversation_messages = truncate_messages_to_fit(
+                conversation_messages, max_input_tokens
+            )
+
+        async def _call_with_messages(
+            effective_tool_choice: str | dict[str, Any] | None = effective_tool_choice,
+            conversation_messages: list[dict[str, Any]] = conversation_messages,
+        ) -> HonchoLLMCallResponse[Any]:
+            plan = get_attempt_plan()
+            return await honcho_llm_call_inner(
+                plan.provider,
+                plan.model,
+                prompt,  # ignored when messages is passed
+                max_tokens,
+                response_model,
+                json_mode,
+                effective_temperature(temperature),
+                stop_seqs,
+                plan.reasoning_effort,
+                verbosity,
+                plan.thinking_budget_tokens,
+                stream=False,
+                client_override=plan.client,
+                tools=tools,
+                tool_choice=effective_tool_choice,
+                messages=conversation_messages,
+                selected_config=plan.selected_config,
+            )
+
+        if enable_retry:
+            call_func = retry(
+                stop=stop_after_attempt(retry_attempts),
+                wait=wait_exponential(multiplier=1, min=4, max=10),
+                before_sleep=before_retry_callback,
+            )(_call_with_messages)
+        else:
+            call_func = _call_with_messages
+
+        response = await call_func()
+
+        total_input_tokens += response.input_tokens
+        total_output_tokens += response.output_tokens
+        total_cache_creation_tokens += response.cache_creation_input_tokens
+        total_cache_read_tokens += response.cache_read_input_tokens
+
+        if not response.tool_calls_made:
+            logger.debug("No tool calls in response, finishing")
+
+            if (
+                isinstance(response.content, str)
+                and not response.content.strip()
+                and empty_response_retries < 1
+                and iteration < max_tool_iterations - 1
+            ):
+                empty_response_retries += 1
+                conversation_messages.append(
+                    {
+                        "role": "user",
+                        "content": (
+                            "Your last response was empty. Provide a concise answer "
+                            "to the original query using the available context."
+                        ),
+                    }
+                )
+                iteration += 1
+                continue
+
+            if stream_final:
+                # Snapshot the plan that just succeeded — streaming retries
+                # pin to this exact client/model so we don't bounce back to
+                # primary after the tool loop settled on fallback.
+                winning_plan = get_attempt_plan()
+                stream = stream_final_response(
+                    winning_plan=winning_plan,
+                    prompt=prompt,
+                    max_tokens=max_tokens,
+                    conversation_messages=conversation_messages,
+                    response_model=response_model,
+                    json_mode=json_mode,
+                    temperature=temperature,
+                    stop_seqs=stop_seqs,
+                    verbosity=verbosity,
+                    enable_retry=enable_retry,
+                    retry_attempts=retry_attempts,
+                    before_retry_callback=before_retry_callback,
+                )
+                return StreamingResponseWithMetadata(
+                    stream=stream,
+                    tool_calls_made=all_tool_calls,
+                    input_tokens=total_input_tokens,
+                    output_tokens=total_output_tokens,
+                    cache_creation_input_tokens=total_cache_creation_tokens,
+                    cache_read_input_tokens=total_cache_read_tokens,
+                    thinking_content=response.thinking_content,
+                    iterations=iteration + 1,
+                )
+
+            response.tool_calls_made = all_tool_calls
+            response.input_tokens = total_input_tokens
+            response.output_tokens = total_output_tokens
+            response.cache_creation_input_tokens = total_cache_creation_tokens
+            response.cache_read_input_tokens = total_cache_read_tokens
+            response.iterations = iteration + 1
+            return response
+
+        current_provider = get_attempt_plan().provider
+
+        assistant_message = format_assistant_tool_message(
+            current_provider,
+            response.content,
+            response.tool_calls_made,
+            response.thinking_blocks,
+            response.reasoning_details,
+        )
+        conversation_messages.append(assistant_message)
+
+        # Telemetry context — 1-indexed iteration.
+        set_current_iteration(iteration + 1)
+
+        tool_results: list[dict[str, Any]] = []
+        for tool_call in response.tool_calls_made:
+            tool_name = tool_call["name"]
+            tool_input = tool_call["input"]
+            tool_id = tool_call.get("id", "")
+
+            logger.debug(f"Executing tool: {tool_name}")
+
+            try:
+                tool_result = await tool_executor(tool_name, tool_input)
+                tool_results.append(
+                    {
+                        "tool_id": tool_id,
+                        "tool_name": tool_name,
+                        "result": tool_result,
+                    }
+                )
+                all_tool_calls.append(
+                    {
+                        "tool_name": tool_name,
+                        "tool_input": tool_input,
+                        "tool_result": tool_result,
+                    }
+                )
+            except Exception as e:
+                logger.error(f"Tool execution failed for {tool_name}: {e}")
+                tool_results.append(
+                    {
+                        "tool_id": tool_id,
+                        "tool_name": tool_name,
+                        "result": f"Error: {str(e)}",
+                        "is_error": True,
+                    }
+                )
+
+        append_tool_results(current_provider, tool_results, conversation_messages)
+
+        if iteration_callback is not None:
+            try:
+                iteration_data = IterationData(
+                    iteration=iteration + 1,
+                    tool_calls=[tc["name"] for tc in response.tool_calls_made],
+                    input_tokens=response.input_tokens,
+                    output_tokens=response.output_tokens,
+                    cache_read_tokens=response.cache_read_input_tokens or 0,
+                    cache_creation_tokens=response.cache_creation_input_tokens or 0,
+                )
+                iteration_callback(iteration_data)
+            except Exception:
+                logger.warning("iteration_callback failed", exc_info=True)
+
+        # After first iteration, switch "required"/"any" → "auto" so the model can stop.
+        if iteration == 0 and effective_tool_choice in ("required", "any"):
+            effective_tool_choice = "auto"
+            logger.debug(
+                "Switched tool_choice from 'required'/'any' to 'auto' after first iteration"
+            )
+
+        iteration += 1
+
+    logger.warning(
+        f"Tool execution loop reached max iterations ({max_tool_iterations})"
+    )
+
+    synthesis_prompt = (
+        "You have reached the maximum number of tool calls. "
+        "Based on all the information you have gathered, provide your final response now. "
+        "Do not attempt to call any more tools."
+    )
+    conversation_messages.append({"role": "user", "content": synthesis_prompt})
+
+    # Truncate again — the per-iteration truncate ran before the last tool
+    # call, so appending synthesis_prompt could nudge us back over the cap.
+    if max_input_tokens is not None:
+        conversation_messages = truncate_messages_to_fit(
+            conversation_messages, max_input_tokens
+        )
+
+    if stream_final:
+        # Snapshot the plan the loop settled on — streaming retries pin to
+        # this exact client/model rather than re-running provider selection.
+        winning_plan = get_attempt_plan()
+        stream = stream_final_response(
+            winning_plan=winning_plan,
+            prompt=prompt,
+            max_tokens=max_tokens,
+            conversation_messages=conversation_messages,
+            response_model=response_model,
+            json_mode=json_mode,
+            temperature=temperature,
+            stop_seqs=stop_seqs,
+            verbosity=verbosity,
+            enable_retry=enable_retry,
+            retry_attempts=retry_attempts,
+            before_retry_callback=before_retry_callback,
+        )
+        return StreamingResponseWithMetadata(
+            stream=stream,
+            tool_calls_made=all_tool_calls,
+            input_tokens=total_input_tokens,
+            output_tokens=total_output_tokens,
+            cache_creation_input_tokens=total_cache_creation_tokens,
+            cache_read_input_tokens=total_cache_read_tokens,
+            thinking_content=None,
+            iterations=iteration + 1,
+        )
+
+    current_attempt.set(1)
+
+    async def _final_call() -> HonchoLLMCallResponse[Any]:
+        plan = get_attempt_plan()
+        return await honcho_llm_call_inner(
+            plan.provider,
+            plan.model,
+            prompt,
+            max_tokens,
+            response_model,
+            json_mode,
+            effective_temperature(temperature),
+            stop_seqs,
+            plan.reasoning_effort,
+            verbosity,
+            plan.thinking_budget_tokens,
+            stream=False,
+            client_override=plan.client,
+            tools=None,
+            tool_choice=None,
+            messages=conversation_messages,
+            selected_config=plan.selected_config,
+        )
+
+    if enable_retry:
+        final_call_func = retry(
+            stop=stop_after_attempt(retry_attempts),
+            wait=wait_exponential(multiplier=1, min=4, max=10),
+            before_sleep=before_retry_callback,
+        )(_final_call)
+    else:
+        final_call_func = _final_call
+
+    final_response = await final_call_func()
+    final_response.tool_calls_made = all_tool_calls
+    final_response.iterations = iteration + 1
+    final_response.input_tokens = total_input_tokens + final_response.input_tokens
+    final_response.output_tokens = total_output_tokens + final_response.output_tokens
+    final_response.cache_creation_input_tokens = (
+        total_cache_creation_tokens + final_response.cache_creation_input_tokens
+    )
+    final_response.cache_read_input_tokens = (
+        total_cache_read_tokens + final_response.cache_read_input_tokens
+    )
+    return final_response
+
+
+__all__ = [
+    "MAX_TOOL_ITERATIONS",
+    "MIN_TOOL_ITERATIONS",
+    "append_tool_results",
+    "execute_tool_loop",
+    "format_assistant_tool_message",
+    "stream_final_response",
+]
diff --git a/src/llm/types.py b/src/llm/types.py
new file mode 100644
index 000000000..7af5372d1
--- /dev/null
+++ b/src/llm/types.py
@@ -0,0 +1,138 @@
+"""Public response/stream/iteration types for the LLM API.
+
+These used to live in src/utils/clients.py and have been moved here as part
+of the migration toward src/llm/ owning all non-embedding LLM orchestration.
+"""
+
+from __future__ import annotations
+
+from collections.abc import AsyncIterator, Callable
+from dataclasses import dataclass
+from typing import Any, Generic, Literal, TypeVar
+
+from anthropic import AsyncAnthropic
+from google import genai
+from openai import AsyncOpenAI
+from pydantic import BaseModel, Field
+
+T = TypeVar("T")
+
+# OpenAI GPT-5 specific reasoning levels.
+ReasoningEffortType = (
+    Literal["none", "minimal", "low", "medium", "high", "xhigh", "max"] | None
+)
+VerbosityType = Literal["low", "medium", "high"] | None
+
+# Raw SDK client union used by the provider-selection layer.
+ProviderClient = AsyncAnthropic | AsyncOpenAI | genai.Client
+
+
+@dataclass
+class IterationData:
+    """Data passed to iteration callbacks after each tool execution loop iteration."""
+
+    iteration: int
+    """1-indexed iteration number."""
+    tool_calls: list[str]
+    """List of tool names called in this iteration."""
+    input_tokens: int
+    """Input tokens used in this iteration's LLM call."""
+    output_tokens: int
+    """Output tokens generated in this iteration's LLM call."""
+    cache_read_tokens: int = 0
+    """Tokens read from cache in this iteration."""
+    cache_creation_tokens: int = 0
+    """Tokens written to cache in this iteration."""
+
+
+IterationCallback = Callable[[IterationData], None]
+
+
+class HonchoLLMCallResponse(BaseModel, Generic[T]):
+    """Response object for LLM calls.
+
+    Note:
+        Uncached input tokens = input_tokens - cache_read_input_tokens
+                              + cache_creation_input_tokens
+        (cache_creation costs 25% more, cache_read costs 90% less)
+    """
+
+    content: T
+    input_tokens: int = 0
+    output_tokens: int
+    cache_creation_input_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    finish_reasons: list[str]
+    tool_calls_made: list[dict[str, Any]] = Field(default_factory=list)
+    iterations: int = 0
+    """Number of LLM calls made in the tool execution loop."""
+    thinking_content: str | None = None
+    # Full thinking blocks with signatures for multi-turn replay (Anthropic only).
+    thinking_blocks: list[dict[str, Any]] = Field(default_factory=list)
+    # OpenRouter reasoning_details for Gemini models — must be preserved across turns.
+    reasoning_details: list[dict[str, Any]] = Field(default_factory=list)
+
+
+class HonchoLLMCallStreamChunk(BaseModel):
+    """A single chunk in a streaming LLM response."""
+
+    content: str
+    is_done: bool = False
+    finish_reasons: list[str] = Field(default_factory=list)
+    output_tokens: int | None = None
+
+
+class StreamingResponseWithMetadata:
+    """Streaming response wrapper carrying metadata from a completed tool loop.
+
+    Lets callers read tool_calls_made / token counts / thinking_content from
+    the tool-execution phase while still iterating the final streamed answer.
+    """
+
+    _stream: AsyncIterator[HonchoLLMCallStreamChunk]
+    tool_calls_made: list[dict[str, Any]]
+    input_tokens: int
+    output_tokens: int
+    cache_creation_input_tokens: int
+    cache_read_input_tokens: int
+    thinking_content: str | None
+    iterations: int
+
+    def __init__(
+        self,
+        stream: AsyncIterator[HonchoLLMCallStreamChunk],
+        tool_calls_made: list[dict[str, Any]],
+        input_tokens: int,
+        output_tokens: int,
+        cache_creation_input_tokens: int,
+        cache_read_input_tokens: int,
+        thinking_content: str | None = None,
+        iterations: int = 0,
+    ):
+        self._stream = stream
+        self.tool_calls_made = tool_calls_made
+        self.input_tokens = input_tokens
+        self.output_tokens = output_tokens
+        self.cache_creation_input_tokens = cache_creation_input_tokens
+        self.cache_read_input_tokens = cache_read_input_tokens
+        self.thinking_content = thinking_content
+        self.iterations = iterations
+
+    def __aiter__(self) -> AsyncIterator[HonchoLLMCallStreamChunk]:
+        return self._stream.__aiter__()
+
+    async def __anext__(self) -> HonchoLLMCallStreamChunk:
+        return await self._stream.__anext__()
+
+
+__all__ = [
+    "HonchoLLMCallResponse",
+    "HonchoLLMCallStreamChunk",
+    "IterationCallback",
+    "IterationData",
+    "ProviderClient",
+    "ReasoningEffortType",
+    "StreamingResponseWithMetadata",
+    "T",
+    "VerbosityType",
+]
diff --git a/src/schemas/api.py b/src/schemas/api.py
index 307b12e03..8be194923 100644
--- a/src/schemas/api.py
+++ b/src/schemas/api.py
@@ -501,9 +501,10 @@ def validate_token_count(self) -> Self:
         tokens = encoding.encode(self.content)
         self._token_count = len(tokens)
 
-        if self._token_count > settings.MAX_EMBEDDING_TOKENS:
+        if self._token_count > settings.EMBEDDING.MAX_INPUT_TOKENS:
             raise ValueError(
-                f"Content exceeds maximum embedding token limit of {settings.MAX_EMBEDDING_TOKENS} "
+                "Content exceeds maximum embedding token limit of "
+                + f"{settings.EMBEDDING.MAX_INPUT_TOKENS} "
                 + f"(got {self._token_count} tokens)"
             )
         return self
diff --git a/src/telemetry/reasoning_traces.py b/src/telemetry/reasoning_traces.py
index b208d08bb..1f4f03e39 100644
--- a/src/telemetry/reasoning_traces.py
+++ b/src/telemetry/reasoning_traces.py
@@ -12,7 +12,11 @@
 
 from pydantic import BaseModel
 
-from src.config import LLMComponentSettings, settings
+from src.config import (
+    ConfiguredModelSettings,
+    ModelConfig,
+    settings,
+)
 
 
 def get_reasoning_traces_file_path() -> Path | None:
@@ -24,7 +28,7 @@ def get_reasoning_traces_file_path() -> Path | None:
 
 def log_reasoning_trace(
     task_type: str,
-    llm_settings: LLMComponentSettings,
+    model_config: ModelConfig | ConfiguredModelSettings,
     prompt: str,
     response: Any,
     *,
@@ -40,7 +44,7 @@ def log_reasoning_trace(
 
     Args:
         task_type: Type of task (e.g., "minimal_deriver", "dialectic_chat")
-        llm_settings: LLM settings used for the call
+        model_config: Model configuration used for the call
         prompt: The full prompt text sent to the LLM (used if messages is None)
         response: HonchoLLMCallResponse object with the LLM response
         max_tokens: Max output tokens setting
@@ -62,8 +66,8 @@ def log_reasoning_trace(
     trace_entry: dict[str, Any] = {
         "timestamp": time.time(),
         "task_type": task_type,
-        "provider": llm_settings.PROVIDER,
-        "model": llm_settings.MODEL,
+        "provider": model_config.transport,
+        "model": model_config.model,
         "settings": {
             "max_tokens": max_tokens,
             "thinking_budget_tokens": thinking_budget_tokens,
diff --git a/src/utils/agent_tools.py b/src/utils/agent_tools.py
index 211323976..36168f345 100644
--- a/src/utils/agent_tools.py
+++ b/src/utils/agent_tools.py
@@ -33,6 +33,205 @@
 MAX_PEER_CARD_FACTS = 40
 
 
+def _base_observation_properties() -> dict[str, Any]:
+    return {
+        "content": {
+            "type": "string",
+            "description": "The observation content",
+        },
+        "level": {
+            "type": "string",
+            "enum": [
+                "explicit",
+                "deductive",
+                "inductive",
+                "contradiction",
+            ],
+            "description": (
+                "Level: 'explicit' for direct facts, 'deductive' for logical "
+                + "necessities, 'inductive' for patterns, 'contradiction' for "
+                + "conflicting statements"
+            ),
+        },
+        "source_ids": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": (
+                "Document IDs of source or premise observations. Required and "
+                + "must be non-empty for deductive, inductive, and contradiction "
+                + "observations."
+            ),
+        },
+        "premises": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "(For deductive) Human-readable premise text for display",
+        },
+        "sources": {
+            "type": "array",
+            "items": {"type": "string"},
+            "description": "(For inductive/contradiction) Human-readable source text for display",
+        },
+        "pattern_type": {
+            "type": "string",
+            "enum": [
+                "preference",
+                "behavior",
+                "personality",
+                "tendency",
+                "correlation",
+            ],
+            "description": "(For inductive only) Type of pattern being identified",
+        },
+        "confidence": {
+            "type": "string",
+            "enum": ["high", "medium", "low"],
+            "description": (
+                "(For inductive only) Confidence level: 'high' for 5+ sources, "
+                + "'medium' for 3-4, 'low' for 2"
+            ),
+        },
+    }
+
+
+def _generic_observation_item_schema() -> dict[str, Any]:
+    return {
+        "type": "object",
+        "properties": _base_observation_properties(),
+        "required": ["content", "level"],
+        "additionalProperties": False,
+        "allOf": [
+            {
+                "if": {"properties": {"level": {"const": "deductive"}}},
+                "then": {
+                    "required": ["source_ids", "premises"],
+                    "properties": {
+                        "source_ids": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "minItems": 1,
+                        },
+                        "premises": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "minItems": 1,
+                        },
+                    },
+                },
+            },
+            {
+                "if": {"properties": {"level": {"const": "inductive"}}},
+                "then": {
+                    "required": [
+                        "source_ids",
+                        "sources",
+                        "pattern_type",
+                        "confidence",
+                    ],
+                    "properties": {
+                        "source_ids": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "minItems": 2,
+                        },
+                        "sources": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "minItems": 2,
+                        },
+                    },
+                },
+            },
+            {
+                "if": {"properties": {"level": {"const": "contradiction"}}},
+                "then": {
+                    "required": ["source_ids", "sources"],
+                    "properties": {
+                        "source_ids": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "minItems": 2,
+                        },
+                        "sources": {
+                            "type": "array",
+                            "items": {"type": "string"},
+                            "minItems": 2,
+                        },
+                    },
+                },
+            },
+        ],
+    }
+
+
+def _deductive_observation_item_schema() -> dict[str, Any]:
+    return {
+        "type": "object",
+        "properties": {
+            "content": {
+                "type": "string",
+                "description": "The deductive conclusion as a self-contained statement",
+            },
+            "source_ids": {
+                "type": "array",
+                "items": {"type": "string"},
+                "minItems": 1,
+                "description": "Required non-empty list of source observation IDs supporting the deduction",
+            },
+            "premises": {
+                "type": "array",
+                "items": {"type": "string"},
+                "minItems": 1,
+                "description": "Required human-readable premise text matching the source observations",
+            },
+        },
+        "required": ["content", "source_ids", "premises"],
+        "additionalProperties": False,
+    }
+
+
+def _inductive_observation_item_schema() -> dict[str, Any]:
+    return {
+        "type": "object",
+        "properties": {
+            "content": {
+                "type": "string",
+                "description": "The inductive pattern or generalization as a self-contained statement",
+            },
+            "source_ids": {
+                "type": "array",
+                "items": {"type": "string"},
+                "minItems": 2,
+                "description": "Required list of at least two source observation IDs supporting the pattern",
+            },
+            "sources": {
+                "type": "array",
+                "items": {"type": "string"},
+                "minItems": 2,
+                "description": "Required human-readable evidence text matching the source observations",
+            },
+            "pattern_type": {
+                "type": "string",
+                "enum": [
+                    "preference",
+                    "behavior",
+                    "personality",
+                    "tendency",
+                    "correlation",
+                ],
+                "description": "Required pattern category",
+            },
+            "confidence": {
+                "type": "string",
+                "enum": ["high", "medium", "low"],
+                "description": "Required confidence level based on evidence count",
+            },
+        },
+        "required": ["content", "source_ids", "sources", "pattern_type", "confidence"],
+        "additionalProperties": False,
+    }
+
+
 def _safe_int(value: Any, default: int) -> int:
     """Coerce a tool input value to int, returning default on failure.
 
@@ -177,88 +376,44 @@ def _extract_pattern_snippet(
 TOOLS: dict[str, dict[str, Any]] = {
     "create_observations": {
         "name": "create_observations",
-        "description": "Create observations at any level: explicit (facts), deductive (logical necessities), inductive (patterns), or contradiction (conflicting statements). Use this to record facts, logical inferences, patterns, or note when the user has said contradictory things.",
+        "description": "Create observations at any level: explicit (facts), deductive (logical necessities), inductive (patterns), or contradiction (conflicting statements). For deductive, inductive, and contradiction observations, missing or empty source_ids are invalid and will be rejected.",
         "input_schema": {
             "type": "object",
             "properties": {
                 "observations": {
                     "type": "array",
                     "description": "List of observations to create",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "content": {
-                                "type": "string",
-                                "description": "The observation content",
-                            },
-                            "level": {
-                                "type": "string",
-                                "enum": [
-                                    "explicit",
-                                    "deductive",
-                                    "inductive",
-                                    "contradiction",
-                                ],
-                                "description": "Level: 'explicit' for direct facts, 'deductive' for logical necessities, 'inductive' for patterns, 'contradiction' for conflicting statements",
-                            },
-                            "source_ids": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "(For deductive/inductive/contradiction) Document IDs of source/premise observations - REQUIRED",
-                            },
-                            "premises": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "(For deductive) Human-readable premise text for display",
-                            },
-                            "sources": {
-                                "type": "array",
-                                "items": {"type": "string"},
-                                "description": "(For inductive/contradiction) Human-readable source text for display",
-                            },
-                            "pattern_type": {
-                                "type": "string",
-                                "enum": [
-                                    "preference",
-                                    "behavior",
-                                    "personality",
-                                    "tendency",
-                                    "correlation",
-                                ],
-                                "description": "(For inductive only) Type of pattern being identified",
-                            },
-                            "confidence": {
-                                "type": "string",
-                                "enum": ["high", "medium", "low"],
-                                "description": "(For inductive only) Confidence level: 'high' for 3+ sources, 'medium' for 2+, 'low' for tentative",
-                            },
-                        },
-                        "required": ["content", "level"],
-                    },
+                    "items": _generic_observation_item_schema(),
                 },
             },
             "required": ["observations"],
         },
     },
     "create_observations_deductive": {
-        "name": "create_observations",
-        "description": "Create new deductive observations discovered while answering the query. Use this when you infer something new about the peer that isn't already captured in existing observations. Only use for novel deductions - not for restating existing facts.",
+        "name": "create_observations_deductive",
+        "description": "Create new deductive observations discovered while answering the query. Every observation must include non-empty source_ids and premise text. Use this only for novel deductions grounded in existing observations.",
         "input_schema": {
             "type": "object",
             "properties": {
                 "observations": {
                     "type": "array",
                     "description": "List of new deductive observations to create",
-                    "items": {
-                        "type": "object",
-                        "properties": {
-                            "content": {
-                                "type": "string",
-                                "description": "The observation content - should be a self-contained statement about the peer",
-                            },
-                        },
-                        "required": ["content"],
-                    },
+                    "items": _deductive_observation_item_schema(),
+                },
+            },
+            "required": ["observations"],
+        },
+    },
+    "create_observations_inductive": {
+        "name": "create_observations_inductive",
+        "description": "Create new inductive observations discovered while answering the query. Every observation must include source_ids, source text, pattern_type, and confidence. Use this only for patterns supported by multiple observations.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "observations": {
+                    "type": "array",
+                    "description": "List of new inductive observations to create",
+                    "items": _inductive_observation_item_schema(),
                 },
             },
             "required": ["observations"],
@@ -595,7 +750,7 @@ def _extract_pattern_snippet(
     TOOLS["search_memory"],
     TOOLS["search_messages"],
     # Action tools
-    TOOLS["create_observations"],
+    TOOLS["create_observations_deductive"],
     TOOLS["delete_observations"],
     TOOLS["update_peer_card"],
 ]
@@ -610,7 +765,7 @@ def _extract_pattern_snippet(
     TOOLS["search_memory"],
     TOOLS["search_messages"],
     # Action tools
-    TOOLS["create_observations"],
+    TOOLS["create_observations_inductive"],
     TOOLS["update_peer_card"],
 ]
 
@@ -1033,8 +1188,11 @@ class ToolContext:
     parent_category: str | None = None  # Parent category for CloudEvents
 
 
-async def _handle_create_observations(
-    ctx: ToolContext, tool_input: dict[str, Any]
+async def _handle_create_observations_impl(
+    ctx: ToolContext,
+    tool_input: dict[str, Any],
+    *,
+    forced_level: str | None = None,
 ) -> str:
     """Handle create_observations tool."""
     raw_observations = tool_input.get("observations", [])
@@ -1045,7 +1203,10 @@ async def _handle_create_observations(
     # Set context-specific default level before Pydantic validation
     default_level = "explicit" if ctx.current_messages else "deductive"
     for obs in raw_observations:
-        obs.setdefault("level", default_level)
+        if forced_level is not None:
+            obs["level"] = forced_level
+        else:
+            obs.setdefault("level", default_level)
 
     # Validate observations individually so valid ones are still processed
     observations: list[schemas.ObservationInput] = []
@@ -1139,6 +1300,32 @@ async def _handle_create_observations(
     return response
 
 
+async def _handle_create_observations(
+    ctx: ToolContext, tool_input: dict[str, Any]
+) -> str:
+    return await _handle_create_observations_impl(ctx, tool_input)
+
+
+async def _handle_create_observations_deductive(
+    ctx: ToolContext, tool_input: dict[str, Any]
+) -> str:
+    return await _handle_create_observations_impl(
+        ctx,
+        tool_input,
+        forced_level="deductive",
+    )
+
+
+async def _handle_create_observations_inductive(
+    ctx: ToolContext, tool_input: dict[str, Any]
+) -> str:
+    return await _handle_create_observations_impl(
+        ctx,
+        tool_input,
+        forced_level="inductive",
+    )
+
+
 async def _handle_update_peer_card(ctx: ToolContext, tool_input: dict[str, Any]) -> str:
     """Handle update_peer_card tool."""
     # Check if peer card creation is disabled via configuration
@@ -1263,7 +1450,10 @@ async def _handle_search_memory(ctx: ToolContext, tool_input: dict[str, Any]) ->
     try:
         query_embedding = await embedding_client.embed(query)
     except ValueError:
-        return f"ERROR: Query exceeds maximum token limit of {settings.MAX_EMBEDDING_TOKENS}. Please use a shorter query."
+        return (
+            "ERROR: Query exceeds maximum token limit of "
+            + f"{settings.EMBEDDING.MAX_INPUT_TOKENS}. Please use a shorter query."
+        )
 
     documents = await crud.query_documents(
         db=None,
@@ -1814,6 +2004,8 @@ async def _handle_get_reasoning_chain(
 # Tool handler dispatch table
 _TOOL_HANDLERS: dict[str, Callable[[ToolContext, dict[str, Any]], Any]] = {
     "create_observations": _handle_create_observations,
+    "create_observations_deductive": _handle_create_observations_deductive,
+    "create_observations_inductive": _handle_create_observations_inductive,
     "update_peer_card": _handle_update_peer_card,
     "get_recent_history": _handle_get_recent_history,
     "search_memory": _handle_search_memory,
diff --git a/src/utils/clients.py b/src/utils/clients.py
deleted file mode 100644
index 1c042bff5..000000000
--- a/src/utils/clients.py
+++ /dev/null
@@ -1,2575 +0,0 @@
-import json
-import logging
-from collections.abc import AsyncIterator, Callable
-from contextvars import ContextVar
-from dataclasses import dataclass
-from typing import Any, Generic, Literal, TypeVar, cast, overload
-
-from anthropic import AsyncAnthropic
-from anthropic.types import TextBlock, ThinkingBlock, ToolUseBlock
-from anthropic.types.message import Message as AnthropicMessage
-from anthropic.types.usage import Usage
-from google import genai
-from google.genai.types import (
-    ContentListUnionDict,
-    GenerateContentConfigDict,
-    GenerateContentResponse,
-)
-from groq import AsyncGroq
-from openai import AsyncOpenAI
-from openai.types.chat import ChatCompletion, ChatCompletionChunk
-from pydantic import BaseModel, Field, ValidationError
-from sentry_sdk.ai.monitoring import ai_track
-from tenacity import retry, stop_after_attempt, wait_exponential
-
-from src.config import LLMComponentSettings, settings
-from src.exceptions import LLMError
-from src.telemetry.logging import conditional_observe
-from src.telemetry.reasoning_traces import log_reasoning_trace
-from src.utils.json_parser import validate_and_repair_json
-from src.utils.representation import PromptRepresentation
-from src.utils.tokens import estimate_tokens
-from src.utils.types import SupportedProviders, set_current_iteration
-
-logger = logging.getLogger(__name__)
-
-# Gemini finish reasons that indicate the response was blocked by safety or policy
-# filters. When these occur, the response typically has no usable text content and
-# retrying with a backup provider is appropriate.
-GEMINI_BLOCKED_FINISH_REASONS = {
-    "SAFETY",
-    "RECITATION",
-    "PROHIBITED_CONTENT",
-    "BLOCKLIST",
-}
-
-
-@dataclass
-class IterationData:
-    """Data passed to iteration callbacks after each tool execution loop iteration."""
-
-    iteration: int
-    """1-indexed iteration number."""
-    tool_calls: list[str]
-    """List of tool names called in this iteration."""
-    input_tokens: int
-    """Input tokens used in this iteration's LLM call."""
-    output_tokens: int
-    """Output tokens generated in this iteration's LLM call."""
-    cache_read_tokens: int = 0
-    """Tokens read from cache in this iteration."""
-    cache_creation_tokens: int = 0
-    """Tokens written to cache in this iteration."""
-
-
-# Type alias for iteration callback
-IterationCallback = Callable[[IterationData], None]
-
-T = TypeVar("T")
-
-# Type aliases for OpenAI GPT-5 specific parameters
-ReasoningEffortType = Literal["low", "medium", "high", "minimal"] | None
-VerbosityType = Literal["low", "medium", "high"] | None
-
-
-def count_message_tokens(messages: list[dict[str, Any]]) -> int:
-    """Count tokens in a list of messages using tiktoken."""
-    total = 0
-    for msg in messages:
-        content = msg.get("content", "")
-        if isinstance(content, str):
-            total += estimate_tokens(content)
-        elif isinstance(content, list):
-            # Handle Anthropic-style content blocks
-            total += estimate_tokens(json.dumps(content))
-        # Also count parts for Google format
-        if "parts" in msg:
-            try:
-                total += estimate_tokens(json.dumps(msg["parts"]))
-            except TypeError:
-                # Handle non-JSON-serializable content (e.g., bytes) by estimating based on string representation
-                total += estimate_tokens(str(msg["parts"]))
-    return total
-
-
-def _is_tool_use_message(msg: dict[str, Any]) -> bool:
-    """Check if a message contains tool calls (any format)."""
-    # Anthropic format: content is a list with tool_use blocks
-    content = msg.get("content")
-    if isinstance(content, list):
-        for block in cast(list[dict[str, Any]], content):
-            if block.get("type") == "tool_use":
-                return True
-
-    # OpenAI format: tool_calls field on assistant message
-    return bool(msg.get("tool_calls"))
-
-
-def _is_tool_result_message(msg: dict[str, Any]) -> bool:
-    """Check if a message contains tool results (any format)."""
-    # Anthropic format: content is a list with tool_result blocks
-    content = msg.get("content")
-    if isinstance(content, list):
-        for block in cast(list[dict[str, Any]], content):
-            if block.get("type") == "tool_result":
-                return True
-
-    # OpenAI format: role is "tool"
-    return msg.get("role") == "tool"
-
-
-def _group_into_units(messages: list[dict[str, Any]]) -> list[list[dict[str, Any]]]:
-    """
-    Group messages into logical conversation units.
-
-    A unit is either:
-    - A tool_use message + ALL consecutive tool_result messages that follow
-    - A single non-tool message
-
-    This ensures tool_use and tool_results stay together.
-    """
-    units: list[list[dict[str, Any]]] = []
-    i = 0
-
-    while i < len(messages):
-        msg = messages[i]
-
-        if _is_tool_use_message(msg):
-            # Collect this tool_use and ALL following tool_results
-            j = i + 1
-            while j < len(messages) and _is_tool_result_message(messages[j]):
-                j += 1
-
-            # Create unit with tool_use + all tool_results
-            unit = messages[i:j]
-            if len(unit) > 1:  # Has at least one tool_result
-                units.append(unit)
-                i = j
-            else:
-                # Orphaned tool_use (no results) - skip it
-                logger.debug(f"Skipping orphaned tool_use at index {i}")
-                i += 1
-        elif _is_tool_result_message(msg):
-            # Orphaned tool_result - skip it
-            logger.debug(f"Skipping orphaned tool_result at index {i}")
-            i += 1
-        else:
-            # Regular message - its own unit
-            units.append([msg])
-            i += 1
-
-    return units
-
-
-def truncate_messages_to_fit(
-    messages: list[dict[str, Any]],
-    max_tokens: int,
-    preserve_system: bool = True,
-) -> list[dict[str, Any]]:
-    """
-    Truncate messages to fit within a token limit while maintaining valid structure.
-
-    Strategy:
-    1. Group messages into units (tool_use + results together, or single messages)
-    2. Remove oldest units first to preserve recent context
-    3. Units stay intact so tool_use/tool_result pairs are never broken
-    """
-    current_tokens = count_message_tokens(messages)
-    if current_tokens <= max_tokens:
-        return messages
-
-    logger.info(f"Truncating: {current_tokens} tokens exceeds {max_tokens} limit")
-
-    # Separate system messages from conversation
-    system_messages: list[dict[str, Any]] = []
-    conversation: list[dict[str, Any]] = []
-
-    for msg in messages:
-        if msg.get("role") == "system" and preserve_system:
-            system_messages.append(msg)
-        else:
-            conversation.append(msg)
-
-    system_tokens = count_message_tokens(system_messages)
-    available_tokens = max_tokens - system_tokens
-
-    if available_tokens <= 0:
-        logger.warning("System message exceeds max_input_tokens")
-        return messages
-
-    # Group messages into units
-    units = _group_into_units(conversation)
-
-    if not units:
-        logger.warning("No valid conversation units")
-        return system_messages
-
-    # Remove oldest units until we fit
-    while len(units) > 1:  # Keep at least one unit
-        # Calculate current token count
-        flat_messages = [msg for unit in units for msg in unit]
-        if count_message_tokens(flat_messages) <= available_tokens:
-            break
-
-        # Remove the oldest unit
-        removed_unit = units.pop(0)
-        logger.debug(
-            f"Removed unit with {len(removed_unit)} messages "
-            + f"(~{count_message_tokens(removed_unit)} tokens)"
-        )
-
-    # Flatten remaining units
-    result_conversation = [msg for unit in units for msg in unit]
-
-    result = system_messages + result_conversation
-    result_tokens = count_message_tokens(result)
-    logger.info(
-        f"Truncation complete: {len(messages)} -> {len(result)} messages, "
-        + f"{current_tokens} -> {result_tokens} tokens, "
-        + f"{len(units)} units kept"
-    )
-    return result
-
-
-M = TypeVar("M", bound=BaseModel)
-
-# Context variable to track retry attempts for provider switching
-_current_attempt: ContextVar[int] = ContextVar("current_attempt", default=0)
-
-
-def _get_effective_temperature(temperature: float | None) -> float | None:
-    """Adjust temperature on retries - bump 0.0 to 0.2 to get different results."""
-    if temperature == 0.0 and _current_attempt.get() > 1:
-        logger.debug("Bumping temperature from 0.0 to 0.2 on retry")
-        return 0.2
-    return temperature
-
-
-CLIENTS: dict[
-    SupportedProviders,
-    AsyncAnthropic | AsyncOpenAI | genai.Client | AsyncGroq,
-] = {}
-
-if settings.LLM.ANTHROPIC_API_KEY:
-    anthropic = AsyncAnthropic(
-        api_key=settings.LLM.ANTHROPIC_API_KEY,
-        timeout=600.0,  # 10 minutes timeout for long-running operations
-    )
-    CLIENTS["anthropic"] = anthropic
-
-if settings.LLM.OPENAI_API_KEY:
-    openai_client = AsyncOpenAI(
-        api_key=settings.LLM.OPENAI_API_KEY,
-    )
-    CLIENTS["openai"] = openai_client
-
-if settings.LLM.OPENAI_COMPATIBLE_API_KEY and settings.LLM.OPENAI_COMPATIBLE_BASE_URL:
-    CLIENTS["custom"] = AsyncOpenAI(
-        api_key=settings.LLM.OPENAI_COMPATIBLE_API_KEY,
-        base_url=settings.LLM.OPENAI_COMPATIBLE_BASE_URL,
-    )
-
-# vLLM uses separate settings for local model serving
-if settings.LLM.VLLM_API_KEY and settings.LLM.VLLM_BASE_URL:
-    CLIENTS["vllm"] = AsyncOpenAI(
-        api_key=settings.LLM.VLLM_API_KEY,
-        base_url=settings.LLM.VLLM_BASE_URL,
-    )
-
-if settings.LLM.GEMINI_API_KEY:
-    google = genai.client.Client(api_key=settings.LLM.GEMINI_API_KEY)
-    CLIENTS["google"] = google
-
-if settings.LLM.GROQ_API_KEY:
-    groq = AsyncGroq(api_key=settings.LLM.GROQ_API_KEY)
-    CLIENTS["groq"] = groq
-
-SELECTED_PROVIDERS = [
-    ("Summary", settings.SUMMARY.PROVIDER),
-    ("Deriver", settings.DERIVER.PROVIDER),
-]
-
-# Add all dialectic level providers
-for level, level_settings in settings.DIALECTIC.LEVELS.items():
-    SELECTED_PROVIDERS.append((f"Dialectic ({level})", level_settings.PROVIDER))
-
-for provider_name, provider_value in SELECTED_PROVIDERS:
-    if provider_value not in CLIENTS:
-        raise ValueError(f"Missing client for {provider_name}: {provider_value}")
-
-# Validate backup providers are initialized if configured
-BACKUP_PROVIDERS: list[tuple[str, SupportedProviders | None]] = [
-    ("Deriver", settings.DERIVER.BACKUP_PROVIDER),
-    ("Summary", settings.SUMMARY.BACKUP_PROVIDER),
-    ("Dream", settings.DREAM.BACKUP_PROVIDER),
-]
-
-# Add all dialectic level backup providers
-for level, level_settings in settings.DIALECTIC.LEVELS.items():
-    BACKUP_PROVIDERS.append((f"Dialectic ({level})", level_settings.BACKUP_PROVIDER))
-
-for component_name, backup_provider in BACKUP_PROVIDERS:
-    if backup_provider is not None and backup_provider not in CLIENTS:
-        raise ValueError(
-            f"Backup provider for {component_name} is set to {backup_provider}, "
-            + "but this provider is not initialized. Please set the required API key/URL environment "
-            + "variables or remove the backup configuration."
-        )
-
-
-def convert_tools_for_provider(
-    tools: list[dict[str, Any]],
-    provider: SupportedProviders,
-) -> list[dict[str, Any]]:
-    """
-    Convert tool definitions to provider-specific format.
-
-    Args:
-        tools: List of tool definitions in Anthropic format (with input_schema)
-        provider: The target provider to convert tools for
-
-    Returns:
-        List of tool definitions in the provider's native format
-    """
-    if provider == "anthropic":
-        # Anthropic format: input_schema
-        return tools
-    elif provider in ("openai", "custom", "vllm"):
-        # OpenAI format: parameters instead of input_schema
-        # custom and vllm use AsyncOpenAI client so need OpenAI format
-        return [
-            {
-                "type": "function",
-                "function": {
-                    "name": tool["name"],
-                    "description": tool["description"],
-                    "parameters": tool["input_schema"],
-                },
-            }
-            for tool in tools
-        ]
-    elif provider == "google":
-        # Google format: function_declarations wrapped in a tool object
-        return [
-            {
-                "function_declarations": [
-                    {
-                        "name": tool["name"],
-                        "description": tool["description"],
-                        "parameters": tool["input_schema"],
-                    }
-                    for tool in tools
-                ]
-            }
-        ]
-    else:
-        # For unsupported providers, return as-is (will likely error if tools are used)
-        logger.warning(
-            f"Tool calling not implemented for provider {provider}, returning tools as-is"
-        )
-        return tools
-
-
-def extract_openai_reasoning_content(response: Any) -> str | None:
-    """
-    Extract reasoning/thinking content from an OpenAI ChatCompletion response.
-
-    GPT-5 and o1 models include reasoning_details in the response message.
-    Custom OpenAI-compatible providers may also include this field.
-
-    Args:
-        response: OpenAI ChatCompletion response object
-
-    Returns:
-        Concatenated reasoning content string, or None if not present
-    """
-    try:
-        message = response.choices[0].message
-        # Check for reasoning_details (GPT-5/o1 models)
-        if hasattr(message, "reasoning_details") and message.reasoning_details:
-            # reasoning_details is a list of reasoning steps
-            reasoning_parts: list[Any] = []
-            for detail in message.reasoning_details:
-                if hasattr(detail, "content") and detail.content:
-                    reasoning_parts.append(detail.content)
-                elif isinstance(detail, dict) and detail.get("content"):  # pyright: ignore[reportUnknownMemberType]
-                    reasoning_parts.append(detail["content"])
-            if reasoning_parts:
-                return "\n".join(reasoning_parts)
-        # Check for reasoning_content (some custom providers)
-        if hasattr(message, "reasoning_content") and message.reasoning_content:
-            return message.reasoning_content
-    except (AttributeError, IndexError, TypeError):
-        pass
-    return None
-
-
-def extract_openai_reasoning_details(response: Any) -> list[dict[str, Any]]:
-    """
-    Extract reasoning_details array from an OpenAI/OpenRouter ChatCompletion response.
-
-    OpenRouter returns reasoning blocks in reasoning_details that must be preserved
-    and passed back in subsequent requests for Gemini models with tool use.
-
-    Args:
-        response: OpenAI ChatCompletion response object
-
-    Returns:
-        List of reasoning detail objects, or empty list if not present
-    """
-    try:
-        message = response.choices[0].message
-        # Check for reasoning_details (OpenRouter/Gemini)
-        if hasattr(message, "reasoning_details") and message.reasoning_details:
-            # Return the full array for preservation
-            return [
-                detail.model_dump() if hasattr(detail, "model_dump") else dict(detail)
-                for detail in message.reasoning_details
-            ]
-    except (AttributeError, IndexError, TypeError):
-        pass
-    return []
-
-
-def extract_openai_cache_tokens(usage: Any) -> tuple[int, int]:
-    """
-    Extract cache token counts from OpenAI-style usage objects.
-
-    OpenAI reports cached tokens in usage.prompt_tokens_details.cached_tokens.
-    OpenRouter and some proxies may report in different locations.
-
-    Args:
-        usage: OpenAI CompletionUsage object or similar
-
-    Returns:
-        Tuple of (cache_creation_tokens, cache_read_tokens).
-        For OpenAI-style APIs, cache_creation is always 0 (automatic caching),
-        and cache_read is the cached_tokens count.
-    """
-    if not usage:
-        return 0, 0
-
-    cache_read = 0
-
-    # OpenAI native: usage.prompt_tokens_details.cached_tokens
-    if hasattr(usage, "prompt_tokens_details") and usage.prompt_tokens_details:
-        details = usage.prompt_tokens_details
-        if hasattr(details, "cached_tokens") and details.cached_tokens:
-            cache_read = details.cached_tokens
-
-    # OpenRouter style: usage.cache_read_input_tokens or usage.cached_tokens
-    if cache_read == 0:
-        if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens:
-            cache_read = usage.cache_read_input_tokens
-        elif hasattr(usage, "cached_tokens") and usage.cached_tokens:
-            cache_read = usage.cached_tokens
-
-    # OpenRouter/Anthropic-proxy style: cache_creation_input_tokens
-    cache_creation = 0
-    if (
-        hasattr(usage, "cache_creation_input_tokens")
-        and usage.cache_creation_input_tokens
-    ):
-        cache_creation = usage.cache_creation_input_tokens
-
-    return cache_creation, cache_read
-
-
-class HonchoLLMCallResponse(BaseModel, Generic[T]):
-    """
-    Response object for LLM calls.
-
-    Args:
-        content: The response content. When a response_model is provided, this will be
-                the parsed object of that type. Otherwise, it will be a string.
-        input_tokens: Total number of input tokens (including cached).
-        output_tokens: Number of tokens generated in the response.
-        cache_creation_input_tokens: Number of tokens written to cache.
-        cache_read_input_tokens: Number of tokens read from cache.
-        finish_reasons: List of finish reasons for the response.
-        tool_calls_made: Optional list of all tool calls executed during the request.
-
-    Note:
-        Uncached input tokens = input_tokens - cache_read_input_tokens + cache_creation_input_tokens
-        (cache_creation costs 25% more, cache_read costs 90% less)
-    """
-
-    content: T
-    input_tokens: int = 0
-    output_tokens: int
-    cache_creation_input_tokens: int = 0
-    cache_read_input_tokens: int = 0
-    finish_reasons: list[str]
-    tool_calls_made: list[dict[str, Any]] = Field(default_factory=list)
-    iterations: int = 0
-    """Number of LLM calls made in the tool execution loop (1 = single response, 2+ = tool use iterations plus final synthesis)."""
-    thinking_content: str | None = None
-    # Full thinking blocks with signatures for multi-turn conversation replay (Anthropic only)
-    thinking_blocks: list[dict[str, Any]] = Field(default_factory=list)
-    # OpenRouter reasoning_details for Gemini models - must be preserved across turns
-    reasoning_details: list[dict[str, Any]] = Field(default_factory=list)
-
-
-class HonchoLLMCallStreamChunk(BaseModel):
-    """
-    A single chunk in a streaming LLM response.
-
-    Args:
-        content: The text content for this chunk. Empty for chunks that only contain metadata.
-        is_done: Whether this is the final chunk in the stream.
-        finish_reasons: List of finish reasons if the stream is complete.
-        output_tokens: Number of tokens generated in the response. Only set on the final chunk.
-    """
-
-    content: str
-    is_done: bool = False
-    finish_reasons: list[str] = Field(default_factory=list)
-    output_tokens: int | None = None
-
-
-class StreamingResponseWithMetadata:
-    """
-    Wrapper for streaming responses that includes metadata from the tool execution phase.
-
-    This allows callers to access tool call counts, token usage, and thinking content
-    from the tool loop while still streaming the final response.
-    """
-
-    _stream: AsyncIterator[HonchoLLMCallStreamChunk]
-    tool_calls_made: list[dict[str, Any]]
-    input_tokens: int
-    output_tokens: int
-    cache_creation_input_tokens: int
-    cache_read_input_tokens: int
-    thinking_content: str | None
-    iterations: int
-
-    def __init__(
-        self,
-        stream: AsyncIterator[HonchoLLMCallStreamChunk],
-        tool_calls_made: list[dict[str, Any]],
-        input_tokens: int,
-        output_tokens: int,
-        cache_creation_input_tokens: int,
-        cache_read_input_tokens: int,
-        thinking_content: str | None = None,
-        iterations: int = 0,
-    ):
-        self._stream = stream
-        self.tool_calls_made = tool_calls_made
-        self.input_tokens = input_tokens
-        self.output_tokens = output_tokens
-        self.cache_creation_input_tokens = cache_creation_input_tokens
-        self.cache_read_input_tokens = cache_read_input_tokens
-        self.thinking_content = thinking_content
-        self.iterations = iterations
-
-    def __aiter__(self) -> AsyncIterator[HonchoLLMCallStreamChunk]:
-        return self._stream.__aiter__()
-
-    async def __anext__(self) -> HonchoLLMCallStreamChunk:
-        return await self._stream.__anext__()
-
-
-# Bounds for max_tool_iterations to prevent runaway loops
-MIN_TOOL_ITERATIONS = 1
-MAX_TOOL_ITERATIONS = 100
-
-
-async def _stream_final_response(
-    llm_settings: "LLMComponentSettings",
-    prompt: str,
-    max_tokens: int,
-    conversation_messages: list[dict[str, Any]],
-    response_model: type[BaseModel] | None,
-    json_mode: bool,
-    temperature: float | None,
-    stop_seqs: list[str] | None,
-    reasoning_effort: ReasoningEffortType,
-    verbosity: VerbosityType,
-    thinking_budget_tokens: int | None,
-) -> AsyncIterator[HonchoLLMCallStreamChunk]:
-    """
-    Stream the final response after tool execution is complete.
-
-    Makes a streaming LLM call with the accumulated conversation messages
-    (which include all tool call results) to generate the final answer.
-
-    Args:
-        llm_settings: Settings for the LLM provider
-        prompt: Original prompt (used as fallback)
-        max_tokens: Maximum tokens to generate
-        conversation_messages: Full conversation history including tool results
-        response_model: Optional Pydantic model for structured output
-        json_mode: Whether to use JSON mode
-        temperature: Temperature for the LLM
-        stop_seqs: Stop sequences
-        reasoning_effort: OpenAI reasoning effort (GPT-5 only)
-        verbosity: OpenAI verbosity (GPT-5 only)
-        thinking_budget_tokens: Anthropic thinking budget
-
-    Yields:
-        HonchoLLMCallStreamChunk objects containing the streaming response
-    """
-    provider = llm_settings.PROVIDER
-    model = llm_settings.MODEL
-
-    client = CLIENTS.get(provider)
-    if not client:
-        raise ValueError(f"Missing client for {provider}")
-
-    # Make a streaming call without tools
-    stream_response = await honcho_llm_call_inner(
-        provider,
-        model,
-        prompt,
-        max_tokens,
-        response_model,
-        json_mode,
-        _get_effective_temperature(temperature),
-        stop_seqs,
-        reasoning_effort,
-        verbosity,
-        thinking_budget_tokens,
-        True,  # stream=True
-        None,  # No tools
-        None,  # No tool_choice
-        conversation_messages,
-    )
-
-    # Yield chunks from the streaming response
-    async for chunk in stream_response:
-        yield chunk
-
-
-async def _execute_tool_loop(
-    llm_settings: "LLMComponentSettings",
-    prompt: str,
-    max_tokens: int,
-    messages: list[dict[str, Any]] | None,
-    tools: list[dict[str, Any]],
-    tool_choice: str | dict[str, Any] | None,
-    tool_executor: Callable[[str, dict[str, Any]], Any],
-    max_tool_iterations: int,
-    response_model: type[BaseModel] | None,
-    json_mode: bool,
-    temperature: float | None,
-    stop_seqs: list[str] | None,
-    reasoning_effort: ReasoningEffortType,
-    verbosity: VerbosityType,
-    thinking_budget_tokens: int | None,
-    enable_retry: bool,
-    retry_attempts: int,
-    max_input_tokens: int | None,
-    get_provider_and_model: Callable[
-        [],
-        tuple[SupportedProviders, str, int | None, ReasoningEffortType, VerbosityType],
-    ],
-    before_retry_callback: Callable[[Any], None],
-    stream_final: bool = False,
-    iteration_callback: IterationCallback | None = None,
-) -> HonchoLLMCallResponse[Any] | StreamingResponseWithMetadata:
-    """
-    Execute the tool calling loop for agentic LLM interactions.
-
-    This function handles the iterative process of:
-    1. Making an LLM call with tools available
-    2. Executing any tool calls the LLM requests
-    3. Feeding tool results back to the LLM
-    4. Repeating until the LLM stops calling tools or max iterations reached
-
-    Args:
-        llm_settings: Settings for the LLM provider
-        prompt: Initial prompt (used if messages is None)
-        max_tokens: Maximum tokens to generate per call
-        messages: Conversation history
-        tools: Tool definitions in Anthropic format
-        tool_choice: Tool selection strategy
-        tool_executor: Async function to execute tools
-        max_tool_iterations: Maximum iterations before forcing completion
-        response_model: Optional Pydantic model for structured output
-        json_mode: Whether to use JSON mode
-        temperature: Temperature for the LLM (default **none**, only some models support this)
-        stop_seqs: Stop sequences
-        reasoning_effort: OpenAI reasoning effort (GPT-5 only)
-        verbosity: OpenAI verbosity (GPT-5 only)
-        thinking_budget_tokens: Anthropic thinking budget
-        enable_retry: Whether to enable retry with exponential backoff
-        retry_attempts: Number of retry attempts
-        max_input_tokens: Maximum input tokens (for truncation)
-        get_provider_and_model: Function to get current provider/model based on attempt
-        before_retry_callback: Callback for retry events
-        stream_final: If True, stream the final response instead of returning it synchronously
-        iteration_callback: Optional callback invoked after each iteration with IterationData
-
-    Returns:
-        Final HonchoLLMCallResponse with accumulated token counts and tool call history,
-        or an AsyncIterator of HonchoLLMCallStreamChunk if stream_final=True
-    """
-    # Initialize conversation messages
-    conversation_messages: list[dict[str, Any]] = (
-        messages.copy() if messages else [{"role": "user", "content": prompt}]
-    )
-
-    iteration = 0
-    all_tool_calls: list[dict[str, Any]] = []
-    total_input_tokens = 0
-    total_output_tokens = 0
-    total_cache_creation_tokens = 0
-    total_cache_read_tokens = 0
-    empty_response_retries = 0
-    # Track effective tool_choice - switches from "required" to "auto" after first iteration
-    effective_tool_choice = tool_choice
-
-    while iteration < max_tool_iterations:
-        # Reset attempt counter so each iteration starts with the primary provider
-        _current_attempt.set(1)
-        logger.debug(f"Tool execution iteration {iteration + 1}/{max_tool_iterations}")
-
-        # Truncate BEFORE making the API call to avoid context length errors
-        if max_input_tokens is not None:
-            conversation_messages = truncate_messages_to_fit(
-                conversation_messages, max_input_tokens
-            )
-
-        # Create a wrapper that injects our messages
-        async def _call_with_messages(
-            effective_tool_choice: str | dict[str, Any] | None = effective_tool_choice,
-            conversation_messages: list[dict[str, Any]] = conversation_messages,
-        ) -> HonchoLLMCallResponse[Any]:
-            # Use shared provider selection helper
-            provider, model, thinking_budget, gpt5_reasoning_effort, gpt5_verbosity = (
-                get_provider_and_model()
-            )
-
-            client = CLIENTS.get(provider)
-            if not client:
-                raise ValueError(f"Missing client for {provider}")
-
-            converted_tools = (
-                convert_tools_for_provider(tools, provider) if tools else None
-            )
-
-            return await honcho_llm_call_inner(
-                provider,
-                model,
-                prompt,  # Will be ignored since we pass messages
-                max_tokens,
-                response_model,
-                json_mode,
-                _get_effective_temperature(temperature),
-                stop_seqs,
-                gpt5_reasoning_effort,
-                gpt5_verbosity,
-                thinking_budget,
-                False,
-                converted_tools,
-                effective_tool_choice,
-                conversation_messages,
-            )
-
-        # Apply retry if enabled
-        if enable_retry:
-            call_func = retry(
-                stop=stop_after_attempt(retry_attempts),
-                wait=wait_exponential(multiplier=1, min=4, max=10),
-                before_sleep=before_retry_callback,
-            )(_call_with_messages)
-        else:
-            call_func = _call_with_messages
-
-        # Make the call
-        response = await call_func()
-
-        # Accumulate tokens from this iteration
-        total_input_tokens += response.input_tokens
-        total_output_tokens += response.output_tokens
-        total_cache_creation_tokens += response.cache_creation_input_tokens
-        total_cache_read_tokens += response.cache_read_input_tokens
-
-        # Check if there are tool calls
-        if not response.tool_calls_made:
-            # No tool calls, return final response
-            logger.debug("No tool calls in response, finishing")
-
-            if (
-                isinstance(response.content, str)
-                and not response.content.strip()
-                and empty_response_retries < 1
-                and iteration < max_tool_iterations - 1
-            ):
-                empty_response_retries += 1
-                conversation_messages.append(
-                    {
-                        "role": "user",
-                        "content": (
-                            "Your last response was empty. Provide a concise answer "
-                            "to the original query using the available context."
-                        ),
-                    }
-                )
-                iteration += 1
-                continue
-
-            if stream_final:
-                # Stream the final response with metadata from tool execution
-                stream = _stream_final_response(
-                    llm_settings=llm_settings,
-                    prompt=prompt,
-                    max_tokens=max_tokens,
-                    conversation_messages=conversation_messages,
-                    response_model=response_model,
-                    json_mode=json_mode,
-                    temperature=temperature,
-                    stop_seqs=stop_seqs,
-                    reasoning_effort=reasoning_effort,
-                    verbosity=verbosity,
-                    thinking_budget_tokens=thinking_budget_tokens,
-                )
-                return StreamingResponseWithMetadata(
-                    stream=stream,
-                    tool_calls_made=all_tool_calls,
-                    input_tokens=total_input_tokens,
-                    output_tokens=total_output_tokens,
-                    cache_creation_input_tokens=total_cache_creation_tokens,
-                    cache_read_input_tokens=total_cache_read_tokens,
-                    thinking_content=response.thinking_content,
-                    iterations=iteration + 1,
-                )
-
-            response.tool_calls_made = all_tool_calls
-            response.input_tokens = total_input_tokens
-            response.output_tokens = total_output_tokens
-            response.cache_creation_input_tokens = total_cache_creation_tokens
-            response.cache_read_input_tokens = total_cache_read_tokens
-            response.iterations = iteration + 1
-            return response
-
-        # Determine which provider we're using (reuse the helper)
-        current_provider, _, _, _, _ = get_provider_and_model()
-
-        # Add assistant message with tool calls to conversation
-        assistant_message = _format_assistant_tool_message(
-            current_provider,
-            response.content,
-            response.tool_calls_made,
-            response.thinking_blocks,
-            response.reasoning_details,
-        )
-        conversation_messages.append(assistant_message)
-
-        # Set current iteration for telemetry context (1-indexed)
-        set_current_iteration(iteration + 1)
-
-        # Execute tools and add results
-        tool_results: list[dict[str, Any]] = []
-        for tool_call in response.tool_calls_made:
-            tool_name = tool_call["name"]
-            tool_input = tool_call["input"]
-            tool_id = tool_call.get("id", "")
-
-            logger.debug(f"Executing tool: {tool_name}")
-
-            try:
-                # Execute the tool
-                tool_result = await tool_executor(tool_name, tool_input)
-
-                # Store for Anthropic format
-                tool_results.append(
-                    {
-                        "tool_id": tool_id,
-                        "tool_name": tool_name,
-                        "result": tool_result,
-                    }
-                )
-
-                all_tool_calls.append(
-                    {
-                        "tool_name": tool_name,
-                        "tool_input": tool_input,
-                        "tool_result": tool_result,
-                    }
-                )
-
-            except Exception as e:
-                logger.error(f"Tool execution failed for {tool_name}: {e}")
-                tool_results.append(
-                    {
-                        "tool_id": tool_id,
-                        "tool_name": tool_name,
-                        "result": f"Error: {str(e)}",
-                        "is_error": True,
-                    }
-                )
-
-        # Add tool result message in provider-specific format
-        _append_tool_results(current_provider, tool_results, conversation_messages)
-
-        # Call iteration callback if provided
-        if iteration_callback is not None:
-            try:
-                iteration_data = IterationData(
-                    iteration=iteration + 1,  # 1-indexed
-                    tool_calls=[tc["name"] for tc in response.tool_calls_made],
-                    input_tokens=response.input_tokens,
-                    output_tokens=response.output_tokens,
-                    cache_read_tokens=response.cache_read_input_tokens or 0,
-                    cache_creation_tokens=response.cache_creation_input_tokens or 0,
-                )
-                iteration_callback(iteration_data)
-            except Exception:
-                logger.warning("iteration_callback failed", exc_info=True)
-
-        # After first iteration, switch from "required" to "auto" to allow model to stop
-        if iteration == 0 and effective_tool_choice in ("required", "any"):
-            effective_tool_choice = "auto"
-            logger.debug(
-                "Switched tool_choice from 'required'/'any' to 'auto' after first iteration"
-            )
-
-        iteration += 1
-
-    # Max iterations reached
-    logger.warning(
-        f"Tool execution loop reached max iterations ({max_tool_iterations})"
-    )
-
-    # Add a synthesis prompt to help the model generate a response
-    # without tool calls - the conversation currently ends with tool results
-    # and the model may not know to produce text output
-    synthesis_prompt = (
-        "You have reached the maximum number of tool calls. "
-        "Based on all the information you have gathered, provide your final response now. "
-        "Do not attempt to call any more tools."
-    )
-    conversation_messages.append({"role": "user", "content": synthesis_prompt})
-
-    # If streaming the final response, use the streaming helper with metadata
-    if stream_final:
-        stream = _stream_final_response(
-            llm_settings=llm_settings,
-            prompt=prompt,
-            max_tokens=max_tokens,
-            conversation_messages=conversation_messages,
-            response_model=response_model,
-            json_mode=json_mode,
-            temperature=temperature,
-            stop_seqs=stop_seqs,
-            reasoning_effort=reasoning_effort,
-            verbosity=verbosity,
-            thinking_budget_tokens=thinking_budget_tokens,
-        )
-        return StreamingResponseWithMetadata(
-            stream=stream,
-            tool_calls_made=all_tool_calls,
-            input_tokens=total_input_tokens,
-            output_tokens=total_output_tokens,
-            cache_creation_input_tokens=total_cache_creation_tokens,
-            cache_read_input_tokens=total_cache_read_tokens,
-            thinking_content=None,  # No thinking content at max iterations
-            iterations=iteration + 1,  # +1 for the synthesis call
-        )
-
-    # Make one final call to get a text response
-    _current_attempt.set(1)  # Reset attempt counter
-
-    async def _final_call() -> HonchoLLMCallResponse[Any]:
-        # Use shared provider selection helper for backup failover support
-        provider, model, thinking_budget, gpt5_reasoning_effort, gpt5_verbosity = (
-            get_provider_and_model()
-        )
-
-        client = CLIENTS.get(provider)
-        if not client:
-            raise ValueError(f"Missing client for {provider}")
-
-        # No tools for final call
-        return await honcho_llm_call_inner(
-            provider,
-            model,
-            prompt,
-            max_tokens,
-            response_model,
-            json_mode,
-            _get_effective_temperature(temperature),
-            stop_seqs,
-            gpt5_reasoning_effort,
-            gpt5_verbosity,
-            thinking_budget,
-            False,
-            None,  # No tools
-            None,  # No tool_choice
-            conversation_messages,
-        )
-
-    if enable_retry:
-        final_call_func = retry(
-            stop=stop_after_attempt(retry_attempts),
-            wait=wait_exponential(multiplier=1, min=4, max=10),
-            before_sleep=before_retry_callback,
-        )(_final_call)
-    else:
-        final_call_func = _final_call
-
-    final_response = await final_call_func()
-    final_response.tool_calls_made = all_tool_calls
-    final_response.iterations = iteration + 1  # +1 for the synthesis call
-    # Include accumulated tokens from all iterations plus the final call
-    final_response.input_tokens = total_input_tokens + final_response.input_tokens
-    final_response.output_tokens = total_output_tokens + final_response.output_tokens
-    final_response.cache_creation_input_tokens = (
-        total_cache_creation_tokens + final_response.cache_creation_input_tokens
-    )
-    final_response.cache_read_input_tokens = (
-        total_cache_read_tokens + final_response.cache_read_input_tokens
-    )
-    return final_response
-
-
-def _format_assistant_tool_message(
-    provider: SupportedProviders,
-    content: Any,
-    tool_calls: list[dict[str, Any]],
-    thinking_blocks: list[dict[str, Any]] | None = None,
-    reasoning_details: list[dict[str, Any]] | None = None,
-) -> dict[str, Any]:
-    """
-    Format an assistant message with tool calls for a specific provider.
-
-    Args:
-        provider: The LLM provider
-        content: The text content from the response
-        tool_calls: List of tool call dicts with id, name, input keys
-        thinking_blocks: Full thinking blocks with signatures for multi-turn replay (Anthropic only)
-        reasoning_details: OpenRouter reasoning_details for Gemini models (must be preserved)
-
-    Returns:
-        Provider-formatted assistant message dict
-    """
-    if provider == "anthropic":
-        # Anthropic requires content to be a list of blocks including tool use blocks
-        content_blocks: list[dict[str, Any]] = []
-
-        # Add thinking blocks FIRST if present (required by Anthropic when extended thinking is enabled)
-        # These include signatures which are required for multi-turn conversation replay
-        if thinking_blocks:
-            content_blocks.extend(thinking_blocks)
-
-        # Add text content if present
-        if isinstance(content, str) and content:
-            content_blocks.append({"type": "text", "text": content})
-
-        # Add tool use blocks
-        for tool_call in tool_calls:
-            content_blocks.append(
-                {
-                    "type": "tool_use",
-                    "id": tool_call["id"],
-                    "name": tool_call["name"],
-                    "input": tool_call["input"],
-                }
-            )
-
-        return {
-            "role": "assistant",
-            "content": content_blocks,
-        }
-    elif provider == "google":
-        # Google format: model role with function_call parts
-        parts: list[dict[str, Any]] = []
-
-        # Add text content if present
-        if isinstance(content, str) and content:
-            parts.append({"text": content})
-
-        # Add function call parts with thought_signature if present
-        for tool_call in tool_calls:
-            part_data: dict[str, Any] = {
-                "function_call": {
-                    "name": tool_call["name"],
-                    "args": tool_call["input"],
-                }
-            }
-            # Include thought_signature if present (required by Gemini)
-            if "thought_signature" in tool_call:
-                part_data["thought_signature"] = tool_call["thought_signature"]
-            parts.append(part_data)
-
-        return {
-            "role": "model",
-            "parts": parts,
-        }
-    else:
-        # OpenAI format - must include tool_calls in the assistant message
-        openai_tool_calls: list[Any] = []
-        for tool_call in tool_calls:
-            openai_tool_calls.append(
-                {
-                    "id": tool_call["id"],
-                    "type": "function",
-                    "function": {
-                        "name": tool_call["name"],
-                        "arguments": json.dumps(tool_call["input"]),
-                    },
-                }
-            )
-        msg: dict[str, Any] = {
-            "role": "assistant",
-            "content": content if isinstance(content, str) else None,
-            "tool_calls": openai_tool_calls,
-        }
-        # Include reasoning_details for OpenRouter/Gemini (required for multi-turn tool use)
-        if reasoning_details:
-            msg["reasoning_details"] = reasoning_details
-        return msg
-
-
-def _append_tool_results(
-    provider: SupportedProviders,
-    tool_results: list[dict[str, Any]],
-    conversation_messages: list[dict[str, Any]],
-) -> None:
-    """
-    Append tool results to conversation messages in provider-specific format.
-
-    Args:
-        provider: The LLM provider
-        tool_results: List of tool result dicts with tool_id, tool_name, result, is_error keys
-        conversation_messages: The conversation to append to (modified in place)
-    """
-    if provider == "anthropic":
-        # Anthropic requires tool results in specific content blocks
-        result_blocks: list[dict[str, Any]] = []
-        for tr in tool_results:
-            result_blocks.append(
-                {
-                    "type": "tool_result",
-                    "tool_use_id": tr["tool_id"],
-                    "content": str(tr["result"]),
-                    "is_error": tr.get("is_error", False),
-                }
-            )
-
-        conversation_messages.append(
-            {
-                "role": "user",
-                "content": result_blocks,
-            }
-        )
-    elif provider == "google":
-        # Google format: user role with function_response parts
-        response_parts: list[dict[str, Any]] = []
-        for tr in tool_results:
-            response_parts.append(
-                {
-                    "function_response": {
-                        "name": tr["tool_name"],
-                        "response": {"result": str(tr["result"])},
-                    }
-                }
-            )
-
-        conversation_messages.append(
-            {
-                "role": "user",
-                "parts": response_parts,
-            }
-        )
-    else:
-        # OpenAI format - add each tool result as a separate message with role="tool"
-        for tr in tool_results:
-            conversation_messages.append(
-                {
-                    "role": "tool",
-                    "tool_call_id": tr["tool_id"],
-                    "content": str(tr["result"]),
-                }
-            )
-
-
-@overload
-async def honcho_llm_call(
-    llm_settings: LLMComponentSettings,
-    prompt: str,
-    max_tokens: int,
-    track_name: str | None = None,
-    *,
-    response_model: type[M],
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,
-    enable_retry: bool = True,
-    retry_attempts: int = 3,
-    stream: Literal[False] = False,
-    stream_final_only: bool = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
-    max_tool_iterations: int = 10,
-    messages: list[dict[str, Any]] | None = None,
-    max_input_tokens: int | None = None,
-    trace_name: str | None = None,
-    iteration_callback: IterationCallback | None = None,
-) -> HonchoLLMCallResponse[M]: ...
-
-
-@overload
-async def honcho_llm_call(
-    llm_settings: LLMComponentSettings,
-    prompt: str,
-    max_tokens: int,
-    track_name: str | None = None,
-    response_model: None = None,
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,
-    enable_retry: bool = True,
-    retry_attempts: int = 3,
-    stream: Literal[False] = False,
-    stream_final_only: bool = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
-    max_tool_iterations: int = 10,
-    messages: list[dict[str, Any]] | None = None,
-    max_input_tokens: int | None = None,
-    trace_name: str | None = None,
-    iteration_callback: IterationCallback | None = None,
-) -> HonchoLLMCallResponse[str]: ...
-
-
-@overload
-async def honcho_llm_call(
-    llm_settings: LLMComponentSettings,
-    prompt: str,
-    max_tokens: int,
-    track_name: str | None = None,
-    response_model: type[BaseModel] | None = None,
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,
-    enable_retry: bool = True,
-    retry_attempts: int = 3,
-    stream: Literal[True] = ...,
-    stream_final_only: bool = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
-    max_tool_iterations: int = 10,
-    messages: list[dict[str, Any]] | None = None,
-    max_input_tokens: int | None = None,
-    trace_name: str | None = None,
-    iteration_callback: IterationCallback | None = None,
-) -> AsyncIterator[HonchoLLMCallStreamChunk] | StreamingResponseWithMetadata: ...
-
-
-@conditional_observe(name="LLM Call")
-async def honcho_llm_call(
-    llm_settings: LLMComponentSettings,
-    prompt: str,
-    max_tokens: int,
-    track_name: str | None = None,
-    response_model: type[BaseModel] | None = None,
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,
-    enable_retry: bool = True,
-    retry_attempts: int = 3,
-    stream: bool = False,
-    stream_final_only: bool = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    tool_executor: Callable[[str, dict[str, Any]], Any] | None = None,
-    max_tool_iterations: int = 10,
-    messages: list[dict[str, Any]] | None = None,
-    max_input_tokens: int | None = None,
-    trace_name: str | None = None,
-    iteration_callback: IterationCallback | None = None,
-) -> (
-    HonchoLLMCallResponse[Any]
-    | AsyncIterator[HonchoLLMCallStreamChunk]
-    | StreamingResponseWithMetadata
-):
-    """
-    Make an LLM call with automatic backup provider failover. Backup provider/model
-    is used on the final retry attempt, which is 3 by default.
-
-    Args:
-        llm_settings: Settings object containing PROVIDER, MODEL,
-                     BACKUP_PROVIDER, and BACKUP_MODEL
-        prompt: The prompt to send to the LLM (used if messages is None)
-        max_tokens: Maximum tokens to generate
-        track_name: Optional name for AI tracking
-        response_model: Optional Pydantic model for structured output
-        json_mode: Whether to use JSON mode
-        temperature: Temperature for the LLM (default **none**, only some models support this)
-        stop_seqs: Stop sequences
-        reasoning_effort: OpenAI reasoning effort (GPT-5 only)
-        verbosity: OpenAI verbosity (GPT-5 only)
-        thinking_budget_tokens: Anthropic thinking budget
-        enable_retry: Whether to enable retry with exponential backoff
-        retry_attempts: Number of retry attempts
-        stream: Whether to stream the response
-        stream_final_only: If True with tools, run tool loop non-streaming then stream final answer
-        tools: Tool definitions for tool calling (Anthropic/OpenAI format)
-        tool_choice: Tool selection strategy (auto/required/specific tool)
-        tool_executor: Async callable to execute tools, receives (tool_name, tool_input)
-        max_tool_iterations: Maximum number of tool execution loops
-        messages: Optional message list for multi-turn conversations (overrides prompt)
-        iteration_callback: Optional callback invoked after each tool iteration with IterationData
-
-    Returns:
-        HonchoLLMCallResponse or AsyncIterator depending on stream parameter
-
-    Raises:
-        ValueError: If provider is not configured
-    """
-    # Validate that streaming and tools are not used together
-    # (unless stream_final_only is set, which streams only the final response after tool calls)
-    if stream and tools and not stream_final_only:
-        raise ValueError(
-            "Streaming is not supported with tool calling. Set stream=False when using tools, "
-            + "or use stream_final_only=True to stream only the final response after tool calls."
-        )
-
-    # Set attempt counter to 1 for first call (tenacity uses 1-indexed attempts)
-    _current_attempt.set(1)
-
-    def _get_provider_and_model() -> (
-        tuple[SupportedProviders, str, int | None, ReasoningEffortType, VerbosityType]
-    ):
-        """
-        Get the provider and model to use based on current attempt.
-
-        Returns:
-            Tuple of (provider, model, thinking_budget, reasoning_effort, verbosity)
-        """
-        attempt = _current_attempt.get()
-
-        provider: SupportedProviders
-        model: str
-        thinking_budget: int | None
-        gpt5_reasoning_effort: ReasoningEffortType
-        gpt5_verbosity: VerbosityType
-
-        # Use backup on final retry attempt (when attempt == retry_attempts)
-        if (
-            attempt == retry_attempts
-            and llm_settings.BACKUP_PROVIDER is not None
-            and llm_settings.BACKUP_MODEL is not None
-            and llm_settings.BACKUP_PROVIDER in CLIENTS
-        ):
-            provider = llm_settings.BACKUP_PROVIDER
-            model = llm_settings.BACKUP_MODEL
-            thinking_budget = thinking_budget_tokens
-            gpt5_reasoning_effort = reasoning_effort
-            gpt5_verbosity = verbosity
-
-            # Filter out incompatible parameters when using backup
-            if provider != "anthropic" and thinking_budget:
-                logger.warning(
-                    f"thinking_budget_tokens not supported by {provider}, ignoring"
-                )
-                thinking_budget = None
-
-            if "gpt-5" not in model and (gpt5_reasoning_effort or gpt5_verbosity):
-                logger.warning(
-                    "reasoning_effort/verbosity only supported by GPT-5 models, ignoring"
-                )
-                gpt5_reasoning_effort = None
-                gpt5_verbosity = None
-
-            logger.warning(
-                f"Final retry attempt {attempt}/{retry_attempts}: switching from "
-                + f"{llm_settings.PROVIDER}/{llm_settings.MODEL} to "
-                + f"backup {provider}/{model}"
-            )
-        else:
-            provider = llm_settings.PROVIDER
-            model = llm_settings.MODEL
-            thinking_budget = thinking_budget_tokens
-            gpt5_reasoning_effort = reasoning_effort
-            gpt5_verbosity = verbosity
-
-        return provider, model, thinking_budget, gpt5_reasoning_effort, gpt5_verbosity
-
-    async def _call_with_provider_selection() -> (
-        HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
-    ):
-        """
-        Inner function that selects provider/model based on current attempt.
-        This function is retried, so provider selection happens on each attempt.
-        """
-        provider, model, thinking_budget, gpt5_reasoning_effort, gpt5_verbosity = (
-            _get_provider_and_model()
-        )
-
-        # Validate client exists
-        client = CLIENTS.get(provider)
-        if not client:
-            raise ValueError(f"Missing client for {provider}")
-
-        # Convert tools to provider-specific format if provided
-        converted_tools = convert_tools_for_provider(tools, provider) if tools else None
-
-        if stream:
-            return await honcho_llm_call_inner(
-                provider,
-                model,
-                prompt,
-                max_tokens,
-                response_model,
-                json_mode,
-                _get_effective_temperature(temperature),
-                stop_seqs,
-                gpt5_reasoning_effort,
-                gpt5_verbosity,
-                thinking_budget,
-                True,  # type: ignore[arg-type]
-                converted_tools,
-                tool_choice,
-            )
-        else:
-            return await honcho_llm_call_inner(
-                provider,
-                model,
-                prompt,
-                max_tokens,
-                response_model,
-                json_mode,
-                _get_effective_temperature(temperature),
-                stop_seqs,
-                gpt5_reasoning_effort,
-                gpt5_verbosity,
-                thinking_budget,
-                False,  # type: ignore[arg-type]
-                converted_tools,
-                tool_choice,
-            )
-
-    decorated = _call_with_provider_selection
-
-    # apply tracking
-    if track_name:
-        decorated = ai_track(track_name)(decorated)
-
-    # Define retry callback for updating attempt counter and logging
-    def before_retry_callback(retry_state: Any) -> None:
-        """Update attempt counter before each retry.
-
-        Note: before_sleep is called AFTER an attempt fails and BEFORE sleeping,
-        so we need to increment to the next attempt number.
-        """
-        next_attempt = retry_state.attempt_number + 1
-        _current_attempt.set(next_attempt)
-        exc = retry_state.outcome.exception() if retry_state.outcome else None
-        if exc:
-            logger.warning(
-                f"Error on attempt {retry_state.attempt_number}/{retry_attempts} with "
-                + f"{llm_settings.PROVIDER}/{llm_settings.MODEL}: {exc}"
-            )
-            logger.info(f"Will retry with attempt {next_attempt}/{retry_attempts}")
-
-    # apply retry logic - retries on ANY exception
-    if enable_retry:
-        decorated = retry(
-            stop=stop_after_attempt(retry_attempts),
-            wait=wait_exponential(multiplier=1, min=4, max=10),
-            before_sleep=before_retry_callback,
-        )(decorated)
-
-    # If no tools or no tool_executor, just call once and return
-    if not tools or not tool_executor:
-        result: (
-            HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
-        ) = await decorated()
-        if trace_name and isinstance(result, HonchoLLMCallResponse):
-            log_reasoning_trace(
-                task_type=trace_name,
-                llm_settings=llm_settings,
-                prompt=prompt,
-                response=result,
-                max_tokens=max_tokens,
-                thinking_budget_tokens=thinking_budget_tokens,
-                reasoning_effort=reasoning_effort,
-                json_mode=json_mode,
-                stop_seqs=stop_seqs,
-                messages=messages,
-            )
-        return result
-
-    # Validate and clamp max_tool_iterations
-    clamped_iterations = max(
-        MIN_TOOL_ITERATIONS, min(max_tool_iterations, MAX_TOOL_ITERATIONS)
-    )
-    if clamped_iterations != max_tool_iterations:
-        logger.warning(
-            f"max_tool_iterations {max_tool_iterations} clamped to {clamped_iterations} "
-            + f"(valid range: {MIN_TOOL_ITERATIONS}-{MAX_TOOL_ITERATIONS})"
-        )
-
-    # Delegate to the tool execution loop
-    result = await _execute_tool_loop(
-        llm_settings=llm_settings,
-        prompt=prompt,
-        max_tokens=max_tokens,
-        messages=messages,
-        tools=tools,
-        tool_choice=tool_choice,
-        tool_executor=tool_executor,
-        max_tool_iterations=clamped_iterations,
-        response_model=response_model,
-        json_mode=json_mode,
-        temperature=temperature,
-        stop_seqs=stop_seqs,
-        reasoning_effort=reasoning_effort,
-        verbosity=verbosity,
-        thinking_budget_tokens=thinking_budget_tokens,
-        enable_retry=enable_retry,
-        retry_attempts=retry_attempts,
-        max_input_tokens=max_input_tokens,
-        get_provider_and_model=_get_provider_and_model,
-        before_retry_callback=before_retry_callback,
-        stream_final=stream_final_only,
-        iteration_callback=iteration_callback,
-    )
-    if trace_name and isinstance(result, HonchoLLMCallResponse):
-        log_reasoning_trace(
-            task_type=trace_name,
-            llm_settings=llm_settings,
-            prompt=prompt,
-            response=result,
-            max_tokens=max_tokens,
-            thinking_budget_tokens=thinking_budget_tokens,
-            reasoning_effort=reasoning_effort,
-            json_mode=json_mode,
-            stop_seqs=stop_seqs,
-            messages=messages,
-        )
-    return result
-
-
-@overload
-async def honcho_llm_call_inner(
-    provider: SupportedProviders,
-    model: str,
-    prompt: str,
-    max_tokens: int,
-    response_model: type[M],
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,  # Anthropic only
-    stream: Literal[False] = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    messages: list[dict[str, Any]] | None = None,
-) -> HonchoLLMCallResponse[M]: ...
-
-
-@overload
-async def honcho_llm_call_inner(
-    provider: SupportedProviders,
-    model: str,
-    prompt: str,
-    max_tokens: int,
-    response_model: None = None,
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,  # Anthropic only
-    stream: Literal[False] = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    messages: list[dict[str, Any]] | None = None,
-) -> HonchoLLMCallResponse[str]: ...
-
-
-@overload
-async def honcho_llm_call_inner(
-    provider: SupportedProviders,
-    model: str,
-    prompt: str,
-    max_tokens: int,
-    response_model: type[BaseModel] | None = None,
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,  # Anthropic only
-    stream: Literal[True] = ...,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    messages: list[dict[str, Any]] | None = None,
-) -> AsyncIterator[HonchoLLMCallStreamChunk]: ...
-
-
-async def honcho_llm_call_inner(
-    provider: SupportedProviders,
-    model: str,
-    prompt: str,
-    max_tokens: int,
-    response_model: type[BaseModel] | None = None,
-    json_mode: bool = False,
-    temperature: float | None = None,
-    stop_seqs: list[str] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"]
-    | None = None,  # OpenAI only
-    verbosity: Literal["low", "medium", "high"] | None = None,  # OpenAI only
-    thinking_budget_tokens: int | None = None,  # Anthropic only
-    stream: bool = False,
-    tools: list[dict[str, Any]] | None = None,
-    tool_choice: str | dict[str, Any] | None = None,
-    messages: list[dict[str, Any]] | None = None,
-) -> HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]:
-    # has already been validated by honcho_llm_call
-    client = CLIENTS[provider]
-
-    # Use messages if provided, otherwise convert prompt to message
-    if messages is None:
-        messages = [{"role": "user", "content": prompt}]
-
-    params: dict[str, Any] = {
-        "model": model,
-        "max_tokens": max_tokens,
-        "messages": messages,
-        "stream": stream,
-    }
-
-    if temperature is not None:
-        params["temperature"] = temperature
-
-    if stream:
-        # Return async generator for streaming responses
-        return handle_streaming_response(
-            client,
-            params,
-            json_mode,
-            thinking_budget_tokens,
-            response_model,
-            reasoning_effort,
-            verbosity,
-        )
-
-    # Remove stream parameter for non-streaming calls as some providers don't accept it
-    params.pop("stream", None)
-
-    system_messages: list[str] = []
-    non_system_messages: list[dict[str, Any]] = []
-
-    match client:
-        case AsyncAnthropic():
-            # Anthropic requires system messages to be passed as a top-level parameter
-            # Extract system messages and non-system messages
-            for msg in params["messages"]:
-                if msg.get("role") == "system":
-                    system_messages.append(msg["content"])
-                else:
-                    non_system_messages.append(msg)
-
-            anthropic_params: dict[str, Any] = {
-                "model": params["model"],
-                "max_tokens": params["max_tokens"],
-                "messages": non_system_messages,
-            }
-
-            if temperature is not None:
-                anthropic_params["temperature"] = temperature
-
-            # Add system parameter if there are system messages
-            # Use cache_control for prompt caching
-            if system_messages:
-                anthropic_params["system"] = [
-                    {
-                        "type": "text",
-                        "text": "\n\n".join(system_messages),
-                        "cache_control": {"type": "ephemeral"},
-                    }
-                ]
-
-            # Add tools if provided
-            if tools:
-                anthropic_params["tools"] = tools
-                if tool_choice:
-                    # Convert tool_choice to Anthropic format
-                    if isinstance(tool_choice, str):
-                        if tool_choice == "auto":
-                            anthropic_params["tool_choice"] = {"type": "auto"}
-                        elif tool_choice in ("any", "required"):
-                            anthropic_params["tool_choice"] = {"type": "any"}
-                        elif tool_choice == "none":
-                            # Don't set tool_choice, let Anthropic default
-                            pass
-                        else:
-                            # Assume it's a tool name
-                            anthropic_params["tool_choice"] = {
-                                "type": "tool",
-                                "name": tool_choice,
-                            }
-                    else:
-                        # Already in dict format, use as-is
-                        anthropic_params["tool_choice"] = tool_choice
-
-            # For response models, we need to request JSON and parse manually
-            # Note: tools and response_model should not be used together
-            if response_model or json_mode:
-                # Add JSON schema instructions to the prompt if using response_model
-                if response_model:
-                    schema_json = json.dumps(
-                        response_model.model_json_schema(), indent=2
-                    )
-                    anthropic_params["messages"][-1]["content"] += (
-                        f"\n\nRespond with valid JSON matching this schema:\n{schema_json}"
-                    )
-                anthropic_params["messages"].append(
-                    {"role": "assistant", "content": "{"}
-                )
-
-            if thinking_budget_tokens:
-                anthropic_params["thinking"] = {
-                    "type": "enabled",
-                    "budget_tokens": thinking_budget_tokens,
-                }
-
-            anthropic_response: AnthropicMessage = cast(
-                AnthropicMessage, await client.messages.create(**anthropic_params)
-            )
-
-            # Extract text content, thinking blocks, and tool use blocks from content blocks
-            text_blocks: list[str] = []
-            thinking_text_blocks: list[str] = []
-            thinking_full_blocks: list[dict[str, Any]] = []
-            tool_calls: list[dict[str, Any]] = []
-            for block in anthropic_response.content:
-                if isinstance(block, TextBlock):
-                    text_blocks.append(block.text)
-                elif isinstance(block, ThinkingBlock):
-                    thinking_text_blocks.append(block.thinking)
-                    # Store full block with signature for multi-turn replay
-                    thinking_full_blocks.append(
-                        {
-                            "type": "thinking",
-                            "thinking": block.thinking,
-                            "signature": block.signature,
-                        }
-                    )
-                elif isinstance(block, ToolUseBlock):
-                    tool_calls.append(
-                        {
-                            "id": block.id,
-                            "name": block.name,
-                            "input": block.input,
-                        }
-                    )
-
-            # Safely extract usage and stop_reason
-            usage: Any | Usage = anthropic_response.usage
-            stop_reason = anthropic_response.stop_reason
-
-            text_content = "\n".join(text_blocks)
-            thinking_content = (
-                "\n".join(thinking_text_blocks) if thinking_text_blocks else None
-            )
-
-            # Extract cache token counts from Anthropic usage
-            # Anthropic's input_tokens = uncached tokens only
-            # Total = input_tokens + cache_read + cache_creation
-            cache_creation_tokens = (
-                getattr(usage, "cache_creation_input_tokens", 0) or 0 if usage else 0
-            )
-            cache_read_tokens = (
-                getattr(usage, "cache_read_input_tokens", 0) or 0 if usage else 0
-            )
-            uncached_tokens = usage.input_tokens if usage else 0
-            # Calculate total input tokens for consistent reporting
-            total_input_tokens = (
-                uncached_tokens + cache_read_tokens + cache_creation_tokens
-            )
-
-            # If using response_model, parse the JSON response
-            if response_model:
-                try:
-                    # Add back the opening brace that we prefilled
-                    json_content = "{" + text_content
-                    parsed_json = json.loads(json_content)
-                    parsed_content = response_model.model_validate(parsed_json)
-
-                    return HonchoLLMCallResponse(
-                        content=parsed_content,
-                        input_tokens=total_input_tokens,
-                        output_tokens=usage.output_tokens if usage else 0,
-                        cache_creation_input_tokens=cache_creation_tokens,
-                        cache_read_input_tokens=cache_read_tokens,
-                        finish_reasons=[stop_reason] if stop_reason else [],
-                        tool_calls_made=tool_calls,
-                        thinking_content=thinking_content,
-                        thinking_blocks=thinking_full_blocks,
-                    )
-                except (json.JSONDecodeError, ValidationError, ValueError) as e:
-                    raise ValueError(
-                        f"Failed to parse Anthropic response as {response_model}: {e}. Raw content: {text_content}"
-                    ) from e
-
-            return HonchoLLMCallResponse(
-                content=text_content,
-                input_tokens=total_input_tokens,
-                output_tokens=usage.output_tokens if usage else 0,
-                cache_creation_input_tokens=cache_creation_tokens,
-                cache_read_input_tokens=cache_read_tokens,
-                finish_reasons=[stop_reason] if stop_reason else [],
-                tool_calls_made=tool_calls,
-                thinking_content=thinking_content,
-                thinking_blocks=thinking_full_blocks,
-            )
-
-        case AsyncOpenAI():
-            # For custom providers (e.g., OpenRouter), add cache_control to system messages
-            # This enables prompt caching for Anthropic models proxied via OpenAI-compatible APIs
-            processed_messages: list[dict[str, Any]] = params["messages"]
-            if provider == "custom":
-                processed_messages = []
-                for msg in params["messages"]:
-                    if msg.get("role") == "system" and isinstance(
-                        msg.get("content"), str
-                    ):
-                        # Convert system message to content block format with cache_control
-                        processed_messages.append(
-                            {
-                                "role": "system",
-                                "content": [
-                                    {
-                                        "type": "text",
-                                        "text": msg["content"],
-                                        "cache_control": {"type": "ephemeral"},
-                                    }
-                                ],
-                            }
-                        )
-                    else:
-                        processed_messages.append(msg)
-
-            openai_params: dict[str, Any] = {
-                "model": params["model"],
-                "messages": processed_messages,
-            }
-
-            if temperature is not None and "gpt-5" not in model:
-                openai_params["temperature"] = temperature
-
-            if "gpt-5" in model:
-                openai_params["max_completion_tokens"] = params["max_tokens"]
-                if reasoning_effort:
-                    openai_params["reasoning_effort"] = reasoning_effort
-                if verbosity:
-                    openai_params["verbosity"] = verbosity
-            else:
-                openai_params["max_tokens"] = params["max_tokens"]
-
-            # Add tools if provided (not compatible with response_model for most cases)
-            if tools and not response_model:
-                openai_params["tools"] = tools
-                if tool_choice:
-                    openai_params["tool_choice"] = tool_choice
-
-            if json_mode and provider != "vllm":
-                openai_params["response_format"] = {"type": "json_object"}
-
-            # custom shim for vLLM response model formatting
-            # NOTE: this is all specific to the Representation model.
-            # Do not call with any other response model.
-            if provider == "vllm" and response_model:
-                if response_model is not PromptRepresentation:
-                    raise NotImplementedError(
-                        "vLLM structured output currently supports only PromptRepresentation"
-                    )
-                openai_params["response_format"] = {
-                    "type": "json_schema",
-                    "json_schema": {
-                        "name": response_model.__name__,
-                        "schema": response_model.model_json_schema(),
-                    },
-                }
-                if stop_seqs:
-                    openai_params["stop"] = stop_seqs
-                vllm_response: ChatCompletion = cast(
-                    ChatCompletion,
-                    await client.chat.completions.create(**openai_params),
-                )
-
-                usage = vllm_response.usage
-                finish_reason = vllm_response.choices[0].finish_reason
-
-                try:
-                    test_rep = ""
-                    if vllm_response.choices[0].message.content is not None:
-                        test_rep = vllm_response.choices[0].message.content
-
-                    final = validate_and_repair_json(test_rep)
-
-                    # Schema-aware repair: ensure deductive observations have required fields
-
-                    repaired_data = json.loads(final)
-
-                    # Fix deductive observations that might be missing conclusion
-                    if "deductive" in repaired_data and isinstance(
-                        repaired_data["deductive"], list
-                    ):
-                        for i, item in enumerate(repaired_data["deductive"]):
-                            if isinstance(item, dict):
-                                # If conclusion is missing but premises exist, create a placeholder
-                                if "conclusion" not in item and "premises" in item:
-                                    logger.warning(
-                                        f"Deductive observation {i} missing conclusion, adding placeholder"
-                                    )
-                                    # Try to generate a conclusion from premises if possible
-                                    if item["premises"]:
-                                        item["conclusion"] = (
-                                            f"[Incomplete reasoning from premises: {item['premises'][0][:100]}...]"
-                                        )
-                                    else:
-                                        item["conclusion"] = (
-                                            "[Incomplete reasoning - conclusion missing]"
-                                        )
-                                # If premises is missing, add empty list (it's optional with default)
-                                if "premises" not in item:
-                                    item["premises"] = []
-
-                    final = json.dumps(repaired_data)
-                except (json.JSONDecodeError, KeyError, TypeError) as e:
-                    final = ""
-                    logger.warning(f"Could not perform schema-aware repair: {e}")
-                    # Continue with original final value if repair fails
-
-                try:
-                    response_obj = PromptRepresentation.model_validate_json(final)
-                except ValidationError as e:
-                    logger.error(f"Validation error after repair: {e}")
-                    logger.debug(f"Problematic JSON: {final}")
-
-                    # Fallback: return empty response rather than failing
-                    logger.warning(
-                        "Using fallback empty Representation due to validation error"
-                    )
-                    response_obj = PromptRepresentation(explicit=[])  # , deductive=[])
-
-                cache_creation, cache_read = extract_openai_cache_tokens(usage)
-                return HonchoLLMCallResponse(
-                    content=response_obj,
-                    input_tokens=usage.prompt_tokens if usage else 0,
-                    output_tokens=usage.completion_tokens if usage else 0,
-                    cache_creation_input_tokens=cache_creation,
-                    cache_read_input_tokens=cache_read,
-                    finish_reasons=[finish_reason] if finish_reason else [],
-                    tool_calls_made=[],
-                    thinking_content=extract_openai_reasoning_content(vllm_response),
-                )
-            elif response_model:
-                openai_params["response_format"] = response_model
-                response: ChatCompletion = await client.chat.completions.parse(  # pyright: ignore
-                    **openai_params
-                )
-                # Extract the parsed object for structured output
-                parsed_content = response.choices[0].message.parsed
-                if parsed_content is None:
-                    raise ValueError("No parsed content in structured response")
-
-                usage = response.usage
-                finish_reason = response.choices[0].finish_reason
-
-                # Validate that parsed content matches the response model
-                if not isinstance(parsed_content, response_model):
-                    raise ValueError(
-                        f"Parsed content does not match the response model: {parsed_content} != {response_model}"
-                    )
-
-                # Extract tool calls if present (though unlikely with structured output)
-                parsed_tool_calls: list[dict[str, Any]] = []
-                if (
-                    hasattr(response.choices[0].message, "tool_calls")
-                    and response.choices[0].message.tool_calls
-                ):
-                    for tool_call in response.choices[0].message.tool_calls:
-                        parsed_tool_calls.append(
-                            {
-                                "id": tool_call.id,
-                                "name": tool_call.function.name,
-                                "input": json.loads(tool_call.function.arguments)
-                                if tool_call.function.arguments
-                                else {},
-                            }
-                        )
-
-                cache_creation, cache_read = extract_openai_cache_tokens(usage)
-                return HonchoLLMCallResponse(
-                    content=parsed_content,
-                    input_tokens=usage.prompt_tokens if usage else 0,
-                    output_tokens=usage.completion_tokens if usage else 0,
-                    cache_creation_input_tokens=cache_creation,
-                    cache_read_input_tokens=cache_read,
-                    finish_reasons=[finish_reason] if finish_reason else [],
-                    tool_calls_made=parsed_tool_calls,
-                    thinking_content=extract_openai_reasoning_content(response),
-                )
-            else:
-                response: ChatCompletion = await client.chat.completions.create(  # pyright: ignore
-                    **openai_params
-                )
-
-                usage = response.usage  # pyright: ignore
-                finish_reason = response.choices[0].finish_reason  # pyright: ignore
-
-                # Extract tool calls if present
-                tool_calls_list: list[dict[str, Any]] = []
-                if response.choices[0].message.tool_calls:  # pyright: ignore
-                    for tool_call in response.choices[0].message.tool_calls:  # pyright: ignore
-                        tool_calls_list.append(
-                            {
-                                "id": tool_call.id,  # pyright: ignore
-                                "name": tool_call.function.name,  # pyright: ignore
-                                "input": json.loads(tool_call.function.arguments)  # pyright: ignore
-                                if tool_call.function.arguments  # pyright: ignore
-                                else {},
-                            }
-                        )
-
-                cache_creation, cache_read = extract_openai_cache_tokens(usage)
-                return HonchoLLMCallResponse(
-                    content=response.choices[0].message.content or "",  # pyright: ignore
-                    input_tokens=usage.prompt_tokens if usage else 0,  # pyright: ignore
-                    output_tokens=usage.completion_tokens if usage else 0,  # pyright: ignore
-                    cache_creation_input_tokens=cache_creation,
-                    cache_read_input_tokens=cache_read,
-                    finish_reasons=[finish_reason] if finish_reason else [],
-                    tool_calls_made=tool_calls_list,
-                    thinking_content=extract_openai_reasoning_content(response),
-                    reasoning_details=extract_openai_reasoning_details(response),
-                )
-
-        case genai.Client():
-            # Build config for Gemini
-            gemini_config: dict[str, Any] = {}
-
-            # Gemini uses max_output_tokens, not max_tokens.
-            gemini_config["max_output_tokens"] = params["max_tokens"]
-
-            if temperature is not None:
-                gemini_config["temperature"] = temperature
-
-            # Add tools if provided
-            if tools:
-                gemini_config["tools"] = tools
-                # Handle tool_choice
-                if tool_choice:
-                    if tool_choice == "auto":
-                        gemini_config["tool_config"] = {
-                            "function_calling_config": {"mode": "AUTO"}
-                        }
-                    elif tool_choice == "any" or tool_choice == "required":
-                        gemini_config["tool_config"] = {
-                            "function_calling_config": {"mode": "ANY"}
-                        }
-                    elif tool_choice == "none":
-                        gemini_config["tool_config"] = {
-                            "function_calling_config": {"mode": "NONE"}
-                        }
-                    elif isinstance(tool_choice, dict) and "name" in tool_choice:
-                        # Specific tool selection
-                        gemini_config["tool_config"] = {
-                            "function_calling_config": {
-                                "mode": "ANY",
-                                "allowed_function_names": [tool_choice["name"]],
-                            }
-                        }
-
-            if response_model is None:
-                if json_mode and not tools:
-                    gemini_config["response_mime_type"] = "application/json"
-
-                # Use messages if provided, otherwise use prompt
-                if messages:
-                    # Extract system messages for system_instruction parameter
-                    # Gemini doesn't support system role in contents - it causes
-                    # consecutive user messages which results in empty responses
-                    for msg in messages:
-                        if msg.get("role") == "system":
-                            if isinstance(msg.get("content"), str):
-                                system_messages.append(msg["content"])
-                        else:
-                            non_system_messages.append(msg)
-
-                    # Add system instruction if present
-                    if system_messages:
-                        gemini_config["system_instruction"] = "\n\n".join(
-                            system_messages
-                        )
-
-                    # Convert non-system messages to Google format
-                    gemini_contents: list[dict[str, Any]] = []
-                    for msg in non_system_messages:
-                        # Map roles to Google's expected values (user, model)
-                        role = msg.get("role", "user")
-                        if role == "assistant":
-                            role = "model"
-
-                        # Handle different content formats
-                        if isinstance(msg.get("content"), str):
-                            # Simple string content
-                            gemini_contents.append(
-                                {"role": role, "parts": [{"text": msg["content"]}]}
-                            )
-                        elif isinstance(msg.get("parts"), list):
-                            # Already in Google format (from tool calling loop)
-                            # But still need to ensure role is correct
-                            msg_copy = msg.copy()
-                            msg_copy["role"] = role
-                            gemini_contents.append(msg_copy)
-                        elif isinstance(msg.get("content"), list):
-                            # Content is a list of parts (Anthropic format) - skip for now
-                            # This shouldn't happen with Google provider in tool loop
-                            continue
-                        else:
-                            # Empty or unknown format, skip
-                            continue
-                    contents: ContentListUnionDict = cast(
-                        ContentListUnionDict, gemini_contents
-                    )
-                else:
-                    contents = prompt
-
-                gemini_response: GenerateContentResponse = (
-                    await client.aio.models.generate_content(
-                        model=model,
-                        contents=contents,
-                        config=cast(GenerateContentConfigDict, gemini_config)  # pyright: ignore[reportInvalidCast]
-                        if gemini_config
-                        else None,
-                    )
-                )
-
-                # Extract text content and function calls from response
-                text_parts: list[str] = []
-                gemini_tool_calls: list[dict[str, Any]] = []
-
-                if gemini_response.candidates and gemini_response.candidates[0].content:
-                    for part in gemini_response.candidates[0].content.parts or []:
-                        if hasattr(part, "text") and part.text:
-                            text_parts.append(part.text)
-                        if hasattr(part, "function_call") and part.function_call:
-                            fc = part.function_call
-                            tool_call_data: dict[str, Any] = {
-                                "id": f"call_{fc.name}_{len(gemini_tool_calls)}",
-                                "name": fc.name,
-                                "input": dict(fc.args) if fc.args else {},
-                            }
-                            # Preserve thought_signature if present (required by Gemini)
-                            if (
-                                hasattr(part, "thought_signature")
-                                and part.thought_signature
-                            ):
-                                tool_call_data["thought_signature"] = (
-                                    part.thought_signature
-                                )
-                            gemini_tool_calls.append(tool_call_data)
-
-                text_content = "\n".join(text_parts) if text_parts else ""
-                input_token_count = (
-                    gemini_response.usage_metadata.prompt_token_count or 0
-                    if gemini_response.usage_metadata
-                    else 0
-                )
-                output_token_count = (
-                    gemini_response.usage_metadata.candidates_token_count or 0
-                    if gemini_response.usage_metadata
-                    else 0
-                )
-                finish_reason = (
-                    gemini_response.candidates[0].finish_reason.name
-                    if gemini_response.candidates
-                    and gemini_response.candidates[0].finish_reason
-                    else "stop"
-                )
-
-                # Raise on blocked responses so retry/backup-provider logic kicks in
-                if (
-                    not text_content
-                    and not gemini_tool_calls
-                    and finish_reason in GEMINI_BLOCKED_FINISH_REASONS
-                ):
-                    raise LLMError(
-                        f"Gemini response blocked (finish_reason={finish_reason})",
-                        provider="google",
-                        model=model,
-                        finish_reason=finish_reason,
-                    )
-
-                return HonchoLLMCallResponse(
-                    content=text_content,
-                    input_tokens=input_token_count,
-                    output_tokens=output_token_count,
-                    finish_reasons=[finish_reason],
-                    tool_calls_made=gemini_tool_calls,
-                )
-
-            else:
-                gemini_config["response_mime_type"] = "application/json"
-                gemini_config["response_schema"] = response_model
-
-                gemini_response = await client.aio.models.generate_content(
-                    model=model,
-                    contents=prompt,
-                    config=cast(GenerateContentConfigDict, gemini_config),  # pyright: ignore[reportInvalidCast]
-                )
-
-                input_token_count = (
-                    gemini_response.usage_metadata.prompt_token_count or 0
-                    if gemini_response.usage_metadata
-                    else 0
-                )
-                output_token_count = (
-                    gemini_response.usage_metadata.candidates_token_count or 0
-                    if gemini_response.usage_metadata
-                    else 0
-                )
-                finish_reason = (
-                    gemini_response.candidates[0].finish_reason.name
-                    if gemini_response.candidates
-                    and gemini_response.candidates[0].finish_reason
-                    else "stop"
-                )
-
-                # Raise on blocked responses before checking parsed content
-                if (
-                    not gemini_response.parsed
-                    and finish_reason in GEMINI_BLOCKED_FINISH_REASONS
-                ):
-                    raise LLMError(
-                        f"Gemini response blocked (finish_reason={finish_reason})",
-                        provider="google",
-                        model=model,
-                        finish_reason=finish_reason,
-                    )
-
-                # Validate that parsed content matches the response model
-                if not isinstance(gemini_response.parsed, response_model):
-                    raise ValueError(
-                        f"Parsed content does not match the response model: {gemini_response.parsed} != {response_model}"
-                    )
-
-                return HonchoLLMCallResponse(
-                    content=gemini_response.parsed,
-                    input_tokens=input_token_count,
-                    output_tokens=output_token_count,
-                    finish_reasons=[finish_reason],
-                    tool_calls_made=[],
-                )
-
-        case AsyncGroq():
-            groq_params: dict[str, Any] = {
-                "model": params["model"],
-                "max_tokens": params["max_tokens"],
-                "messages": params["messages"],
-            }
-
-            if temperature is not None:
-                groq_params["temperature"] = temperature
-
-            if response_model:
-                groq_params["response_format"] = response_model
-            elif json_mode:
-                groq_params["response_format"] = {"type": "json_object"}
-
-            # TODO: figure out why groq returns unknown type and fix it
-            response: ChatCompletion = await client.chat.completions.create(  # pyright: ignore
-                **groq_params
-            )
-            if response.choices[0].message.content is None:  # pyright: ignore
-                raise ValueError("No content in response")
-
-            # Safely extract usage and finish_reason
-            usage = response.usage  # pyright: ignore
-            finish_reason = response.choices[0].finish_reason  # pyright: ignore
-
-            # Handle response model parsing for Groq
-            cache_creation, cache_read = extract_openai_cache_tokens(usage)
-            if response_model:
-                try:
-                    json_content = json.loads(response.choices[0].message.content)  # pyright: ignore
-                    parsed_content = response_model.model_validate(json_content)
-
-                    return HonchoLLMCallResponse(
-                        content=parsed_content,
-                        input_tokens=usage.prompt_tokens if usage else 0,  # pyright: ignore
-                        output_tokens=usage.completion_tokens if usage else 0,  # pyright: ignore
-                        cache_creation_input_tokens=cache_creation,
-                        cache_read_input_tokens=cache_read,
-                        finish_reasons=[finish_reason] if finish_reason else [],
-                        tool_calls_made=[],
-                    )
-                except (json.JSONDecodeError, ValidationError, ValueError) as e:
-                    raise ValueError(
-                        f"Failed to parse Groq response as {response_model}: {e}. Raw content: {response.choices[0].message.content}"  # pyright: ignore
-                    ) from e
-            else:
-                return HonchoLLMCallResponse(
-                    content=response.choices[0].message.content,  # pyright: ignore
-                    input_tokens=usage.prompt_tokens if usage else 0,  # pyright: ignore
-                    output_tokens=usage.completion_tokens if usage else 0,  # pyright: ignore
-                    cache_creation_input_tokens=cache_creation,
-                    cache_read_input_tokens=cache_read,
-                    finish_reasons=[finish_reason] if finish_reason else [],
-                    tool_calls_made=[],
-                )
-
-
-async def handle_streaming_response(
-    client: AsyncAnthropic | AsyncOpenAI | genai.Client | AsyncGroq,
-    params: dict[str, Any],
-    json_mode: bool,
-    thinking_budget_tokens: int | None,
-    response_model: type[BaseModel] | None = None,
-    reasoning_effort: Literal["low", "medium", "high", "minimal"] | None = None,
-    verbosity: Literal["low", "medium", "high"] | None = None,
-) -> AsyncIterator[HonchoLLMCallStreamChunk]:
-    """
-    Handle streaming responses for all supported providers.
-
-    Args:
-        client: The LLM client instance
-        params: Request parameters including stream=True
-        json_mode: Whether to use JSON mode
-        thinking_budget_tokens: Anthropic thinking budget tokens
-        response_model: Pydantic model for structured output
-        reasoning_effort: OpenAI reasoning effort level (GPT-5 only)
-        verbosity: OpenAI verbosity level (GPT-5 only)
-
-    Yields:
-        HonchoLLMCallStreamChunk: Individual chunks of the streaming response
-    """
-    match client:
-        case AsyncAnthropic():
-            # Anthropic requires system messages as a top-level parameter
-            messages = params["messages"]
-            system_content = "\n\n".join(
-                m["content"] for m in messages if m.get("role") == "system"
-            )
-            anthropic_params: dict[str, Any] = {
-                "model": params["model"],
-                "max_tokens": params["max_tokens"],
-                "messages": [m for m in messages if m.get("role") != "system"],
-            }
-            if system_content:
-                anthropic_params["system"] = [
-                    {
-                        "type": "text",
-                        "text": system_content,
-                        "cache_control": {"type": "ephemeral"},
-                    }
-                ]
-
-            # For response models, we need to request JSON and parse manually
-            # Note: Streaming with response_model is not ideal but we'll accumulate and parse at the end
-            if response_model or json_mode:
-                # Add JSON schema instructions to the prompt if using response_model
-                if response_model:
-                    schema_json = json.dumps(
-                        response_model.model_json_schema(), indent=2
-                    )
-                    anthropic_params["messages"][-1]["content"] += (
-                        f"\n\nRespond with valid JSON matching this schema:\n{schema_json}"
-                    )
-                anthropic_params["messages"].append(
-                    {"role": "assistant", "content": "{"}
-                )
-
-            if thinking_budget_tokens:
-                anthropic_params["thinking"] = {
-                    "type": "enabled",
-                    "budget_tokens": thinking_budget_tokens,
-                }
-
-            async with client.messages.stream(**anthropic_params) as anthropic_stream:
-                async for chunk in anthropic_stream:
-                    if (
-                        chunk.type == "content_block_delta"
-                        and hasattr(chunk, "delta")
-                        and hasattr(chunk.delta, "text")
-                    ):
-                        text_content = getattr(chunk.delta, "text", "")
-                        yield HonchoLLMCallStreamChunk(content=text_content)
-                final_message = await anthropic_stream.get_final_message()
-                usage = final_message.usage
-                output_tokens = usage.output_tokens if usage else None
-                yield HonchoLLMCallStreamChunk(
-                    content="",
-                    is_done=True,
-                    finish_reasons=[final_message.stop_reason]
-                    if final_message.stop_reason
-                    else [],
-                    output_tokens=output_tokens,
-                )
-
-        case AsyncOpenAI():
-            openai_params: dict[str, Any] = {
-                "model": params["model"],
-                "messages": params["messages"],
-                "stream": True,
-                "stream_options": {"include_usage": True},
-            }
-
-            model_name = params["model"]
-            if "gpt-5" in model_name:
-                openai_params["max_completion_tokens"] = params["max_tokens"]
-                if reasoning_effort:
-                    openai_params["reasoning_effort"] = reasoning_effort
-                if verbosity:
-                    openai_params["verbosity"] = verbosity
-            else:
-                openai_params["max_tokens"] = params["max_tokens"]
-
-            if response_model:
-                openai_params["response_format"] = response_model
-            elif json_mode:
-                openai_params["response_format"] = {"type": "json_object"}
-
-            openai_stream = await client.chat.completions.create(**openai_params)  # pyright: ignore
-            finish_reason: str | None = None
-            usage_chunk_received = False
-            async for chunk in openai_stream:  # pyright: ignore
-                chunk = cast(ChatCompletionChunk, chunk)
-                if chunk.choices and chunk.choices[0].delta.content:
-                    content = chunk.choices[0].delta.content
-                    yield HonchoLLMCallStreamChunk(content=content)
-                # Track finish_reason when it appears (before usage chunk)
-                if chunk.choices and chunk.choices[0].finish_reason:
-                    finish_reason = chunk.choices[0].finish_reason
-                # Check for usage info in chunk (with include_usage, this is a separate chunk with empty choices)
-                if hasattr(chunk, "usage") and chunk.usage:
-                    yield HonchoLLMCallStreamChunk(
-                        content="",
-                        is_done=True,
-                        finish_reasons=[finish_reason] if finish_reason else [],
-                        output_tokens=chunk.usage.completion_tokens,
-                    )
-                    usage_chunk_received = True
-
-            # If stream ended without usage chunk (interrupted), still yield final chunk
-            if not usage_chunk_received and finish_reason:
-                logger.warning("OpenAI stream ended without usage chunk (interrupted)")
-                yield HonchoLLMCallStreamChunk(
-                    content="",
-                    is_done=True,
-                    finish_reasons=[finish_reason],
-                    output_tokens=None,
-                )
-
-        case genai.Client():
-            prompt_text = params["messages"][0]["content"] if params["messages"] else ""
-            stream_config: GenerateContentConfigDict = {
-                "max_output_tokens": cast(int, params["max_tokens"]),
-            }
-
-            if response_model is not None:
-                stream_config["response_mime_type"] = "application/json"
-                stream_config["response_schema"] = response_model
-                response_stream = await client.aio.models.generate_content_stream(
-                    model=params["model"],
-                    contents=prompt_text,
-                    config=stream_config,
-                )
-            else:
-                if json_mode:
-                    stream_config["response_mime_type"] = "application/json"
-                response_stream = await client.aio.models.generate_content_stream(
-                    model=params["model"],
-                    contents=prompt_text,
-                    config=stream_config,
-                )
-
-            final_chunk = None
-            async for chunk in response_stream:
-                if chunk.text:
-                    yield HonchoLLMCallStreamChunk(content=chunk.text)
-                final_chunk = chunk
-
-            # NOTE: Blocked-response check is intentionally omitted for streaming.
-            # Exceptions mid-iteration in an async generator won't be caught by
-            # the tenacity retry wrapper in honcho_llm_call.
-            finish_reason = "stop"  # Default fallback
-            gemini_output_tokens: int | None = None
-            if (
-                final_chunk
-                and hasattr(final_chunk, "candidates")
-                and final_chunk.candidates
-                and hasattr(final_chunk.candidates[0], "finish_reason")
-                and final_chunk.candidates[0].finish_reason
-            ):
-                finish_reason = final_chunk.candidates[0].finish_reason.name
-
-            # Extract output tokens from usage_metadata if available
-            if (
-                final_chunk
-                and hasattr(final_chunk, "usage_metadata")
-                and final_chunk.usage_metadata
-                and hasattr(final_chunk.usage_metadata, "candidates_token_count")
-            ):
-                gemini_output_tokens = (
-                    final_chunk.usage_metadata.candidates_token_count or None
-                )
-
-            yield HonchoLLMCallStreamChunk(
-                content="",
-                is_done=True,
-                finish_reasons=[finish_reason],
-                output_tokens=gemini_output_tokens,
-            )
-
-        case AsyncGroq():
-            groq_params: dict[str, Any] = {
-                "model": params["model"],
-                "max_tokens": params["max_tokens"],
-                "messages": params["messages"],
-                "stream": True,
-            }
-
-            if response_model:
-                groq_params["response_format"] = response_model
-            elif json_mode:
-                groq_params["response_format"] = {"type": "json_object"}
-
-            groq_stream = await client.chat.completions.create(**groq_params)  # pyright: ignore
-            async for chunk in groq_stream:  # pyright: ignore
-                chunk = cast(ChatCompletionChunk, chunk)
-                if chunk.choices and chunk.choices[0].delta.content:
-                    yield HonchoLLMCallStreamChunk(
-                        content=chunk.choices[0].delta.content
-                    )
-                if chunk.choices and chunk.choices[0].finish_reason:
-                    yield HonchoLLMCallStreamChunk(
-                        content="",
-                        is_done=True,
-                        finish_reasons=[chunk.choices[0].finish_reason],
-                    )
diff --git a/src/utils/search.py b/src/utils/search.py
index 67a0d355e..59c8f137a 100644
--- a/src/utils/search.py
+++ b/src/utils/search.py
@@ -382,7 +382,7 @@ async def search(
             query_embedding = await embedding_client.embed(query)
         except ValueError as e:
             raise ValidationException(
-                f"Query exceeds maximum token limit of {settings.MAX_EMBEDDING_TOKENS}."
+                f"Query exceeds maximum token limit of {settings.EMBEDDING.MAX_INPUT_TOKENS}."
             ) from e
 
         if not _uses_pgvector_message_search():
diff --git a/src/utils/summarizer.py b/src/utils/summarizer.py
index ca1965b9b..d964402c6 100644
--- a/src/utils/summarizer.py
+++ b/src/utils/summarizer.py
@@ -11,10 +11,11 @@
 
 from src import schemas
 from src.cache.client import cache as cache_client
-from src.config import settings
+from src.config import ConfiguredModelSettings, settings
 from src.crud.session import session_cache_key
 from src.dependencies import tracked_db
 from src.exceptions import ResourceNotFoundException
+from src.llm import HonchoLLMCallResponse, honcho_llm_call
 from src.models import Message
 from src.telemetry import prometheus_metrics
 from src.telemetry.events import AgentToolSummaryCreatedEvent, emit
@@ -24,7 +25,6 @@
     DeriverTaskTypes,
     TokenTypes,
 )
-from src.utils.clients import HonchoLLMCallResponse, honcho_llm_call
 from src.utils.formatting import utc_now_iso
 from src.utils.tokens import estimate_tokens, track_deriver_input_tokens
 
@@ -78,6 +78,10 @@ def to_schema_summary(s: Summary) -> schemas.Summary:
 ]
 
 
+def _get_summary_model_config() -> ConfiguredModelSettings:
+    return settings.SUMMARY.MODEL_CONFIG
+
+
 # Configuration constants for summaries
 MESSAGES_PER_SHORT_SUMMARY = settings.SUMMARY.MESSAGES_PER_SHORT_SUMMARY
 MESSAGES_PER_LONG_SUMMARY = settings.SUMMARY.MESSAGES_PER_LONG_SUMMARY
@@ -212,7 +216,7 @@ async def create_short_summary(
     )
 
     return await honcho_llm_call(
-        llm_settings=settings.SUMMARY,
+        model_config=_get_summary_model_config(),
         prompt=prompt,
         max_tokens=settings.SUMMARY.MAX_TOKENS_SHORT,
     )
@@ -237,7 +241,7 @@ async def create_long_summary(
     )
 
     return await honcho_llm_call(
-        llm_settings=settings.SUMMARY,
+        model_config=_get_summary_model_config(),
         prompt=prompt,
         max_tokens=settings.SUMMARY.MAX_TOKENS_LONG,
     )
diff --git a/src/utils/types.py b/src/utils/types.py
index 0654ed7aa..dd66dccf5 100644
--- a/src/utils/types.py
+++ b/src/utils/types.py
@@ -34,7 +34,6 @@ async def post_commit(self) -> None:
             await self.on_commit()
 
 
-SupportedProviders = Literal["anthropic", "openai", "google", "groq", "custom", "vllm"]
 TaskType = Literal[
     "webhook", "summary", "representation", "dream", "deletion", "reconciler"
 ]
diff --git a/src/vector_store/lancedb.py b/src/vector_store/lancedb.py
index 0853a98d8..77c3c6cd3 100644
--- a/src/vector_store/lancedb.py
+++ b/src/vector_store/lancedb.py
@@ -25,7 +25,7 @@
 _VALID_IDENTIFIER_PATTERN = re.compile(r"^[a-zA-Z_][a-zA-Z0-9_]*$")
 
 # Schema for LanceDB tables
-# id: string, vector: fixed_size_list of float32 (1536 dimensions for OpenAI embeddings)
+# id: string, vector: fixed_size_list of float32 (dimension from embedding settings)
 # Additional metadata columns are added dynamically
 
 # pyright: reportUnknownMemberType=false, reportUnknownVariableType=false, reportUnknownParameterType=false
@@ -93,7 +93,7 @@ async def _get_or_create_table(
         fields: list[pa.Field] = [
             pa.field("id", pa.string()),
             pa.field(
-                "vector", pa.list_(pa.float32(), settings.VECTOR_STORE.DIMENSIONS)
+                "vector", pa.list_(pa.float32(), settings.EMBEDDING.VECTOR_DIMENSIONS)
             ),
         ]
         fields.extend(self._metadata_fields_for_namespace(namespace))
diff --git a/tests/__init__.py b/tests/__init__.py
index e69de29bb..7468bd264 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+# Test package marker for shared helper imports.
diff --git a/tests/bench/harness.py b/tests/bench/harness.py
index 6f725bf7d..90f32e44d 100755
--- a/tests/bench/harness.py
+++ b/tests/bench/harness.py
@@ -648,29 +648,69 @@ def print_honcho_config(self) -> None:
 # and will be inherited by this subprocess
 
 try:
+    from pydantic import BaseModel
     from src.config import settings
 
-    # Function to recursively print settings
+    SENSITIVE_TOKENS = ('password', 'secret', 'key', 'uri')
+
+    def _mask(full_key, value):
+        if isinstance(full_key, str) and any(t in full_key.lower() for t in SENSITIVE_TOKENS):
+            return '*' * len(value) if value else 'None'
+        return value
+
+    def _compact(model):
+        # Render a pydantic BaseModel as `field=value` pairs, skipping
+        # None / empty-dict fields and recursing into nested models.
+        parts = []
+        for field_name in type(model).model_fields:
+            val = getattr(model, field_name)
+            if val is None:
+                continue
+            if isinstance(val, BaseModel):
+                inner = _compact(val)
+                if inner:
+                    parts.append(f"{{field_name}}=({{inner}})")
+                continue
+            if isinstance(val, dict) and not val:
+                continue
+            parts.append(f"{{field_name}}={{val!r}}")
+        return " ".join(parts)
+
     def print_settings(obj, prefix="", max_depth=3, current_depth=0):
         if current_depth >= max_depth:
             return
-        if hasattr(obj, '__dict__'):
-            for key, value in obj.__dict__.items():
-                if not key.startswith('_'):
-                    full_key = f"{{prefix}}.{{key}}" if prefix else key
-                    # Handle nested settings objects
-                    if hasattr(value, '__dict__') and not isinstance(value, (str, int, float, bool, type(None))):
-                        print(f"\\n📋 {{full_key}}:")
-                        print_settings(value, full_key, max_depth, current_depth + 1)
-                    else:
-                        # Mask sensitive information
-                        if isinstance(full_key, str) and any(sensitive in full_key.lower() for sensitive in ['password', 'secret', 'key', 'uri']):
-                            masked_value = '*' * len(value) if value else 'None'
-                        else:
-                            masked_value = value
-                        print(f"  {{key}}: {{masked_value}}")
-
-    # Print all settings
+        if not hasattr(obj, '__dict__'):
+            return
+        for key, value in obj.__dict__.items():
+            if key.startswith('_'):
+                continue
+            full_key = f"{{prefix}}.{{key}}" if prefix else key
+
+            # dict-of-BaseModel → print each entry on its own line compactly
+            if (
+                isinstance(value, dict) and value
+                and all(isinstance(v, BaseModel) for v in value.values())
+            ):
+                print(f"\\n📋 {{full_key}}:")
+                for k, v in value.items():
+                    rendered = _compact(v)
+                    print(f"  {{k}}: {{rendered}}")
+                continue
+
+            if isinstance(value, BaseModel):
+                print(f"\\n📋 {{full_key}}:")
+                rendered = _compact(value)
+                if rendered:
+                    print(f"  {{rendered}}")
+                continue
+
+            if hasattr(value, '__dict__') and not isinstance(value, (str, int, float, bool, type(None))):
+                print(f"\\n📋 {{full_key}}:")
+                print_settings(value, full_key, max_depth, current_depth + 1)
+                continue
+
+            print(f"  {{key}}: {{_mask(full_key, value)}}")
+
     print_settings(settings)
 
 except Exception as e:
diff --git a/tests/conftest.py b/tests/conftest.py
index 2ef7086b6..cd739b7c6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -72,8 +72,17 @@ def emit(self, record: logging.LogRecord):
     "tests/bench/",
     "tests/alembic/",
     "tests/unified/",
+    "tests/live_llm/",
+    # Pure llm unit tests should stay isolated from the broader app/runtime fixtures.
+    "tests/llm/",
+    # LLM transport tests mock providers directly and don't need database/runtime setup.
+    "tests/utils/test_length_finish_reason.py",
+    "tests/utils/test_clients.py",
 )
 
+_LIVE_LLM_MARKER = "live_llm"
+_LIVE_LLM_SKIP_REASON = "live LLM tests are disabled; pass --live-llm to run them"
+
 
 def _requires_runtime_mocks(nodeid: str) -> bool:
     return not any(
@@ -87,6 +96,28 @@ def _get_nodeid(request: pytest.FixtureRequest) -> str:
     return nodeid if isinstance(nodeid, str) else ""
 
 
+def pytest_addoption(parser: pytest.Parser) -> None:
+    parser.addoption(
+        "--live-llm",
+        action="store_true",
+        default=False,
+        help="Run opt-in live LLM integration tests that call provider APIs.",
+    )
+
+
+def pytest_collection_modifyitems(
+    config: pytest.Config,
+    items: list[pytest.Item],
+) -> None:
+    if config.getoption("--live-llm"):
+        return
+
+    skip_live = pytest.mark.skip(reason=_LIVE_LLM_SKIP_REASON)
+    for item in items:
+        if _LIVE_LLM_MARKER in item.keywords:
+            item.add_marker(skip_live)
+
+
 def _get_test_db_url(worker_id: str) -> URL:
     """Get a worker-specific test database URL for pytest-xdist parallelism."""
 
@@ -412,9 +443,10 @@ def _content_to_embedding(content: str) -> list[float]:
 
     # Hash the content to get a deterministic seed
     content_hash = hashlib.sha256(content.encode()).digest()
-    # Use hash bytes to generate 1536 floats between -1 and 1
+    vector_dimensions = settings.EMBEDDING.VECTOR_DIMENSIONS
+    # Use hash bytes to generate deterministic floats between -1 and 1
     embedding: list[float] = []
-    for i in range(1536):
+    for i in range(vector_dimensions):
         # Use different bytes from hash (cycling through)
         byte_val = content_hash[i % len(content_hash)]
         # Normalize to [-1, 1] range
@@ -431,6 +463,9 @@ def mock_openai_embeddings(request: pytest.FixtureRequest):
 
     with (
         patch("src.embedding_client.embedding_client.embed") as mock_embed,
+        patch(
+            "src.embedding_client.embedding_client.simple_batch_embed"
+        ) as mock_simple_batch_embed,
         patch("src.embedding_client.embedding_client.batch_embed") as mock_batch_embed,
     ):
         # Mock the embed method to return content-dependent embedding
@@ -439,6 +474,11 @@ def embed_side_effect(content: str) -> list[float]:
 
         mock_embed.side_effect = embed_side_effect
 
+        async def mock_simple_batch_embed_func(texts: list[str]) -> list[list[float]]:
+            return [_content_to_embedding(text) for text in texts]
+
+        mock_simple_batch_embed.side_effect = mock_simple_batch_embed_func
+
         # Mock the batch_embed method to return content-dependent embeddings
         async def mock_batch_embed_func(
             id_resource_dict: dict[str, tuple[str, list[int]]],
@@ -450,7 +490,11 @@ async def mock_batch_embed_func(
 
         mock_batch_embed.side_effect = mock_batch_embed_func
 
-        yield {"embed": mock_embed, "batch_embed": mock_batch_embed}
+        yield {
+            "embed": mock_embed,
+            "simple_batch_embed": mock_simple_batch_embed,
+            "batch_embed": mock_batch_embed,
+        }
 
 
 @pytest.fixture(autouse=True)
@@ -670,10 +714,10 @@ def create_mock_response(
     # Patch the honcho_llm_call decorator to prevent actual LLM calls at module level
     original_decorator = None
     try:
-        import src.utils.clients
+        import src.llm
 
-        original_decorator = src.utils.clients.honcho_llm_call
-        src.utils.clients.honcho_llm_call = lambda *args, **kwargs: lambda func: func  # pyright: ignore[reportUnknownLambdaType]
+        original_decorator = src.llm.honcho_llm_call
+        src.llm.honcho_llm_call = lambda *args, **kwargs: lambda func: func  # pyright: ignore[reportUnknownLambdaType]
     except ImportError:
         pass
 
@@ -707,21 +751,21 @@ def sync_wrapper(*func_args: Any, **func_kwargs: Any) -> Any:  # pyright: ignore
 
         return mock_llm_decorator
 
-    with patch("src.utils.clients.honcho_llm_call", side_effect=decorator_factory):
+    with patch("src.llm.honcho_llm_call", side_effect=decorator_factory):
         yield decorator_factory
 
     # Restore the original decorator
     if original_decorator:
         try:
-            import src.utils.clients
+            import src.llm
 
-            src.utils.clients.honcho_llm_call = original_decorator
+            src.llm.honcho_llm_call = original_decorator
         except ImportError:
             pass
 
 
 @pytest.fixture(autouse=True)
-def mock_tracked_db(db_engine: AsyncEngine, request: pytest.FixtureRequest):
+def mock_tracked_db(request: pytest.FixtureRequest):
     """Mock tracked_db to create fresh sessions per call.
 
     Using a session factory instead of a shared session avoids asyncio lock
@@ -733,6 +777,7 @@ def mock_tracked_db(db_engine: AsyncEngine, request: pytest.FixtureRequest):
 
     from contextlib import asynccontextmanager
 
+    db_engine = request.getfixturevalue("db_engine")
     session_factory = async_sessionmaker(bind=db_engine, expire_on_commit=False)
 
     @asynccontextmanager
diff --git a/tests/deriver/test_deriver_processing.py b/tests/deriver/test_deriver_processing.py
index 0cde8a68f..5822f4d54 100644
--- a/tests/deriver/test_deriver_processing.py
+++ b/tests/deriver/test_deriver_processing.py
@@ -1,10 +1,15 @@
 import signal
+from datetime import datetime, timezone
 from typing import Any
+from unittest.mock import AsyncMock, Mock, patch
 
 import pytest
 
 from src import models
-from src.utils.representation import Representation
+from src.config import settings
+from src.deriver.deriver import process_representation_tasks_batch
+from src.llm import HonchoLLMCallResponse
+from src.utils.representation import PromptRepresentation, Representation
 from src.utils.work_unit import construct_work_unit_key, parse_work_unit_key
 
 
@@ -12,6 +17,59 @@
 class TestDeriverProcessing:
     """Test suite for deriver processing using the conftest fixtures"""
 
+    async def test_process_representation_tasks_batch_uses_model_config(self):
+        message = Mock(
+            id=1,
+            public_id="msg_1",
+            session_name="session-1",
+            workspace_name="workspace-1",
+            peer_name="alice",
+            content="hello",
+            token_count=5,
+            created_at=datetime.now(timezone.utc),
+        )
+        configuration = Mock()
+        configuration.reasoning.enabled = True
+
+        mock_response = HonchoLLMCallResponse(
+            content=PromptRepresentation(explicit=[]),
+            input_tokens=10,
+            output_tokens=5,
+            finish_reasons=["STOP"],
+        )
+
+        with patch(
+            "src.deriver.deriver.honcho_llm_call",
+            new_callable=AsyncMock,
+            return_value=mock_response,
+        ) as mock_llm_call:
+            await process_representation_tasks_batch(
+                messages=[message],
+                message_level_configuration=configuration,
+                observers=["bob"],
+                observed="alice",
+                queue_item_message_ids=[1],
+            )
+
+        await_args = mock_llm_call.await_args
+        if await_args is None:
+            raise AssertionError("Expected deriver LLM call")
+        kwargs = await_args.kwargs
+        expected_config = settings.DERIVER.MODEL_CONFIG.model_copy(
+            update={
+                "stop_sequences": ["   \n", "\n\n\n\n"],
+            }
+        )
+        assert "model_config" in kwargs
+        assert kwargs["model_config"].model == expected_config.model
+        assert kwargs["model_config"].thinking_effort == expected_config.thinking_effort
+        assert (
+            kwargs["model_config"].thinking_budget_tokens
+            == expected_config.thinking_budget_tokens
+        )
+        assert kwargs["model_config"].stop_sequences == expected_config.stop_sequences
+        assert "llm_settings" not in kwargs
+
     async def test_work_unit_key_generation(
         self,
         sample_session_with_peers: tuple[models.Session, list[models.Peer]],
diff --git a/tests/deriver/test_queue_processing.py b/tests/deriver/test_queue_processing.py
index 9538c871a..540dfab52 100644
--- a/tests/deriver/test_queue_processing.py
+++ b/tests/deriver/test_queue_processing.py
@@ -1088,8 +1088,16 @@ async def test_forced_batching_waits_for_threshold(
         db_session: AsyncSession,
         sample_session_with_peers: tuple[models.Session, list[models.Peer]],
         create_queue_payload: Callable[..., Any],
+        monkeypatch: pytest.MonkeyPatch,
     ) -> None:
-        """Test that representation work units below token threshold are not claimed"""
+        """Test that representation work units below token threshold are not claimed.
+
+        The token-threshold gate in QueueManager.get_and_claim_work_units is
+        skipped entirely when DERIVER_FLUSH_ENABLED is True, so this test
+        forces it False regardless of what the process env has set (benches
+        commonly enable flush mode for immediate processing).
+        """
+        monkeypatch.setattr(settings.DERIVER, "FLUSH_ENABLED", False)
 
         session, peers = sample_session_with_peers
         peer = peers[0]
diff --git a/tests/dialectic/test_model_config_usage.py b/tests/dialectic/test_model_config_usage.py
new file mode 100644
index 000000000..03b6e9c9c
--- /dev/null
+++ b/tests/dialectic/test_model_config_usage.py
@@ -0,0 +1,111 @@
+import time
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from src.config import settings
+from src.dialectic.core import DialecticAgent
+from src.llm import (
+    HonchoLLMCallResponse,
+    HonchoLLMCallStreamChunk,
+    StreamingResponseWithMetadata,
+)
+
+
+async def _stream_chunks() -> StreamingResponseWithMetadata:
+    async def _stream():
+        yield HonchoLLMCallStreamChunk(content="streamed")
+        yield HonchoLLMCallStreamChunk(content="", is_done=True)
+
+    return StreamingResponseWithMetadata(
+        _stream(),
+        tool_calls_made=[],
+        input_tokens=10,
+        output_tokens=5,
+        cache_creation_input_tokens=0,
+        cache_read_input_tokens=0,
+        iterations=1,
+    )
+
+
+@pytest.mark.asyncio
+async def test_dialectic_answer_uses_level_model_config() -> None:
+    agent = DialecticAgent(
+        workspace_name="workspace",
+        session_name="session",
+        observer="observer",
+        observed="observed",
+        reasoning_level="medium",
+    )
+
+    mock_response = HonchoLLMCallResponse(
+        content="answer",
+        input_tokens=10,
+        output_tokens=5,
+        finish_reasons=["stop"],
+    )
+
+    with (
+        patch.object(
+            DialecticAgent,
+            "_prepare_query",
+            new=AsyncMock(
+                return_value=(AsyncMock(), "task", "run", time.perf_counter())
+            ),
+        ),
+        patch.object(DialecticAgent, "_log_response_metrics"),
+        patch(
+            "src.dialectic.core.honcho_llm_call",
+            new=AsyncMock(return_value=mock_response),
+        ) as mock_llm_call,
+    ):
+        result = await agent.answer("What do you know?")
+
+    await_args = mock_llm_call.await_args
+    if await_args is None:
+        raise AssertionError("Expected dialectic LLM call")
+    kwargs = await_args.kwargs
+    expected_config = settings.DIALECTIC.LEVELS["medium"].MODEL_CONFIG
+
+    assert result == "answer"
+    assert kwargs["model_config"] == expected_config
+    assert "llm_settings" not in kwargs
+    assert "thinking_budget_tokens" not in kwargs
+
+
+@pytest.mark.asyncio
+async def test_dialectic_answer_stream_uses_level_model_config() -> None:
+    agent = DialecticAgent(
+        workspace_name="workspace",
+        session_name="session",
+        observer="observer",
+        observed="observed",
+        reasoning_level="medium",
+    )
+
+    with (
+        patch.object(
+            DialecticAgent,
+            "_prepare_query",
+            new=AsyncMock(
+                return_value=(AsyncMock(), "task", "run", time.perf_counter())
+            ),
+        ),
+        patch.object(DialecticAgent, "_log_response_metrics"),
+        patch(
+            "src.dialectic.core.honcho_llm_call",
+            new=AsyncMock(return_value=await _stream_chunks()),
+        ) as mock_llm_call,
+    ):
+        chunks = [chunk async for chunk in agent.answer_stream("What do you know?")]
+
+    await_args = mock_llm_call.await_args
+    if await_args is None:
+        raise AssertionError("Expected dialectic streaming LLM call")
+    kwargs = await_args.kwargs
+    expected_config = settings.DIALECTIC.LEVELS["medium"].MODEL_CONFIG
+
+    assert chunks == ["streamed"]
+    assert kwargs["model_config"] == expected_config
+    assert "llm_settings" not in kwargs
+    assert "thinking_budget_tokens" not in kwargs
diff --git a/tests/dreamer/test_model_config_usage.py b/tests/dreamer/test_model_config_usage.py
new file mode 100644
index 000000000..91d1d1418
--- /dev/null
+++ b/tests/dreamer/test_model_config_usage.py
@@ -0,0 +1,56 @@
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from src.config import settings
+from src.dreamer.specialists import DeductionSpecialist
+from src.llm import HonchoLLMCallResponse
+
+
+@pytest.mark.asyncio
+async def test_deduction_specialist_uses_nested_model_config(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings.METRICS, "ENABLED", False)
+    specialist = DeductionSpecialist()
+    mock_response = HonchoLLMCallResponse(
+        content="done",
+        input_tokens=10,
+        output_tokens=5,
+        finish_reasons=["stop"],
+    )
+
+    with (
+        patch(
+            "src.dreamer.specialists.crud.get_peer",
+            new=AsyncMock(),
+        ),
+        patch(
+            "src.dreamer.specialists.crud.get_peer_card",
+            new=AsyncMock(return_value=None),
+        ),
+        patch(
+            "src.dreamer.specialists.create_tool_executor",
+            new=AsyncMock(return_value=AsyncMock()),
+        ),
+        patch(
+            "src.dreamer.specialists.honcho_llm_call",
+            new=AsyncMock(return_value=mock_response),
+        ) as mock_llm_call,
+    ):
+        result = await specialist.run(
+            workspace_name="workspace",
+            observer="alice",
+            observed="alice",
+            session_name="session",
+        )
+
+    await_args = mock_llm_call.await_args
+    if await_args is None:
+        raise AssertionError("Expected dreamer LLM call")
+    kwargs = await_args.kwargs
+    expected_config = settings.DREAM.DEDUCTION_MODEL_CONFIG
+
+    assert result.content == "done"
+    assert kwargs["model_config"] == expected_config
+    assert "llm_settings" not in kwargs
diff --git a/tests/integration/test_enqueue.py b/tests/integration/test_enqueue.py
index 09bc9edaa..0f3d50ade 100644
--- a/tests/integration/test_enqueue.py
+++ b/tests/integration/test_enqueue.py
@@ -641,68 +641,6 @@ async def test_observer_left_session_no_queue_items_generated(
         assert observer_who_stayed.name in observers
         assert sender_peer.name in observers
 
-    @pytest.mark.asyncio
-    async def test_sender_not_in_peer_configuration_uses_defaults(
-        self,
-        db_session: AsyncSession,
-        sample_data: tuple[Workspace, Peer],
-    ):
-        """Test get_effective_observe_me handles missing sender configuration gracefully"""
-        test_workspace, existing_peer = sample_data
-
-        # Create observer peer
-        observer_peer = models.Peer(
-            workspace_name=test_workspace.name, name=str(generate_nanoid())
-        )
-        db_session.add(observer_peer)
-
-        # Create session with only observer (sender not in peers_with_configuration)
-        test_session = (
-            await crud.get_or_create_session(
-                db_session,
-                schemas.SessionCreate(
-                    name=str(generate_nanoid()),
-                    peers={
-                        observer_peer.name: schemas.SessionPeerConfig(
-                            observe_others=True
-                        ),
-                    },
-                ),
-                test_workspace.name,
-            )
-        ).resource
-        await db_session.commit()
-
-        # Create message from peer NOT in the session configuration
-        # This simulates the race condition where a peer left after sending
-        payload = await self.create_sample_payload(
-            db_session,
-            workspace_name=test_workspace.name,
-            session_name=test_session.name,
-            peer_name=existing_peer.name,
-        )
-
-        initial_count = await self.count_queue_items(db_session)
-        await enqueue(payload)
-        final_count = await self.count_queue_items(db_session)
-
-        # With deduplication: 1 queue item per message with all observers
-        assert final_count - initial_count == 1
-
-        result = await db_session.execute(
-            select(QueueItem).where(QueueItem.session_id == test_session.id)
-        )
-        queue_items = result.scalars().all()
-
-        assert len(queue_items) == 1
-        item = queue_items[0]
-        assert item.payload.get("task_type") == "representation"
-        assert item.payload.get("observed") == existing_peer.name
-        observers = item.payload.get("observers")
-        assert observers is not None
-        assert existing_peer.name in observers  # self-observation (default)
-        assert observer_peer.name in observers  # observer (observing others)
-
     @pytest.mark.asyncio
     async def test_mixed_active_inactive_peers_complex_scenario(
         self,
diff --git a/tests/integration/test_message_embeddings.py b/tests/integration/test_message_embeddings.py
index ef0450496..091e8cf3e 100644
--- a/tests/integration/test_message_embeddings.py
+++ b/tests/integration/test_message_embeddings.py
@@ -478,7 +478,7 @@ async def test_message_chunking_creates_multiple_embeddings(
     monkeypatch.setattr("src.config.settings.EMBED_MESSAGES", True)
 
     # Mock a low token limit to force chunking
-    monkeypatch.setattr("src.config.settings.MAX_EMBEDDING_TOKENS", 10)
+    monkeypatch.setattr("src.config.settings.EMBEDDING.MAX_INPUT_TOKENS", 10)
 
     test_workspace, test_peer = sample_data
 
diff --git a/tests/integration/test_token_metrics.py b/tests/integration/test_token_metrics.py
index 7c24223a6..4e6c16529 100644
--- a/tests/integration/test_token_metrics.py
+++ b/tests/integration/test_token_metrics.py
@@ -19,6 +19,7 @@
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from src import crud, models, schemas
+from src.llm import HonchoLLMCallResponse
 from src.models import Peer, Workspace
 from src.schemas import (
     ResolvedConfiguration,
@@ -31,7 +32,6 @@
     deriver_tokens_processed_counter,
     dialectic_tokens_processed_counter,
 )
-from src.utils.clients import HonchoLLMCallResponse
 from src.utils.representation import ExplicitObservationBase, PromptRepresentation
 from src.utils.summarizer import (
     SummaryType,
diff --git a/tests/live_llm/README.md b/tests/live_llm/README.md
new file mode 100644
index 000000000..cef108027
--- /dev/null
+++ b/tests/live_llm/README.md
@@ -0,0 +1,59 @@
+# Live LLM Tests
+
+These tests call real provider APIs and are disabled by default.
+
+Run them with:
+
+```bash
+uv run pytest tests/live_llm -n 0 --live-llm --no-header -q
+```
+
+Required API key env vars:
+
+- `LLM_ANTHROPIC_API_KEY`
+- `LLM_OPENAI_API_KEY`
+- `LLM_GEMINI_API_KEY`
+
+Model-family env vars:
+
+- `LIVE_LLM_ANTHROPIC_45_PLUS_MODELS`
+- `LIVE_LLM_OPENAI_GPT4_MODELS`
+- `LIVE_LLM_OPENAI_GPT5_MODELS`
+- `LIVE_LLM_OPENAI_OPENROUTER_NON_REASONING_MODELS` (OpenAI-transport → OpenRouter-served non-reasoning models)
+- `LIVE_LLM_GEMINI_25_MODELS`
+- `LIVE_LLM_GEMINI_30_MODELS`
+- `LIVE_LLM_GEMINI_31_MODELS`
+
+Each model env var accepts a comma-separated list of bare model ids or provider-qualified ids.
+
+Examples:
+
+```bash
+export LIVE_LLM_ANTHROPIC_45_PLUS_MODELS="claude-sonnet-4-5,claude-sonnet-4-6"
+export LIVE_LLM_OPENAI_GPT4_MODELS="gpt-4.1"
+export LIVE_LLM_OPENAI_GPT5_MODELS="gpt-5,gpt-5.4,gpt-5.4-mini"
+export LIVE_LLM_OPENAI_OPENROUTER_NON_REASONING_MODELS="inception/mercury-2"
+export LIVE_LLM_GEMINI_25_MODELS="gemini-2.5-flash,gemini-2.5-pro"
+export LIVE_LLM_GEMINI_30_MODELS="gemini-3-flash-preview"
+export LIVE_LLM_GEMINI_31_MODELS="gemini-3.1-pro-preview"
+```
+
+OpenRouter-routed models require additional env for the proxy endpoint:
+
+```bash
+export OPENROUTER_API_KEY="sk-or-v1-..."
+# Per-feature config example:
+#   DERIVER_MODEL_CONFIG__TRANSPORT=openai
+#   DERIVER_MODEL_CONFIG__MODEL=inception/mercury-2
+#   DERIVER_MODEL_CONFIG__OVERRIDES__BASE_URL=https://openrouter.ai/api/v1
+#   DERIVER_MODEL_CONFIG__OVERRIDES__API_KEY_ENV=OPENROUTER_API_KEY
+```
+
+Coverage by provider:
+
+- Anthropic: structured output path, prompt caching metrics, thinking blocks, multi-turn tool replay
+- OpenAI GPT-4 class: structured outputs, prompt caching
+- OpenAI GPT-5 class (incl. gpt-5.x point-releases): structured outputs, prompt caching, `reasoning_effort`, `max_completion_tokens` routing
+- OpenAI transport → OpenRouter non-reasoning models (e.g. `inception/mercury-2`): non-chat / diffusion architectures must stay on `max_tokens`, no `reasoning_effort`, tool-calling parameter-schema compatibility is the canary for exotic OR-served providers
+- Gemini 2.5/3.0 classes: structured outputs, cached-content reuse, thought signatures, multi-turn tool replay
+- Gemini 3.1 class: thinking and tool replay coverage by default; structured-output/caching coverage should only be added once Google documents support for that path
diff --git a/tests/live_llm/__init__.py b/tests/live_llm/__init__.py
new file mode 100644
index 000000000..2c9355036
--- /dev/null
+++ b/tests/live_llm/__init__.py
@@ -0,0 +1 @@
+# Live LLM integration test package.
diff --git a/tests/live_llm/conftest.py b/tests/live_llm/conftest.py
new file mode 100644
index 000000000..d96463837
--- /dev/null
+++ b/tests/live_llm/conftest.py
@@ -0,0 +1,120 @@
+from __future__ import annotations
+
+from collections.abc import Iterator
+from typing import Any
+
+import pytest
+from pydantic import BaseModel
+
+from src.config import ModelConfig, settings
+from src.llm import get_backend
+from src.llm.caching import gemini_cache_store
+
+from .model_matrix import LiveModelSpec, selected_model_summary_lines
+
+
+class StructuredLiveResponse(BaseModel):
+    provider: str
+    family: str
+    answer: str
+
+
+def pytest_report_header(config: pytest.Config) -> list[str] | None:
+    if not config.getoption("--live-llm"):
+        return None
+    return ["live llm model matrix:"] + [
+        f"  {line}" for line in selected_model_summary_lines()
+    ]
+
+
+@pytest.fixture(autouse=True)
+def clear_live_gemini_cache_store() -> Iterator[None]:
+    # The live Gemini cache store is process-local and should not leak state between tests.
+    gemini_cache_store._handles.clear()  # pyright: ignore[reportPrivateUsage]
+    yield
+    gemini_cache_store._handles.clear()  # pyright: ignore[reportPrivateUsage]
+
+
+def require_provider_key(model_spec: LiveModelSpec) -> None:
+    key_present = {
+        "anthropic": bool(settings.LLM.ANTHROPIC_API_KEY),
+        "openai": bool(settings.LLM.OPENAI_API_KEY),
+        "gemini": bool(settings.LLM.GEMINI_API_KEY),
+    }[model_spec.provider]
+    if not key_present:
+        pytest.skip(f"Missing API key for live provider {model_spec.provider}")
+
+
+def make_model_config(model_spec: LiveModelSpec, **overrides: Any) -> ModelConfig:
+    return ModelConfig(
+        model=model_spec.model,
+        transport=model_spec.provider,
+        **overrides,
+    )
+
+
+def make_backend(
+    model_spec: LiveModelSpec, **config_overrides: Any
+) -> tuple[Any, ModelConfig]:
+    config = make_model_config(model_spec, **config_overrides)
+    return get_backend(config), config
+
+
+def make_large_system_prompt(*, label: str) -> str:
+    repeated_prefix = " ".join([f"{label}-token-{index % 37}" for index in range(2400)])
+    return (
+        f"{label} system prompt. Reuse this prefix exactly for prompt-caching validation. "
+        f"{repeated_prefix}"
+    )
+
+
+def favorite_prime_tools() -> list[dict[str, Any]]:
+    return [
+        {
+            "name": "get_favorite_prime",
+            "description": "Return the favorite prime number for the current test run.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "topic": {
+                        "type": "string",
+                        "description": "Why the caller wants the prime number.",
+                    }
+                },
+                "required": ["topic"],
+            },
+        }
+    ]
+
+
+def execute_local_tool(tool_name: str, tool_input: dict[str, Any]) -> str:
+    assert tool_name == "get_favorite_prime"
+    assert isinstance(tool_input, dict)
+    return "13"
+
+
+def wrap_async_method(
+    monkeypatch: pytest.MonkeyPatch,
+    target: Any,
+    attribute: str,
+) -> list[dict[str, Any]]:
+    original = getattr(target, attribute)
+    calls: list[dict[str, Any]] = []
+
+    async def wrapped(*args: Any, **kwargs: Any) -> Any:
+        calls.append({"args": args, "kwargs": kwargs})
+        return await original(*args, **kwargs)
+
+    monkeypatch.setattr(target, attribute, wrapped)
+    return calls
+
+
+def extract_openai_reasoning_tokens(raw_response: Any) -> int | None:
+    usage = getattr(raw_response, "usage", None)
+    if usage is None:
+        return None
+    details = getattr(usage, "completion_tokens_details", None)
+    if details is None:
+        return None
+    reasoning_tokens = getattr(details, "reasoning_tokens", None)
+    return int(reasoning_tokens) if reasoning_tokens is not None else None
diff --git a/tests/live_llm/model_matrix.py b/tests/live_llm/model_matrix.py
new file mode 100644
index 000000000..a9abf0f95
--- /dev/null
+++ b/tests/live_llm/model_matrix.py
@@ -0,0 +1,184 @@
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Literal
+
+ProviderName = Literal["anthropic", "openai", "gemini"]
+FeatureName = Literal["thinking", "structured_output", "caching", "reasoning"]
+
+
+@dataclass(frozen=True)
+class LiveModelFamily:
+    provider: ProviderName
+    family: str
+    env_var: str
+    default_models: tuple[str, ...] = ()
+    supports_thinking: bool = False
+    supports_structured_output: bool = False
+    supports_caching: bool = False
+    supports_reasoning: bool = False
+    supports_tool_replay: bool = False
+    docs_url: str | None = None
+
+
+@dataclass(frozen=True)
+class LiveModelSpec:
+    provider: ProviderName
+    family: str
+    model: str
+    env_var: str
+    supports_thinking: bool
+    supports_structured_output: bool
+    supports_caching: bool
+    supports_reasoning: bool
+    supports_tool_replay: bool
+    docs_url: str | None = None
+
+    @property
+    def id(self) -> str:
+        return f"{self.provider}:{self.family}:{self.model}"
+
+
+MODEL_FAMILIES: tuple[LiveModelFamily, ...] = (
+    LiveModelFamily(
+        provider="anthropic",
+        family="claude_4_5_plus",
+        env_var="LIVE_LLM_ANTHROPIC_45_PLUS_MODELS",
+        supports_thinking=True,
+        supports_structured_output=True,
+        supports_caching=True,
+        supports_tool_replay=True,
+        docs_url="https://docs.anthropic.com/en/docs/about-claude/models/all-models",
+    ),
+    LiveModelFamily(
+        provider="openai",
+        family="gpt_4_class",
+        env_var="LIVE_LLM_OPENAI_GPT4_MODELS",
+        default_models=("gpt-4.1",),
+        supports_structured_output=True,
+        supports_caching=True,
+        docs_url="https://platform.openai.com/docs/models/gpt-4.1",
+    ),
+    LiveModelFamily(
+        provider="openai",
+        family="gpt_5_class",
+        env_var="LIVE_LLM_OPENAI_GPT5_MODELS",
+        default_models=("gpt-5", "gpt-5.4", "gpt-5.4-mini"),
+        supports_structured_output=True,
+        supports_caching=True,
+        supports_reasoning=True,
+        docs_url="https://platform.openai.com/docs/models/gpt-5",
+    ),
+    # OpenAI-compatible transport → OpenRouter-served non-reasoning models.
+    # Best canary for operators routing exotic providers through OpenRouter:
+    # if honcho works here, it works for most OR-served models. Currently
+    # anchored on Inception Labs' Mercury-2 diffusion model (non-chat
+    # architecture, must stay on max_tokens, no reasoning_effort).
+    LiveModelFamily(
+        provider="openai",
+        family="openrouter_non_reasoning",
+        env_var="LIVE_LLM_OPENAI_OPENROUTER_NON_REASONING_MODELS",
+        default_models=("inception/mercury-2",),
+        supports_structured_output=False,
+        supports_caching=False,
+        docs_url="https://openrouter.ai/models",
+    ),
+    LiveModelFamily(
+        provider="gemini",
+        family="gemini_2_5_class",
+        env_var="LIVE_LLM_GEMINI_25_MODELS",
+        default_models=("gemini-2.5-flash",),
+        supports_thinking=True,
+        supports_structured_output=True,
+        supports_caching=True,
+        supports_tool_replay=True,
+        docs_url="https://ai.google.dev/gemini-api/docs/models/gemini",
+    ),
+    LiveModelFamily(
+        provider="gemini",
+        family="gemini_3_0_class",
+        env_var="LIVE_LLM_GEMINI_30_MODELS",
+        supports_thinking=True,
+        supports_structured_output=True,
+        supports_caching=True,
+        supports_tool_replay=True,
+        docs_url="https://ai.google.dev/gemini-api/docs/models/gemini",
+    ),
+    LiveModelFamily(
+        provider="gemini",
+        family="gemini_3_1_class",
+        env_var="LIVE_LLM_GEMINI_31_MODELS",
+        supports_thinking=True,
+        supports_structured_output=False,
+        supports_caching=False,
+        supports_tool_replay=True,
+        docs_url="https://ai.google.dev/gemini-api/docs/models/gemini",
+    ),
+)
+
+
+def _parse_env_models(value: str | None) -> tuple[str, ...]:
+    if value is None:
+        return ()
+    models = [model.strip() for model in value.split(",")]
+    return tuple(model for model in models if model)
+
+
+def iter_live_model_specs() -> tuple[LiveModelSpec, ...]:
+    specs: list[LiveModelSpec] = []
+    for family in MODEL_FAMILIES:
+        configured_models = _parse_env_models(os.getenv(family.env_var))
+        models = configured_models or family.default_models
+        for model in models:
+            specs.append(
+                LiveModelSpec(
+                    provider=family.provider,
+                    family=family.family,
+                    model=model,
+                    env_var=family.env_var,
+                    supports_thinking=family.supports_thinking,
+                    supports_structured_output=family.supports_structured_output,
+                    supports_caching=family.supports_caching,
+                    supports_reasoning=family.supports_reasoning,
+                    supports_tool_replay=family.supports_tool_replay,
+                    docs_url=family.docs_url,
+                )
+            )
+    return tuple(specs)
+
+
+def get_live_model_specs(
+    *,
+    provider: ProviderName | None = None,
+    feature: FeatureName | None = None,
+) -> tuple[LiveModelSpec, ...]:
+    specs = iter_live_model_specs()
+    filtered: list[LiveModelSpec] = []
+
+    for spec in specs:
+        if provider is not None and spec.provider != provider:
+            continue
+        if feature == "thinking" and not spec.supports_thinking:
+            continue
+        if feature == "structured_output" and not spec.supports_structured_output:
+            continue
+        if feature == "caching" and not spec.supports_caching:
+            continue
+        if feature == "reasoning" and not spec.supports_reasoning:
+            continue
+        filtered.append(spec)
+
+    return tuple(filtered)
+
+
+def selected_model_summary_lines() -> list[str]:
+    lines: list[str] = []
+    for family in MODEL_FAMILIES:
+        configured_models = _parse_env_models(os.getenv(family.env_var))
+        models = configured_models or family.default_models
+        joined_models = ", ".join(models) if models else "(none configured)"
+        lines.append(
+            f"{family.env_var} [{family.provider}/{family.family}]: {joined_models}"
+        )
+    return lines
diff --git a/tests/live_llm/test_live_anthropic.py b/tests/live_llm/test_live_anthropic.py
new file mode 100644
index 000000000..e10d2b103
--- /dev/null
+++ b/tests/live_llm/test_live_anthropic.py
@@ -0,0 +1,154 @@
+from __future__ import annotations
+
+import pytest
+
+from src.llm.backend import CompletionResult
+from src.llm.history_adapters import AnthropicHistoryAdapter
+from src.llm.request_builder import execute_completion
+
+from .conftest import (
+    StructuredLiveResponse,
+    execute_local_tool,
+    favorite_prime_tools,
+    make_backend,
+    make_large_system_prompt,
+    require_provider_key,
+    wrap_async_method,
+)
+from .model_matrix import LiveModelSpec, get_live_model_specs
+
+pytestmark = [pytest.mark.live_llm, pytest.mark.requires_anthropic]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_spec",
+    get_live_model_specs(provider="anthropic"),
+    ids=lambda spec: spec.id,
+)
+async def test_live_anthropic_structured_output_and_prefix_caching(
+    model_spec: LiveModelSpec,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    require_provider_key(model_spec)
+    backend, config = make_backend(model_spec)
+    create_calls = wrap_async_method(monkeypatch, backend._client.messages, "create")
+
+    messages = [
+        {
+            "role": "system",
+            "content": make_large_system_prompt(label=f"anthropic-{model_spec.family}"),
+        },
+        {
+            "role": "user",
+            "content": (
+                "Return valid JSON with provider='anthropic', "
+                f"family='{model_spec.family}', and answer='cache-ok'."
+            ),
+        },
+    ]
+
+    results: list[CompletionResult] = []
+    for _ in range(3):
+        results.append(
+            await execute_completion(
+                backend,
+                config,
+                messages=messages,
+                max_tokens=256,
+                response_format=StructuredLiveResponse,
+            )
+        )
+        if len(results) >= 2 and results[-1].cache_read_input_tokens > 0:
+            break
+
+    first = results[0]
+    later_results = results[1:]
+
+    assert isinstance(first.content, StructuredLiveResponse)
+    assert first.content.provider == "anthropic"
+    assert first.content.family == model_spec.family
+    assert later_results, "Anthropic caching validation requires at least two calls"
+    for result in later_results:
+        assert isinstance(result.content, StructuredLiveResponse)
+    assert any(
+        result.cache_read_input_tokens > 0 for result in later_results
+    ), "Anthropic prompt caching did not report a cache hit after repeated identical requests"
+
+    assert len(create_calls) == len(results)
+    for call in create_calls:
+        assert call["kwargs"]["system"][0]["cache_control"] == {"type": "ephemeral"}
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_spec",
+    get_live_model_specs(provider="anthropic"),
+    ids=lambda spec: spec.id,
+)
+async def test_live_anthropic_thinking_and_tool_replay(
+    model_spec: LiveModelSpec,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    require_provider_key(model_spec)
+    backend, config = make_backend(model_spec, thinking_budget_tokens=1024)
+    create_calls = wrap_async_method(monkeypatch, backend._client.messages, "create")
+    tools = favorite_prime_tools()
+    adapter = AnthropicHistoryAdapter()
+
+    initial_messages = [
+        {
+            "role": "user",
+            "content": (
+                "Before answering, call the get_favorite_prime tool exactly once. "
+                "After you receive the tool result, answer in one sentence that includes "
+                "the number and the word 'prime'."
+            ),
+        }
+    ]
+
+    first = await execute_completion(
+        backend,
+        config,
+        messages=initial_messages,
+        max_tokens=2048,
+        tools=tools,
+    )
+
+    assert create_calls[0]["kwargs"]["thinking"] == {
+        "type": "enabled",
+        "budget_tokens": 1024,
+    }
+    assert first.tool_calls, "Anthropic should issue a tool call in the first turn"
+    assert first.thinking_blocks, "Anthropic thinking blocks should be preserved"
+
+    tool_call = first.tool_calls[0]
+    tool_result = execute_local_tool(tool_call.name, tool_call.input)
+    replay_messages = initial_messages + [
+        adapter.format_assistant_tool_message(first),
+        *adapter.format_tool_results(
+            [
+                {
+                    "tool_id": tool_call.id,
+                    "tool_name": tool_call.name,
+                    "result": tool_result,
+                }
+            ]
+        ),
+    ]
+
+    second = await execute_completion(
+        backend,
+        config,
+        messages=replay_messages,
+        max_tokens=2048,
+        tools=tools,
+    )
+
+    assert create_calls[1]["kwargs"]["thinking"] == {
+        "type": "enabled",
+        "budget_tokens": 1024,
+    }
+    assert isinstance(second.content, str)
+    assert "13" in second.content
+    assert "prime" in second.content.lower()
diff --git a/tests/live_llm/test_live_gemini.py b/tests/live_llm/test_live_gemini.py
new file mode 100644
index 000000000..022b6cf04
--- /dev/null
+++ b/tests/live_llm/test_live_gemini.py
@@ -0,0 +1,173 @@
+from __future__ import annotations
+
+import pytest
+
+from src.llm.caching import PromptCachePolicy
+from src.llm.history_adapters import GeminiHistoryAdapter
+from src.llm.request_builder import execute_completion
+
+from .conftest import (
+    StructuredLiveResponse,
+    execute_local_tool,
+    favorite_prime_tools,
+    make_backend,
+    make_large_system_prompt,
+    require_provider_key,
+    wrap_async_method,
+)
+from .model_matrix import LiveModelSpec, get_live_model_specs
+
+pytestmark = [pytest.mark.live_llm, pytest.mark.requires_gemini]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_spec",
+    get_live_model_specs(provider="gemini", feature="structured_output"),
+    ids=lambda spec: spec.id,
+)
+async def test_live_gemini_structured_output_and_explicit_cache_reuse(
+    model_spec: LiveModelSpec,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    require_provider_key(model_spec)
+    backend, config = make_backend(model_spec, temperature=0)
+    cache_create_calls = wrap_async_method(
+        monkeypatch,
+        backend._client.aio.caches,
+        "create",
+    )
+    generate_calls = wrap_async_method(
+        monkeypatch,
+        backend._client.aio.models,
+        "generate_content",
+    )
+    cache_policy = PromptCachePolicy(mode="gemini_cached_content", ttl_seconds=300)
+
+    messages = [
+        {
+            "role": "system",
+            "content": make_large_system_prompt(label=f"gemini-{model_spec.family}"),
+        },
+        {
+            "role": "user",
+            "content": (
+                "Return valid JSON with provider='gemini', "
+                f"family='{model_spec.family}', and answer='cache-ok'. "
+                "Return JSON only, with no prose or markdown."
+            ),
+        },
+    ]
+
+    first = await execute_completion(
+        backend,
+        config,
+        messages=messages,
+        max_tokens=512,
+        response_format=StructuredLiveResponse,
+        cache_policy=cache_policy,
+    )
+    second = await execute_completion(
+        backend,
+        config,
+        messages=messages,
+        max_tokens=512,
+        response_format=StructuredLiveResponse,
+        cache_policy=cache_policy,
+    )
+
+    assert isinstance(first.content, StructuredLiveResponse)
+    assert first.content.provider == "gemini"
+    assert first.content.family == model_spec.family
+    assert isinstance(second.content, StructuredLiveResponse)
+
+    assert len(cache_create_calls) == 1
+    assert len(generate_calls) == 2
+    first_cached_content = generate_calls[0]["kwargs"]["config"]["cached_content"]
+    second_cached_content = generate_calls[1]["kwargs"]["config"]["cached_content"]
+    assert first_cached_content == second_cached_content
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "model_spec",
+    get_live_model_specs(provider="gemini", feature="thinking"),
+    ids=lambda spec: spec.id,
+)
+async def test_live_gemini_thinking_and_tool_replay(
+    model_spec: LiveModelSpec,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    require_provider_key(model_spec)
+    backend, config = make_backend(
+        model_spec,
+        thinking_budget_tokens=512,
+        temperature=0,
+    )
+    generate_calls = wrap_async_method(
+        monkeypatch,
+        backend._client.aio.models,
+        "generate_content",
+    )
+    tools = favorite_prime_tools()
+    adapter = GeminiHistoryAdapter()
+
+    initial_messages = [
+        {
+            "role": "user",
+            "content": (
+                "Before answering, call the get_favorite_prime tool exactly once. "
+                "Do not answer with plain text on this turn. "
+                "After the tool result arrives, answer with the exact text "
+                "'13 is prime.'"
+            ),
+        }
+    ]
+
+    first = await execute_completion(
+        backend,
+        config,
+        messages=initial_messages,
+        max_tokens=512,
+        tools=tools,
+        tool_choice="required",
+    )
+
+    assert generate_calls[0]["kwargs"]["config"]["thinking_config"] == {
+        "thinking_budget": 512,
+    }
+    assert first.tool_calls, "Gemini should issue a tool call in the first turn"
+    assert any(
+        tool_call.thought_signature for tool_call in first.tool_calls
+    ), "Gemini tool replay should preserve thought signatures"
+
+    tool_call = first.tool_calls[0]
+    tool_result = execute_local_tool(tool_call.name, tool_call.input)
+    replay_messages = initial_messages + [
+        adapter.format_assistant_tool_message(first),
+        *adapter.format_tool_results(
+            [
+                {
+                    "tool_id": tool_call.id,
+                    "tool_name": tool_call.name,
+                    "result": tool_result,
+                }
+            ]
+        ),
+    ]
+
+    second = await execute_completion(
+        backend,
+        config,
+        messages=replay_messages,
+        max_tokens=512,
+        tools=tools,
+        tool_choice="none",
+    )
+
+    assert generate_calls[1]["kwargs"]["config"]["thinking_config"] == {
+        "thinking_budget": 512,
+    }
+    assert isinstance(second.content, str)
+    assert "13" in second.content
+    assert "prime" in second.content.lower()
diff --git a/tests/live_llm/test_live_openai.py b/tests/live_llm/test_live_openai.py
new file mode 100644
index 000000000..60d89161b
--- /dev/null
+++ b/tests/live_llm/test_live_openai.py
@@ -0,0 +1,136 @@
+from __future__ import annotations
+
+import pytest
+
+from src.llm.request_builder import execute_completion
+
+from .conftest import (
+    StructuredLiveResponse,
+    make_backend,
+    make_large_system_prompt,
+    require_provider_key,
+    wrap_async_method,
+)
+from .model_matrix import LiveModelSpec, get_live_model_specs
+
+pytestmark = [pytest.mark.live_llm, pytest.mark.requires_openai]
+
+_GPT4_SPECS = tuple(
+    spec
+    for spec in get_live_model_specs(provider="openai")
+    if spec.family == "gpt_4_class"
+)
+_GPT5_SPECS = tuple(
+    spec
+    for spec in get_live_model_specs(provider="openai")
+    if spec.family == "gpt_5_class"
+)
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_spec", _GPT4_SPECS, ids=lambda spec: spec.id)
+async def test_live_openai_gpt4_structured_output_and_prefix_caching(
+    model_spec: LiveModelSpec,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    require_provider_key(model_spec)
+    backend, config = make_backend(model_spec)
+    parse_calls = wrap_async_method(
+        monkeypatch,
+        backend._client.chat.completions,
+        "parse",
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": make_large_system_prompt(label=f"openai-{model_spec.family}"),
+        },
+        {
+            "role": "user",
+            "content": (
+                "Return valid JSON with provider='openai', "
+                f"family='{model_spec.family}', and answer='cache-ok'."
+            ),
+        },
+    ]
+
+    first = await execute_completion(
+        backend,
+        config,
+        messages=messages,
+        max_tokens=256,
+        response_format=StructuredLiveResponse,
+    )
+    second = await execute_completion(
+        backend,
+        config,
+        messages=messages,
+        max_tokens=256,
+        response_format=StructuredLiveResponse,
+    )
+
+    assert isinstance(first.content, StructuredLiveResponse)
+    assert first.content.provider == "openai"
+    assert first.content.family == model_spec.family
+    assert isinstance(second.content, StructuredLiveResponse)
+    assert second.cache_read_input_tokens > 0
+
+    assert parse_calls[0]["kwargs"]["response_format"] is StructuredLiveResponse
+    assert "max_tokens" in parse_calls[0]["kwargs"]
+    assert "max_completion_tokens" not in parse_calls[0]["kwargs"]
+
+
+@pytest.mark.asyncio
+@pytest.mark.parametrize("model_spec", _GPT5_SPECS, ids=lambda spec: spec.id)
+async def test_live_openai_gpt5_reasoning_structured_output_and_prefix_caching(
+    model_spec: LiveModelSpec,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    require_provider_key(model_spec)
+    backend, config = make_backend(model_spec, reasoning_effort="minimal")
+    parse_calls = wrap_async_method(
+        monkeypatch,
+        backend._client.chat.completions,
+        "parse",
+    )
+
+    messages = [
+        {
+            "role": "system",
+            "content": make_large_system_prompt(label=f"openai-{model_spec.family}"),
+        },
+        {
+            "role": "user",
+            "content": (
+                "Return valid JSON with provider='openai', "
+                f"family='{model_spec.family}', and answer='reasoning-ok'."
+            ),
+        },
+    ]
+
+    first = await execute_completion(
+        backend,
+        config,
+        messages=messages,
+        max_tokens=1024,
+        response_format=StructuredLiveResponse,
+    )
+    second = await execute_completion(
+        backend,
+        config,
+        messages=messages,
+        max_tokens=1024,
+        response_format=StructuredLiveResponse,
+    )
+
+    assert isinstance(first.content, StructuredLiveResponse)
+    assert first.content.provider == "openai"
+    assert first.content.family == model_spec.family
+    assert isinstance(second.content, StructuredLiveResponse)
+    assert second.cache_read_input_tokens > 0
+
+    assert parse_calls[0]["kwargs"]["response_format"] is StructuredLiveResponse
+    assert parse_calls[0]["kwargs"]["reasoning_effort"] == "minimal"
+    assert "max_completion_tokens" in parse_calls[0]["kwargs"]
+    assert "max_tokens" not in parse_calls[0]["kwargs"]
diff --git a/tests/llm/conftest.py b/tests/llm/conftest.py
new file mode 100644
index 000000000..fccccf2d1
--- /dev/null
+++ b/tests/llm/conftest.py
@@ -0,0 +1,30 @@
+from collections.abc import AsyncIterator, Iterator
+from typing import Any
+
+import pytest
+
+from src.llm.backend import CompletionResult, ProviderBackend, StreamChunk
+
+
+class FakeBackend(ProviderBackend):
+    """Simple backend for request-builder and orchestration tests."""
+
+    def __init__(self, responses: list[CompletionResult] | None = None) -> None:
+        self.calls: list[dict[str, Any]] = []
+        self._responses: Iterator[CompletionResult] = iter(
+            responses or [CompletionResult(content="ok")]
+        )
+
+    async def complete(self, **kwargs: Any) -> CompletionResult:
+        self.calls.append(kwargs)
+        return next(self._responses)
+
+    async def stream(self, **kwargs: Any) -> AsyncIterator[StreamChunk]:
+        self.calls.append(kwargs)
+        result = next(self._responses)
+        yield StreamChunk(content=result.content, is_done=True)
+
+
+@pytest.fixture
+def fake_backend() -> FakeBackend:
+    return FakeBackend()
diff --git a/tests/llm/test_agent_tool_schemas.py b/tests/llm/test_agent_tool_schemas.py
new file mode 100644
index 000000000..12c12c119
--- /dev/null
+++ b/tests/llm/test_agent_tool_schemas.py
@@ -0,0 +1,72 @@
+from typing import Any, cast
+
+from src.utils.agent_tools import (
+    DEDUCTION_SPECIALIST_TOOLS,
+    INDUCTION_SPECIALIST_TOOLS,
+    TOOLS,
+)
+
+
+def _observation_items_schema(tool_key: str) -> dict[str, Any]:
+    return cast(
+        dict[str, Any],
+        TOOLS[tool_key]["input_schema"]["properties"]["observations"]["items"],
+    )
+
+
+def test_generic_create_observations_schema_has_level_specific_requirements() -> None:
+    items = _observation_items_schema("create_observations")
+
+    assert items["additionalProperties"] is False
+
+    level_requirements = {
+        condition["if"]["properties"]["level"]["const"]: condition["then"]["required"]
+        for condition in cast(list[dict[str, Any]], items["allOf"])
+    }
+
+    assert level_requirements["deductive"] == ["source_ids", "premises"]
+    assert level_requirements["inductive"] == [
+        "source_ids",
+        "sources",
+        "pattern_type",
+        "confidence",
+    ]
+    assert level_requirements["contradiction"] == ["source_ids", "sources"]
+
+
+def test_deductive_specialist_tool_requires_evidence_fields() -> None:
+    items = _observation_items_schema("create_observations_deductive")
+
+    assert TOOLS["create_observations_deductive"]["name"] == (
+        "create_observations_deductive"
+    )
+    assert items["required"] == ["content", "source_ids", "premises"]
+    assert items["properties"]["source_ids"]["minItems"] == 1
+    assert items["properties"]["premises"]["minItems"] == 1
+
+
+def test_inductive_specialist_tool_requires_pattern_fields() -> None:
+    items = _observation_items_schema("create_observations_inductive")
+
+    assert TOOLS["create_observations_inductive"]["name"] == (
+        "create_observations_inductive"
+    )
+    assert items["required"] == [
+        "content",
+        "source_ids",
+        "sources",
+        "pattern_type",
+        "confidence",
+    ]
+    assert items["properties"]["source_ids"]["minItems"] == 2
+    assert items["properties"]["sources"]["minItems"] == 2
+
+
+def test_dreamer_specialists_use_level_specific_creation_tools() -> None:
+    deduction_tool_names = {tool["name"] for tool in DEDUCTION_SPECIALIST_TOOLS}
+    induction_tool_names = {tool["name"] for tool in INDUCTION_SPECIALIST_TOOLS}
+
+    assert "create_observations_deductive" in deduction_tool_names
+    assert "create_observations_inductive" in induction_tool_names
+    assert "create_observations" not in deduction_tool_names
+    assert "create_observations" not in induction_tool_names
diff --git a/tests/llm/test_backends/test_anthropic.py b/tests/llm/test_backends/test_anthropic.py
new file mode 100644
index 000000000..c8f2bbdf0
--- /dev/null
+++ b/tests/llm/test_backends/test_anthropic.py
@@ -0,0 +1,135 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+from anthropic.types import TextBlock, ThinkingBlock, ToolUseBlock
+from pydantic import BaseModel
+
+from src.llm.backends.anthropic import AnthropicBackend
+
+
+@pytest.mark.asyncio
+async def test_anthropic_backend_extracts_text_thinking_and_tool_calls() -> None:
+    client = Mock()
+    client.messages.create = AsyncMock(
+        return_value=SimpleNamespace(
+            content=[
+                ThinkingBlock(
+                    type="thinking",
+                    thinking="internal reasoning",
+                    signature="sig_123",
+                ),
+                TextBlock(type="text", text="Hello from Anthropic"),
+                ToolUseBlock(
+                    type="tool_use",
+                    id="tool_1",
+                    name="search",
+                    input={"query": "honcho"},
+                ),
+            ],
+            usage=SimpleNamespace(
+                input_tokens=10,
+                output_tokens=5,
+                cache_creation_input_tokens=3,
+                cache_read_input_tokens=2,
+            ),
+            stop_reason="tool_use",
+        )
+    )
+
+    backend = AnthropicBackend(client)
+    result = await backend.complete(
+        model="claude-haiku-4-5",
+        messages=[
+            {"role": "system", "content": "System prompt"},
+            {"role": "user", "content": "Hello"},
+        ],
+        max_tokens=100,
+        tools=[
+            {
+                "name": "search",
+                "description": "Search for information",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {"query": {"type": "string"}},
+                },
+            }
+        ],
+        thinking_budget_tokens=2048,
+        tool_choice="required",
+    )
+
+    assert result.content == "Hello from Anthropic"
+    assert result.thinking_content == "internal reasoning"
+    assert result.thinking_blocks == [
+        {
+            "type": "thinking",
+            "thinking": "internal reasoning",
+            "signature": "sig_123",
+        }
+    ]
+    assert result.tool_calls[0].name == "search"
+    assert result.input_tokens == 15
+    assert result.output_tokens == 5
+    assert result.finish_reason == "tool_use"
+
+    await_args = client.messages.create.await_args
+    if await_args is None:
+        raise AssertionError("Expected Anthropic client call")
+    call = await_args.kwargs
+    assert call["model"] == "claude-haiku-4-5"
+    assert call["system"][0]["text"] == "System prompt"
+    assert call["thinking"] == {"type": "enabled", "budget_tokens": 2048}
+    assert call["tool_choice"] == {"type": "any"}
+
+
+class StructuredResponse(BaseModel):
+    answer: str
+
+
+@pytest.mark.asyncio
+async def test_anthropic_backend_skips_assistant_prefill_for_claude_4_models() -> None:
+    client = Mock()
+    client.messages.create = AsyncMock(
+        return_value=SimpleNamespace(
+            content=[TextBlock(type="text", text='{"answer":"ok"}')],
+            usage=SimpleNamespace(
+                input_tokens=10,
+                output_tokens=5,
+                cache_creation_input_tokens=0,
+                cache_read_input_tokens=0,
+            ),
+            stop_reason="end_turn",
+        )
+    )
+
+    backend = AnthropicBackend(client)
+    result = await backend.complete(
+        model="claude-sonnet-4-5",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        response_format=StructuredResponse,
+    )
+
+    assert isinstance(result.content, StructuredResponse)
+    assert result.content.answer == "ok"
+    await_args = client.messages.create.await_args
+    if await_args is None:
+        raise AssertionError("Expected Anthropic client call")
+    call = await_args.kwargs
+    assert len(call["messages"]) == 1
+    assert call["messages"][0]["role"] == "user"
+    assert call["messages"][0]["content"].startswith("Hello\n\nRespond with valid JSON")
+
+
+@pytest.mark.asyncio
+async def test_anthropic_backend_rejects_thinking_effort() -> None:
+    backend = AnthropicBackend(Mock())
+
+    with pytest.raises(ValueError, match="does not support thinking_effort"):
+        await backend.complete(
+            model="claude-haiku-4-5",
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=100,
+            thinking_effort="high",
+        )
diff --git a/tests/llm/test_backends/test_gemini.py b/tests/llm/test_backends/test_gemini.py
new file mode 100644
index 000000000..b327c8e42
--- /dev/null
+++ b/tests/llm/test_backends/test_gemini.py
@@ -0,0 +1,391 @@
+from datetime import datetime, timedelta, timezone
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+from pydantic import BaseModel
+
+from src.exceptions import LLMError, ValidationException
+from src.llm.backends.gemini import GeminiBackend
+from src.llm.caching import PromptCachePolicy, gemini_cache_store
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_preserves_thought_signature() -> None:
+    client = Mock()
+    client.aio.models.generate_content = AsyncMock(
+        return_value=SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    finish_reason=SimpleNamespace(name="STOP"),
+                    content=SimpleNamespace(
+                        parts=[
+                            SimpleNamespace(text="Hello from Gemini"),
+                            SimpleNamespace(
+                                function_call=SimpleNamespace(
+                                    name="search",
+                                    args={"query": "honcho"},
+                                ),
+                                thought_signature="sig_gemini",
+                            ),
+                        ]
+                    ),
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=6,
+            ),
+            parsed=None,
+        )
+    )
+
+    backend = GeminiBackend(client)
+    result = await backend.complete(
+        model="gemini-2.5-flash",
+        messages=[
+            {"role": "system", "content": "System prompt"},
+            {"role": "user", "content": "Hello"},
+        ],
+        max_tokens=100,
+        thinking_budget_tokens=256,
+    )
+
+    assert result.content == "Hello from Gemini"
+    assert result.tool_calls[0].name == "search"
+    assert result.tool_calls[0].thought_signature == "sig_gemini"
+
+    await_args = client.aio.models.generate_content.await_args
+    if await_args is None:
+        raise AssertionError("Expected Gemini generate_content call")
+    call = await_args.kwargs
+    assert call["model"] == "gemini-2.5-flash"
+    assert call["config"]["system_instruction"] == "System prompt"
+    assert call["config"]["thinking_config"] == {"thinking_budget": 256}
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_maps_thinking_effort_to_thinking_level() -> None:
+    client = Mock()
+    client.aio.models.generate_content = AsyncMock(
+        return_value=SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    finish_reason=SimpleNamespace(name="STOP"),
+                    content=SimpleNamespace(parts=[SimpleNamespace(text="ok")]),
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=6,
+            ),
+            parsed=None,
+        )
+    )
+
+    backend = GeminiBackend(client)
+    await backend.complete(
+        model="gemini-3-pro-preview",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        thinking_effort="low",
+    )
+
+    await_args = client.aio.models.generate_content.await_args
+    if await_args is None:
+        raise AssertionError("Expected Gemini generate_content call")
+    call = await_args.kwargs
+    assert call["config"]["thinking_config"] == {"thinking_level": "low"}
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_rejects_budget_and_effort_together() -> None:
+    backend = GeminiBackend(Mock())
+
+    with pytest.raises(
+        ValidationException,
+        match="does not support sending both thinking_budget_tokens and thinking_effort",
+    ):
+        await backend.complete(
+            model="gemini-3-pro-preview",
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=100,
+            thinking_budget_tokens=256,
+            thinking_effort="low",
+        )
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_raises_on_blocked_response() -> None:
+    client = Mock()
+    client.aio.models.generate_content = AsyncMock(
+        return_value=SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    finish_reason=SimpleNamespace(name="SAFETY"),
+                    content=SimpleNamespace(parts=[]),
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=0,
+            ),
+            parsed=None,
+        )
+    )
+
+    backend = GeminiBackend(client)
+
+    with pytest.raises(LLMError, match="Gemini response blocked"):
+        await backend.complete(
+            model="gemini-2.5-flash",
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=100,
+        )
+
+
+class StructuredResponse(BaseModel):
+    answer: str
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_validates_dict_parsed_payload() -> None:
+    client = Mock()
+    client.aio.models.generate_content = AsyncMock(
+        return_value=SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    finish_reason=SimpleNamespace(name="STOP"),
+                    content=SimpleNamespace(parts=[]),
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=6,
+            ),
+            parsed={"answer": "ok"},
+            text=None,
+            function_calls=None,
+        )
+    )
+
+    backend = GeminiBackend(client)
+    result = await backend.complete(
+        model="gemini-2.5-flash",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        response_format=StructuredResponse,
+    )
+
+    assert isinstance(result.content, StructuredResponse)
+    assert result.content.answer == "ok"
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_falls_back_to_response_text_and_function_calls() -> None:
+    client = Mock()
+    client.aio.models.generate_content = AsyncMock(
+        return_value=SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    finish_reason=SimpleNamespace(name="STOP"),
+                    content=SimpleNamespace(parts=None),
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=6,
+            ),
+            parsed=None,
+            text="13 is prime.",
+            function_calls=[
+                SimpleNamespace(name="get_favorite_prime", args={"topic": "test"})
+            ],
+        )
+    )
+
+    backend = GeminiBackend(client)
+    result = await backend.complete(
+        model="gemini-2.5-flash",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+    )
+
+    assert result.content == "13 is prime."
+    assert result.tool_calls[0].name == "get_favorite_prime"
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_ignores_mock_text_and_function_call_placeholders() -> (
+    None
+):
+    client = Mock()
+    client.aio.models.generate_content = AsyncMock(
+        return_value=Mock(
+            candidates=[
+                Mock(
+                    finish_reason=SimpleNamespace(name="STOP"),
+                    content=None,
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=0,
+            ),
+            parsed=None,
+        )
+    )
+
+    backend = GeminiBackend(client)
+    result = await backend.complete(
+        model="gemini-2.5-flash",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+    )
+
+    assert result.content == ""
+    assert result.tool_calls == []
+
+
+@pytest.mark.asyncio
+async def test_gemini_backend_strips_system_and_tools_when_using_cached_content() -> (
+    None
+):
+    gemini_cache_store._handles.clear()  # pyright: ignore[reportPrivateUsage]
+    client = Mock()
+    client.aio.caches.create = AsyncMock(
+        return_value=SimpleNamespace(
+            name="cachedContents/abc123",
+            expire_time=datetime.now(timezone.utc) + timedelta(minutes=5),
+        )
+    )
+    client.aio.models.generate_content = AsyncMock(
+        return_value=SimpleNamespace(
+            candidates=[
+                SimpleNamespace(
+                    finish_reason=SimpleNamespace(name="STOP"),
+                    content=SimpleNamespace(
+                        parts=[SimpleNamespace(text="cached result")]
+                    ),
+                )
+            ],
+            usage_metadata=SimpleNamespace(
+                prompt_token_count=12,
+                candidates_token_count=6,
+            ),
+            parsed=None,
+        )
+    )
+
+    backend = GeminiBackend(client)
+    result = await backend.complete(
+        model="gemini-2.5-flash",
+        messages=[
+            {"role": "system", "content": "System prompt"},
+            {"role": "user", "content": "Hello"},
+        ],
+        max_tokens=100,
+        tools=[
+            {
+                "name": "search",
+                "description": "Search for information",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {"query": {"type": "string"}},
+                },
+            }
+        ],
+        tool_choice="required",
+        extra_params={
+            "cache_policy": PromptCachePolicy(
+                mode="gemini_cached_content",
+                ttl_seconds=300,
+            )
+        },
+    )
+
+    assert result.content == "cached result"
+    await_args = client.aio.models.generate_content.await_args
+    if await_args is None:
+        raise AssertionError("Expected Gemini generate_content call")
+    call = await_args.kwargs
+    assert call["config"]["cached_content"] == "cachedContents/abc123"
+    assert "system_instruction" not in call["config"]
+    assert "tools" not in call["config"]
+    assert "tool_config" not in call["config"]
+
+
+def test_gemini_sanitize_schema_strips_unsupported_keywords() -> None:
+    """Gemini's function-declarations validator rejects JSON-Schema keywords
+    outside its narrow allowlist (additionalProperties, allOf, if/then, $ref,
+    anyOf, oneOf, patternProperties, ...). _sanitize_schema must strip them
+    recursively so tool schemas authored for OpenAI/Anthropic don't 400 here.
+    """
+    raw = {
+        "type": "object",
+        "properties": {
+            "items": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "content": {"type": "string"},
+                        "level": {"type": "string", "enum": ["a", "b"]},
+                    },
+                    "required": ["content"],
+                    "additionalProperties": False,
+                    "allOf": [
+                        {
+                            "if": {"properties": {"level": {"const": "a"}}},
+                            "then": {"required": ["aux"]},
+                        }
+                    ],
+                },
+            },
+        },
+        "required": ["items"],
+        "$defs": {"Foo": {"type": "string"}},
+    }
+    cleaned = GeminiBackend._sanitize_schema(raw)  # pyright: ignore[reportPrivateUsage]
+
+    # Top-level
+    assert "additionalProperties" not in cleaned
+    assert "$defs" not in cleaned
+    assert cleaned["type"] == "object"
+    assert cleaned["required"] == ["items"]
+
+    # Nested under items
+    item_schema = cleaned["properties"]["items"]["items"]
+    assert "additionalProperties" not in item_schema
+    assert "allOf" not in item_schema
+    assert item_schema["properties"]["level"]["enum"] == ["a", "b"]
+
+
+def test_gemini_convert_tools_sanitizes_parameters_schema() -> None:
+    """End-to-end: feeding a Pydantic/OpenAI-style schema through _convert_tools
+    must produce a Gemini-safe function_declarations payload."""
+    tools = [
+        {
+            "name": "create_observations",
+            "description": "Create observations.",
+            "input_schema": {
+                "type": "object",
+                "properties": {
+                    "observations": {
+                        "type": "array",
+                        "items": {
+                            "type": "object",
+                            "properties": {"content": {"type": "string"}},
+                            "additionalProperties": False,
+                        },
+                    }
+                },
+                "required": ["observations"],
+                "additionalProperties": False,
+            },
+        }
+    ]
+    converted = GeminiBackend._convert_tools(tools)  # pyright: ignore[reportPrivateUsage]
+    params = converted[0]["function_declarations"][0]["parameters"]
+    assert "additionalProperties" not in params
+    assert "additionalProperties" not in params["properties"]["observations"]["items"]
diff --git a/tests/llm/test_backends/test_openai.py b/tests/llm/test_backends/test_openai.py
new file mode 100644
index 000000000..81838202e
--- /dev/null
+++ b/tests/llm/test_backends/test_openai.py
@@ -0,0 +1,276 @@
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, Mock
+
+import pytest
+
+from src.exceptions import ValidationException
+from src.llm.backends.openai import OpenAIBackend
+
+
+@pytest.mark.asyncio
+async def test_openai_backend_uses_gpt5_params_and_extracts_reasoning() -> None:
+    client = Mock()
+    client.chat.completions.create = AsyncMock(
+        return_value=SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    finish_reason="stop",
+                    message=SimpleNamespace(
+                        content="Hello from GPT-5",
+                        tool_calls=[],
+                        reasoning_details=[
+                            SimpleNamespace(
+                                content="reasoning summary",
+                                model_dump=lambda: {
+                                    "type": "reasoning",
+                                    "content": "reasoning summary",
+                                },
+                            )
+                        ],
+                    ),
+                )
+            ],
+            usage=SimpleNamespace(
+                prompt_tokens=10,
+                completion_tokens=5,
+                prompt_tokens_details=SimpleNamespace(cached_tokens=4),
+            ),
+        )
+    )
+
+    backend = OpenAIBackend(client)
+    result = await backend.complete(
+        model="gpt-5-mini",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        thinking_effort="high",
+    )
+
+    assert result.content == "Hello from GPT-5"
+    assert result.thinking_content == "reasoning summary"
+    assert result.reasoning_details == [
+        {"type": "reasoning", "content": "reasoning summary"}
+    ]
+    assert result.cache_read_input_tokens == 4
+
+    await_args = client.chat.completions.create.await_args
+    if await_args is None:
+        raise AssertionError("Expected OpenAI create call")
+    call = await_args.kwargs
+    assert call["model"] == "gpt-5-mini"
+    assert call["max_completion_tokens"] == 100
+    assert call["reasoning_effort"] == "high"
+    assert "max_tokens" not in call
+
+
+@pytest.mark.asyncio
+async def test_openai_backend_passes_thinking_effort_through_for_non_gpt5_models() -> (
+    None
+):
+    client = Mock()
+    client.chat.completions.create = AsyncMock(
+        return_value=SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    finish_reason="stop",
+                    message=SimpleNamespace(
+                        content="Hello from GPT-4.1",
+                        tool_calls=[],
+                        reasoning_details=[],
+                    ),
+                )
+            ],
+            usage=SimpleNamespace(
+                prompt_tokens=10,
+                completion_tokens=5,
+                prompt_tokens_details=None,
+            ),
+        )
+    )
+
+    backend = OpenAIBackend(client)
+    await backend.complete(
+        model="gpt-4.1",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        thinking_effort="low",
+    )
+
+    await_args = client.chat.completions.create.await_args
+    if await_args is None:
+        raise AssertionError("Expected OpenAI create call")
+    call = await_args.kwargs
+    assert call["model"] == "gpt-4.1"
+    assert call["max_tokens"] == 100
+    assert call["reasoning_effort"] == "low"
+
+
+@pytest.mark.asyncio
+async def test_openai_backend_does_not_treat_proxy_models_with_gpt5_substring_as_gpt5() -> (
+    None
+):
+    """Regression: proxy/deployment names containing 'gpt-5' must use `max_tokens`.
+
+    Flexible OpenAI-compatible configuration means operators commonly route through
+    proxies/Azure deployments with IDs like `azure-gpt-5-deployment` or
+    `my-gpt-5-proxy`. A naive substring check would incorrectly send
+    `max_completion_tokens` (a GPT-5-only parameter) to those endpoints.
+    """
+    client = Mock()
+    client.chat.completions.create = AsyncMock(
+        return_value=SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    finish_reason="stop",
+                    message=SimpleNamespace(
+                        content="ok",
+                        tool_calls=[],
+                        reasoning_details=[],
+                    ),
+                )
+            ],
+            usage=SimpleNamespace(
+                prompt_tokens=10,
+                completion_tokens=5,
+                prompt_tokens_details=None,
+            ),
+        )
+    )
+
+    backend = OpenAIBackend(client)
+    await backend.complete(
+        model="my-gpt-5-proxy",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+    )
+
+    await_args = client.chat.completions.create.await_args
+    if await_args is None:
+        raise AssertionError("Expected OpenAI create call")
+    call = await_args.kwargs
+    assert call["max_tokens"] == 100
+    assert "max_completion_tokens" not in call
+
+
+@pytest.mark.asyncio
+async def test_openai_backend_rejects_thinking_budget_tokens() -> None:
+    backend = OpenAIBackend(Mock())
+
+    with pytest.raises(
+        ValidationException, match="does not support thinking_budget_tokens"
+    ):
+        await backend.complete(
+            model="gpt-5-mini",
+            messages=[{"role": "user", "content": "Hello"}],
+            max_tokens=100,
+            thinking_budget_tokens=256,
+        )
+
+
+@pytest.mark.asyncio
+async def test_openai_backend_converts_anthropic_style_tools() -> None:
+    client = Mock()
+    client.chat.completions.create = AsyncMock(
+        return_value=SimpleNamespace(
+            choices=[
+                SimpleNamespace(
+                    finish_reason="stop",
+                    message=SimpleNamespace(
+                        content="Used tools",
+                        tool_calls=[],
+                        reasoning_details=[],
+                    ),
+                )
+            ],
+            usage=SimpleNamespace(
+                prompt_tokens=10,
+                completion_tokens=5,
+                prompt_tokens_details=None,
+            ),
+        )
+    )
+
+    backend = OpenAIBackend(client)
+    await backend.complete(
+        model="gpt-4.1",
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        tools=[
+            {
+                "name": "get_weather",
+                "description": "Lookup weather",
+                "input_schema": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                },
+            }
+        ],
+        tool_choice="required",
+    )
+
+    await_args = client.chat.completions.create.await_args
+    if await_args is None:
+        raise AssertionError("Expected OpenAI create call")
+    call = await_args.kwargs
+    assert call["tools"] == [
+        {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Lookup weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"city": {"type": "string"}},
+                    "required": ["city"],
+                },
+            },
+        }
+    ]
+    assert call["tool_choice"] == "required"
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "gpt-5",
+        "gpt-5-turbo",
+        "gpt-5.4",
+        "gpt-5.4-mini",
+        "gpt-5.5-preview",
+        "o1",
+        "o1-mini",
+        "o3",
+        "o3-mini",
+        "o4-preview",
+    ],
+)
+def test_openai_reasoning_models_use_max_completion_tokens(model: str) -> None:
+    """Reasoning model families (gpt-5 incl. x.y versions, o1/o3/o4) must send
+    max_completion_tokens, not max_tokens — OpenAI rejects max_tokens for them
+    with 400 unsupported_parameter."""
+    from src.llm.backends.openai import (
+        _uses_max_completion_tokens,  # pyright: ignore[reportPrivateUsage]
+    )
+
+    assert _uses_max_completion_tokens(model) is True
+
+
+@pytest.mark.parametrize(
+    "model",
+    [
+        "gpt-4.1",
+        "gpt-4o",
+        "gpt-4o-mini",
+        "gpt-3.5-turbo",
+        "some-proxy-model",
+    ],
+)
+def test_openai_classic_models_use_max_tokens(model: str) -> None:
+    """Non-reasoning OpenAI and OpenAI-compatible proxy models stay on
+    the classic max_tokens parameter."""
+    from src.llm.backends.openai import (
+        _uses_max_completion_tokens,  # pyright: ignore[reportPrivateUsage]
+    )
+
+    assert _uses_max_completion_tokens(model) is False
diff --git a/tests/llm/test_conversation.py b/tests/llm/test_conversation.py
new file mode 100644
index 000000000..324803ce2
--- /dev/null
+++ b/tests/llm/test_conversation.py
@@ -0,0 +1,103 @@
+from typing import Any
+
+from src.llm.conversation import (
+    _is_tool_result_message,  # pyright: ignore[reportPrivateUsage]
+    _is_tool_use_message,  # pyright: ignore[reportPrivateUsage]
+    truncate_messages_to_fit,
+)
+
+
+def test_truncate_messages_to_fit_keeps_last_unit_when_over_limit() -> None:
+    messages = [
+        {"role": "user", "content": "x " * 2000},
+    ]
+
+    truncated = truncate_messages_to_fit(messages, max_tokens=1)
+
+    assert truncated == messages
+
+
+def test_truncate_messages_to_fit_preserves_tool_result_pair() -> None:
+    messages = [
+        {"role": "user", "content": "old context " * 1000},
+        {
+            "role": "assistant",
+            "content": None,
+            "tool_calls": [
+                {
+                    "id": "call_1",
+                    "type": "function",
+                    "function": {"name": "lookup", "arguments": "{}"},
+                }
+            ],
+        },
+        {"role": "tool", "tool_call_id": "call_1", "content": "result"},
+    ]
+
+    truncated = truncate_messages_to_fit(messages, max_tokens=5)
+
+    assert truncated == messages[1:]
+
+
+def test_is_tool_use_message_detects_gemini_function_call_in_parts() -> None:
+    msg: dict[str, Any] = {
+        "role": "model",
+        "parts": [
+            {"function_call": {"name": "search", "args": {"q": "honcho"}}},
+        ],
+    }
+    assert _is_tool_use_message(msg) is True
+
+
+def test_is_tool_result_message_detects_gemini_function_response_in_parts() -> None:
+    msg: dict[str, Any] = {
+        "role": "user",
+        "parts": [
+            {"function_response": {"name": "search", "response": {"result": "ok"}}},
+        ],
+    }
+    assert _is_tool_result_message(msg) is True
+
+
+def test_is_tool_use_message_detects_anthropic_tool_use_block() -> None:
+    msg: dict[str, Any] = {
+        "role": "assistant",
+        "content": [
+            {"type": "text", "text": "calling lookup"},
+            {"type": "tool_use", "id": "t_1", "name": "lookup", "input": {}},
+        ],
+    }
+    assert _is_tool_use_message(msg) is True
+
+
+def test_truncate_messages_to_fit_preserves_gemini_tool_pair() -> None:
+    """A Gemini-shaped function_call / function_response pair must stay
+    grouped when older units get dropped. Regression: before adding the
+    parts-based detection, neither message would be recognized as a tool
+    unit, and truncation could split or drop them individually."""
+    messages: list[dict[str, Any]] = [
+        {"role": "user", "parts": [{"text": "old context " * 1000}]},
+        {
+            "role": "model",
+            "parts": [
+                {"function_call": {"name": "lookup", "args": {}}},
+            ],
+        },
+        {
+            "role": "user",
+            "parts": [
+                {
+                    "function_response": {
+                        "name": "lookup",
+                        "response": {"result": "found"},
+                    }
+                }
+            ],
+        },
+    ]
+
+    truncated = truncate_messages_to_fit(messages, max_tokens=20)
+
+    # The oldest (bulk-text) message should be dropped; the function_call +
+    # function_response pair stays intact together.
+    assert truncated == messages[1:]
diff --git a/tests/llm/test_credentials.py b/tests/llm/test_credentials.py
new file mode 100644
index 000000000..fae9a8cf0
--- /dev/null
+++ b/tests/llm/test_credentials.py
@@ -0,0 +1,50 @@
+import pytest
+
+from src.config import ModelConfig, settings
+from src.llm.credentials import resolve_credentials
+
+
+def test_transport_credentials_use_global_settings(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings.LLM, "ANTHROPIC_API_KEY", "anthropic-test-key")
+
+    credentials = resolve_credentials(
+        ModelConfig(model="claude-haiku-4-5", transport="anthropic")
+    )
+
+    assert credentials == {"api_key": "anthropic-test-key", "api_base": None}
+
+
+def test_openai_transport_credentials_use_per_model_config() -> None:
+    credentials = resolve_credentials(
+        ModelConfig(
+            model="my-local-model",
+            transport="openai",
+            api_key="local-key",
+            base_url="http://localhost:8000/v1",
+        )
+    )
+
+    assert credentials == {
+        "api_key": "local-key",
+        "api_base": "http://localhost:8000/v1",
+    }
+
+
+def test_openai_transport_credentials_fall_back_to_global_defaults(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(settings.LLM, "OPENAI_API_KEY", "openai-test-key")
+
+    credentials = resolve_credentials(
+        ModelConfig(
+            model="my-local-model",
+            transport="openai",
+        )
+    )
+
+    assert credentials == {
+        "api_key": "openai-test-key",
+        "api_base": None,
+    }
diff --git a/tests/llm/test_embedding_client.py b/tests/llm/test_embedding_client.py
new file mode 100644
index 000000000..463b3dd6d
--- /dev/null
+++ b/tests/llm/test_embedding_client.py
@@ -0,0 +1,139 @@
+from types import SimpleNamespace
+from typing import Any
+
+import pytest
+
+from src.config import EmbeddingModelConfig
+from src.embedding_client import _EmbeddingClient  # pyright: ignore[reportPrivateUsage]
+
+
+class FakeOpenAIEmbeddingsAPI:
+    def __init__(self, embedding: list[float]) -> None:
+        self.embedding: list[float] = embedding
+        self.calls: list[dict[str, Any]] = []
+
+    async def create(self, *, model: str, input: str | list[str]) -> SimpleNamespace:
+        self.calls.append({"model": model, "input": input})
+        if isinstance(input, list):
+            data = [SimpleNamespace(embedding=self.embedding) for _ in input]
+        else:
+            data = [SimpleNamespace(embedding=self.embedding)]
+        return SimpleNamespace(data=data)
+
+
+@pytest.mark.asyncio
+async def test_openai_embedding_client_uses_configured_model_and_dimensions(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    fake_embeddings = FakeOpenAIEmbeddingsAPI([0.1] * 8)
+
+    class FakeOpenAIClient:
+        def __init__(self, *, api_key: str | None, base_url: str | None) -> None:
+            self.api_key: str | None = api_key
+            self.base_url: str | None = base_url
+            self.embeddings: FakeOpenAIEmbeddingsAPI = fake_embeddings
+
+    monkeypatch.setattr("src.embedding_client.AsyncOpenAI", FakeOpenAIClient)
+
+    client = _EmbeddingClient(
+        EmbeddingModelConfig(
+            transport="openai",
+            model="text-embedding-3-small",
+            api_key="test-key",
+            base_url="http://localhost:8000/v1",
+        ),
+        vector_dimensions=8,
+        max_input_tokens=8192,
+        max_tokens_per_request=300_000,
+    )
+
+    embedding = await client.embed("hello world")
+
+    assert embedding == [0.1] * 8
+    assert fake_embeddings.calls == [
+        {"model": "text-embedding-3-small", "input": "hello world"}
+    ]
+
+
+@pytest.mark.asyncio
+async def test_openai_embedding_client_rejects_dimension_mismatch(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    fake_embeddings = FakeOpenAIEmbeddingsAPI([0.1] * 7)
+
+    class FakeOpenAIClient:
+        def __init__(self, *, api_key: str | None, base_url: str | None) -> None:
+            self.embeddings: FakeOpenAIEmbeddingsAPI = fake_embeddings
+
+    monkeypatch.setattr("src.embedding_client.AsyncOpenAI", FakeOpenAIClient)
+
+    client = _EmbeddingClient(
+        EmbeddingModelConfig(
+            transport="openai",
+            model="text-embedding-3-small",
+            api_key="test-key",
+        ),
+        vector_dimensions=8,
+        max_input_tokens=8192,
+        max_tokens_per_request=300_000,
+    )
+
+    with pytest.raises(ValueError, match="Embedding dimension mismatch"):
+        await client.embed("hello world")
+
+
+@pytest.mark.asyncio
+async def test_gemini_embedding_client_uses_output_dimensionality(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    calls: list[dict[str, Any]] = []
+
+    class FakeGeminiModels:
+        async def embed_content(
+            self,
+            *,
+            model: str,
+            contents: str | list[str],
+            config: dict[str, Any],
+        ) -> SimpleNamespace:
+            calls.append(
+                {
+                    "model": model,
+                    "contents": contents,
+                    "config": config,
+                }
+            )
+            return SimpleNamespace(
+                embeddings=[SimpleNamespace(values=[0.2] * 12)],
+            )
+
+    class FakeGeminiClient:
+        def __init__(self, *, api_key: str | None, http_options: Any) -> None:
+            self.api_key: str | None = api_key
+            self.http_options: Any = http_options
+            self.aio: Any = SimpleNamespace(models=FakeGeminiModels())
+
+    monkeypatch.setattr("src.embedding_client.genai.Client", FakeGeminiClient)
+
+    client = _EmbeddingClient(
+        EmbeddingModelConfig(
+            transport="gemini",
+            model="gemini-embedding-001",
+            api_key="gemini-key",
+            base_url="https://gemini-proxy.example/v1beta",
+        ),
+        vector_dimensions=12,
+        max_input_tokens=4096,
+        max_tokens_per_request=300_000,
+    )
+
+    embedding = await client.embed("hello world")
+
+    assert embedding == [0.2] * 12
+    assert calls == [
+        {
+            "model": "gemini-embedding-001",
+            "contents": "hello world",
+            "config": {"output_dimensionality": 12},
+        }
+    ]
diff --git a/tests/llm/test_history_adapters.py b/tests/llm/test_history_adapters.py
new file mode 100644
index 000000000..6881df6ad
--- /dev/null
+++ b/tests/llm/test_history_adapters.py
@@ -0,0 +1,67 @@
+from src.llm.backend import CompletionResult, ToolCallResult
+from src.llm.history_adapters import (
+    AnthropicHistoryAdapter,
+    GeminiHistoryAdapter,
+    OpenAIHistoryAdapter,
+)
+
+
+def test_anthropic_history_adapter_preserves_thinking_blocks() -> None:
+    adapter = AnthropicHistoryAdapter()
+    result = CompletionResult(
+        content="Done",
+        thinking_blocks=[
+            {
+                "type": "thinking",
+                "thinking": "private reasoning",
+                "signature": "sig_123",
+            }
+        ],
+        tool_calls=[
+            ToolCallResult(id="tool_1", name="search", input={"query": "honcho"})
+        ],
+    )
+
+    message = adapter.format_assistant_tool_message(result)
+
+    assert message["role"] == "assistant"
+    assert message["content"][0]["type"] == "thinking"
+    assert message["content"][1] == {"type": "text", "text": "Done"}
+    assert message["content"][2]["type"] == "tool_use"
+
+
+def test_gemini_history_adapter_preserves_thought_signature() -> None:
+    adapter = GeminiHistoryAdapter()
+    result = CompletionResult(
+        content="Calling a tool",
+        tool_calls=[
+            ToolCallResult(
+                id="tool_1",
+                name="search",
+                input={"query": "honcho"},
+                thought_signature="sig_abc",
+            )
+        ],
+    )
+
+    message = adapter.format_assistant_tool_message(result)
+
+    assert message["role"] == "model"
+    assert message["parts"][1]["thought_signature"] == "sig_abc"
+
+
+def test_openai_history_adapter_preserves_reasoning_details() -> None:
+    adapter = OpenAIHistoryAdapter()
+    result = CompletionResult(
+        content="Calling a tool",
+        reasoning_details=[{"type": "reasoning", "content": "step 1"}],
+        tool_calls=[
+            ToolCallResult(id="tool_1", name="search", input={"query": "honcho"})
+        ],
+    )
+
+    message = adapter.format_assistant_tool_message(result)
+
+    assert message["role"] == "assistant"
+    assert message["reasoning_details"] == [{"type": "reasoning", "content": "step 1"}]
+    assert message["tool_calls"][0]["function"]["name"] == "search"
diff --git a/tests/llm/test_model_config.py b/tests/llm/test_model_config.py
new file mode 100644
index 000000000..20d4aa7c8
--- /dev/null
+++ b/tests/llm/test_model_config.py
@@ -0,0 +1,510 @@
+import os
+import re
+from pathlib import Path
+from typing import Any, cast
+
+import pytest
+
+from src.config import (
+    AppSettings,
+    ConfiguredEmbeddingModelSettings,
+    ConfiguredModelSettings,
+    DialecticLevelSettings,
+    DreamSettings,
+    EmbeddingSettings,
+    ModelConfig,
+    ModelOverrideSettings,
+    SummarySettings,
+    VectorStoreSettings,
+    load_toml_config,
+    resolve_embedding_model_config,
+    resolve_model_config,
+)
+
+
+def test_fallback_config_is_independent() -> None:
+    """Fallback config has its own transport and reasoning params."""
+    from src.config import ResolvedFallbackConfig
+
+    config = ModelConfig(
+        model="claude-haiku-4-5",
+        transport="anthropic",
+        thinking_budget_tokens=1024,
+        fallback=ResolvedFallbackConfig(
+            model="gpt-4.1-mini",
+            transport="openai",
+            base_url="https://example.com/v1",
+        ),
+    )
+    assert config.fallback is not None
+    assert config.fallback.transport == "openai"
+    assert config.fallback.thinking_budget_tokens is None
+    assert config.fallback.base_url == "https://example.com/v1"
+
+
+def test_base_url_is_allowed_for_any_transport() -> None:
+    config = ModelConfig(
+        model="claude-haiku-4-5",
+        transport="anthropic",
+        base_url="https://anthropic-proxy.example/v1",
+    )
+
+    assert config.base_url == "https://anthropic-proxy.example/v1"
+
+
+def test_anthropic_thinking_budget_has_minimum() -> None:
+    with pytest.raises(ValueError, match="thinking_budget_tokens must be >= 1024"):
+        ModelConfig(
+            model="claude-haiku-4-5",
+            transport="anthropic",
+            thinking_budget_tokens=512,
+        )
+
+
+def test_reasoning_effort_alias_populates_generic_thinking_effort() -> None:
+    config = ModelConfig.model_validate(
+        {
+            "model": "gpt-5",
+            "transport": "openai",
+            "reasoning_effort": "minimal",
+        }
+    )
+
+    assert config.thinking_effort == "minimal"
+    assert config.reasoning_effort == "minimal"
+
+
+def test_for_model_overrides_model_and_transport() -> None:
+    config = ModelConfig(
+        model="claude-haiku-4-5",
+        transport="anthropic",
+    )
+
+    updated = config.for_model(
+        "gpt-5-mini",
+        transport_override="openai",
+    )
+
+    assert updated.model == "gpt-5-mini"
+    assert updated.transport == "openai"
+    assert config.transport == "anthropic"
+
+
+def test_configured_model_settings_validate_like_runtime_model_config() -> None:
+    with pytest.raises(ValueError, match="thinking_budget_tokens must be >= 1024"):
+        ConfiguredModelSettings(
+            model="claude-haiku-4-5",
+            transport="anthropic",
+            thinking_budget_tokens=512,
+        )
+
+
+def test_summary_settings_accept_nested_model_config() -> None:
+    from src.config import FallbackModelSettings
+
+    settings = SummarySettings(
+        MODEL_CONFIG=ConfiguredModelSettings(
+            model="claude-haiku-4-5",
+            transport="anthropic",
+            fallback=FallbackModelSettings(
+                model="gemini-2.5-pro",
+                transport="gemini",
+            ),
+            thinking_budget_tokens=1024,
+        ),
+    )
+
+    assert settings.MODEL_CONFIG.model == "claude-haiku-4-5"
+    assert settings.MODEL_CONFIG.transport == "anthropic"
+    assert settings.MODEL_CONFIG.fallback is not None
+    assert settings.MODEL_CONFIG.fallback.model == "gemini-2.5-pro"
+    assert settings.MODEL_CONFIG.fallback.transport == "gemini"
+    assert settings.MODEL_CONFIG.thinking_budget_tokens == 1024
+
+
+def test_resolve_model_config_reads_override_env_and_provider_params(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("SUMMARY_LOCAL_API_KEY", "test-key")
+
+    configured = ConfiguredModelSettings(
+        model="my-local-model",
+        transport="openai",
+        overrides=ModelOverrideSettings(
+            api_key_env="SUMMARY_LOCAL_API_KEY",
+            base_url="http://localhost:8000/v1",
+            provider_params={"verbosity": "low"},
+        ),
+    )
+
+    resolved = resolve_model_config(configured)
+
+    assert resolved.api_key == "test-key"
+    assert resolved.base_url == "http://localhost:8000/v1"
+    assert resolved.provider_params == {"verbosity": "low"}
+
+
+def test_resolve_embedding_model_config_reads_override_env(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("EMBEDDING_LOCAL_API_KEY", "embed-key")
+
+    configured = ConfiguredEmbeddingModelSettings(
+        transport="openai",
+        model="text-embedding-3-small",
+        overrides=ModelOverrideSettings(
+            api_key_env="EMBEDDING_LOCAL_API_KEY",
+            base_url="http://localhost:8000/v1",
+        ),
+    )
+
+    resolved = resolve_embedding_model_config(configured)
+
+    assert resolved.api_key == "embed-key"
+    assert resolved.base_url == "http://localhost:8000/v1"
+
+
+def test_dialectic_level_settings_accepts_nested_model_config() -> None:
+    from src.config import FallbackModelSettings
+
+    settings = DialecticLevelSettings(
+        MODEL_CONFIG=ConfiguredModelSettings(
+            model="claude-haiku-4-5",
+            transport="anthropic",
+            fallback=FallbackModelSettings(
+                model="gemini-2.5-pro",
+                transport="gemini",
+            ),
+            thinking_budget_tokens=1024,
+        ),
+        MAX_TOOL_ITERATIONS=2,
+    )
+
+    resolved = resolve_model_config(settings.MODEL_CONFIG)
+    assert resolved.model == "claude-haiku-4-5"
+    assert resolved.transport == "anthropic"
+    assert resolved.fallback is not None
+    assert resolved.fallback.model == "gemini-2.5-pro"
+    assert resolved.fallback.transport == "gemini"
+
+
+def test_dialectic_level_settings_require_nested_model_config() -> None:
+    with pytest.raises(ValueError, match="Field required"):
+        DialecticLevelSettings.model_validate({"MAX_TOOL_ITERATIONS": 2})
+
+
+def test_dialectic_level_settings_reject_legacy_flat_model_shape() -> None:
+    with pytest.raises(ValueError, match="Field required"):
+        DialecticLevelSettings.model_validate(
+            {
+                "MODEL": "claude-haiku-4-5",
+                "THINKING_BUDGET_TOKENS": 1024,
+                "MAX_TOOL_ITERATIONS": 2,
+            }
+        )
+
+
+def test_legacy_prefixed_model_strings_are_normalized() -> None:
+    config = ModelConfig.model_validate({"model": "gemini/gemini-2.5-flash"})
+    configured = ConfiguredModelSettings.model_validate(
+        {"model": "anthropic/claude-haiku-4-5"}
+    )
+
+    assert config.transport == "gemini"
+    assert config.model == "gemini-2.5-flash"
+    assert configured.transport == "anthropic"
+    assert configured.model == "claude-haiku-4-5"
+
+
+def test_dream_specialist_model_configs_are_independent() -> None:
+    """Specialist configs carry their own defaults and don't inherit from a parent."""
+
+    dream = DreamSettings(
+        DEDUCTION_MODEL_CONFIG=ConfiguredModelSettings(
+            model="claude-haiku-4-5",
+            transport="anthropic",
+            thinking_budget_tokens=2048,
+        ),
+        INDUCTION_MODEL_CONFIG=ConfiguredModelSettings(
+            model="claude-opus-4-1",
+            transport="anthropic",
+            max_output_tokens=8000,
+        ),
+    )
+
+    assert dream.DEDUCTION_MODEL_CONFIG.model == "claude-haiku-4-5"
+    assert dream.DEDUCTION_MODEL_CONFIG.thinking_budget_tokens == 2048
+    assert dream.DEDUCTION_MODEL_CONFIG.max_output_tokens is None
+
+    assert dream.INDUCTION_MODEL_CONFIG.model == "claude-opus-4-1"
+    assert dream.INDUCTION_MODEL_CONFIG.max_output_tokens == 8000
+    assert dream.INDUCTION_MODEL_CONFIG.thinking_budget_tokens is None
+
+
+def test_app_settings_propagate_embedding_dimensions_to_vector_store() -> None:
+    settings = AppSettings(
+        EMBEDDING=EmbeddingSettings(VECTOR_DIMENSIONS=2048),
+        VECTOR_STORE=VectorStoreSettings(TYPE="lancedb", MIGRATED=True),
+    )
+
+    assert settings.EMBEDDING.VECTOR_DIMENSIONS == 2048
+    assert settings.VECTOR_STORE.DIMENSIONS == 2048
+
+
+def test_app_settings_require_matching_embedding_and_vector_store_dimensions() -> None:
+    with pytest.raises(
+        ValueError,
+        match=re.escape(
+            "VECTOR_STORE.DIMENSIONS must match EMBEDDING.VECTOR_DIMENSIONS"
+        ),
+    ):
+        AppSettings(
+            EMBEDDING=EmbeddingSettings(VECTOR_DIMENSIONS=2048),
+            VECTOR_STORE=VectorStoreSettings(
+                TYPE="lancedb",
+                MIGRATED=True,
+                DIMENSIONS=1536,
+            ),
+        )
+
+
+def test_app_settings_reject_non_1536_dimensions_while_pgvector_or_dual_write_active() -> (
+    None
+):
+    with pytest.raises(
+        ValueError,
+        match=re.escape("EMBEDDING.VECTOR_DIMENSIONS must remain 1536"),
+    ):
+        AppSettings(
+            EMBEDDING=EmbeddingSettings(VECTOR_DIMENSIONS=2048),
+            VECTOR_STORE=VectorStoreSettings(TYPE="pgvector", MIGRATED=True),
+        )
+
+    with pytest.raises(
+        ValueError,
+        match=re.escape("EMBEDDING.VECTOR_DIMENSIONS must remain 1536"),
+    ):
+        AppSettings(
+            EMBEDDING=EmbeddingSettings(VECTOR_DIMENSIONS=2048),
+            VECTOR_STORE=VectorStoreSettings(TYPE="lancedb", MIGRATED=False),
+        )
+
+
+def test_config_toml_example_uses_nested_model_config_sections() -> None:
+    config_path = Path(__file__).resolve().parents[2] / "config.toml.example"
+    config_data = load_toml_config(str(config_path))
+
+    deriver_config = ConfiguredModelSettings.model_validate(
+        config_data["deriver"]["model_config"]
+    )
+    minimal_level = DialecticLevelSettings.model_validate(
+        config_data["dialectic"]["levels"]["minimal"]
+    )
+    max_level = DialecticLevelSettings.model_validate(
+        config_data["dialectic"]["levels"]["max"]
+    )
+    embedding_config = ConfiguredEmbeddingModelSettings.model_validate(
+        config_data["embedding"]["model_config"]
+    )
+    summary_config = ConfiguredModelSettings.model_validate(
+        config_data["summary"]["model_config"]
+    )
+    deduction_model_config = ConfiguredModelSettings.model_validate(
+        config_data["dream"]["deduction_model_config"]
+    )
+    induction_model_config = ConfiguredModelSettings.model_validate(
+        config_data["dream"]["induction_model_config"]
+    )
+    dream = DreamSettings.model_validate(
+        {
+            "DEDUCTION_MODEL_CONFIG": deduction_model_config,
+            "INDUCTION_MODEL_CONFIG": induction_model_config,
+        }
+    )
+
+    # config.toml.example ships the same minimal defaults the app uses:
+    # transport=openai, model=gpt-5.4-mini across every text-generation
+    # feature, with embeddings on openai/text-embedding-3-small. Asserting
+    # these keeps the example file and the in-code defaults in lockstep.
+    assert deriver_config.transport == "openai"
+    assert deriver_config.model == "gpt-5.4-mini"
+    assert deriver_config.thinking_budget_tokens is None
+    assert minimal_level.MODEL_CONFIG.model == "gpt-5.4-mini"
+    assert minimal_level.MODEL_CONFIG.transport == "openai"
+    assert max_level.MODEL_CONFIG.model == "gpt-5.4-mini"
+    assert max_level.MODEL_CONFIG.transport == "openai"
+    assert max_level.MODEL_CONFIG.thinking_budget_tokens is None
+    assert embedding_config.transport == "openai"
+    assert embedding_config.model == "text-embedding-3-small"
+    assert summary_config.model == "gpt-5.4-mini"
+    assert summary_config.transport == "openai"
+    assert dream.DEDUCTION_MODEL_CONFIG.model == "gpt-5.4-mini"
+    assert dream.INDUCTION_MODEL_CONFIG.model == "gpt-5.4-mini"
+
+
+def test_env_template_uses_nested_model_config_keys() -> None:
+    env_template_path = Path(__file__).resolve().parents[2] / ".env.template"
+    env_template = env_template_path.read_text()
+
+    assert "EMBEDDING_MODEL_CONFIG__MODEL" in env_template
+    assert "EMBEDDING_VECTOR_DIMENSIONS" in env_template
+    assert "DERIVER_MODEL_CONFIG__MODEL" in env_template
+    assert "DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL" in env_template
+    assert "SUMMARY_MODEL_CONFIG__MODEL" in env_template
+    assert "DREAM_DEDUCTION_MODEL_CONFIG__MODEL" in env_template
+
+    assert "DERIVER_PROVIDER=" not in env_template
+    assert "SUMMARY_PROVIDER=" not in env_template
+    assert "DIALECTIC_LEVELS__minimal__PROVIDER=" not in env_template
+    assert "DREAM_PROVIDER=" not in env_template
+    assert "DREAM_DEDUCTION_MODEL=" not in env_template
+
+
+def _clear_deriver_env(monkeypatch: pytest.MonkeyPatch) -> None:
+    """Strip any DERIVER_MODEL_CONFIG__* env that would interfere with
+    direct-construction tests."""
+    for name in list(os.environ):
+        if name.startswith("DERIVER_MODEL_CONFIG__") or name == "DERIVER_MODEL_CONFIG":
+            monkeypatch.delenv(name, raising=False)
+
+
+def test_partial_env_override_of_transport_drops_default_thinking_params(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """A partial env override of transport must not leak the default's thinking
+    params into a transport that rejects them.
+
+    Regression: setting DERIVER_MODEL_CONFIG__TRANSPORT=openai +
+    DERIVER_MODEL_CONFIG__MODEL=gpt-4.1-mini (without clearing the default
+    thinking_budget_tokens=1024 carried over from the gemini default) used to
+    produce a merged ConfiguredModelSettings with thinking_budget_tokens=1024,
+    which the OpenAI backend then rejected at call time.
+    """
+    from src.config import DeriverSettings
+
+    _clear_deriver_env(monkeypatch)
+    # Exercise the @model_validator(mode="before") merge path with a raw dict
+    # — pyright can't see through the pre-validator that accepts dict input.
+    settings = DeriverSettings(
+        MODEL_CONFIG={"transport": "openai", "model": "gpt-4.1-mini"},  # pyright: ignore[reportArgumentType]
+    )
+
+    assert settings.MODEL_CONFIG.transport == "openai"
+    assert settings.MODEL_CONFIG.model == "gpt-4.1-mini"
+    assert settings.MODEL_CONFIG.thinking_budget_tokens is None
+    assert settings.MODEL_CONFIG.thinking_effort is None
+
+
+def test_partial_env_override_same_transport_keeps_default_thinking_params(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """When env preserves the default transport, default thinking params still
+    apply — we only strip on actual transport change.
+
+    The app-level defaults are intentionally minimal (transport + model only)
+    to avoid clobbering operator config, so this test patches in a deliberately
+    rich default to exercise the merge-preservation behavior.
+    """
+    from src.config import ConfiguredModelSettings, DeriverSettings
+
+    _clear_deriver_env(monkeypatch)
+
+    def _rich_default() -> ConfiguredModelSettings:
+        return ConfiguredModelSettings(
+            transport="gemini",
+            model="gemini-2.5-flash-lite",
+            thinking_budget_tokens=1024,
+            max_output_tokens=4096,
+        )
+
+    monkeypatch.setattr(DeriverSettings, "_MODEL_CONFIG_DEFAULT", _rich_default)
+
+    settings = DeriverSettings(
+        MODEL_CONFIG={"model": "gemini-2.5-pro"},  # pyright: ignore[reportArgumentType]
+    )
+
+    assert settings.MODEL_CONFIG.transport == "gemini"
+    assert settings.MODEL_CONFIG.model == "gemini-2.5-pro"
+    assert settings.MODEL_CONFIG.thinking_budget_tokens == 1024
+    assert settings.MODEL_CONFIG.max_output_tokens == 4096
+
+
+def test_explicit_thinking_effort_survives_transport_override(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """User-set thinking params in the override are always preserved."""
+    from src.config import DeriverSettings
+
+    _clear_deriver_env(monkeypatch)
+    settings = DeriverSettings(
+        MODEL_CONFIG={  # pyright: ignore[reportArgumentType]
+            "transport": "openai",
+            "model": "gpt-5",
+            "thinking_effort": "high",
+        },
+    )
+
+    assert settings.MODEL_CONFIG.transport == "openai"
+    assert settings.MODEL_CONFIG.thinking_effort == "high"
+    assert settings.MODEL_CONFIG.thinking_budget_tokens is None
+
+
+def test_dialectic_level_transport_override_drops_default_thinking_params(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    """Same leak existed in DialecticSettings._merge_level_defaults.
+    Regression: when a level default has thinking_budget_tokens=0 under a
+    gemini transport and env flips the override to openai, the 0 used to leak
+    through and trip the OpenAI backend's thinking-param rejection.
+
+    The app-level defaults are intentionally minimal (transport + model only)
+    to avoid clobbering operator config, so this test patches in a rich
+    level default to exercise the strip-on-transport-change behavior.
+
+    Exercises the before-validator directly to avoid DialecticSettings'
+    "all 5 levels required" constraint.
+    """
+    from src.config import (
+        ConfiguredModelSettings,
+        DialecticLevelSettings,
+        DialecticSettings,
+    )
+
+    def _rich_levels() -> dict[str, DialecticLevelSettings]:
+        return {
+            "minimal": DialecticLevelSettings(
+                MODEL_CONFIG=ConfiguredModelSettings(
+                    transport="gemini",
+                    model="gemini-2.5-flash-lite",
+                    thinking_budget_tokens=0,
+                ),
+                MAX_TOOL_ITERATIONS=1,
+                MAX_OUTPUT_TOKENS=250,
+                TOOL_CHOICE="any",
+            ),
+        }
+
+    monkeypatch.setattr("src.config._default_dialectic_levels", _rich_levels)
+
+    data: dict[str, object] = {
+        "LEVELS": {
+            "minimal": {
+                "MODEL_CONFIG": {
+                    "transport": "openai",
+                    "model": "gpt-4.1-mini",
+                }
+            }
+        }
+    }
+    # The @model_validator decorator wraps the classmethod in a descriptor proxy
+    # that pyright can't see as callable; at runtime pydantic routes it correctly.
+    merged = cast(
+        dict[str, Any],
+        DialecticSettings._merge_level_defaults(data),  # pyright: ignore[reportPrivateUsage, reportCallIssue]
+    )
+    levels = cast(dict[str, dict[str, Any]], merged["LEVELS"])
+    minimal_mc = cast(dict[str, Any], levels["minimal"]["MODEL_CONFIG"])
+    assert minimal_mc["transport"] == "openai"
+    assert minimal_mc["model"] == "gpt-4.1-mini"
+    assert "thinking_budget_tokens" not in minimal_mc
+    assert "thinking_effort" not in minimal_mc
diff --git a/tests/llm/test_request_builder.py b/tests/llm/test_request_builder.py
new file mode 100644
index 000000000..c8ed7dfd4
--- /dev/null
+++ b/tests/llm/test_request_builder.py
@@ -0,0 +1,97 @@
+from pydantic import BaseModel
+
+from src.config import ModelConfig
+from src.llm.caching import PromptCachePolicy
+from src.llm.request_builder import execute_completion
+from tests.llm.conftest import FakeBackend
+
+
+class SampleResponse(BaseModel):
+    answer: str
+
+
+async def test_gemini_explicit_budget_passes_tokens_through_without_adjustment(
+    fake_backend: FakeBackend,
+) -> None:
+    config = ModelConfig(
+        model="gemini-2.5-flash",
+        transport="gemini",
+        thinking_budget_tokens=256,
+    )
+
+    await execute_completion(
+        fake_backend,
+        config,
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+    )
+
+    call = fake_backend.calls[0]
+    # No auto-adjustment — operators set explicit values
+    assert call["max_output_tokens"] == 100
+    assert call["max_tokens"] == 100
+    assert call["thinking_budget_tokens"] == 256
+
+
+async def test_thinking_params_are_passed_through_without_capability_dropping(
+    fake_backend: FakeBackend,
+) -> None:
+    config = ModelConfig(
+        model="claude-haiku-4-5",
+        transport="anthropic",
+        thinking_effort="high",
+        thinking_budget_tokens=1024,
+    )
+
+    await execute_completion(
+        fake_backend,
+        config,
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+    )
+
+    call = fake_backend.calls[0]
+    assert call["thinking_effort"] == "high"
+    assert call["thinking_budget_tokens"] == 1024
+
+
+async def test_cache_policy_is_passed_through_extra_params(
+    fake_backend: FakeBackend,
+) -> None:
+    config = ModelConfig(model="gpt-4.1-mini", transport="openai")
+    cache_policy = PromptCachePolicy(mode="prefix", ttl_seconds=300)
+
+    await execute_completion(
+        fake_backend,
+        config,
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+        response_format=SampleResponse,
+        cache_policy=cache_policy,
+    )
+
+    call = fake_backend.calls[0]
+    assert call["response_format"] is SampleResponse
+    assert call["extra_params"]["cache_policy"] == cache_policy
+
+
+async def test_provider_params_are_merged_into_extra_params(
+    fake_backend: FakeBackend,
+) -> None:
+    config = ModelConfig(
+        model="gpt-4.1-mini",
+        transport="openai",
+        top_p=0.9,
+        provider_params={"custom_flag": True},
+    )
+
+    await execute_completion(
+        fake_backend,
+        config,
+        messages=[{"role": "user", "content": "Hello"}],
+        max_tokens=100,
+    )
+
+    call = fake_backend.calls[0]
+    assert call["extra_params"]["top_p"] == 0.9
+    assert call["extra_params"]["custom_flag"] is True
diff --git a/tests/routes/test_peers.py b/tests/routes/test_peers.py
index c8047f02d..9626ea10a 100644
--- a/tests/routes/test_peers.py
+++ b/tests/routes/test_peers.py
@@ -576,25 +576,6 @@ def test_get_peer_representation_with_all_parameters(
     assert isinstance(data["representation"], str)
 
 
-def test_get_peer_representation_structure(
-    client: TestClient, sample_data: tuple[Workspace, Peer]
-):
-    """Test that peer representation response has correct structure"""
-    test_workspace, test_peer = sample_data
-
-    # Get representation and validate structure
-    response = client.post(
-        f"/v3/workspaces/{test_workspace.name}/peers/{test_peer.name}/representation",
-        json={},
-    )
-    assert response.status_code == 200
-    data = response.json()
-
-    # Validate response structure
-    assert "representation" in data
-    assert isinstance(data["representation"], str)
-
-
 def test_get_peer_representation_boundary_values(
     client: TestClient, sample_data: tuple[Workspace, Peer]
 ):
diff --git a/tests/routes/test_queue_status.py b/tests/routes/test_queue_status.py
index af25077b5..279ad913d 100644
--- a/tests/routes/test_queue_status.py
+++ b/tests/routes/test_queue_status.py
@@ -74,20 +74,6 @@ async def test_get_deriver_status_with_include_sender_true(
         assert response.status_code == 200
         assert response.json()["total_work_units"] == 0
 
-    async def test_get_deriver_status_with_include_sender_false(
-        self,
-        client: TestClient,
-        sample_data: tuple[models.Workspace, models.Peer],
-    ):
-        """Test getting deriver status with include_sender=False (default)"""
-        workspace, peer = sample_data
-        response = client.get(
-            f"/v3/workspaces/{workspace.name}/queue/status",
-            params={"observer_id": peer.name},
-        )
-        assert response.status_code == 200
-        assert response.json()["total_work_units"] == 0
-
     async def test_get_deriver_status_no_parameters(
         self, client: TestClient, sample_data: tuple[models.Workspace, models.Peer]
     ):
diff --git a/tests/routes/test_scoped_api.py b/tests/routes/test_scoped_api.py
index c2d7d2e6a..a4d4677a6 100644
--- a/tests/routes/test_scoped_api.py
+++ b/tests/routes/test_scoped_api.py
@@ -20,45 +20,6 @@ def test_create_workspace_with_auth(auth_client: AuthClient):
     assert response.status_code in [200, 201]
 
 
-def test_auth_response_time(auth_client: AuthClient):
-    name = str(generate_nanoid())
-
-    import time
-
-    start_time = time.time()
-
-    response = auth_client.post(
-        "/v3/workspaces", json={"name": name, "metadata": {"key": "value"}}
-    )
-
-    end_time = time.time()
-    response_time = end_time - start_time
-    print(
-        f"Server response time for client {auth_client.auth_type}: {response_time:.6f} seconds"
-    )
-
-    # Check expected behavior based on auth type
-    if auth_client.auth_type != "admin":
-        assert response.status_code == 401
-        return
-
-    assert response.status_code in [200, 201]
-
-
-def test_get_or_create_workspace_with_auth(auth_client: AuthClient):
-    name = str(generate_nanoid())
-
-    response = auth_client.post(
-        "/v3/workspaces", json={"name": name, "metadata": {"key": "value"}}
-    )
-
-    if auth_client.auth_type != "admin":
-        assert response.status_code == 401
-        return
-
-    assert response.status_code in [200, 201]
-
-
 def test_get_workspace_with_auth(
     auth_client: AuthClient, sample_data: tuple[Workspace, Peer]
 ):
diff --git a/tests/utils/test_clients.py b/tests/utils/test_clients.py
index f76cb27c8..213a370cb 100644
--- a/tests/utils/test_clients.py
+++ b/tests/utils/test_clients.py
@@ -1,14 +1,12 @@
 """
-Comprehensive tests for src/utils/clients.py
+Comprehensive tests for the public src.llm orchestration surface.
 
 Tests cover:
-- All supported LLM providers (Anthropic, OpenAI, Google/Gemini, Groq)
+- All supported LLM providers (Anthropic, OpenAI, Google/Gemini)
 - Streaming and non-streaming responses
 - Response models (structured output)
 - Error handling and retries
 - Provider-specific features
-- Client initialization
-- Langfuse integration
 """
 
 from typing import Any
@@ -25,13 +23,12 @@
 from openai.types.completion_usage import CompletionUsage
 from pydantic import BaseModel, Field
 
-from src.config import settings
-from src.exceptions import LLMError
-from src.utils.clients import (
+from src.config import ConfiguredModelSettings, ModelConfig, ResolvedFallbackConfig
+from src.exceptions import LLMError, ValidationException
+from src.llm import (
     CLIENTS,
     HonchoLLMCallResponse,
     HonchoLLMCallStreamChunk,
-    handle_streaming_response,
     honcho_llm_call,
     honcho_llm_call_inner,
 )
@@ -185,45 +182,14 @@ async def test_anthropic_thinking_budget(self):
                 model="claude-3-sonnet",
                 prompt="Think about this",
                 max_tokens=100,
-                thinking_budget_tokens=1000,
+                thinking_budget_tokens=1024,
             )
 
             # Verify thinking parameter was passed
             mock_client.messages.create.assert_called_once()
             call_args = mock_client.messages.create.call_args
             thinking_config = call_args.kwargs["thinking"]
-            assert thinking_config == {"type": "enabled", "budget_tokens": 1000}
-
-    async def test_anthropic_response_model_with_json_parsing(self):
-        """Test that Anthropic supports response models via JSON schema in prompt"""
-        from anthropic.types import TextBlock
-
-        # Create an actual Anthropic client mock that passes isinstance checks
-        mock_messages = AsyncMock()
-        mock_response = Mock()
-        # Create an actual TextBlock instance that will pass isinstance checks
-        text_block = TextBlock(type="text", text='"name": "Alice", "age": 30}')
-        mock_response.content = [text_block]
-        mock_response.usage = Mock(output_tokens=10)
-        mock_response.stop_reason = "end_turn"
-        mock_messages.create.return_value = mock_response
-
-        # Instead of mocking the CLIENTS dict, we mock the entire AsyncAnthropic class
-        # to return our configured mock when instantiated
-        with patch("src.utils.clients.AsyncAnthropic") as mock_anthropic_class:
-            mock_client_instance = Mock()
-            mock_client_instance.messages = mock_messages
-            mock_anthropic_class.return_value = mock_client_instance
-
-            # Also need to patch the CLIENTS dict with an instance that passes isinstance
-            # Since this is complex, let's verify the simpler behavior - that response_model
-            # is supported and the prompt is modified (no NotImplementedError)
-
-            # Note: Full integration testing of response_model parsing would require
-            # a more complex setup with actual Anthropic client mocking.
-            # This test verifies that the code path for response_model exists and
-            # modifies the prompt appropriately.
-            pass  # Test simplified - behavior is now supported
+            assert thinking_config == {"type": "enabled", "budget_tokens": 1024}
 
     async def test_anthropic_streaming(self):
         """Test Anthropic streaming response"""
@@ -253,16 +219,16 @@ async def test_anthropic_streaming(self):
 
         with patch.dict(CLIENTS, {"anthropic": mock_client}):
             chunks: list[HonchoLLMCallStreamChunk] = []
-            async for chunk in handle_streaming_response(
-                client=mock_client,
-                params={
-                    "model": "claude-3-sonnet",
-                    "max_tokens": 100,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                },
-                json_mode=False,
-                thinking_budget_tokens=None,
-            ):
+            stream = await honcho_llm_call_inner(
+                provider="anthropic",
+                model="claude-3-sonnet",
+                prompt="Hello",
+                max_tokens=100,
+                stream=True,
+                client_override=mock_client,
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+            async for chunk in stream:
                 chunks.append(chunk)
 
             assert len(chunks) == 3  # 2 content chunks + 1 final chunk
@@ -501,16 +467,16 @@ async def mock_create(**_kwargs: Any):
 
         with patch.dict(CLIENTS, {"openai": mock_client}):
             chunks: list[HonchoLLMCallStreamChunk] = []
-            async for chunk in handle_streaming_response(
-                client=mock_client,
-                params={
-                    "model": "gpt-4",
-                    "max_tokens": 100,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                },
-                json_mode=False,
-                thinking_budget_tokens=None,
-            ):
+            stream = await honcho_llm_call_inner(
+                provider="openai",
+                model="gpt-4",
+                prompt="Hello",
+                max_tokens=100,
+                stream=True,
+                client_override=mock_client,
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+            async for chunk in stream:
                 chunks.append(chunk)
 
             assert len(chunks) == 3
@@ -553,9 +519,9 @@ async def test_google_basic_call(self):
         mock_aio.models.generate_content = AsyncMock(return_value=mock_response)
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             response = await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-1.5-pro",
                 prompt="Hello",
                 max_tokens=100,
@@ -600,9 +566,9 @@ async def test_google_json_mode(self):
         mock_aio.models.generate_content = AsyncMock(return_value=mock_response)
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             _response = await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-1.5-pro",
                 prompt="Generate JSON",
                 max_tokens=100,
@@ -637,9 +603,9 @@ async def test_google_response_model(self):
         mock_aio.models.generate_content = AsyncMock(return_value=mock_response)
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             response = await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-1.5-pro",
                 prompt="Generate a person",
                 max_tokens=100,
@@ -691,18 +657,18 @@ async def async_chunk_iterator():
         )
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             chunks: list[HonchoLLMCallStreamChunk] = []
-            async for chunk in handle_streaming_response(
-                client=mock_client,
-                params={
-                    "model": "gemini-1.5-pro",
-                    "max_tokens": 100,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                },
-                json_mode=False,
-                thinking_budget_tokens=None,
-            ):
+            stream = await honcho_llm_call_inner(
+                provider="gemini",
+                model="gemini-1.5-pro",
+                prompt="Hello",
+                max_tokens=100,
+                stream=True,
+                client_override=mock_client,
+                messages=[{"role": "user", "content": "Hello"}],
+            )
+            async for chunk in stream:
                 chunks.append(chunk)
 
             assert len(chunks) == 3
@@ -731,9 +697,9 @@ async def test_google_no_candidates_fallback(self):
         mock_aio.models.generate_content = AsyncMock(return_value=mock_response)
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             response = await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-1.5-pro",
                 prompt="Hello",
                 max_tokens=100,
@@ -769,11 +735,11 @@ async def test_google_blocked_response_raises_error(self, finish_reason: str):
         mock_client.aio = mock_aio
 
         with (
-            patch.dict(CLIENTS, {"google": mock_client}),
+            patch.dict(CLIENTS, {"gemini": mock_client}),
             pytest.raises(LLMError, match=f"finish_reason={finish_reason}"),
         ):
             await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-2.5-flash",
                 prompt="Summarize this",
                 max_tokens=1000,
@@ -799,9 +765,9 @@ async def test_google_max_tokens_empty_does_not_raise(self):
         mock_aio.models.generate_content = AsyncMock(return_value=mock_response)
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             response = await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-2.5-flash",
                 prompt="Hello",
                 max_tokens=100,
@@ -829,11 +795,11 @@ async def test_google_blocked_response_model_raises_error(self):
         mock_client.aio = mock_aio
 
         with (
-            patch.dict(CLIENTS, {"google": mock_client}),
+            patch.dict(CLIENTS, {"gemini": mock_client}),
             pytest.raises(LLMError, match="finish_reason=SAFETY"),
         ):
             await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-2.5-flash",
                 prompt="Generate a person",
                 max_tokens=100,
@@ -858,9 +824,9 @@ async def test_google_blocked_finish_reason_with_valid_parsed_does_not_raise(sel
         mock_aio.models.generate_content = AsyncMock(return_value=mock_response)
         mock_client.aio = mock_aio
 
-        with patch.dict(CLIENTS, {"google": mock_client}):
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
             response = await honcho_llm_call_inner(
-                provider="google",
+                provider="gemini",
                 model="gemini-2.5-flash",
                 prompt="Generate a person",
                 max_tokens=100,
@@ -873,362 +839,481 @@ async def test_google_blocked_finish_reason_with_valid_parsed_does_not_raise(sel
 
 
 @pytest.mark.asyncio
-class TestGroqClient:
-    """Tests for Groq client functionality"""
+class TestMainLLMCallFunction:
+    """Tests for the main honcho_llm_call function"""
 
-    async def test_groq_basic_call(self):
-        """Test basic Groq API call"""
-        from groq import AsyncGroq
+    async def test_streaming_call(self):
+        """Test streaming LLM call"""
 
-        mock_client = AsyncMock(spec=AsyncGroq)
-        mock_response = ChatCompletion(
-            id="test-id",
-            object="chat.completion",
-            created=1234567890,
-            model="llama-3.1-70b",
-            choices=[
-                Choice(
-                    index=0,
-                    message=ChatCompletionMessage(
-                        role="assistant", content="Hello from Groq"
-                    ),
-                    finish_reason="stop",
-                )
-            ],
-            usage=CompletionUsage(
-                prompt_tokens=10, completion_tokens=8, total_tokens=18
-            ),
-        )
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
+        mock_client = AsyncMock(spec=AsyncAnthropic)
+        mock_stream = AsyncMock()
 
-        with patch.dict(CLIENTS, {"groq": mock_client}):
-            response = await honcho_llm_call_inner(
-                provider="groq", model="llama-3.1-70b", prompt="Hello", max_tokens=100
+        # Mock streaming chunks
+        mock_chunks = [
+            Mock(type="content_block_delta", delta=Mock(text="Stream")),
+            Mock(type="content_block_delta", delta=Mock(text=" test")),
+        ]
+        mock_stream.__aenter__.return_value = mock_stream
+        mock_stream.__aiter__.return_value = iter(mock_chunks)
+
+        # Mock final message with usage tokens
+        mock_usage = Mock(output_tokens=28)
+        mock_final_message = Mock(stop_reason="stop", usage=mock_usage)
+        mock_stream.get_final_message.return_value = mock_final_message
+
+        mock_client.messages.stream.return_value = mock_stream
+
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            chunks: list[HonchoLLMCallStreamChunk] = []
+            async for chunk in await honcho_llm_call(
+                model_config=ConfiguredModelSettings(
+                    model="claude-4-sonnet",
+                    transport="anthropic",
+                ),
+                prompt="Hello",
+                max_tokens=100,
+                stream=True,
+                enable_retry=False,  # Disable retry for simpler testing
+            ):
+                chunks.append(chunk)
+
+            assert len(chunks) == 3  # 2 content + 1 final
+            assert chunks[0].content == "Stream"
+            assert chunks[1].content == " test"
+            assert chunks[2].is_done is True
+
+    async def test_retry_disabled(self):
+        """Test that retry can be disabled"""
+
+        mock_client = AsyncMock(spec=AsyncAnthropic)
+        mock_response = Mock()
+        mock_response.content = [TextBlock(text="No retry response", type="text")]
+        mock_response.usage = Usage(input_tokens=5, output_tokens=5)
+        mock_response.stop_reason = "stop"
+        mock_client.messages.create = AsyncMock(return_value=mock_response)
+
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            response = await honcho_llm_call(
+                model_config=ConfiguredModelSettings(
+                    model="claude-4-sonnet",
+                    transport="anthropic",
+                ),
+                prompt="Hello",
+                max_tokens=100,
+                enable_retry=False,
             )
 
-            assert isinstance(response, HonchoLLMCallResponse)
-            assert response.content == "Hello from Groq"
-            assert response.output_tokens == 8
-            assert response.finish_reasons == ["stop"]
+            assert response.content == "No retry response"
 
-    async def test_groq_json_mode(self):
-        """Test Groq with JSON mode"""
-        from groq import AsyncGroq
 
-        mock_client = AsyncMock(spec=AsyncGroq)
-        mock_response = ChatCompletion(
-            id="test-id",
-            object="chat.completion",
-            created=1234567890,
-            model="llama-3.1-70b",
-            choices=[
-                Choice(
-                    index=0,
-                    message=ChatCompletionMessage(
-                        role="assistant", content='{"success": true}'
-                    ),
-                    finish_reason="stop",
-                )
-            ],
-            usage=CompletionUsage(
-                prompt_tokens=10, completion_tokens=5, total_tokens=15
-            ),
-        )
-        mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
+class TestEdgeCases:
+    """Tests for edge cases and boundary conditions"""
 
-        with patch.dict(CLIENTS, {"groq": mock_client}):
-            _response = await honcho_llm_call_inner(
-                provider="groq",
-                model="llama-3.1-70b",
-                prompt="Generate JSON",
+    def test_stream_chunk_with_no_finish_reasons(self):
+        """Test stream chunk creation without finish reasons"""
+        chunk = HonchoLLMCallStreamChunk(content="test")
+        # Should use default_factory for empty list
+        assert chunk.finish_reasons == []
+        # Modifying the list shouldn't affect other instances
+        chunk.finish_reasons.append("stop")
+
+        new_chunk = HonchoLLMCallStreamChunk(content="test2")
+        assert new_chunk.finish_reasons == []  # Should still be empty
+
+
+@pytest.mark.asyncio
+class TestModelConfigCalls:
+    async def test_honcho_llm_call_accepts_model_config(self):
+        mock_client = AsyncMock(spec=AsyncAnthropic)
+        mock_response = Mock()
+        mock_response.content = [TextBlock(text="ModelConfig response", type="text")]
+        mock_response.usage = Usage(input_tokens=8, output_tokens=4)
+        mock_response.stop_reason = "stop"
+        mock_client.messages.create = AsyncMock(return_value=mock_response)
+
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            response = await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="claude-haiku-4-5",
+                    transport="anthropic",
+                ),
+                prompt="Hello",
                 max_tokens=100,
-                json_mode=True,
+                enable_retry=False,
             )
 
-            # Verify JSON mode was set
-            mock_client.chat.completions.create.assert_called_once()
-            call_args = mock_client.chat.completions.create.call_args
-            assert call_args.kwargs["response_format"] == {"type": "json_object"}
+            assert response.content == "ModelConfig response"
+            await_args = mock_client.messages.create.await_args
+            if await_args is None:
+                raise AssertionError("Expected Anthropic create call")
+            call_args = await_args.kwargs
+            assert call_args["model"] == "claude-haiku-4-5"
+
+    async def test_honcho_llm_call_accepts_configured_model_settings(self):
+        mock_client = AsyncMock(spec=AsyncAnthropic)
+        mock_response = Mock()
+        mock_response.content = [
+            TextBlock(text="ConfiguredModelSettings response", type="text")
+        ]
+        mock_response.usage = Usage(input_tokens=8, output_tokens=4)
+        mock_response.stop_reason = "stop"
+        mock_client.messages.create = AsyncMock(return_value=mock_response)
+
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            response = await honcho_llm_call(
+                model_config=ConfiguredModelSettings(
+                    model="claude-haiku-4-5",
+                    transport="anthropic",
+                    thinking_budget_tokens=1024,
+                ),
+                prompt="Hello",
+                max_tokens=100,
+                enable_retry=False,
+            )
+
+            assert response.content == "ConfiguredModelSettings response"
+            await_args = mock_client.messages.create.await_args
+            if await_args is None:
+                raise AssertionError("Expected Anthropic create call")
+            call_args = await_args.kwargs
+            assert call_args["model"] == "claude-haiku-4-5"
+            assert call_args["thinking"] == {
+                "type": "enabled",
+                "budget_tokens": 1024,
+            }
+
 
-    async def test_groq_response_model(self):
-        """Test Groq with response model (structured output)"""
-        from groq import AsyncGroq
+@pytest.mark.asyncio
+class TestModelConfigExtraParamsPropagation:
+    """Regression tests — config knobs must reach the backend.
+
+    Prior to the fix, honcho_llm_call_inner built extra_params from only
+    {json_mode, verbosity}, silently dropping top_p/top_k/frequency_penalty/
+    presence_penalty/seed/provider_params off the ModelConfig. These tests
+    lock in that each backend now receives them.
+    """
+
+    async def test_openai_propagates_top_p_frequency_seed(self):
+        from openai import AsyncOpenAI
 
-        mock_client = AsyncMock(spec=AsyncGroq)
-        # Mock JSON response that matches SampleTestModel structure
-        json_content = '{"name": "Bob", "age": 30, "active": true}'
+        mock_client = AsyncMock(spec=AsyncOpenAI)
         mock_response = ChatCompletion(
             id="test-id",
             object="chat.completion",
             created=1234567890,
-            model="llama-3.1-70b",
+            model="gpt-4.1",
             choices=[
                 Choice(
                     index=0,
-                    message=ChatCompletionMessage(
-                        role="assistant", content=json_content
-                    ),
+                    message=ChatCompletionMessage(role="assistant", content="ok"),
                     finish_reason="stop",
                 )
             ],
             usage=CompletionUsage(
-                prompt_tokens=10, completion_tokens=12, total_tokens=22
+                prompt_tokens=10, completion_tokens=5, total_tokens=15
             ),
         )
         mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
 
-        with patch.dict(CLIENTS, {"groq": mock_client}):
-            response = await honcho_llm_call_inner(
-                provider="groq",
-                model="llama-3.1-70b",
-                prompt="Generate a person",
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="gpt-4.1",
+                    transport="openai",
+                    top_p=0.92,
+                    frequency_penalty=0.5,
+                    presence_penalty=0.1,
+                    seed=42,
+                ),
+                prompt="Hello",
                 max_tokens=100,
-                response_model=SampleTestModel,
+                enable_retry=False,
             )
 
-            # Verify the response contains the parsed model
-            assert isinstance(response.content, SampleTestModel)
-            assert response.content.name == "Bob"
-            assert response.content.age == 30
-            assert response.content.active is True
-            assert response.output_tokens == 12
-            assert response.finish_reasons == ["stop"]
-
-            # Verify the response format was set to the model
             mock_client.chat.completions.create.assert_called_once()
-            call_args = mock_client.chat.completions.create.call_args
-            assert call_args.kwargs["response_format"] == SampleTestModel
+            kwargs = mock_client.chat.completions.create.call_args.kwargs
+            assert kwargs["top_p"] == 0.92
+            assert kwargs["frequency_penalty"] == 0.5
+            assert kwargs["presence_penalty"] == 0.1
+            assert kwargs["seed"] == 42
+
+    async def test_anthropic_propagates_top_p_top_k(self):
+        mock_client = AsyncMock(spec=AsyncAnthropic)
+        mock_response = Mock()
+        mock_response.content = [TextBlock(text="ok", type="text")]
+        mock_response.usage = Usage(input_tokens=8, output_tokens=4)
+        mock_response.stop_reason = "stop"
+        mock_client.messages.create = AsyncMock(return_value=mock_response)
 
-    async def test_groq_no_content_error(self):
-        """Test Groq error handling when no content in response"""
-        from groq import AsyncGroq
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="claude-haiku-4-5",
+                    transport="anthropic",
+                    top_p=0.85,
+                    top_k=40,
+                ),
+                prompt="Hello",
+                max_tokens=100,
+                enable_retry=False,
+            )
+
+            await_args = mock_client.messages.create.await_args
+            if await_args is None:
+                raise AssertionError("Expected Anthropic create call")
+            kwargs = await_args.kwargs
+            assert kwargs["top_p"] == 0.85
+            assert kwargs["top_k"] == 40
+
+    async def test_provider_params_passthrough(self):
+        """Operator-supplied provider_params must reach the backend's extra_params.
+
+        Scope: verifies the ModelConfig.provider_params → backend.extra_params
+        boundary inside honcho_llm_call_inner. This is NOT a guarantee that
+        arbitrary keys reach the provider SDK — each backend's _build_params
+        forwards only an allowlist (top_p, top_k, frequency_penalty, seed,
+        etc.). We assert only that the sentinel key arrives in extra_params
+        at the backend boundary, which is the internal contract this test
+        exists to protect.
+        """
+        from openai import AsyncOpenAI
 
-        mock_client = AsyncMock(spec=AsyncGroq)
+        mock_client = AsyncMock(spec=AsyncOpenAI)
         mock_response = ChatCompletion(
             id="test-id",
             object="chat.completion",
             created=1234567890,
-            model="llama-3.1-70b",
+            model="gpt-4.1",
             choices=[
                 Choice(
                     index=0,
-                    message=ChatCompletionMessage(role="assistant", content=None),
+                    message=ChatCompletionMessage(role="assistant", content="ok"),
                     finish_reason="stop",
                 )
             ],
             usage=CompletionUsage(
-                prompt_tokens=10, completion_tokens=0, total_tokens=10
+                prompt_tokens=10, completion_tokens=5, total_tokens=15
             ),
         )
         mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
 
+        captured_extra: dict[str, Any] = {}
+
+        from src.llm.backends.openai import OpenAIBackend
+
+        original_complete = OpenAIBackend.complete
+
+        async def capture_extra(self: Any, **kwargs: Any) -> Any:
+            captured_extra.update(kwargs.get("extra_params") or {})
+            return await original_complete(self, **kwargs)
+
         with (
-            patch.dict(CLIENTS, {"groq": mock_client}),
-            pytest.raises(ValueError, match="No content in response"),
+            patch.dict(CLIENTS, {"openai": mock_client}),
+            patch.object(OpenAIBackend, "complete", capture_extra),
         ):
-            await honcho_llm_call_inner(
-                provider="groq",
-                model="llama-3.1-70b",
+            await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="gpt-4.1",
+                    transport="openai",
+                    provider_params={"honcho_sentinel": "zap"},
+                ),
                 prompt="Hello",
                 max_tokens=100,
+                enable_retry=False,
             )
 
-    async def test_groq_streaming(self):
-        """Test Groq streaming response"""
-        from groq import AsyncGroq
+            assert captured_extra.get("honcho_sentinel") == "zap"
 
-        mock_client = AsyncMock(spec=AsyncGroq)
+    async def test_cache_policy_reaches_gemini_backend(self):
+        """PromptCachePolicy set on ModelConfig must reach the Gemini backend's
+        extra_params as a typed object (so gemini_cached_content reuse fires)."""
+        from google import genai
 
-        # Create mock streaming chunks
-        mock_chunks = [
-            ChatCompletionChunk(
-                id="test-id",
-                object="chat.completion.chunk",
-                created=1234567890,
-                model="llama-3.1-70b",
-                choices=[
-                    ChunkChoice(
-                        index=0, delta=ChoiceDelta(content="Hello"), finish_reason=None
-                    )
-                ],
-            ),
-            ChatCompletionChunk(
-                id="test-id",
-                object="chat.completion.chunk",
-                created=1234567890,
-                model="llama-3.1-70b",
-                choices=[
-                    ChunkChoice(
-                        index=0,
-                        delta=ChoiceDelta(content=" from Groq"),
-                        finish_reason=None,
-                    )
-                ],
-            ),
-            ChatCompletionChunk(
-                id="test-id",
-                object="chat.completion.chunk",
-                created=1234567890,
-                model="llama-3.1-70b",
-                choices=[
-                    ChunkChoice(
-                        index=0, delta=ChoiceDelta(content=None), finish_reason="stop"
-                    )
-                ],
-            ),
-        ]
+        from src.config import PromptCachePolicy
+        from src.llm.backends.gemini import GeminiBackend
 
-        # Create async iterator
-        async def async_chunk_iterator():
-            for chunk in mock_chunks:
-                yield chunk
+        mock_client = Mock(spec=genai.Client)
+        mock_client.__class__ = genai.Client  # pyright: ignore[reportAttributeAccessIssue]
 
-        # Mock the create method to return the async generator when awaited
-        mock_client.chat.completions.create = AsyncMock(
-            return_value=async_chunk_iterator()
-        )
+        import contextlib
 
-        with patch.dict(CLIENTS, {"groq": mock_client}):
-            chunks: list[HonchoLLMCallStreamChunk] = []
-            async for chunk in handle_streaming_response(
-                client=mock_client,
-                params={
-                    "model": "llama-3.1-70b",
-                    "max_tokens": 100,
-                    "messages": [{"role": "user", "content": "Hello"}],
-                },
-                json_mode=False,
-                thinking_budget_tokens=None,
-            ):
-                chunks.append(chunk)
+        captured_extra: dict[str, Any] = {}
 
-            assert len(chunks) == 3
-            assert chunks[0].content == "Hello"
-            assert chunks[1].content == " from Groq"
-            assert chunks[2].content == ""
-            assert chunks[2].is_done is True
-            assert chunks[2].finish_reasons == ["stop"]
+        async def capture_extra(_self: Any, **kwargs: Any) -> Any:
+            captured_extra.update(kwargs.get("extra_params") or {})
+            return None
 
+        policy = PromptCachePolicy(mode="gemini_cached_content", ttl_seconds=300)
 
-@pytest.mark.asyncio
-class TestMainLLMCallFunction:
-    """Tests for the main honcho_llm_call function"""
+        with (
+            patch.dict(CLIENTS, {"gemini": mock_client}),
+            patch.object(GeminiBackend, "complete", capture_extra),
+            # capture_extra returns None, so downstream normalization will raise;
+            # we only care that extra_params was observed pre-raise.
+            contextlib.suppress(Exception),
+        ):
+            await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="gemini-2.5-flash",
+                    transport="gemini",
+                    cache_policy=policy,
+                ),
+                prompt="Hello",
+                max_tokens=100,
+                enable_retry=False,
+            )
 
-    async def test_streaming_call(self):
-        """Test streaming LLM call"""
+        assert captured_extra.get("cache_policy") is policy
 
-        mock_client = AsyncMock(spec=AsyncAnthropic)
-        mock_stream = AsyncMock()
+    async def test_per_call_kwargs_override_provider_params(self):
+        """json_mode/verbosity from honcho_llm_call must win over provider_params defaults."""
+        from openai import AsyncOpenAI
 
-        # Mock streaming chunks
-        mock_chunks = [
-            Mock(type="content_block_delta", delta=Mock(text="Stream")),
-            Mock(type="content_block_delta", delta=Mock(text=" test")),
-        ]
-        mock_stream.__aenter__.return_value = mock_stream
-        mock_stream.__aiter__.return_value = iter(mock_chunks)
+        from src.llm.backends.openai import OpenAIBackend
 
-        # Mock final message with usage tokens
-        mock_usage = Mock(output_tokens=28)
-        mock_final_message = Mock(stop_reason="stop", usage=mock_usage)
-        mock_stream.get_final_message.return_value = mock_final_message
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_response = ChatCompletion(
+            id="test-id",
+            object="chat.completion",
+            created=1234567890,
+            model="gpt-4.1",
+            choices=[
+                Choice(
+                    index=0,
+                    message=ChatCompletionMessage(role="assistant", content="{}"),
+                    finish_reason="stop",
+                )
+            ],
+            usage=CompletionUsage(
+                prompt_tokens=10, completion_tokens=5, total_tokens=15
+            ),
+        )
+        mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
 
-        mock_client.messages.stream.return_value = mock_stream
+        captured_extra: dict[str, Any] = {}
+        original_complete = OpenAIBackend.complete
 
-        with patch.dict(CLIENTS, {"anthropic": mock_client}):
-            settings.DIALECTIC.LEVELS["medium"].PROVIDER = "anthropic"
-            settings.DIALECTIC.LEVELS["medium"].MODEL = "claude-4-sonnet"
-            chunks: list[HonchoLLMCallStreamChunk] = []
-            async for chunk in await honcho_llm_call(
-                llm_settings=settings.DIALECTIC.LEVELS["medium"],
+        async def capture_extra(self: Any, **kwargs: Any) -> Any:
+            captured_extra.update(kwargs.get("extra_params") or {})
+            return await original_complete(self, **kwargs)
+
+        with (
+            patch.dict(CLIENTS, {"openai": mock_client}),
+            patch.object(OpenAIBackend, "complete", capture_extra),
+        ):
+            await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="gpt-4.1",
+                    transport="openai",
+                    provider_params={"json_mode": False, "verbosity": "low"},
+                ),
                 prompt="Hello",
                 max_tokens=100,
-                stream=True,
-                enable_retry=False,  # Disable retry for simpler testing
-            ):
-                chunks.append(chunk)
-
-            assert len(chunks) == 3  # 2 content + 1 final
-            assert chunks[0].content == "Stream"
-            assert chunks[1].content == " test"
-            assert chunks[2].is_done is True
-
-    async def test_retry_disabled(self):
-        """Test that retry can be disabled"""
+                json_mode=True,
+                verbosity="high",
+                enable_retry=False,
+            )
 
+            assert captured_extra["json_mode"] is True
+            assert captured_extra["verbosity"] == "high"
+
+    async def test_fallback_config_thinking_params_applied_on_final_retry(
+        self,
+    ) -> None:
+        """When primary fails, the FALLBACK ModelConfig's own temperature and
+        thinking_budget_tokens must reach the backend on the final retry —
+        not the primary's values, and not whatever the caller never set.
+
+        Regression for the 'default caller kwargs from runtime_model_config too
+        early' bug: if honcho_llm_call pre-populated temperature from
+        runtime_model_config (the primary) before attempt selection, those
+        primary values would clobber the fallback's own thinking params via
+        effective_config_for_call(update={...}).
+        """
         mock_client = AsyncMock(spec=AsyncAnthropic)
         mock_response = Mock()
-        mock_response.content = [TextBlock(text="No retry response", type="text")]
-        mock_response.usage = Usage(input_tokens=5, output_tokens=5)
+        mock_response.content = [TextBlock(text="from fallback", type="text")]
+        mock_response.usage = Usage(input_tokens=5, output_tokens=3)
         mock_response.stop_reason = "stop"
-        mock_client.messages.create = AsyncMock(return_value=mock_response)
+
+        # Primary fails twice, then fallback succeeds on attempt 3.
+        mock_client.messages.create = AsyncMock(
+            side_effect=[
+                RuntimeError("primary attempt 1"),
+                RuntimeError("primary attempt 2"),
+                mock_response,
+            ]
+        )
+
+        fallback = ResolvedFallbackConfig(
+            model="claude-haiku-4-5",
+            transport="anthropic",
+            temperature=0.9,
+            thinking_budget_tokens=2048,
+        )
 
         with patch.dict(CLIENTS, {"anthropic": mock_client}):
-            settings.DIALECTIC.LEVELS["medium"].PROVIDER = "anthropic"
-            settings.DIALECTIC.LEVELS["medium"].MODEL = "claude-4-sonnet"
-            response = await honcho_llm_call(
-                llm_settings=settings.DIALECTIC.LEVELS["medium"],
+            await honcho_llm_call(
+                model_config=ModelConfig(
+                    model="claude-sonnet-4-5",
+                    transport="anthropic",
+                    temperature=0.1,
+                    thinking_budget_tokens=1024,
+                    fallback=fallback,
+                ),
                 prompt="Hello",
                 max_tokens=100,
-                enable_retry=False,
+                enable_retry=True,
+                retry_attempts=3,
             )
 
-            assert response.content == "No retry response"
-
-
-class TestEdgeCases:
-    """Tests for edge cases and boundary conditions"""
-
-    def test_stream_chunk_with_no_finish_reasons(self):
-        """Test stream chunk creation without finish reasons"""
-        chunk = HonchoLLMCallStreamChunk(content="test")
-        # Should use default_factory for empty list
-        assert chunk.finish_reasons == []
-        # Modifying the list shouldn't affect other instances
-        chunk.finish_reasons.append("stop")
-
-        new_chunk = HonchoLLMCallStreamChunk(content="test2")
-        assert new_chunk.finish_reasons == []  # Should still be empty
+            # Final call should carry the FALLBACK's values, not primary's.
+            final_call = mock_client.messages.create.await_args_list[-1]
+            kwargs = final_call.kwargs
+            assert kwargs["model"] == "claude-haiku-4-5"
+            assert kwargs["temperature"] == 0.9
+            assert kwargs["thinking"] == {
+                "type": "enabled",
+                "budget_tokens": 2048,
+            }
 
 
-# Test fixtures and utilities
-@pytest.fixture
-def sample_test_model():
-    """Fixture providing a sample SampleTestModel instance"""
-    return SampleTestModel(name="Test User", age=25, active=True)
-
-
-@pytest.fixture
-def mock_anthropic_client():
-    """Fixture providing a mocked Anthropic client"""
-    mock_client = AsyncMock()
-    mock_response = Mock()
-    mock_response.content = [TextBlock(text="Mocked Anthropic response", type="text")]
-    mock_response.usage = Usage(input_tokens=10, output_tokens=5)
-    mock_response.stop_reason = "stop"
-    mock_client.messages.create.return_value = mock_response
-    return mock_client
-
-
-@pytest.fixture
-def mock_openai_client():
-    """Fixture providing a mocked OpenAI client"""
-    mock_client = AsyncMock()
-    mock_response = ChatCompletion(
-        id="test-id",
-        object="chat.completion",
-        created=1234567890,
-        model="gpt-4",
-        choices=[
-            Choice(
-                index=0,
-                message=ChatCompletionMessage(
-                    role="assistant", content="Mocked OpenAI response"
-                ),
-                finish_reason="stop",
+@pytest.mark.asyncio
+class TestToolLoopValidation:
+    """Lock in the fail-fast behavior on max_tool_iterations out of range."""
+
+    @pytest.mark.parametrize("bad_value", [0, -1, 101, 1_000])
+    async def test_invalid_max_tool_iterations_raises(self, bad_value: int) -> None:
+        from src.llm.tool_loop import execute_tool_loop
+
+        def _noop_plan() -> Any:  # pragma: no cover - never called
+            raise AssertionError("plan should not be invoked for invalid input")
+
+        def _noop_executor(
+            _name: str, _input: dict[str, Any]
+        ) -> str:  # pragma: no cover
+            return "ok"
+
+        def _noop_retry_callback(_state: Any) -> None:  # pragma: no cover
+            return None
+
+        with pytest.raises(ValidationException, match="max_tool_iterations"):
+            await execute_tool_loop(
+                prompt="x",
+                max_tokens=10,
+                messages=None,
+                tools=[{"name": "t", "description": "d", "input_schema": {}}],
+                tool_choice=None,
+                tool_executor=_noop_executor,
+                max_tool_iterations=bad_value,
+                response_model=None,
+                json_mode=False,
+                temperature=None,
+                stop_seqs=None,
+                verbosity=None,
+                enable_retry=False,
+                retry_attempts=3,
+                max_input_tokens=None,
+                get_attempt_plan=_noop_plan,
+                before_retry_callback=_noop_retry_callback,
             )
-        ],
-        usage=CompletionUsage(prompt_tokens=10, completion_tokens=5, total_tokens=15),
-    )
-    mock_client.chat.completions.create = AsyncMock(return_value=mock_response)
-    return mock_client
diff --git a/tests/utils/test_length_finish_reason.py b/tests/utils/test_length_finish_reason.py
new file mode 100644
index 000000000..69d3210cb
--- /dev/null
+++ b/tests/utils/test_length_finish_reason.py
@@ -0,0 +1,456 @@
+"""
+Tests for JSON repair handling across all providers in honcho_llm_call_inner,
+and Gemini thinking budget support.
+
+Verifies that when an LLM hits the max token limit or returns malformed JSON,
+the truncated output is repaired and returned instead of crashing.
+"""
+
+import json
+from typing import Any
+from unittest.mock import AsyncMock, Mock, patch
+
+import pytest
+from anthropic import AsyncAnthropic
+from anthropic.types import TextBlock, Usage
+from openai import AsyncOpenAI, LengthFinishReasonError
+from openai.types.chat import ChatCompletion
+from openai.types.chat.chat_completion import Choice
+from openai.types.chat.chat_completion_message import ChatCompletionMessage
+from openai.types.completion_usage import CompletionUsage
+from pydantic import BaseModel, ValidationError
+
+from src.llm import CLIENTS, HonchoLLMCallResponse, honcho_llm_call_inner
+from src.utils.representation import PromptRepresentation
+
+# --- Test models ---
+
+
+class SimpleModel(BaseModel):
+    """Non-PromptRepresentation model for testing re-raise behavior."""
+
+    items: list[str]
+
+
+# --- Helpers ---
+
+VALID_REPR_JSON = {
+    "explicit": [
+        {"content": "hermes is 25 years old"},
+        {"content": "hermes has a dog"},
+    ]
+}
+
+
+def _make_truncated_completion(content: str) -> ChatCompletion:
+    """Build a ChatCompletion with finish_reason='length' and the given content."""
+    return ChatCompletion(
+        id="test-truncated",
+        object="chat.completion",
+        created=1234567890,
+        model="test-model",
+        choices=[
+            Choice(
+                index=0,
+                message=ChatCompletionMessage(role="assistant", content=content),
+                finish_reason="length",
+            )
+        ],
+        usage=CompletionUsage(
+            prompt_tokens=1000, completion_tokens=2000, total_tokens=3000
+        ),
+    )
+
+
+def _raise_length_error(content: str) -> AsyncMock:
+    """Return an AsyncMock that raises LengthFinishReasonError with truncated content."""
+    completion = _make_truncated_completion(content)
+    return AsyncMock(side_effect=LengthFinishReasonError(completion=completion))
+
+
+def _make_anthropic_mock(text: str, stop_reason: str = "end_turn") -> AsyncMock:
+    """Build a mocked AsyncAnthropic client returning the given text."""
+    mock_client = AsyncMock(spec=AsyncAnthropic)
+    mock_response = Mock()
+    mock_response.content = [TextBlock(text=text, type="text")]
+    mock_response.usage = Usage(input_tokens=100, output_tokens=50)
+    mock_response.stop_reason = stop_reason
+    mock_client.messages.create = AsyncMock(return_value=mock_response)
+    return mock_client
+
+
+def _make_gemini_mock(
+    text: str | None = None,
+    parsed: Any = None,
+    finish_reason_name: str = "STOP",
+) -> Mock:
+    """Build a mocked genai.Client returning the given text/parsed content."""
+    mock_client = Mock()
+
+    # Build response
+    mock_response = Mock()
+    mock_response.parsed = parsed
+
+    # Candidates
+    mock_candidate = Mock()
+    mock_finish_reason = Mock()
+    mock_finish_reason.name = finish_reason_name
+    mock_candidate.finish_reason = mock_finish_reason
+
+    # Content parts
+    if text is not None:
+        mock_part = Mock()
+        mock_part.text = text
+        mock_part.function_call = None
+        mock_content = Mock()
+        mock_content.parts = [mock_part]
+        mock_candidate.content = mock_content
+    else:
+        mock_candidate.content = None
+
+    mock_response.candidates = [mock_candidate]
+
+    # Usage
+    mock_usage = Mock()
+    mock_usage.prompt_token_count = 200
+    mock_usage.candidates_token_count = 100
+    mock_response.usage_metadata = mock_usage
+
+    mock_client.aio.models.generate_content = AsyncMock(return_value=mock_response)
+    return mock_client
+
+
+# ---------------------------------------------------------------------------
+# OpenAI / Custom provider tests (LengthFinishReasonError path)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestOpenAILengthFinishReasonRepair:
+    """Tests that LengthFinishReasonError is caught and truncated JSON is repaired."""
+
+    async def test_truncated_prompt_representation_repaired_openai(self) -> None:
+        """Truncated but repairable PromptRepresentation JSON should be repaired (openai)."""
+        truncated_json = json.dumps(VALID_REPR_JSON)[:-2]
+
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error(truncated_json)
+
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response, HonchoLLMCallResponse)
+        assert isinstance(response.content, PromptRepresentation)
+        assert len(response.content.explicit) >= 1
+        assert response.finish_reasons == ["length"]
+        assert response.output_tokens == 2000
+
+    async def test_truncated_prompt_representation_repaired_openai_with_custom_base(
+        self,
+    ) -> None:
+        """Truncated but repairable PromptRepresentation JSON should be repaired."""
+        truncated_json = json.dumps(VALID_REPR_JSON)[:-2]
+
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error(truncated_json)
+
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response, HonchoLLMCallResponse)
+        assert isinstance(response.content, PromptRepresentation)
+        assert len(response.content.explicit) >= 1
+        assert response.finish_reasons == ["length"]
+
+    async def test_completely_broken_json_falls_back_to_empty(self) -> None:
+        """Completely unrepairable JSON should fall back to empty PromptRepresentation."""
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error(
+            "this is not json at all just random text"
+        )
+
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert response.content.explicit == []
+        assert response.finish_reasons == ["length"]
+
+    async def test_empty_content_falls_back_to_empty(self) -> None:
+        """Empty/null content should fall back to empty PromptRepresentation."""
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error("")
+
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert response.content.explicit == []
+
+    async def test_non_prompt_representation_reraises_on_unfixable(self) -> None:
+        """Non-PromptRepresentation with unrepairable JSON should raise ValidationError."""
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error("not json")
+
+        with (
+            patch.dict(CLIENTS, {"openai": mock_client}),
+            pytest.raises(ValidationError),
+        ):
+            await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Generate items",
+                max_tokens=2000,
+                response_model=SimpleModel,
+                json_mode=True,
+            )
+
+    async def test_token_counts_preserved(self) -> None:
+        """Token counts from the truncated completion should be preserved."""
+        truncated_json = '{"explicit": [{"content": "fact one"}'
+
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error(truncated_json)
+
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert response.input_tokens == 1000
+        assert response.output_tokens == 2000
+
+    async def test_valid_json_with_length_finish_reason(self) -> None:
+        """Valid JSON despite length truncation should parse fine."""
+        valid_json = json.dumps(VALID_REPR_JSON)
+
+        mock_client = AsyncMock(spec=AsyncOpenAI)
+        mock_client.chat.completions.parse = _raise_length_error(valid_json)
+
+        with patch.dict(CLIENTS, {"openai": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="openai",
+                model="test-model",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert len(response.content.explicit) == 2
+        assert response.content.explicit[0].content == "hermes is 25 years old"
+
+
+# ---------------------------------------------------------------------------
+# Anthropic provider tests (JSON parse failure -> repair path)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestAnthropicJsonRepair:
+    """Tests that Anthropic response_model parse failures trigger JSON repair."""
+
+    async def test_truncated_anthropic_response_repaired(self) -> None:
+        """Truncated Anthropic JSON response should be repaired."""
+        # Anthropic prefills "{" so the response text starts after that
+        # The code prepends "{" back: json_content = "{" + text_content
+        truncated_text = json.dumps(VALID_REPR_JSON)[
+            1:-2
+        ]  # Remove leading { and trailing }]
+
+        mock_client = _make_anthropic_mock(truncated_text, stop_reason="max_tokens")
+
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="anthropic",
+                model="claude-3-sonnet",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert len(response.content.explicit) >= 1
+
+    async def test_broken_anthropic_response_falls_back_to_empty(self) -> None:
+        """Completely broken Anthropic JSON should fall back to empty PromptRepresentation."""
+        mock_client = _make_anthropic_mock(
+            "random gibberish that is not json", stop_reason="max_tokens"
+        )
+
+        with patch.dict(CLIENTS, {"anthropic": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="anthropic",
+                model="claude-3-sonnet",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert response.content.explicit == []
+
+    async def test_non_prompt_representation_reraises(self) -> None:
+        """Non-PromptRepresentation with broken JSON should raise."""
+        mock_client = _make_anthropic_mock("not json", stop_reason="max_tokens")
+
+        with (
+            patch.dict(CLIENTS, {"anthropic": mock_client}),
+            pytest.raises(ValidationError),
+        ):
+            await honcho_llm_call_inner(
+                provider="anthropic",
+                model="claude-3-sonnet",
+                prompt="Generate items",
+                max_tokens=2000,
+                response_model=SimpleModel,
+                json_mode=True,
+            )
+
+
+# ---------------------------------------------------------------------------
+# Gemini provider tests (parsed=None or type mismatch -> repair path)
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestGeminiJsonRepair:
+    """Tests that Gemini response_model parse failures trigger JSON repair."""
+
+    async def test_gemini_unparsed_response_repaired(self) -> None:
+        """Gemini returning text but no parsed object should repair from raw text."""
+        from google import genai
+
+        valid_text = json.dumps(VALID_REPR_JSON)
+        mock_client = _make_gemini_mock(
+            text=valid_text, parsed=None, finish_reason_name="MAX_TOKENS"
+        )
+
+        with (
+            patch.dict(CLIENTS, {"gemini": mock_client}),
+            patch.object(genai.Client, "__instancecheck__", return_value=True),
+        ):
+            # We need the match statement to hit the genai.Client case
+            mock_client.__class__ = genai.Client  # pyright: ignore[reportAttributeAccessIssue]
+            response = await honcho_llm_call_inner(
+                provider="gemini",
+                model="gemini-2.5-flash",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert len(response.content.explicit) == 2
+
+    async def test_gemini_broken_text_falls_back_to_empty(self) -> None:
+        """Gemini with broken text and no parsed content should fall back."""
+        from google import genai
+
+        mock_client = _make_gemini_mock(
+            text="broken json", parsed=None, finish_reason_name="MAX_TOKENS"
+        )
+        mock_client.__class__ = genai.Client  # pyright: ignore[reportAttributeAccessIssue]
+
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
+            response = await honcho_llm_call_inner(
+                provider="gemini",
+                model="gemini-2.5-flash",
+                prompt="Analyze messages",
+                max_tokens=2000,
+                response_model=PromptRepresentation,
+                json_mode=True,
+            )
+
+        assert isinstance(response.content, PromptRepresentation)
+        assert response.content.explicit == []
+
+
+# ---------------------------------------------------------------------------
+# Gemini thinking budget tests
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.asyncio
+class TestGeminiThinkingBudget:
+    """Tests that thinking_budget_tokens is passed to Gemini via ThinkingConfig."""
+
+    async def test_thinking_budget_passed_to_gemini(self) -> None:
+        """thinking_budget_tokens should be included in Gemini config."""
+        from google import genai
+
+        mock_client = _make_gemini_mock(text="Hello", parsed=None)
+        mock_client.__class__ = genai.Client  # pyright: ignore[reportAttributeAccessIssue]
+
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
+            await honcho_llm_call_inner(
+                provider="gemini",
+                model="gemini-2.5-flash",
+                prompt="Think about this",
+                max_tokens=2000,
+                thinking_budget_tokens=4096,
+            )
+
+        # Verify generate_content was called with thinking_config
+        call_args = mock_client.aio.models.generate_content.call_args
+        config = call_args.kwargs.get("config") or call_args[1].get("config")
+        assert config is not None
+        assert "thinking_config" in config
+        assert config["thinking_config"]["thinking_budget"] == 4096
+
+    async def test_no_thinking_config_when_budget_is_none(self) -> None:
+        """When thinking_budget_tokens is None, thinking_config should not be set."""
+        from google import genai
+
+        mock_client = _make_gemini_mock(text="Hello", parsed=None)
+        mock_client.__class__ = genai.Client  # pyright: ignore[reportAttributeAccessIssue]
+
+        with patch.dict(CLIENTS, {"gemini": mock_client}):
+            await honcho_llm_call_inner(
+                provider="gemini",
+                model="gemini-2.5-flash",
+                prompt="No thinking needed",
+                max_tokens=2000,
+            )
+
+        call_args = mock_client.aio.models.generate_content.call_args
+        config = call_args.kwargs.get("config") or call_args[1].get("config")
+        if config:
+            assert "thinking_config" not in config
diff --git a/tests/utils/test_summarizer.py b/tests/utils/test_summarizer.py
index 3e8f8dc91..b842ced76 100644
--- a/tests/utils/test_summarizer.py
+++ b/tests/utils/test_summarizer.py
@@ -10,11 +10,14 @@
 
 import pytest
 
-from src.utils.clients import HonchoLLMCallResponse
+from src.config import settings
+from src.llm import HonchoLLMCallResponse
 from src.utils.summarizer import (
     Summary,
     SummaryType,
     _create_summary,  # pyright: ignore[reportPrivateUsage]
+    create_long_summary,
+    create_short_summary,
 )
 
 # Common test arguments for _create_summary
@@ -217,3 +220,61 @@ async def test_zero_message_count_empty_fallback(self):
         assert is_fallback is True
         assert summary["content"] == ""
         assert summary["token_count"] == 0
+
+
+@pytest.mark.asyncio
+class TestSummaryCallerMigration:
+    async def test_create_short_summary_uses_model_config(self):
+        mock_response = HonchoLLMCallResponse(
+            content="short summary",
+            input_tokens=10,
+            output_tokens=5,
+            finish_reasons=["STOP"],
+        )
+
+        with patch(
+            "src.utils.summarizer.honcho_llm_call",
+            new_callable=AsyncMock,
+            return_value=mock_response,
+        ) as mock_llm_call:
+            await create_short_summary(
+                formatted_messages=_FORMATTED_MESSAGES,
+                input_tokens=_INPUT_TOKENS,
+                previous_summary=None,
+            )
+
+        await_args = mock_llm_call.await_args
+        if await_args is None:
+            raise AssertionError("Expected summary LLM call")
+        kwargs = await_args.kwargs
+        expected_config = settings.SUMMARY.MODEL_CONFIG
+        assert "model_config" in kwargs
+        assert kwargs["model_config"].model == expected_config.model
+        assert "llm_settings" not in kwargs
+
+    async def test_create_long_summary_uses_model_config(self):
+        mock_response = HonchoLLMCallResponse(
+            content="long summary",
+            input_tokens=10,
+            output_tokens=5,
+            finish_reasons=["STOP"],
+        )
+
+        with patch(
+            "src.utils.summarizer.honcho_llm_call",
+            new_callable=AsyncMock,
+            return_value=mock_response,
+        ) as mock_llm_call:
+            await create_long_summary(
+                formatted_messages=_FORMATTED_MESSAGES,
+                previous_summary=None,
+            )
+
+        await_args = mock_llm_call.await_args
+        if await_args is None:
+            raise AssertionError("Expected summary LLM call")
+        kwargs = await_args.kwargs
+        expected_config = settings.SUMMARY.MODEL_CONFIG
+        assert "model_config" in kwargs
+        assert kwargs["model_config"].model == expected_config.model
+        assert "llm_settings" not in kwargs
diff --git a/uv.lock b/uv.lock
index 0d4647770..6e7dffa9c 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1254,23 +1254,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/29/4b/45d90626aef8e65336bed690106d1382f7a43665e2249017e9527df8823b/greenlet-3.3.2-cp314-cp314t-win_amd64.whl", hash = "sha256:c04c5e06ec3e022cbfe2cd4a846e1d4e50087444f875ff6d2c2ad8445495cf1a", size = 237086, upload-time = "2026-02-20T20:20:45.786Z" },
 ]
 
-[[package]]
-name = "groq"
-version = "1.0.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "anyio" },
-    { name = "distro" },
-    { name = "httpx" },
-    { name = "pydantic" },
-    { name = "sniffio" },
-    { name = "typing-extensions" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/3f/12/f4099a141677fcd2ed79dcc1fcec431e60c52e0e90c9c5d935f0ffaf8c0e/groq-1.0.0.tar.gz", hash = "sha256:66cb7bb729e6eb644daac7ce8efe945e99e4eb33657f733ee6f13059ef0c25a9", size = 146068, upload-time = "2025-12-17T23:34:23.115Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/4a/88/3175759d2ef30406ea721f4d837bfa1ba4339fde3b81ba8c5640a96ed231/groq-1.0.0-py3-none-any.whl", hash = "sha256:6e22bf92ffad988f01d2d4df7729add66b8fd5dbfb2154b5bbf3af245b72c731", size = 138292, upload-time = "2025-12-17T23:34:21.957Z" },
-]
-
 [[package]]
 name = "h11"
 version = "0.16.0"
@@ -1292,7 +1275,6 @@ dependencies = [
     { name = "fastapi-pagination" },
     { name = "google-genai" },
     { name = "greenlet" },
-    { name = "groq" },
     { name = "httpx" },
     { name = "json-repair" },
     { name = "lancedb" },
@@ -1349,7 +1331,6 @@ requires-dist = [
     { name = "fastapi-pagination", specifier = ">=0.14.2" },
     { name = "google-genai", specifier = ">=1.32.0" },
     { name = "greenlet", specifier = ">=3.0.3" },
-    { name = "groq", specifier = ">=0.31.0" },
     { name = "httpx", specifier = ">=0.27.0" },
     { name = "json-repair", specifier = ">=0.49.0" },
     { name = "lancedb", specifier = ">=0.25.3" },

From 3dbf0e66fccc7d30b13030ac2710d76495e25e0e Mon Sep 17 00:00:00 2001
From: ajspig <46900795+ajspig@users.noreply.github.com>
Date: Mon, 20 Apr 2026 13:27:35 -0400
Subject: [PATCH 11/46] feat: adding honcho-cli package (#424)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* feat: adding honcho-cli package

* feat: adding more support for command-level flags, also including workarounds for getting raw SDK info

* feat: adding peer config

* feat: adding setup commands

* chore: setting up package dependencies for cli

* feat: promote init/doctor to top-level + polish wizard

* feat: make init --yes fall back to existing config

* chore: updating documentation

* chore: updating tagline

* feat: structurally updating recomended settings for CLI

* fix: style

* fix: removing redundant describe method

* fix: delete key generation commands and fixing session ID

* fix: removing defaults and changing config write path.

* chore: pagnating conclusions

* chore: require workspace

* fix: polish command surfaces — scoping, validation, perf, consistency

* chore: removing session message

* fix: CLI output shape, destructive-confirm previews, skip needless round-trips

* chore: CLI polish — peer inspect config, drop dead helper, doc/help consistency

* chore: update readme

* chore: updating tests

* chore: doc updates

* fix: config command

* chore: unused code

* fix: doctor command

* fix: removing quiet tag and fixing session key ordering

* fix: config commands and session id command

* fix: removing message_count

* fix: branding circular dependency

* fix: refactor lazy imports to use common.py correctly.

* fix: removing all lazy imports

* chore: cr fixes

* fix: config, env, flag setup

* chore: updating skill

* feat: adding workspace, session, and message create

* fix: init now supports local honcho

* chore: cr

* feat(cli): CLI surface polish — reasoning flag, peer-scoped messages, help sync

Add --reasoning/-r to peer chat (minimal..max), -p peer filter to
message list with newest-first ordering, and a curated welcome panel
with getting-started/memory/commands sections.

Sync the welcome panel and group help strings with the actual
registered commands — drop phantom 'session clone', add the 4 missing
peer commands and 7 missing session commands, fix conclusion/message/
workspace group docstrings that claimed commands that don't exist.

* feat(cli): themed, unified help system with pattern/example

Replace the hand-rolled welcome with a layered system:

- Theme typer.rich_utils (dim borders, brand color) so every --help
  inherits the voice.
- HonchoTyperGroup subclass renders a curated 3-panel welcome
  (getting started / memory / commands) with recipes Typer can't
  auto-generate.
- Unify the front door: bare 'honcho', 'honcho --help', and
  'honcho help' all render the same welcome via one code path;
  sub-groups and leaf commands still get Typer's themed renderer.
- Replace Click's 'Usage: …' line with pattern/example rows at every
  sub-group and leaf command, so the help voice stays consistent from
  top to leaves.

* refactor(cli): address review — typed exceptions, chmod 600, tighter redaction, class-based help, tests

- Replace module-level monkey-patch of TyperGroup/TyperCommand.get_usage
  with HonchoTyperGroup applied via cls= on every sub-Typer. Lives in
  a new _help.py module to avoid circular imports. No longer leaks
  behavior changes into other Typer users in the same process.
- _test_connection dispatches on the SDK's typed exceptions
  (AuthenticationError, ConnectionError, TimeoutError, APIError)
  instead of substring-matching error messages.
- Config.save() now chmods ~/.honcho/config.json to 0o600 after write
  so the plaintext API key isn't world-readable on multi-user hosts.
- Tighten api_key redaction to '***<last4>' (was 'header...last4'),
  matching setup._redact for consistency. Short keys fully masked.
- Add test_validation.py covering safe IDs, unsafe chars, path
  traversal, and empty input. Update test_config.py redaction cases
  and add 0o600 permission assertion. Fix stale patch paths in
  test_commands.py that pointed at honcho_cli.main instead of the
  command modules where get_client is actually imported.

* feat(cli): add options panel to welcome menu

Append a fourth panel listing the global flags (-w/-p/-s, --json,
--version, --help) with their env-var counterparts. Discoverable
from bare 'honcho' without needing to hunt for --help.

* chore(cli): drop --version from welcome options panel

* feat(cli): add pixel-honcho icon to banner

Prepend a 13-char ASCII rendering of honcho-pixel.svg to the HONCHO
wordmark. Uses Unicode half-blocks to pack 12 pixel rows into 6 text
rows, faithfully preserving the SVG outline (two eye dots, mouth slit,
tapering foot). Appears in bare 'honcho', 'honcho --help', 'honcho
--version', and 'honcho init'.

* fix: polish Honcho CLI wolcome panel and error messages

* fix: honcho workspace inspect speed

* chore: minor fix to session pagination

* fix: removing NDJSON output

* chore: consolidating honcho CLI's dula argv grammar onto Pattern A (command-first)

* chore: clean up imports

* fix: four `-s` consistency fixes applied

* chore: minor changes to memory rows

* fix: changing package name to honcho-cli

* fix: removing pixel face

---------

Co-authored-by: Erosika <eri@plasticlabs.ai>
---
 .claude/skills/honcho-integration/SKILL.md    |  14 +
 .gitignore                                    |   1 +
 honcho-cli/README.md                          | 211 ++++++
 honcho-cli/pyproject.toml                     |  53 ++
 honcho-cli/src/honcho_cli/__init__.py         |   3 +
 honcho-cli/src/honcho_cli/_help.py            | 130 ++++
 honcho-cli/src/honcho_cli/branding.py         |  17 +
 .../src/honcho_cli/commands/__init__.py       |   0
 .../src/honcho_cli/commands/conclusion.py     | 219 ++++++
 .../src/honcho_cli/commands/config_cmd.py     |  31 +
 honcho-cli/src/honcho_cli/commands/message.py | 161 ++++
 honcho-cli/src/honcho_cli/commands/peer.py    | 308 ++++++++
 honcho-cli/src/honcho_cli/commands/session.py | 404 ++++++++++
 honcho-cli/src/honcho_cli/commands/setup.py   | 287 ++++++++
 .../src/honcho_cli/commands/workspace.py      | 322 ++++++++
 honcho-cli/src/honcho_cli/common.py           | 112 +++
 honcho-cli/src/honcho_cli/config.py           | 150 ++++
 honcho-cli/src/honcho_cli/main.py             |  96 +++
 honcho-cli/src/honcho_cli/output.py           | 104 +++
 honcho-cli/src/honcho_cli/skills/CONTEXT.md   |  50 ++
 .../src/honcho_cli/skills/honcho-debug.md     |  54 ++
 .../src/honcho_cli/skills/honcho-inspect.md   |  53 ++
 honcho-cli/src/honcho_cli/validation.py       |  44 ++
 honcho-cli/tests/__init__.py                  |   0
 honcho-cli/tests/test_commands.py             | 195 +++++
 honcho-cli/tests/test_config.py               | 113 +++
 honcho-cli/tests/test_validation.py           |  57 ++
 honcho-cli/uv.lock                            | 407 +++++++++++
 pyproject.toml                                |   2 +
 sdks/python/pyproject.toml                    |   3 +
 uv.lock                                       | 688 ++----------------
 31 files changed, 3660 insertions(+), 629 deletions(-)
 create mode 100644 honcho-cli/README.md
 create mode 100644 honcho-cli/pyproject.toml
 create mode 100644 honcho-cli/src/honcho_cli/__init__.py
 create mode 100644 honcho-cli/src/honcho_cli/_help.py
 create mode 100644 honcho-cli/src/honcho_cli/branding.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/__init__.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/conclusion.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/config_cmd.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/message.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/peer.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/session.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/setup.py
 create mode 100644 honcho-cli/src/honcho_cli/commands/workspace.py
 create mode 100644 honcho-cli/src/honcho_cli/common.py
 create mode 100644 honcho-cli/src/honcho_cli/config.py
 create mode 100644 honcho-cli/src/honcho_cli/main.py
 create mode 100644 honcho-cli/src/honcho_cli/output.py
 create mode 100644 honcho-cli/src/honcho_cli/skills/CONTEXT.md
 create mode 100644 honcho-cli/src/honcho_cli/skills/honcho-debug.md
 create mode 100644 honcho-cli/src/honcho_cli/skills/honcho-inspect.md
 create mode 100644 honcho-cli/src/honcho_cli/validation.py
 create mode 100644 honcho-cli/tests/__init__.py
 create mode 100644 honcho-cli/tests/test_commands.py
 create mode 100644 honcho-cli/tests/test_config.py
 create mode 100644 honcho-cli/tests/test_validation.py
 create mode 100644 honcho-cli/uv.lock

diff --git a/.claude/skills/honcho-integration/SKILL.md b/.claude/skills/honcho-integration/SKILL.md
index 68f436d14..ecfdb009c 100644
--- a/.claude/skills/honcho-integration/SKILL.md
+++ b/.claude/skills/honcho-integration/SKILL.md
@@ -91,6 +91,8 @@ Based on interview responses, implement the integration:
 
 ### Phase 4: Verification
 
+- If the Honcho CLI is available, run `honcho doctor` to confirm connectivity before testing the integration code
+- Use `honcho peer list` and `honcho peer chat` to verify peers exist and the dialectic endpoint works independently of the integration
 - Ensure all message exchanges are stored to Honcho
 - Verify AI peers have `observe_me=False` (unless user specifically wants AI observation)
 - Check that the workspace ID is consistent across the codebase
@@ -106,6 +108,16 @@ Based on interview responses, implement the integration:
 
 2. **Get an API key** ask the user to get a Honcho API key from <https://app.honcho.dev> and add it to the environment.
 
+3. **Verify with the CLI** (optional but recommended). If the user has the Honcho CLI installed (`pip install honcho-cli`), they can validate their setup before writing any integration code:
+
+   ```bash
+   honcho init          # persist API key + URL to ~/.honcho/config.json
+   honcho doctor        # verify connectivity, config, workspace health
+   honcho peer chat     # test the dialectic endpoint interactively
+   ```
+
+   This is the fastest way to confirm the API key and URL are correct before debugging SDK code.
+
 ## Installation
 
 ### Python (use uv)
@@ -524,6 +536,8 @@ When integrating Honcho into an existing codebase:
   - [ ] Pre-fetch pattern for simpler integrations
   - [ ] context() for conversation history
 - [ ] Store messages after each exchange to build user models
+- [ ] (Optional) Run `honcho doctor` to verify connectivity before testing integration code
+- [ ] (Optional) Use `honcho peer chat` to test dialectic queries independently
 
 ## Common Mistakes to Avoid
 
diff --git a/.gitignore b/.gitignore
index ea5ccca18..e6cbf1d7e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -182,6 +182,7 @@ docs/node_modules
 
 timing_logs.csv
 
+config.json
 config.toml
 .aider*
 
diff --git a/honcho-cli/README.md b/honcho-cli/README.md
new file mode 100644
index 000000000..8271d200a
--- /dev/null
+++ b/honcho-cli/README.md
@@ -0,0 +1,211 @@
+```
+██╗  ██╗ ██████╗ ███╗   ██╗ ██████╗██╗  ██╗ ██████╗
+██║  ██║██╔═══██╗████╗  ██║██╔════╝██║  ██║██╔═══██╗
+███████║██║   ██║██╔██╗ ██║██║     ███████║██║   ██║
+██╔══██║██║   ██║██║╚██╗██║██║     ██╔══██║██║   ██║
+██║  ██║╚██████╔╝██║ ╚████║╚██████╗██║  ██║╚██████╔╝
+╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═══╝ ╚═════╝╚═╝  ╚═╝ ╚═════╝
+```
+
+# honcho-cli
+
+A terminal for [Honcho](https://honcho.dev) — memory that reasons.
+
+## Install
+
+As a standalone tool (recommended):
+
+```bash
+uv tool install honcho-cli
+```
+
+As an extra on the Honcho SDK (if you want both the SDK and the CLI in one project):
+
+```bash
+uv add honcho-ai[cli]
+# or
+pip install honcho-ai[cli]
+```
+
+Either way, you'll get the `honcho` command on your PATH.
+
+## Quick Start
+
+```bash
+honcho init        # confirm/set apiKey + Honcho URL in ~/.honcho/config.json
+honcho doctor      # verify your config + connectivity
+honcho             # show banner + command list
+```
+
+`honcho init` reads `apiKey` and `environmentUrl` from the top-level of `~/.honcho/config.json` (the same file other Honcho tools — plugins, host integrations — share). If both are present, it confirms them with you; if either is missing (or you decline), it prompts for the missing value(s) and writes them back. Host-specific entries under `hosts` are left untouched.
+
+Per-command scoping (workspace / peer / session) is handled via `-w` / `-p` / `-s` flags or `HONCHO_*` env vars — not persisted as CLI defaults.
+
+## Commands
+
+### Onboarding
+
+| Command | Description |
+|---------|-------------|
+| `honcho init` | Confirm/set `apiKey` + `environmentUrl` in `~/.honcho/config.json` |
+| `honcho doctor` | Health check: config, connectivity, workspace, peer, queue |
+
+### Workspaces
+
+| Command | Description |
+|---------|-------------|
+| `honcho workspace list` | List accessible workspaces |
+| `honcho workspace create <id>` | Create or get a workspace |
+| `honcho workspace inspect` | Peers, sessions, config for a workspace |
+| `honcho workspace search <query>` | Search messages across workspace |
+| `honcho workspace queue-status` | Deriver queue status (filter with `--observer` / `--sender`) |
+| `honcho workspace delete <id>` | Delete a workspace. Use `--dry-run` to preview, `--cascade` to also delete sessions, `--yes` to skip the confirm prompt |
+
+### Peers
+
+| Command | Description |
+|---------|-------------|
+| `honcho peer list` | List peers in the workspace |
+| `honcho peer create <id>` | Create or get a peer |
+| `honcho peer inspect <id>` | Card, session count, recent conclusions |
+| `honcho peer card <id>` | Raw peer card content |
+| `honcho peer chat <query>` | Query the dialectic about a peer (peer via `-p` / `HONCHO_PEER_ID`) |
+| `honcho peer representation <id>` | Formatted representation |
+| `honcho peer search <query>` | Search a peer's messages (peer via `-p` / `HONCHO_PEER_ID`) |
+| `honcho peer get-metadata <id>` / `set-metadata` | Metadata operations |
+
+### Sessions
+
+| Command | Description |
+|---------|-------------|
+| `honcho session list` | List sessions in the workspace (filter with `--peer/-p`) |
+| `honcho session create <id>` | Create or get a session (optionally `--peers` to add peers, `--metadata`) |
+| `honcho session inspect <id>` | Peers, message count, summaries, config |
+| `honcho session context <id>` | What an agent would see |
+| `honcho session summaries <id>` | Short + long summaries |
+| `honcho session peers <id>` / `add-peers` / `remove-peers` | Peer management |
+| `honcho session search <id> <query>` | Search messages in a session |
+| `honcho session representation <id>` | Peer representation in a session |
+| `honcho session get-metadata <id>` / `set-metadata` | Metadata operations |
+| `honcho session delete <id>` | Destructive; requires `--yes` |
+
+### Messages
+
+| Command | Description |
+|---------|-------------|
+| `honcho message list` | List messages in a session (session via `-s` / `HONCHO_SESSION_ID`) |
+| `honcho message create <content>` | Create a message (requires `--peer/-p`, session via `-s`) |
+| `honcho message get <id>` | Get a single message (session via `-s` / `HONCHO_SESSION_ID`) |
+
+### Conclusions (observations)
+
+| Command | Description |
+|---------|-------------|
+| `honcho conclusion list` | List conclusions (filter with `--observer` / `--observed`) |
+| `honcho conclusion search <query>` | Semantic search (filter with `--observer` / `--observed`) |
+| `honcho conclusion create` | Create a conclusion |
+| `honcho conclusion delete <id>` | Delete a conclusion |
+
+### Config
+
+| Command | Description |
+|---------|-------------|
+| `honcho config` | Show current config (API key redacted) |
+
+## Agent Usage
+
+All commands output JSON when stdout isn't a TTY, or when `--json` is forced.
+Collection commands emit JSON arrays, and single-resource commands emit JSON objects:
+
+```bash
+honcho peer list --json
+honcho workspace inspect --json | jq '.peers'
+honcho doctor --json              # machine-parseable health checklist
+```
+
+Errors are structured:
+
+```json
+{
+  "error": {
+    "code": "PEER_NOT_FOUND",
+    "message": "Peer 'abc' not found in workspace 'my-ws'",
+    "details": {"workspace_id": "my-ws", "peer_id": "abc"}
+  }
+}
+```
+
+Non-interactive onboarding:
+
+```bash
+# Pre-seed via flags / env vars; init still prompts for anything missing
+HONCHO_API_KEY=hch-v3-xxx honcho init --base-url https://api.honcho.dev
+```
+
+## Environment Variables
+
+All `HONCHO_*` env vars work at runtime — no config file required.
+
+Precedence (highest first): **flag → env var → config file → default**.
+
+| Variable | Flag | Description |
+|----------|------|-------------|
+| `HONCHO_API_KEY` | `--api-key` (init) | Admin JWT |
+| `HONCHO_BASE_URL` | `--base-url` (init) | API URL |
+| `HONCHO_WORKSPACE_ID` | `-w` / `--workspace` | Workspace scope |
+| `HONCHO_PEER_ID` | `-p` / `--peer` | Peer scope |
+| `HONCHO_SESSION_ID` | `-s` / `--session` | Session scope |
+| `HONCHO_JSON` | `--json` | Force JSON output (`1` / `true`) |
+
+```bash
+# Per-command flags
+honcho peer card -w prod -p user
+
+# Or export once per shell
+export HONCHO_WORKSPACE_ID=prod
+export HONCHO_PEER_ID=user
+honcho peer card
+
+# One-off against a different server
+HONCHO_BASE_URL=http://localhost:8000 honcho workspace list
+
+# CI/CD — env vars only, no config file needed
+export HONCHO_API_KEY=hch-v3-xxx
+export HONCHO_BASE_URL=https://api.honcho.dev
+honcho workspace list
+```
+
+## Configuration
+
+The CLI shares `~/.honcho/config.json` with sibling Honcho tools. It owns two
+top-level keys: `apiKey` and `environmentUrl` (the full Honcho API URL, e.g.
+`https://api.honcho.dev` or `http://localhost:8000`). Everything else at the
+top level — `hosts`, `sessions`, `saveMessages`, `sessionStrategy`, etc. —
+is left untouched.
+
+```json
+{
+  "apiKey": "hch-v3-...",
+  "environmentUrl": "https://api.honcho.dev",
+  "hosts": { "claude_code": { "...": "..." } }
+}
+```
+
+`workspace_id` / `peer_id` / `session_id` are per-command only — never
+persisted to the config file.
+
+## Development
+
+Install from source in editable mode so changes are picked up live:
+
+```bash
+git clone https://github.com/plastic-labs/honcho
+cd honcho
+uv tool install --force --editable --from ./honcho-cli honcho-cli
+```
+
+Re-run any time — changes to `honcho-cli/src/` are reflected immediately without reinstalling.
+
+## License
+
+MIT
diff --git a/honcho-cli/pyproject.toml b/honcho-cli/pyproject.toml
new file mode 100644
index 000000000..c06f22e05
--- /dev/null
+++ b/honcho-cli/pyproject.toml
@@ -0,0 +1,53 @@
+[project]
+name = "honcho-cli"
+version = "0.1.0"
+description = "A terminal for Honcho — memory that reasons."
+readme = "README.md"
+requires-python = ">=3.11"
+license = "MIT"
+authors = [
+    { name = "Plastic Labs", email = "hello@plasticlabs.ai" },
+]
+classifiers = [
+    "Development Status :: 3 - Alpha",
+    "Environment :: Console",
+    "Intended Audience :: Developers",
+    "Programming Language :: Python :: 3.11",
+    "Programming Language :: Python :: 3.12",
+    "Topic :: Software Development :: Libraries",
+]
+dependencies = [
+    "typer>=0.15.0",
+    "honcho-ai>=2.0.0",
+    "rich>=13.0.0",
+    "httpx>=0.27.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/plastic-labs/honcho"
+Repository = "https://github.com/plastic-labs/honcho"
+
+[project.scripts]
+honcho = "honcho_cli.main:app"
+
+[build-system]
+requires = ["hatchling"]
+build-backend = "hatchling.build"
+
+[tool.hatch.build.targets.wheel]
+packages = ["src/honcho_cli"]
+
+[tool.pytest.ini_options]
+testpaths = ["tests"]
+
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0.0",
+    "pytest-mock>=3.14.0",
+]
+
+[[tool.uv.index]]
+name = "testpypi"
+url = "https://test.pypi.org/simple/"
+publish-url = "https://test.pypi.org/legacy/"
+explicit = true
diff --git a/honcho-cli/src/honcho_cli/__init__.py b/honcho-cli/src/honcho_cli/__init__.py
new file mode 100644
index 000000000..f38546a96
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/__init__.py
@@ -0,0 +1,3 @@
+"""Honcho CLI — a terminal for Honcho."""
+
+__version__ = "0.1.0"
diff --git a/honcho-cli/src/honcho_cli/_help.py b/honcho-cli/src/honcho_cli/_help.py
new file mode 100644
index 000000000..d5dcb976b
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/_help.py
@@ -0,0 +1,130 @@
+"""Themed help rendering for honcho CLI.
+
+Single source of truth for:
+
+- Rich-utils theme constants (dim borders, brand color)
+- HonchoTyperGroup: subclass applied via ``cls=`` at every Typer app in
+  this package — replaces Click's terse ``Usage: …`` line with
+  pattern/example rows and prints a curated welcome at the top-level.
+
+Lives in its own module so every ``commands/*.py`` can import it without
+pulling in ``main.py`` (which would create an import cycle).
+"""
+
+from __future__ import annotations
+
+import click
+import typer.rich_utils as ru
+from rich import box
+from rich.console import Console
+from rich.panel import Panel
+from rich.table import Table
+from typer.core import TyperGroup
+
+from honcho_cli import __version__
+from honcho_cli.branding import BANNER, BRAND
+from honcho_cli.output import use_json
+
+
+# Theme Typer's rich help renderer. Module-level side effect limited to
+# styling — no behavior changes that could surprise other Typer users.
+ru.STYLE_COMMANDS_PANEL_BORDER = "dim"
+ru.STYLE_OPTIONS_PANEL_BORDER = "dim"
+ru.STYLE_ERRORS_PANEL_BORDER = "dim"
+ru.STYLE_OPTION = f"bold {BRAND}"
+ru.STYLE_SWITCH = f"bold {BRAND}"
+ru.STYLE_USAGE = "dim"
+ru.STYLE_USAGE_COMMAND = f"bold {BRAND}"
+
+
+def _cmd_table(rows: list[tuple[str, str]]) -> Table:
+    t = Table(show_header=False, box=None, padding=(0, 2, 0, 0), expand=False)
+    t.add_column("cmd", style=f"bold {BRAND}", no_wrap=True)
+    t.add_column("desc", style="default")
+    for cmd, desc in rows:
+        t.add_row(cmd, desc)
+    return t
+
+
+def _welcome_panel(title: str, rows: list[tuple[str, str]]) -> Panel:
+    return Panel(
+        _cmd_table(rows),
+        title=f"[dim]{title}[/dim]",
+        title_align="left",
+        border_style="dim",
+        box=box.ROUNDED,
+        padding=(0, 1),
+        expand=False,
+    )
+
+
+def print_welcome(console: Console) -> None:
+    """Render the curated 3-panel welcome (banner + getting started / memory / commands)."""
+    if use_json():
+        return
+    console.print(f"[bold {BRAND}]{BANNER}[/bold {BRAND}]")
+    console.print(f"  [dim]v{__version__}[/dim]\n", highlight=False)
+
+    start_rows = [
+        ("honcho init",   "configure API key and server URL"),
+        ("honcho doctor", "verify connection and workspace health"),
+    ]
+    cmd_rows = [
+        ("[dim]pattern[/dim]", r"[dim]honcho <command> \[args] \[-w workspace] \[-p peer] \[-s session][/dim]"),
+        ("[dim]example[/dim]", "[dim]honcho peer chat \"what does alice prefer?\" -p alice -w agents[/dim]"),
+        ("", ""),
+        ("workspace",  "list · create · search · delete · inspect · queue-status"),
+        ("peer",       "list · create · search · inspect · card · chat"),
+        ("",           "get-metadata · set-metadata · representation"),
+        ("session",    "list · create · search · delete · inspect · add-peers"),
+        ("",           "context · get-metadata · set-metadata · peers"),
+        ("",           "remove-peers · representation · summaries"),
+        ("message",    "list · create · get"),
+        ("conclusion", "list · create · search · delete"),
+        ("config",     "inspect current configuration"),
+    ]
+    memory_rows = [
+        ("honcho peer chat \"...\" -p <peer> -w <workspace>","query the Dialectic about a peer"),
+        ("honcho peer inspect -p <peer> -w <workspace>","dashboard: peer card + recent conclusions + configuration"),
+        ("honcho peer representation -p <peer> -w <workspace>", "global peer representation"),
+        ("honcho peer representation -p <peer> -w <workspace> -s <session>", "session-scoped peer representation"),
+        ("honcho peer card -p <peer> -w <workspace>", "synthesized identity: traits, preferences, instructions"),
+        ("honcho conclusion list -p <peer> -w <workspace>",  "browse peer conclusions"),
+    ]
+
+    option_rows = [
+        ("-w / --workspace", "scope to a workspace"),
+        ("-p / --peer",      "scope to a peer"),
+        ("-s / --session",   "scope to a session"),
+        ("--json",           "force JSON output for scripts and agents"),
+        ("--help",           "show help for any command (e.g. honcho peer --help)"),
+    ]
+
+    console.print(_welcome_panel("getting started", start_rows))
+    console.print(_welcome_panel("commands", cmd_rows))
+    console.print(_welcome_panel("memory", memory_rows))
+    console.print(_welcome_panel("options", option_rows))
+    console.print()
+
+
+class HonchoTyperGroup(TyperGroup):
+    """Typer group with pattern/example usage and top-level welcome.
+
+    Applied via ``cls=`` on every ``typer.Typer(...)`` in this package,
+    so no class-level monkey-patching is needed.
+    """
+
+    def get_usage(self, ctx):
+        """Replace Click's 'Usage: …' with pattern/example rows."""
+        original = click.Command.get_usage(self, ctx)
+        pattern = original.replace("Usage: ", "", 1) if original.startswith("Usage: ") else original
+        subs = self.list_commands(ctx)
+        example = f"{ctx.command_path} {subs[0]}" if subs else f"{ctx.command_path} --help"
+        return f"pattern: {pattern}\nexample: {example}"
+
+    def format_help(self, ctx, formatter):
+        """Top-level --help renders the welcome; sub-groups fall through to Typer."""
+        if ctx.parent is None:
+            print_welcome(Console())
+            return
+        super().format_help(ctx, formatter)
diff --git a/honcho-cli/src/honcho_cli/branding.py b/honcho-cli/src/honcho_cli/branding.py
new file mode 100644
index 000000000..2534a0b2b
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/branding.py
@@ -0,0 +1,17 @@
+"""Honcho CLI brand constants — colours, icons, and the ASCII banner.
+"""
+
+BRAND = "#B6DAFD"
+
+BANNER = """
+██╗  ██╗ ██████╗ ███╗   ██╗ ██████╗██╗  ██╗ ██████╗
+██║  ██║██╔═══██╗████╗  ██║██╔════╝██║  ██║██╔═══██╗
+███████║██║   ██║██╔██╗ ██║██║     ███████║██║   ██║
+██╔══██║██║   ██║██║╚██╗██║██║     ██╔══██║██║   ██║
+██║  ██║╚██████╔╝██║ ╚████║╚██████╗██║  ██║╚██████╔╝
+╚═╝  ╚═╝ ╚═════╝ ╚═╝  ╚═══╝ ╚═════╝╚═╝  ╚═╝ ╚═════╝
+""".strip("\n")
+
+ICON_OK = "[green]✓[/green]"
+ICON_FAIL = "[red]✗[/red]"
+ICON_RUN = f"[{BRAND}]→[/{BRAND}]"
diff --git a/honcho-cli/src/honcho_cli/commands/__init__.py b/honcho-cli/src/honcho_cli/commands/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/honcho-cli/src/honcho_cli/commands/conclusion.py b/honcho-cli/src/honcho_cli/commands/conclusion.py
new file mode 100644
index 000000000..1a158de6b
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/conclusion.py
@@ -0,0 +1,219 @@
+"""Conclusion commands: list, search, create, delete."""
+
+from __future__ import annotations
+
+import json
+from typing import Optional
+
+import typer
+
+from honcho_cli.commands.workspace import _handle_error
+from honcho_cli.output import print_error, print_result, status, use_json
+from honcho_cli.validation import validate_resource_id
+
+from honcho_cli._help import HonchoTyperGroup
+from honcho_cli.common import add_common_options, get_client, get_resolved_config, handle_cmd_flags
+
+app = typer.Typer(cls=HonchoTyperGroup, help="List, search, create, and delete peer conclusions (Honcho's memory atoms).")
+add_common_options(app)
+
+
+def _require_observer(observer: str | None) -> str:
+    """Resolve observer peer ID; emit combined error if peer+workspace both missing."""
+    config = get_resolved_config()
+    obs = observer or config.peer_id
+    if not obs:
+        if not config.workspace_id:
+            print_error(
+                "NO_SCOPE",
+                "No peer or workspace scoped. Pass --peer/-p and --workspace/-w, or set HONCHO_PEER_ID and HONCHO_WORKSPACE_ID.",
+            )
+        else:
+            print_error("NO_PEER", "Peer required. Pass --peer/-p: honcho conclusion <cmd> -p <peer>")
+        raise typer.Exit(1)
+    return obs
+
+
+@app.command("list")
+def list_conclusions(
+    observer: Optional[str] = typer.Option(None, "--observer", help="Observer peer ID"),
+    observed: Optional[str] = typer.Option(None, "--observed", help="Observed peer ID"),
+    limit: int = typer.Option(10, "--limit", help="Max results"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """List conclusions."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    observer = _require_observer(observer)
+    client, config = get_client()
+
+    p = client.peer(observer)
+
+    try:
+        if observed:
+            scope = p.conclusions_of(observed)
+        else:
+            scope = p.conclusions
+
+        conclusions = scope.list(size=limit).items
+        items = [
+            {
+                "id": c.id,
+                "content": c.content if use_json() else c.content[:200],
+                "workspace_id": config.workspace_id,
+                "observer_id": c.observer_id,
+                "observed_id": c.observed_id,
+                "session_id": c.session_id,
+                "created_at": str(c.created_at),
+            }
+            for c in conclusions
+        ]
+        print_result(items, columns=["id", "content", "workspace_id", "observer_id", "observed_id", "session_id", "created_at"], title="Conclusions")
+    except Exception as e:
+        _handle_error(e, "conclusion", "list")
+
+
+@app.command()
+def search(
+    query: str = typer.Argument(help="Search query"),
+    observer: Optional[str] = typer.Option(None, "--observer", help="Observer peer ID"),
+    observed: Optional[str] = typer.Option(None, "--observed", help="Observed peer ID"),
+    top_k: int = typer.Option(10, help="Max results"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Semantic search over conclusions."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    observer = _require_observer(observer)
+    client, config = get_client()
+
+    p = client.peer(observer)
+
+    try:
+        if observed:
+            scope = p.conclusions_of(observed)
+        else:
+            scope = p.conclusions
+
+        results = scope.query(query, top_k=top_k)
+        items = [
+            {
+                "id": c.id,
+                "content": c.content if use_json() else c.content[:200],
+                "workspace_id": config.workspace_id,
+                "observer_id": c.observer_id,
+                "observed_id": c.observed_id,
+                "session_id": c.session_id,
+                "created_at": str(c.created_at),
+            }
+            for c in results
+        ]
+        print_result(items, columns=["id", "content", "workspace_id", "session_id", "created_at"], title=f"Conclusion search: {query}")
+    except Exception as e:
+        _handle_error(e, "conclusion", "search")
+
+
+@app.command()
+def create(
+    content: str = typer.Argument(help="Conclusion content or JSON payload"),
+    observer: Optional[str] = typer.Option(None, "--observer", help="Observer peer ID"),
+    observed: Optional[str] = typer.Option(None, "--observed", help="Observed peer ID"),
+    session_id: Optional[str] = typer.Option(None, "--session", "-s", help="Session context"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Create a conclusion."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer, session=session_id)
+    observer = _require_observer(observer)
+    client, config = get_client()
+
+    # If content looks like JSON, try to parse it
+    try:
+        payload = json.loads(content)
+        if isinstance(payload, dict):
+            content = payload.get("content", content)
+    except json.JSONDecodeError:
+        pass
+
+    p = client.peer(observer)
+
+    try:
+        if observed:
+            scope = p.conclusions_of(observed)
+        else:
+            scope = p.conclusions
+
+        params: dict[str, object] = {"content": content}
+        if config.session_id:
+            params["session_id"] = config.session_id
+        results = scope.create([params])
+        result = results[0] if results else None
+        if result is None:
+            print_error("CREATE_FAILED", "Conclusion create returned no results")
+            raise typer.Exit(1)
+        print_result({
+            "id": result.id,
+            "content": result.content,
+            "workspace_id": config.workspace_id,
+            "observer_id": result.observer_id,
+            "observed_id": result.observed_id,
+            "session_id": result.session_id,
+            "created_at": str(result.created_at),
+        })
+    except Exception as e:
+        _handle_error(e, "conclusion", "create")
+
+
+@app.command()
+def delete(
+    conclusion_id: str = typer.Argument(help="Conclusion ID to delete"),
+    observer: Optional[str] = typer.Option(None, "--observer", help="Observer peer ID"),
+    observed: Optional[str] = typer.Option(None, "--observed", help="Observed peer ID"),
+    yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Delete a conclusion."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    validate_resource_id(conclusion_id, "conclusion")
+    client, config = get_client()
+
+    if not observer:
+        observer = config.peer_id
+    if not observer:
+        print_error("NO_PEER", "Peer required. Pass --peer/-p: honcho conclusion <cmd> -p <peer>")
+        raise typer.Exit(1)
+
+    p = client.peer(observer)
+
+    if not yes:
+        # SDK doesn't expose a get-by-id on ConclusionScope, so we can't
+        # preview content cheaply — don't paginate the list just to
+        # decorate the prompt. Show identifying fields only.
+        if not use_json():
+            typer.echo(
+                f"  id:       {conclusion_id}\n"
+                f"  observer: {observer}\n"
+                f"  observed: {observed or '(self)'}"
+            )
+        typer.confirm(f"Delete conclusion '{conclusion_id}'?", abort=True)
+
+    try:
+        if observed:
+            scope = p.conclusions_of(observed)
+        else:
+            scope = p.conclusions
+
+        scope.delete(conclusion_id)
+        status(f"Conclusion '{conclusion_id}' deleted")
+        print_result({"deleted": conclusion_id})
+    except Exception as e:
+        _handle_error(e, "conclusion", conclusion_id)
diff --git a/honcho-cli/src/honcho_cli/commands/config_cmd.py b/honcho-cli/src/honcho_cli/commands/config_cmd.py
new file mode 100644
index 000000000..814685fcb
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/config_cmd.py
@@ -0,0 +1,31 @@
+"""Config inspection command: ``honcho config``.
+
+Writing to ``~/.honcho/config.json`` is done only via ``honcho init``, which
+manages the two CLI-owned keys (``apiKey`` + ``environmentUrl``).
+Workspace / peer / session scoping is per-command via flags / env vars, not
+persisted defaults.
+"""
+
+from __future__ import annotations
+
+import typer
+
+from honcho_cli._help import HonchoTyperGroup
+from honcho_cli.common import handle_cmd_flags
+from honcho_cli.config import CLIConfig
+from honcho_cli.output import print_result
+
+app = typer.Typer(cls=HonchoTyperGroup, help="Inspect CLI configuration.", invoke_without_command=True)
+
+
+@app.callback(invoke_without_command=True)
+def config(
+    ctx: typer.Context,
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Show current config (api key redacted)."""
+    if ctx.invoked_subcommand is not None:
+        return
+    handle_cmd_flags(json_output=json_output)
+    cfg = CLIConfig.load()
+    print_result(cfg.redacted())
diff --git a/honcho-cli/src/honcho_cli/commands/message.py b/honcho-cli/src/honcho_cli/commands/message.py
new file mode 100644
index 000000000..b810917ae
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/message.py
@@ -0,0 +1,161 @@
+"""Message commands: list, get, create."""
+
+from __future__ import annotations
+
+import hashlib
+import json
+from typing import Optional
+
+import typer
+
+from honcho.api_types import MessageCreateParams
+
+from honcho_cli.commands.session import _get_session_id
+from honcho_cli.commands.workspace import _handle_error
+from honcho_cli.output import print_error, print_result, status
+from honcho_cli.validation import validate_resource_id
+
+from honcho_cli._help import HonchoTyperGroup
+from honcho_cli.common import add_common_options, get_client, handle_cmd_flags
+
+app = typer.Typer(cls=HonchoTyperGroup, help="List, create, and get messages within a session.")
+add_common_options(app)
+
+
+@app.command("list")
+def list_messages(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    last: int = typer.Option(20, "--last", help="Number of recent messages"),
+    reverse: bool = typer.Option(False, "--reverse", help="Show oldest first (default is newest first)"),
+    brief: bool = typer.Option(False, "--brief", help="Show only IDs, peer, token count, and created_at (no content)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Filter by peer ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """List messages in a session. Scoped to a peer with -p."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        filters = {"peer_id": config.peer_id} if config.peer_id else None
+        # Fetch newest-first so [:last] always gives the most recent N messages,
+        # then flip to oldest-at-top / newest-at-bottom for readable display.
+        # --reverse keeps the raw server order (oldest first, descending in table).
+        msgs = sess.messages(filters=filters, reverse=True).items[:last]
+        if not reverse:
+            msgs = list(reversed(msgs))
+
+        # Detect duplicate content
+        content_hashes: dict[str, list[str]] = {}
+        for m in msgs:
+            h = hashlib.md5(m.content.encode()).hexdigest()
+            content_hashes.setdefault(h, []).append(m.id)
+        dupes = {h: ids for h, ids in content_hashes.items() if len(ids) > 1}
+        if dupes:
+            dupe_count = sum(len(ids) - 1 for ids in dupes.values())
+            status(f"Warning: {dupe_count} duplicate message(s) detected (identical content, different IDs)")
+
+        if brief:
+            items = [
+                {
+                    "id": m.id,
+                    "peer_id": m.peer_id,
+                    "token_count": m.token_count,
+                    "created_at": str(m.created_at),
+                }
+                for m in msgs
+            ]
+            print_result(items, columns=["id", "peer_id", "token_count", "created_at"], title="Messages")
+        else:
+            items = [
+                {
+                    "id": m.id,
+                    "peer_id": m.peer_id,
+                    "content": m.content,
+                    "token_count": m.token_count,
+                    "metadata": m.metadata,
+                    "created_at": str(m.created_at),
+                }
+                for m in msgs
+            ]
+            print_result(items, columns=["id", "peer_id", "content", "created_at"], title="Messages")
+    except Exception as e:
+        _handle_error(e, "message", "list")
+
+
+@app.command("create")
+def create_message(
+    content: str = typer.Argument(help="Message content"),
+    peer_id: str = typer.Option(..., "--peer", "-p", help="Peer ID of the message sender"),
+    metadata: Optional[str] = typer.Option(None, "--metadata", help="JSON metadata to associate with the message"),
+    session_id: Optional[str] = typer.Option(None, "--session", "-s", help="Session ID"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Create a message in a session."""
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session_id)
+    sid = _get_session_id(None)
+    validate_resource_id(peer_id, "peer")
+    client, config = get_client()
+    sess = client.session(sid)
+
+    parsed_metadata = None
+    if metadata:
+        try:
+            parsed_metadata = json.loads(metadata)
+        except json.JSONDecodeError as e:
+            print_error("INVALID_JSON", f"--metadata must be valid JSON: {e}", {})
+            raise typer.Exit(1)
+
+    try:
+        msgs = sess.add_messages(MessageCreateParams(
+            peer_id=peer_id,
+            content=content,
+            metadata=parsed_metadata,
+        ))
+        msg = msgs[0]
+        print_result({
+            "id": msg.id,
+            "peer_id": msg.peer_id,
+            "content": msg.content,
+            "token_count": msg.token_count,
+            "created_at": str(msg.created_at),
+        })
+    except Exception as e:
+        _handle_error(e, "message", "create")
+
+
+@app.command("get")
+def get_message(
+    message_id: str = typer.Argument(help="Message ID"),
+    session_id: Optional[str] = typer.Option(None, "--session", "-s", help="Session ID"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get a single message by ID."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    validate_resource_id(message_id, "message")
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+
+    try:
+        sess = client.session(sid)
+        msg = sess.get_message(message_id)
+
+        print_result({
+            "id": msg.id,
+            "peer_id": msg.peer_id,
+            "content": msg.content,
+            "token_count": msg.token_count,
+            "metadata": msg.metadata,
+            "created_at": str(msg.created_at),
+        })
+    except SystemExit:
+        raise
+    except Exception as e:
+        _handle_error(e, "message", message_id)
diff --git a/honcho-cli/src/honcho_cli/commands/peer.py b/honcho-cli/src/honcho_cli/commands/peer.py
new file mode 100644
index 000000000..856fab229
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/peer.py
@@ -0,0 +1,308 @@
+"""Peer commands: list, inspect, card, chat, search, create, metadata, representation."""
+
+from __future__ import annotations
+
+import json
+from typing import Optional
+
+import typer
+
+from honcho.api_types import PeerConfig
+
+from honcho_cli.commands.workspace import _config_to_dict, _handle_error, _raw_list
+from honcho_cli.output import print_error, print_result, use_json
+from honcho_cli.validation import validate_resource_id
+
+from honcho_cli._help import HonchoTyperGroup
+from honcho_cli.common import add_common_options, get_client, get_resolved_config, handle_cmd_flags
+
+app = typer.Typer(cls=HonchoTyperGroup, help="List, create, chat with, search, and manage peers and their representations.")
+add_common_options(app)
+
+
+def _get_peer_id(peer_id: str | None) -> str:
+
+    config = get_resolved_config()
+    pid = peer_id or config.peer_id
+    if not pid:
+        if not config.workspace_id:
+            print_error(
+                "NO_SCOPE",
+                "No peer or workspace scoped. Pass --peer/-p and --workspace/-w, or set HONCHO_PEER_ID and HONCHO_WORKSPACE_ID.",
+            )
+        else:
+            print_error("NO_PEER", "No peer ID provided. Pass --peer/-p or set HONCHO_PEER_ID.")
+        raise typer.Exit(1)
+    return validate_resource_id(pid, "peer")
+
+
+@app.command("list")
+def list_peers(
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """List all peers in the workspace."""
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    client, config = get_client()
+
+    try:
+        raw_peers = _raw_list(client.peers())
+        items = [
+            {
+                "id": p.id,
+                "metadata": p.metadata,
+                "configuration": _config_to_dict(p.configuration) if p.configuration else None,
+                "created_at": str(p.created_at),
+            }
+            for p in raw_peers
+        ]
+        print_result(items, columns=["id", "metadata", "created_at"], title="Peers")
+    except Exception as e:
+        _handle_error(e, "peer", "list")
+
+
+@app.command()
+def inspect(
+    peer_id: Optional[str] = typer.Argument(None, help="Peer ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Inspect a peer: card, session count, recent conclusions."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    pid = _get_peer_id(peer_id)
+    client, config = get_client()
+    p = client.peer(pid)
+
+    try:
+        card = p.get_card()
+        peer_config = p.get_configuration()
+        # First page only; SyncPage.total (when the server supplies it) is
+        # authoritative for counts without walking every page.
+        session_page = p.sessions()
+        conclusion_page = p.conclusions.list(size=10)
+
+        session_items = session_page.items
+        conclusion_items = conclusion_page.items
+
+        result = {
+            "id": pid,
+            "card": card,
+            "configuration": _config_to_dict(peer_config) if peer_config else None,
+            "session_count": session_page.total,
+            "conclusion_count": conclusion_page.total,
+            "recent_conclusions": [
+                {"id": c.id, "content": c.content if use_json() else c.content[:200], "created_at": str(c.created_at)}
+                for c in conclusion_items
+            ],
+            "sessions": [{"id": s.id} for s in session_items[:10]],
+        }
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command()
+def card(
+    peer_id: Optional[str] = typer.Argument(None, help="Peer ID (uses default if omitted)"),
+    target: Optional[str] = typer.Option(None, help="Target peer for relationship card"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get raw peer card content."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    pid = _get_peer_id(peer_id)
+    client, config = get_client()
+    p = client.peer(pid)
+
+    try:
+        result = p.get_card(target=target)
+        print_result({"peer_id": pid, "target": target, "card": result})
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command()
+def chat(
+    query: str = typer.Argument(help="Question to ask about the peer"),
+    target: Optional[str] = typer.Option(None, help="Target peer for perspective"),
+    reasoning: Optional[str] = typer.Option(None, "--reasoning", "-r", help="Reasoning level: minimal, low, medium, high, max"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Query the dialectic about a peer."""
+
+    _REASONING_LEVELS = ("minimal", "low", "medium", "high", "max")
+    if reasoning and reasoning not in _REASONING_LEVELS:
+        from honcho_cli.output import print_error
+        print_error("INVALID_REASONING", f"--reasoning must be one of: {', '.join(_REASONING_LEVELS)}")
+        raise typer.Exit(1)
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer, session=session)
+    pid = _get_peer_id(None)
+    client, config = get_client()
+    p = client.peer(pid)
+
+    try:
+        response = p.chat(
+            query,
+            target=target,
+            session=config.session_id or None,
+            reasoning_level=reasoning or None,
+        )
+        print_result({"peer_id": pid, "query": query, "response": response})
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command()
+def search(
+    query: str = typer.Argument(help="Search query"),
+    limit: int = typer.Option(10, help="Max results"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Search a peer's messages."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    pid = _get_peer_id(None)
+    client, config = get_client()
+    p = client.peer(pid)
+
+    try:
+        results = p.search(query, limit=limit)
+        items = [
+            {
+                "id": m.id,
+                "content": m.content if use_json() else m.content[:200],
+                "session_id": m.session_id,
+                "created_at": str(m.created_at),
+            }
+            for m in results
+        ]
+        print_result(items, columns=["id", "session_id", "content", "created_at"], title=f"Peer search: {query}")
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command("create")
+def create_peer(
+    peer_id: str = typer.Argument(help="Peer ID to create or get"),
+    observe_me: Optional[bool] = typer.Option(None, "--observe-me/--no-observe-me", help="Whether Honcho will form a representation of this peer"),
+    metadata: Optional[str] = typer.Option(None, "--metadata", help="JSON metadata to associate with the peer"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Create or get a peer."""
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    pid = validate_resource_id(peer_id, "peer")
+    client, config = get_client()
+
+    parsed_metadata = None
+    if metadata:
+        try:
+            parsed_metadata = json.loads(metadata)
+        except json.JSONDecodeError as e:
+            print_error("INVALID_JSON", f"--metadata must be valid JSON: {e}", {})
+            raise typer.Exit(1)
+
+    peer_config = PeerConfig(observe_me=observe_me) if observe_me is not None else None
+
+    try:
+        p = client.peer(pid, configuration=peer_config, metadata=parsed_metadata)
+        # Only round-trip to the server when the caller passed config or
+        # metadata — in that case get-or-create may have returned a
+        # pre-existing peer and the echoed output would lie. When no input
+        # was passed, skip the two extra API calls entirely.
+        result: dict[str, object] = {"peer_id": p.id}
+        if peer_config is not None or parsed_metadata is not None:
+            result["metadata"] = p.get_metadata()
+            result["configuration"] = _config_to_dict(p.get_configuration())
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command("get-metadata")
+def get_metadata(
+    peer_id: Optional[str] = typer.Argument(None, help="Peer ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get metadata for a peer."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    pid = _get_peer_id(peer_id)
+    client, config = get_client()
+    p = client.peer(pid)
+
+    try:
+        result = p.get_metadata()
+        print_result({"peer_id": pid, "metadata": result})
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command("set-metadata")
+def set_metadata(
+    metadata: str = typer.Argument(help="JSON metadata to set (e.g. '{\"key\": \"value\"}')"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Peer ID (uses default if omitted)"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Set metadata for a peer."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer)
+    pid = _get_peer_id(None)
+    client, config = get_client()
+
+    try:
+        parsed = json.loads(metadata)
+    except json.JSONDecodeError as e:
+        print_error("INVALID_JSON", f"metadata must be valid JSON: {e}", {})
+        raise typer.Exit(1)
+
+    p = client.peer(pid)
+
+    try:
+        p.set_metadata(parsed)
+        print_result({"peer_id": pid, "metadata": parsed})
+    except Exception as e:
+        _handle_error(e, "peer", pid)
+
+
+@app.command()
+def representation(
+    peer_id: Optional[str] = typer.Argument(None, help="Peer ID (uses default if omitted)"),
+    target: Optional[str] = typer.Option(None, help="Target peer to get representation about"),
+    search_query: Optional[str] = typer.Option(None, help="Semantic search query to filter conclusions"),
+    max_conclusions: Optional[int] = typer.Option(None, help="Maximum number of conclusions to include"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get the formatted representation for a peer."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, peer=peer, session=session)
+    pid = _get_peer_id(peer_id)
+    client, config = get_client()
+    p = client.peer(pid)
+
+    try:
+        result = p.representation(
+            target=target,
+            session=config.session_id or None,
+            search_query=search_query,
+            max_conclusions=max_conclusions,
+        )
+        print_result({"peer_id": pid, "target": target, "representation": result})
+    except Exception as e:
+        _handle_error(e, "peer", pid)
diff --git a/honcho-cli/src/honcho_cli/commands/session.py b/honcho-cli/src/honcho_cli/commands/session.py
new file mode 100644
index 000000000..2c5d811be
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/session.py
@@ -0,0 +1,404 @@
+"""Session commands: list, inspect, context, summaries, peers, search, representation, metadata."""
+
+from __future__ import annotations
+
+import json
+from typing import List, Optional
+
+import typer
+
+from honcho import HonchoError
+
+from honcho_cli.commands.workspace import _config_to_dict, _handle_error, _raw_list
+from honcho_cli.output import print_error, print_result, status, use_json
+from honcho_cli.validation import validate_resource_id
+
+from honcho_cli._help import HonchoTyperGroup
+from honcho_cli.common import add_common_options, get_client, get_resolved_config, handle_cmd_flags
+
+app = typer.Typer(cls=HonchoTyperGroup, help="List, inspect, create, delete, and manage conversation sessions and their peers.")
+add_common_options(app)
+
+
+def _get_session_id(session_id: str | None) -> str:
+
+    config = get_resolved_config()
+    sid = session_id or config.session_id
+    if not sid:
+        print_error("NO_SESSION", "No session ID provided. Pass --session/-s or set HONCHO_SESSION_ID.")
+        raise typer.Exit(1)
+    return validate_resource_id(sid, "session")
+
+
+@app.command("list")
+def list_sessions(
+    peer_id: Optional[str] = typer.Option(None, "--peer", "-p", help="Filter by peer"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """List sessions in the workspace."""
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    client, config = get_client()
+
+    try:
+        if peer_id:
+            peer = client.peer(peer_id)
+            raw_sessions = _raw_list(peer.sessions())
+        else:
+            raw_sessions = _raw_list(client.sessions())
+
+        items = [
+            {
+                "id": s.id,
+                "is_active": s.is_active,
+                "metadata": s.metadata,
+                "created_at": str(s.created_at),
+            }
+            for s in raw_sessions
+        ]
+        print_result(items, columns=["id", "is_active", "metadata", "created_at"], title="Sessions")
+    except Exception as e:
+        _handle_error(e, "session", "list")
+
+
+@app.command("create")
+def create_session(
+    session_id: str = typer.Argument(help="Session ID to create or get"),
+    peers: Optional[str] = typer.Option(None, "--peers", help="Comma-separated peer IDs to add to the session"),
+    metadata: Optional[str] = typer.Option(None, "--metadata", help="JSON metadata to associate with the session"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Create or get a session."""
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    sid = validate_resource_id(session_id, "session")
+    client, config = get_client()
+
+    parsed_metadata = None
+    if metadata:
+        try:
+            parsed_metadata = json.loads(metadata)
+        except json.JSONDecodeError as e:
+            print_error("INVALID_JSON", f"--metadata must be valid JSON: {e}", {})
+            raise typer.Exit(1)
+
+    peer_ids = [p.strip() for p in peers.split(",") if p.strip()] if peers else []
+    for pid in peer_ids:
+        validate_resource_id(pid, "peer")
+
+    try:
+        sess = client.session(sid, metadata=parsed_metadata)
+        if peer_ids:
+            sess.add_peers(peer_ids)
+        result: dict[str, object] = {"session_id": sess.id}
+        if parsed_metadata is not None:
+            result["metadata"] = parsed_metadata
+        if peer_ids:
+            result["peers"] = peer_ids
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command()
+def inspect(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Inspect a session: peers, message count, summaries, config."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        peers = sess.peers()
+        msg_page = sess.messages()
+        summaries = sess.summaries()
+        sess_config = sess.get_configuration()
+
+        result = {
+            "session_id": sid,
+            "peers": [{"id": p.id} for p in peers],
+            "message_count": msg_page.total,
+            "summaries": {
+                "short": summaries.short_summary if hasattr(summaries, "short_summary") else None,
+                "long": summaries.long_summary if hasattr(summaries, "long_summary") else None,
+            },
+            "configuration": _config_to_dict(sess_config) if sess_config else None,
+        }
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command()
+def context(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    tokens: Optional[int] = typer.Option(None, help="Token budget"),
+    summary: bool = typer.Option(True, help="Include summary"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get session context (what an agent would see)."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        ctx = sess.context(tokens=tokens, summary=summary)
+        result = ctx.__dict__ if hasattr(ctx, "__dict__") else ctx
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command()
+def summaries(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get session summaries (short + long)."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        s = sess.summaries()
+        result = {
+            "session_id": sid,
+            "short_summary": s.short_summary if hasattr(s, "short_summary") else None,
+            "long_summary": s.long_summary if hasattr(s, "long_summary") else None,
+        }
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command()
+def delete(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Delete a session and all its data. Destructive — requires --yes or interactive confirm."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    if not yes:
+        # Show a short preview so the user knows what's about to disappear.
+        # Only in interactive/TTY mode — scripted (--json) callers already
+        # know what they're deleting, and they still need to pass --yes.
+        # Narrow the except to HonchoError so auth/network failures surface
+        # before the user types 'y' on a destructive op.
+        if not use_json():
+            try:
+                peers = sess.peers()
+                msg_page = sess.messages()
+                peer_ids = [p.id for p in peers]
+                typer.echo(
+                    f"  session:  {sid}\n"
+                    f"  peers:    {', '.join(peer_ids) if peer_ids else '(none)'}\n"
+                    f"  messages: {msg_page.total}"
+                )
+            except HonchoError as preview_err:
+                status(f"preview unavailable: {preview_err}")
+        typer.confirm(f"Delete session '{sid}' and all its messages, conclusions, and queue items?", abort=True)
+
+    try:
+        sess.delete()
+        status(f"Session '{sid}' deleted")
+        print_result({"deleted": sid})
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command("peers")
+def session_peers(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """List peers in a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        peers = sess.peers()
+        items = [{"id": p.id} for p in peers]
+        print_result(items, columns=["id"], title=f"Session peers ({sid})")
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command("add-peers")
+def add_peers(
+    session_id: str = typer.Argument(help="Session ID"),
+    peer_ids: List[str] = typer.Argument(help="Peer IDs to add to the session"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Add peers to a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        sess.add_peers(peer_ids)
+        print_result({"session_id": sid, "added_peers": peer_ids})
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command("remove-peers")
+def remove_peers(
+    session_id: str = typer.Argument(help="Session ID"),
+    peer_ids: List[str] = typer.Argument(help="Peer IDs to remove from the session"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Remove peers from a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        sess.remove_peers(peer_ids)
+        print_result({"session_id": sid, "removed_peers": peer_ids})
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command()
+def search(
+    query: str = typer.Argument(help="Search query"),
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    limit: int = typer.Option(10, help="Max results"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Search messages in a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        results = sess.search(query, limit=limit)
+        items = [
+            {
+                "id": m.id,
+                "peer_id": m.peer_id,
+                "content": m.content if use_json() else m.content[:200],
+                "created_at": str(m.created_at),
+            }
+            for m in results
+        ]
+        print_result(items, columns=["id", "peer_id", "content", "created_at"], title=f"Session search: {query}")
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command()
+def representation(
+    peer_id: str = typer.Argument(help="Peer ID to get representation for"),
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    target: Optional[str] = typer.Option(None, help="Target peer (what peer_id knows about target)"),
+    search_query: Optional[str] = typer.Option(None, help="Semantic search query to filter conclusions"),
+    max_conclusions: Optional[int] = typer.Option(None, help="Maximum number of conclusions to include"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get the representation of a peer within a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        result = sess.representation(
+            peer_id,
+            target=target,
+            search_query=search_query,
+            max_conclusions=max_conclusions,
+        )
+        print_result({"session_id": sid, "peer_id": peer_id, "target": target, "representation": result})
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command("get-metadata")
+def get_metadata(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get metadata for a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+    sess = client.session(sid)
+
+    try:
+        result = sess.get_metadata()
+        print_result({"session_id": sid, "metadata": result})
+    except Exception as e:
+        _handle_error(e, "session", sid)
+
+
+@app.command("set-metadata")
+def set_metadata(
+    session_id: Optional[str] = typer.Argument(None, help="Session ID (uses default if omitted)"),
+    metadata: str = typer.Option(..., "--data", "-d", help="JSON metadata to set (e.g. '{\"key\": \"value\"}')"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Set metadata for a session."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+    sid = _get_session_id(session_id)
+    client, config = get_client()
+
+    try:
+        parsed = json.loads(metadata)
+    except json.JSONDecodeError as e:
+        print_error("INVALID_JSON", f"metadata must be valid JSON: {e}", {})
+        raise typer.Exit(1)
+
+    sess = client.session(sid)
+
+    try:
+        sess.set_metadata(parsed)
+        print_result({"session_id": sid, "metadata": parsed})
+    except Exception as e:
+        _handle_error(e, "session", sid)
diff --git a/honcho-cli/src/honcho_cli/commands/setup.py b/honcho-cli/src/honcho_cli/commands/setup.py
new file mode 100644
index 000000000..6073b3e45
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/setup.py
@@ -0,0 +1,287 @@
+"""Top-level onboarding and health-check commands.
+
+`honcho init`    — confirm or set apiKey + Honcho URL in ~/.honcho/config.json
+`honcho doctor`  — verify connectivity, config validity, queue health
+"""
+
+from __future__ import annotations
+
+import json
+
+import typer
+from honcho import (
+    APIError,
+    AuthenticationError,
+    ConnectionError as HonchoConnectionError,
+    Honcho,
+    TimeoutError as HonchoTimeoutError,
+)
+from rich.console import Console
+from rich.panel import Panel
+
+from honcho_cli import __version__
+from honcho_cli.branding import BANNER, BRAND, ICON_FAIL, ICON_OK, ICON_RUN
+from honcho_cli.common import get_resolved_config
+from honcho_cli.config import (
+    CONFIG_FILE,
+    DEFAULT_BASE_URL,
+    CLIConfig,
+)
+from honcho_cli.output import print_error, print_result, set_json_mode, use_json
+
+_console = Console(stderr=True)
+
+
+# --------------------------------------------------------------------------- #
+# shared helpers
+
+def _redact(api_key: str) -> str:
+    """Show ``***<last4>`` — enough to compare keys without leaking the body."""
+    if not api_key:
+        return ""
+    if len(api_key) <= 4:
+        return "***"
+    return "***" + api_key[-4:]
+
+
+def _read_file_values() -> tuple[str, str]:
+    """Return (apiKey, environmentUrl) persisted on disk (or empty strings)."""
+    if not CONFIG_FILE.exists():
+        return "", ""
+    try:
+        with open(CONFIG_FILE, encoding="utf-8") as f:
+            data = json.load(f)
+    except (json.JSONDecodeError, OSError):
+        return "", ""
+    if not isinstance(data, dict):
+        return "", ""
+    key = data.get("apiKey") if isinstance(data.get("apiKey"), str) else ""
+    url = data.get("environmentUrl") if isinstance(data.get("environmentUrl"), str) else ""
+    return key, url
+
+
+def _test_connection(base_url: str, api_key: str) -> tuple[bool, str]:
+    """Probe the Honcho API by listing workspaces. Returns (ok, detail).
+
+    Dispatches on the SDK's typed exception hierarchy instead of matching
+    substrings of error messages — robust to SDK message changes and locale.
+    """
+    try:
+        list(Honcho(base_url=base_url, api_key=api_key).workspaces())
+        return True, "OK"
+    except AuthenticationError:
+        return False, "Unauthorized — check your API key"
+    except HonchoConnectionError:
+        return False, "Connection refused — is the server running?"
+    except HonchoTimeoutError:
+        return False, "Request timed out"
+    except APIError as e:
+        return False, f"API error ({e.status}): {e}"
+    except Exception as e:
+        return False, str(e)
+
+
+def _pick(flag_val: str | None, file_val: str) -> str:
+    """Return best available value. Flag/env wins over file."""
+    return flag_val or file_val or ""
+
+
+# --------------------------------------------------------------------------- #
+# honcho init
+
+def init(
+    api_key: str | None = typer.Option(None, "--api-key", envvar="HONCHO_API_KEY", help="API key (admin JWT)"),
+    base_url: str | None = typer.Option(None, "--base-url", envvar="HONCHO_BASE_URL", help="Honcho API URL (e.g. https://api.honcho.dev, http://localhost:8000)"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Set API key and server URL in ~/.honcho/config.json.
+
+    Press Enter to keep the current value or type a replacement.
+    Workspace / peer / session scoping is per-command via -w / -p / -s
+    or HONCHO_* env vars — never persisted.
+    """
+
+    if json_output:
+        set_json_mode(True)
+
+    file_key, file_url = _read_file_values()
+    key_val = _pick(api_key, file_key)
+    url_val = _pick(base_url, file_url).strip()
+
+    if not use_json():
+        _console.print()
+        _console.print(Panel(
+            f"[bold {BRAND}]{BANNER}[/bold {BRAND}]\n\n     Memory that reasons",
+            expand=False, subtitle=f"Honcho CLI · v{__version__}",
+        ))
+        _console.print()
+        _console.print()
+
+    final_key = _prompt_api_key(key_val)
+    final_url = _prompt_url(url_val)
+
+    # Persist if anything changed or if the value came from env/flag.
+    if final_key != file_key or final_url != file_url:
+        CLIConfig(base_url=final_url, api_key=final_key).save()
+        if not use_json():
+            _console.print(f"  {ICON_OK} [dim]Saved to {CONFIG_FILE}[/dim]")
+
+    _check_connection(final_url, final_key)
+
+    if use_json():
+        print_result({"apiKey": _redact(final_key), "baseUrl": final_url})
+
+
+def _prompt_api_key(value: str) -> str:
+    """Prompt for API key.
+
+    When a key already exists (from env var or config file), the user picks
+    between keeping it or entering a replacement.  When no key exists, the
+    user can paste one or press Enter to skip (local dev with auth disabled
+    doesn't need a key).
+    """
+    if use_json():
+        return value
+
+    if value:
+        redacted = _redact(value)
+        _console.print(f"  [dim]Current API key: {redacted}[/dim]")
+        _console.print("  [dim](1)[/dim] Keep current key")
+        _console.print("  [dim](2)[/dim] Enter a new key")
+        choice = typer.prompt("  Choice", default="1", show_default=True, prompt_suffix=": ").strip()
+        if choice == "2":
+            raw = typer.prompt("  API key", default="", show_default=False, prompt_suffix=": ").strip()
+            return raw
+        return value
+    else:
+        _console.print("  [dim]Not needed for local dev — press Enter to skip[/dim]")
+        raw = typer.prompt("  API key", default="", show_default=False, prompt_suffix=": ").strip()
+        return raw
+
+
+def _normalize_url(url: str) -> str:
+    """Strip whitespace from the URL."""
+    return url.strip()
+
+
+def _prompt_url(value: str) -> str:
+    """Prompt for Honcho URL. Shows current value as the default; Enter keeps it.
+
+    First run defaults to DEFAULT_BASE_URL. After that, whatever is saved
+    in config becomes the default so the user isn't fighting back to their
+    custom URL every time.
+    """
+    if use_json():
+        if value:
+            return _normalize_url(value)
+        print_error("MISSING_VALUE", "Honcho URL is required", {})
+        raise typer.Exit(1)
+
+    default = _normalize_url(value) if value else DEFAULT_BASE_URL
+    _console.print("  [dim]Use https://api.honcho.dev for the hosted Honcho instance[/dim]")
+    while True:
+        raw = typer.prompt("  Honcho URL", default=default, show_default=True, prompt_suffix=": ").strip()
+        url = _normalize_url(raw)
+        if url.startswith(("http://", "https://")):
+            return url
+        _console.print("  [red]URL must start with http:// or https://[/red]")
+
+
+def _check_connection(base_url: str, api_key: str) -> None:
+
+
+    if not use_json():
+        _console.print(f"\n  {ICON_RUN} [dim]Testing connection to {base_url}...[/dim]", end=" ")
+    ok, detail = _test_connection(base_url, api_key)
+    if not ok:
+        if use_json():
+            print_error("CONNECTION_FAILED", detail, {"base_url": base_url})
+        else:
+            _console.print(f"{ICON_FAIL} [red]Failed[/red]: {detail}")
+        raise typer.Exit(1)
+    if not use_json():
+        _console.print(f"{ICON_OK} [green]Connected[/green]")
+
+
+# --------------------------------------------------------------------------- #
+# honcho doctor
+
+def doctor(
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Verify config and connectivity. Scope with -w / -p to check workspace, peer, and queue health."""
+
+
+    if json_output:
+        set_json_mode(True)
+
+    checks: list[dict] = []
+
+    def _add(name: str, ok: bool, detail: str = "") -> None:
+        checks.append({"check": name, "ok": ok, "detail": detail})
+        if not use_json():
+            icon = ICON_OK if ok else ICON_FAIL
+            line = f"  {icon}  {name:<22}"
+            if detail:
+                line += f"  [dim]{detail}[/dim]"
+            _console.print(line)
+
+    if not use_json():
+        _console.print(f"\n[bold {BRAND}]Honcho Doctor[/bold {BRAND}]\n")
+
+    config = get_resolved_config()
+    _add("Config file", CONFIG_FILE.exists(),
+         str(CONFIG_FILE) if CONFIG_FILE.exists() else f"{CONFIG_FILE} not found")
+    _add("API key configured", bool(config.api_key),
+         "set" if config.api_key else "missing — run `honcho init`")
+
+    if config.base_url and config.api_key:
+        _add("API connectivity", *_test_connection(config.base_url, config.api_key))
+    else:
+        _add("API connectivity", False, "skipped — no base_url or api_key")
+
+    # Workspace / peer / queue run only when scoped via -w / -p.
+    ws_ok, client = False, None
+    if config.workspace_id and config.api_key:
+        try:
+
+
+            client = Honcho(base_url=config.base_url, api_key=config.api_key, workspace_id=config.workspace_id)
+            client.get_configuration()
+            ws_ok = True
+            _add("Workspace reachable", True, config.workspace_id)
+        except Exception as e:
+            _add("Workspace reachable", False, f"{config.workspace_id}: {e}")
+        if ws_ok:
+            try:
+                q = client.queue_status()
+                _add("Queue health", True, f"{q.completed_work_units}/{q.total_work_units} completed, {q.pending_work_units} pending")
+            except Exception:
+                _add("Queue health", True, "endpoint not available (non-critical)")
+
+    if config.peer_id:
+        if ws_ok and client is not None:
+            try:
+                client.peer(config.peer_id).get_card()
+                _add("Peer exists", True, config.peer_id)
+            except Exception as e:
+                _add("Peer exists", False, f"{config.peer_id}: {e}")
+        else:
+            _add("Peer exists", False, "skipped — workspace not reachable")
+
+    passed = sum(1 for c in checks if c["ok"])
+    total = len(checks)
+
+    if use_json():
+        print_result({"checks": checks, "passed": passed, "total": total})
+    else:
+        color = BRAND if passed == total else ("yellow" if passed > total // 2 else "red")
+        hint = "" if config.workspace_id else "  [dim](pass -w / -p to include workspace, peer, queue checks)[/dim]"
+        _console.print(f"\n  [{color}]{passed}/{total}[/{color}] checks passed{hint}\n")
+
+    # Config file + API connectivity are hard requirements.
+    critical = {"Config file", "API key configured", "API connectivity"}
+    if config.workspace_id:
+        critical.add("Workspace reachable")
+    if any(not c["ok"] for c in checks if c["check"] in critical):
+        raise typer.Exit(1)
diff --git a/honcho-cli/src/honcho_cli/commands/workspace.py b/honcho-cli/src/honcho_cli/commands/workspace.py
new file mode 100644
index 000000000..6aeda00e5
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/commands/workspace.py
@@ -0,0 +1,322 @@
+"""Workspace commands: list, inspect, create, delete, search, queue-status."""
+
+from __future__ import annotations
+
+import json
+from typing import Optional
+
+import typer
+
+from honcho import (
+    APIError,
+    AuthenticationError,
+    Honcho,
+    NotFoundError,
+    PermissionDeniedError,
+    ServerError,
+)
+
+from honcho_cli.output import print_error, print_result, status, use_json
+from honcho_cli.validation import validate_resource_id
+
+from honcho_cli._help import HonchoTyperGroup
+from honcho_cli.common import add_common_options, get_client, get_resolved_config, handle_cmd_flags
+
+app = typer.Typer(cls=HonchoTyperGroup, help="List, create, inspect, delete, and search workspaces.")
+add_common_options(app)
+
+
+def _get_workspace_id(workspace_id: str | None) -> str:
+
+    config = get_resolved_config()
+    wid = workspace_id or config.workspace_id
+    if not wid:
+        print_error("NO_WORKSPACE", "No workspace ID provided. Pass --workspace/-w or set HONCHO_WORKSPACE_ID.")
+        raise typer.Exit(1)
+    return validate_resource_id(wid, "workspace")
+
+
+def _raw_list(page) -> list:
+    """Collect all raw API response items across all pages of a SyncPage."""
+    items = list(page._raw_items)
+    while page.has_next_page():
+        page = page.get_next_page()
+        if page is None:
+            break
+        items.extend(page._raw_items)
+    return items
+
+
+@app.command("list")
+def list_workspaces(
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """List all accessible workspaces."""
+
+    handle_cmd_flags(json_output=json_output)
+    client, config = get_client(require_workspace=False)
+
+    try:
+        workspaces = list(client.workspaces())
+        items = [{"id": w} for w in workspaces]
+        print_result(items, columns=["id"], title="Workspaces")
+    except Exception as e:
+        _handle_error(e, "workspace", "list")
+
+
+@app.command("create")
+def create_workspace(
+    workspace_id: str = typer.Argument(help="Workspace ID to create or get"),
+    metadata: Optional[str] = typer.Option(None, "--metadata", help="JSON metadata to associate with the workspace"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Create or get a workspace."""
+    handle_cmd_flags(json_output=json_output)
+    wid = validate_resource_id(workspace_id, "workspace")
+    client, config = get_client(require_workspace=False)
+    ws_client = _with_workspace(client, wid)
+
+    parsed_metadata = None
+    if metadata:
+        try:
+            parsed_metadata = json.loads(metadata)
+        except json.JSONDecodeError as e:
+            print_error("INVALID_JSON", f"--metadata must be valid JSON: {e}", {})
+            raise typer.Exit(1)
+
+    try:
+        # Trigger get-or-create via the workspace ensure mechanism
+        ws_client.get_configuration()
+        result: dict[str, object] = {"workspace_id": wid}
+        if parsed_metadata is not None:
+            ws_client.set_metadata(parsed_metadata)
+            result["metadata"] = parsed_metadata
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "workspace", wid)
+
+
+@app.command()
+def inspect(
+    workspace_id: Optional[str] = typer.Argument(None, help="Workspace ID (uses default if omitted)"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Inspect a workspace: peers, sessions, config."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+
+    wid = _get_workspace_id(workspace_id)
+    client, config = get_client(require_workspace=False)
+
+    # Override workspace if positional arg given
+    if workspace_id:
+        client = _with_workspace(client, workspace_id)
+
+    try:
+        ws_config = client.get_configuration()
+        ws_metadata = client.get_metadata()
+        peer_page = client.peers()
+        session_page = client.sessions()
+
+        raw_peers = peer_page._raw_items
+        raw_sessions = session_page._raw_items
+
+        result = {
+            "workspace_id": wid,
+            "metadata": ws_metadata,
+            "configuration": _config_to_dict(ws_config) if ws_config else None,
+            "peer_count": peer_page.total,
+            "session_count": session_page.total,
+            "peers": [
+                {"id": p.id, "metadata": p.metadata, "created_at": str(p.created_at)}
+                for p in raw_peers[:20]
+            ],
+            "sessions": [
+                {"id": s.id, "is_active": s.is_active, "metadata": s.metadata, "created_at": str(s.created_at)}
+                for s in raw_sessions[:20]
+            ],
+        }
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "workspace", wid)
+
+
+@app.command()
+def delete(
+    workspace_id: str = typer.Argument(help="Workspace ID to delete"),
+    yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt (for scripted/agent use)"),
+    cascade: bool = typer.Option(False, "--cascade", help="Delete all sessions before deleting the workspace"),
+    dry_run: bool = typer.Option(False, "--dry-run", help="Show what would be deleted without deleting"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Delete a workspace. Use --dry-run first to see what will be deleted.
+
+    Requires --yes to skip confirmation, or will prompt interactively.
+    If sessions exist, requires --cascade to delete them first.
+    """
+
+    handle_cmd_flags(json_output=json_output)
+
+    validate_resource_id(workspace_id, "workspace")
+    # workspace_id is a required positional and we rebuild the client with it
+    # immediately, so the default-workspace guard isn't needed here.
+    client, config = get_client(require_workspace=False)
+    ws_client = _with_workspace(client, workspace_id)
+
+    # Verify workspace exists before prompting for confirmation
+    try:
+        ws_client.get_metadata()
+    except Exception as e:
+        _handle_error(e, "workspace", workspace_id)
+        return
+
+    # Always fetch sessions for dry-run or cascade
+    raw_sessions = _raw_list(ws_client.sessions()) if (dry_run or cascade) else []
+
+    if dry_run:
+        print_result({
+            "dry_run": True,
+            "workspace_id": workspace_id,
+            "sessions_to_delete": len(raw_sessions),
+            "session_ids": [s.id for s in raw_sessions],
+            "warning": "This action cannot be undone.",
+        })
+        return
+
+    if not yes:
+        if cascade and raw_sessions:
+            typer.confirm(
+                f"Delete workspace '{workspace_id}' and {len(raw_sessions)} session(s)? This cannot be undone.",
+                abort=True,
+            )
+        else:
+            typer.confirm(f"Delete workspace '{workspace_id}'? This cannot be undone.", abort=True)
+
+    try:
+        deleted_sessions = []
+        if cascade and raw_sessions:
+            for s in raw_sessions:
+                ws_client.session(s.id).delete()
+                deleted_sessions.append(s.id)
+                status(f"Deleted session '{s.id}'")
+
+        ws_client.delete_workspace(workspace_id)
+        status(f"Workspace '{workspace_id}' deletion accepted (processing in background)")
+        result = {"deleted_workspace": workspace_id, "status": "accepted"}
+        if cascade:
+            result["deleted_sessions"] = deleted_sessions
+        print_result(result)
+    except Exception as e:
+        _handle_error(e, "workspace", workspace_id)
+
+
+@app.command()
+def search(
+    query: str = typer.Argument(help="Search query"),
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    limit: int = typer.Option(10, help="Max results"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Search messages across workspace."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace)
+
+    wid = _get_workspace_id(None)
+    client, config = get_client()
+
+    try:
+        results = client.search(query, limit=limit)
+        items = [
+            {
+                "id": m.id,
+                "content": m.content if use_json() else m.content[:200],
+                "peer_id": m.peer_id,
+                "session_id": m.session_id,
+                "created_at": str(m.created_at),
+            }
+            for m in results
+        ]
+        print_result(items, columns=["id", "peer_id", "session_id", "content"], title=f"Search: {query}")
+    except Exception as e:
+        _handle_error(e, "workspace", wid)
+
+
+@app.command("queue-status")
+def queue_status(
+    workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+    session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    observer: Optional[str] = typer.Option(None, help="Filter by observer peer"),
+    sender: Optional[str] = typer.Option(None, help="Filter by sender peer"),
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+) -> None:
+    """Get queue processing status."""
+
+    handle_cmd_flags(json_output=json_output, workspace=workspace, session=session)
+
+    _get_workspace_id(None)
+    client, config = get_client()
+
+    try:
+        result = client.queue_status(observer=observer, sender=sender, session=config.session_id or None)
+        print_result(result.__dict__ if hasattr(result, "__dict__") else result)
+    except Exception as e:
+        _handle_error(e, "queue", "status")
+
+
+def _with_workspace(client, workspace_id: str):
+    """Return a new client pointed at a different workspace."""
+    return Honcho(
+        base_url=str(client.base_url),
+        api_key=client._http.api_key if hasattr(client._http, "api_key") else None,
+        workspace_id=workspace_id,
+    )
+
+
+def _config_to_dict(config) -> dict:
+    """Convert a config object to a dict, handling nested objects."""
+    if hasattr(config, "__dict__"):
+        result = {}
+        for k, v in config.__dict__.items():
+            if k.startswith("_"):
+                continue
+            result[k] = _config_to_dict(v) if hasattr(v, "__dict__") and not isinstance(v, str) else v
+        return result
+    return config
+
+
+def _handle_error(e: Exception, resource: str, resource_id: str) -> None:
+    """Handle SDK exceptions with structured error output.
+
+    Dispatches on the SDK's typed exception hierarchy
+    (``honcho.http.exceptions``) and falls back to its ``status`` field for
+    any APIError subclass we don't enumerate. Substring matching on the
+    message is used only as a last-ditch fallback for non-SDK exceptions.
+    """
+    if isinstance(e, NotFoundError):
+        print_error(
+            f"{resource.upper()}_NOT_FOUND",
+            f"{resource.title()} '{resource_id}' not found",
+            {resource: resource_id},
+        )
+        raise typer.Exit(1)
+    if isinstance(e, AuthenticationError):
+        print_error("AUTH_ERROR", f"Authentication failed: {e}", {})
+        raise typer.Exit(3)
+    if isinstance(e, PermissionDeniedError):
+        print_error("PERMISSION_ERROR", f"Permission denied: {e}", {})
+        raise typer.Exit(3)
+    if isinstance(e, ServerError):
+        print_error("SERVER_ERROR", f"Server error: {e}", {resource: resource_id})
+        raise typer.Exit(2)
+    if isinstance(e, APIError):
+        # Catch-all for typed API errors we haven't special-cased
+        # (BadRequest, Conflict, UnprocessableEntity, RateLimit, ...).
+        print_error(
+            "API_ERROR",
+            f"API error ({e.status}): {e}",
+            {resource: resource_id, "status": e.status},
+        )
+        raise typer.Exit(1)
+    print_error("UNKNOWN_ERROR", str(e), {resource: resource_id})
+    raise typer.Exit(1)
diff --git a/honcho-cli/src/honcho_cli/common.py b/honcho-cli/src/honcho_cli/common.py
new file mode 100644
index 000000000..2680a8692
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/common.py
@@ -0,0 +1,112 @@
+"""Shared runtime state, client factory, and command-level flag helpers.
+
+Flags --json, -w, -p, -s are documented at **command-level** (the canonical
+form demonstrated in the welcome panel, README, and skill files):
+
+    honcho workspace list -w granola --json
+
+They also parse at group-level and top-level for flexibility. All three
+positions resolve identically and are idempotent — command-level is a
+no-op if the same flag was already set at an outer level.
+"""
+
+from __future__ import annotations
+
+from typing import Optional
+
+import typer
+
+from honcho import Honcho
+
+from honcho_cli.config import CLIConfig, get_client_kwargs
+from honcho_cli.output import print_error, set_json_mode
+from honcho_cli.validation import validate_resource_id
+
+
+# Global overrides from flags (commands read these)
+_global_overrides: dict[str, str | None] = {
+    "workspace": None,
+    "peer": None,
+    "session": None,
+}
+
+
+def get_resolved_config():
+    """Get config with global flag overrides applied.
+
+    Overrides flow through ``validate_resource_id`` so that a malformed
+    ``-w``/``-p``/``-s`` value fails fast with a structured error rather than
+    reaching the API and surfacing as an opaque ``UNKNOWN_ERROR``.
+    """
+    config = CLIConfig.load()
+
+    if _global_overrides["workspace"]:
+        config.workspace_id = validate_resource_id(_global_overrides["workspace"], "workspace")
+    if _global_overrides["peer"]:
+        config.peer_id = validate_resource_id(_global_overrides["peer"], "peer")
+    if _global_overrides["session"]:
+        config.session_id = validate_resource_id(_global_overrides["session"], "session")
+
+    return config
+
+
+def get_client(*, require_workspace: bool = True):
+    """Create a Honcho client from resolved config.
+
+    By default, refuses to build a client when no workspace is scoped — the
+    SDK's get-or-create semantics would otherwise silently operate on an empty
+    workspace. Commands that legitimately run without a workspace (e.g.
+    ``workspace list``) pass ``require_workspace=False``.
+    """
+    config = get_resolved_config()
+    if require_workspace and not config.workspace_id:
+        print_error(
+            "NO_WORKSPACE",
+            "No workspace scoped. Pass --workspace/-w or set HONCHO_WORKSPACE_ID.",
+        )
+        raise typer.Exit(1)
+    return Honcho(**get_client_kwargs(config)), config
+
+
+def handle_cmd_flags(
+    json_output: bool = False,
+    workspace: str | None = None,
+    peer: str | None = None,
+    session: str | None = None,
+    **_kwargs,
+) -> None:
+    """Apply command-level flags. Idempotent if already set by group callback."""
+    if json_output:
+        set_json_mode(True)
+
+    if workspace:
+        _global_overrides["workspace"] = workspace
+    if peer:
+        _global_overrides["peer"] = peer
+    if session:
+        _global_overrides["session"] = session
+
+
+def add_common_options(app: typer.Typer) -> None:
+    """Add a callback to a sub-app that accepts --json, -w, -p, -s."""
+
+    @app.callback(invoke_without_command=True)
+    def _callback(
+        ctx: typer.Context,
+        json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+        workspace: Optional[str] = typer.Option(None, "--workspace", "-w", help="Override workspace ID"),
+        peer: Optional[str] = typer.Option(None, "--peer", "-p", help="Override peer ID"),
+        session: Optional[str] = typer.Option(None, "--session", "-s", help="Override session ID"),
+    ) -> None:
+        if json_output:
+            set_json_mode(True)
+
+        if workspace:
+            _global_overrides["workspace"] = workspace
+        if peer:
+            _global_overrides["peer"] = peer
+        if session:
+            _global_overrides["session"] = session
+
+        if ctx.invoked_subcommand is None:
+            typer.echo(ctx.get_help())
diff --git a/honcho-cli/src/honcho_cli/config.py b/honcho-cli/src/honcho_cli/config.py
new file mode 100644
index 000000000..a0c648647
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/config.py
@@ -0,0 +1,150 @@
+"""Configuration management for Honcho CLI.
+
+Config stored at ``~/.honcho/config.json`` with env var overrides.
+
+The CLI owns exactly two top-level keys in that file:
+
+    apiKey          -- Honcho admin JWT
+    environmentUrl  -- Honcho API URL (full URL, e.g. https://api.honcho.dev)
+
+All other top-level keys (``hosts``, ``sessions``, ``saveMessages``,
+``sessionStrategy``, …) are written by sibling Honcho tools and are
+preserved untouched on save.
+
+Workspace / peer / session scoping is intentionally *not* persisted here —
+pass ``-w`` / ``-p`` / ``-s`` flags or set ``HONCHO_WORKSPACE_ID`` /
+``HONCHO_PEER_ID`` / ``HONCHO_SESSION_ID`` per command instead.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from dataclasses import dataclass, fields
+from pathlib import Path
+
+CONFIG_DIR = Path.home() / ".honcho"
+CONFIG_FILE = CONFIG_DIR / "config.json"
+
+DEFAULT_BASE_URL = "https://api.honcho.dev"
+
+# Env var mapping for runtime overrides.
+#
+# Resolution order: flag > env var > config file > default.
+ENV_MAP: dict[str, str] = {
+    "api_key": "HONCHO_API_KEY",
+    "base_url": "HONCHO_BASE_URL",
+    "workspace_id": "HONCHO_WORKSPACE_ID",
+    "peer_id": "HONCHO_PEER_ID",
+    "session_id": "HONCHO_SESSION_ID",
+}
+
+
+@dataclass
+class CLIConfig:
+    """CLI configuration with layered resolution: flag > env > file > default.
+
+    ``workspace_id`` / ``peer_id`` / ``session_id`` exist on this dataclass so
+    flag/env overrides flow through ``get_client_kwargs()``, but they are
+    never read from or written to the config file — they're per-command.
+    """
+
+    base_url: str = DEFAULT_BASE_URL
+    api_key: str = ""
+    workspace_id: str = ""
+    peer_id: str = ""
+    session_id: str = ""
+
+    @classmethod
+    def load(cls) -> CLIConfig:
+        """Load config from file, then overlay env vars."""
+        config = cls()
+
+        if CONFIG_FILE.exists():
+            try:
+                with open(CONFIG_FILE, encoding="utf-8") as f:
+                    data = json.load(f)
+            except (json.JSONDecodeError, OSError):
+                data = {}
+
+            if isinstance(data, dict):
+                url = data.get("environmentUrl")
+                if isinstance(url, str) and url:
+                    config.base_url = url
+                key = data.get("apiKey")
+                if isinstance(key, str):
+                    config.api_key = key
+
+        for fld_name, env_var in ENV_MAP.items():
+            val = os.environ.get(env_var)
+            if val:
+                setattr(config, fld_name, val)
+            elif val == "":
+                # SDK reads these env vars directly and crashes on empty
+                # strings with a Pydantic ValidationError. Drop them so the
+                # SDK falls back to kwargs / defaults.
+                os.environ.pop(env_var, None)
+
+        return config
+
+    def save(self) -> None:
+        """Write ``apiKey`` + ``environmentUrl`` to config.json.
+
+        Preserves unrelated top-level keys (``hosts``, ``sessions``,
+        ``saveMessages``, ``sessionStrategy``, …) that other tools write.
+        """
+        CONFIG_DIR.mkdir(parents=True, exist_ok=True)
+
+        data: dict = {}
+        if CONFIG_FILE.exists():
+            try:
+                with open(CONFIG_FILE, encoding="utf-8") as f:
+                    loaded = json.load(f)
+                if isinstance(loaded, dict):
+                    data = loaded
+            except (json.JSONDecodeError, OSError):
+                data = {}
+
+        data["environmentUrl"] = self.base_url
+        if self.api_key:
+            data["apiKey"] = self.api_key
+        else:
+            data.pop("apiKey", None)
+
+        CONFIG_FILE.write_text(json.dumps(data, indent=2) + "\n")
+        # API key in plaintext — restrict to the owner on multi-user hosts.
+        try:
+            os.chmod(CONFIG_FILE, 0o600)
+        except OSError:
+            pass
+
+    def redacted(self) -> dict[str, str]:
+        """Return config dict with api_key redacted.
+
+        Only includes fields that have a value set — per-command fields
+        (workspace_id, peer_id, session_id) are omitted when empty.
+        """
+        d: dict[str, str] = {}
+        for fld in fields(self):
+            val = getattr(self, fld.name)
+            if not val:
+                continue
+            if fld.name == "api_key":
+                # Show ``***<last4>`` only — enough to compare keys without
+                # leaking the header or body of the JWT.
+                d[fld.name] = "***" + val[-4:] if len(val) > 4 else "***"
+            else:
+                d[fld.name] = val
+        return d
+
+
+def get_client_kwargs(config: CLIConfig) -> dict:
+    """Build kwargs for Honcho client from config."""
+    kwargs: dict = {}
+    if config.base_url:
+        kwargs["base_url"] = config.base_url
+    if config.api_key:
+        kwargs["api_key"] = config.api_key
+    if config.workspace_id:
+        kwargs["workspace_id"] = config.workspace_id
+    return kwargs
diff --git a/honcho-cli/src/honcho_cli/main.py b/honcho-cli/src/honcho_cli/main.py
new file mode 100644
index 000000000..7ed8aa9b9
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/main.py
@@ -0,0 +1,96 @@
+"""Honcho CLI — a terminal for Honcho.
+
+Entry point and top-level command group.
+"""
+
+from __future__ import annotations
+
+import os
+import sys
+
+import typer
+from rich.console import Console
+
+from honcho_cli import __version__
+from honcho_cli._help import HonchoTyperGroup, print_welcome
+from honcho_cli.branding import BANNER
+from honcho_cli.output import set_json_mode
+
+
+app = typer.Typer(
+    name="honcho",
+    cls=HonchoTyperGroup,
+    help="A terminal for Honcho — memory that reasons.",
+    invoke_without_command=True,
+    pretty_exceptions_enable=False,
+    add_completion=False,
+)
+
+
+def _json_requested_early() -> bool:
+    """Best-effort JSON detection before Typer parses flags.
+
+    version_callback is eager and fires before set_json_mode() runs, so we
+    can't call use_json() here. Mirror its logic against argv/env/TTY.
+    """
+    return (
+        "--json" in sys.argv
+        or os.environ.get("HONCHO_JSON", "").lower() in ("1", "true")
+        or not sys.stdout.isatty()
+    )
+
+
+def version_callback(value: bool) -> None:
+    if value:
+        if not _json_requested_early():
+            print(BANNER)
+        print(f"  honcho-cli {__version__}")
+        raise typer.Exit()
+
+
+@app.callback()
+def main(
+    ctx: typer.Context,
+    json_output: bool = typer.Option(False, "--json", help="Force JSON output"),
+    version: bool = typer.Option(False, "--version", "-V", callback=version_callback, is_eager=True, help="Show version"),
+) -> None:
+    """Honcho CLI — admin & debugging tool for Honcho workspaces."""
+    set_json_mode(json_output)
+
+    if ctx.invoked_subcommand is None:
+        print_welcome(Console())
+        raise typer.Exit()
+
+
+# Register top-level commands
+from honcho_cli.commands.setup import doctor, init
+
+app.command()(init)
+app.command()(doctor)
+
+
+@app.command("help", hidden=True)
+def help_cmd(ctx: typer.Context) -> None:
+    """Show help message."""
+    Console().print(ctx.parent.get_help() if ctx.parent else "")
+    raise typer.Exit()
+
+
+# Register command groups
+from honcho_cli.commands.config_cmd import app as config_app
+from honcho_cli.commands.conclusion import app as conclusion_app
+from honcho_cli.commands.message import app as message_app
+from honcho_cli.commands.peer import app as peer_app
+from honcho_cli.commands.session import app as session_app
+from honcho_cli.commands.workspace import app as workspace_app
+
+app.add_typer(peer_app,       name="peer")
+app.add_typer(session_app,    name="session")
+app.add_typer(message_app,    name="message")
+app.add_typer(conclusion_app, name="conclusion")
+app.add_typer(workspace_app,  name="workspace")
+app.add_typer(config_app,     name="config")
+
+
+if __name__ == "__main__":
+    app()
diff --git a/honcho-cli/src/honcho_cli/output.py b/honcho-cli/src/honcho_cli/output.py
new file mode 100644
index 000000000..de7902b8f
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/output.py
@@ -0,0 +1,104 @@
+"""Output formatting: JSON, tables, and structured errors.
+
+Detects TTY to auto-switch between human-readable and machine-parseable output.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import sys
+from typing import Any
+
+from rich.console import Console
+from rich.table import Table
+
+console = Console(stderr=True)
+stdout_console = Console()
+
+
+def is_tty() -> bool:
+    """Check if stdout is a TTY."""
+    return sys.stdout.isatty()
+
+
+# Global state for --json flag
+_force_json = False
+
+
+def set_json_mode(enabled: bool) -> None:
+    global _force_json
+    _force_json = enabled
+
+
+
+def use_json() -> bool:
+    """Should we output JSON?"""
+    return _force_json or os.environ.get("HONCHO_JSON", "").lower() in ("1", "true") or not is_tty()
+
+
+def print_json(data: Any) -> None:
+    """Print a single JSON value to stdout."""
+    print(json.dumps(data, indent=2, default=str))
+
+
+def print_table(columns: list[str], rows: list[list[str]], title: str | None = None) -> None:
+    """Print a rich table to stdout."""
+    table = Table(title=title, show_header=True, header_style="bold")
+    for col in columns:
+        table.add_column(col)
+    for row in rows:
+        table.add_row(*row)
+    stdout_console.print(table)
+
+
+def print_result(data: Any, columns: list[str] | None = None, title: str | None = None) -> None:
+    """Print data as JSON or table depending on mode.
+
+    For lists, uses JSON arrays in JSON mode or tables in TTY mode.
+    For dicts, uses JSON or key-value display.
+    """
+    if use_json():
+        print_json(data)
+    else:
+        if isinstance(data, list) and columns:
+            rows = []
+            for item in data:
+                row = [str(item.get(col, "")) if isinstance(item, dict) else str(item) for col in columns]
+                rows.append(row)
+            print_table(columns, rows, title=title)
+        elif isinstance(data, dict):
+            table = Table(show_header=False)
+            table.add_column("Field", style="bold")
+            table.add_column("Value")
+            for k, v in data.items():
+                val = json.dumps(v, default=str) if isinstance(v, (dict, list)) else str(v)
+                table.add_row(k, val)
+            stdout_console.print(table)
+        else:
+            stdout_console.print(data)
+
+
+def print_error(code: str, message: str, details: dict | None = None) -> None:
+    """Print structured error."""
+    err = {
+        "error": {
+            "code": code,
+            "message": message,
+        }
+    }
+    if details:
+        err["error"]["details"] = details
+
+    if use_json():
+        print(json.dumps(err, default=str), file=sys.stderr)
+    else:
+        console.print(f"[red]Error[/red] ({code}): {message}")
+        if details:
+            for k, v in details.items():
+                console.print(f"  {k}: {v}")
+
+
+def status(msg: str) -> None:
+    """Print a status message to stderr."""
+    console.print(f"[dim]{msg}[/dim]")
diff --git a/honcho-cli/src/honcho_cli/skills/CONTEXT.md b/honcho-cli/src/honcho_cli/skills/CONTEXT.md
new file mode 100644
index 000000000..59e5fdcda
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/skills/CONTEXT.md
@@ -0,0 +1,50 @@
+---
+name: honcho-cli
+version: 0.1.0
+description: A terminal for Honcho — memory that reasons.
+---
+
+# Honcho CLI — Agent Interface
+
+## Overview
+
+`honcho` is a CLI for administering and debugging Honcho workspaces. It wraps the Honcho Python SDK with agent-friendly defaults: JSON output, structured errors, input validation.
+
+## Output Modes
+
+- **TTY**: Human-readable tables (default when interactive)
+- **Piped/scripted**: JSON automatically
+- `--json`: Force JSON output
+
+## Exit Codes
+
+- 0: Success
+- 1: Client error (bad input, not found)
+- 2: Server error
+- 3: Auth error
+
+## Config
+
+Shared with other Honcho tools at `~/.honcho/config.json`. The CLI owns only
+`apiKey` and `environmentUrl` at the top level. Host-specific
+entries under `hosts` are untouched.
+
+Run `honcho init` to confirm or set those two values. Workspace / peer /
+session are per-command — pass them via flags or env vars:
+
+```bash
+honcho peer card -w my-workspace -p my-peer
+# or
+export HONCHO_WORKSPACE_ID=my-workspace
+export HONCHO_PEER_ID=my-peer
+honcho peer card
+```
+
+## Command Groups
+
+- `honcho config` — Manage CLI configuration
+- `honcho workspace` — Inspect, delete, search workspaces
+- `honcho peer` — Inspect, card, chat, search peers
+- `honcho session` — Inspect, messages, context, summaries
+- `honcho message` — List and get messages
+- `honcho conclusion` — List, search, create, delete conclusions
diff --git a/honcho-cli/src/honcho_cli/skills/honcho-debug.md b/honcho-cli/src/honcho_cli/skills/honcho-debug.md
new file mode 100644
index 000000000..31ce2203d
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/skills/honcho-debug.md
@@ -0,0 +1,54 @@
+---
+name: honcho-cli-debug
+version: 0.1.0
+description: Debug Honcho peer representations and memory
+---
+
+# Honcho CLI — Debug Skills
+
+## Rules
+
+- Check queue status when derivation seems stalled
+- Compare peer card with conclusions to understand memory state
+
+## Debugging Memory Issues
+
+### Peer not learning?
+
+```bash
+# Check if observation is enabled
+honcho peer inspect <peer_id> --json | jq '.configuration'
+
+# Check queue — are messages being processed?
+honcho workspace queue-status --json
+
+# Check what conclusions exist
+honcho conclusion list --observer <peer_id> --json
+honcho conclusion search "expected topic" --observer <peer_id> --json
+```
+
+### Session context looks wrong?
+
+```bash
+# See raw context
+honcho session context <session_id> --json
+
+# Check summaries
+honcho session summaries <session_id> --json
+
+# Check message history
+honcho message list <session_id> --last 50 --json
+```
+
+### Dialectic giving bad answers?
+
+```bash
+# Check what the peer card says
+honcho peer card <peer_id> --json
+
+# Check conclusions for the specific topic
+honcho conclusion search "topic" --observer <peer_id> --json
+
+# Try the dialectic directly
+honcho peer chat <peer_id> "what do you know about X?" --json
+```
diff --git a/honcho-cli/src/honcho_cli/skills/honcho-inspect.md b/honcho-cli/src/honcho_cli/skills/honcho-inspect.md
new file mode 100644
index 000000000..14c0cb94e
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/skills/honcho-inspect.md
@@ -0,0 +1,53 @@
+---
+name: honcho-cli-inspect
+version: 0.1.0
+description: Inspect Honcho workspace state for debugging
+---
+
+# Honcho CLI — Inspection Skills
+
+## Rules
+
+- Always use `--json` when processing output programmatically
+- Run `honcho peer inspect` before `honcho peer chat` to understand context
+- Use `honcho session context` to see exactly what an agent receives
+- Never run `honcho workspace delete` without `honcho workspace inspect` first
+
+## Inspection Workflow
+
+### 1. Understand the workspace
+
+```bash
+honcho workspace inspect --json
+```
+
+### 2. Find the peer
+
+```bash
+honcho peer list --json
+honcho peer inspect <peer_id> --json
+```
+
+### 3. Check peer's memory
+
+```bash
+honcho peer card <peer_id> --json
+honcho conclusion list --observer <peer_id> --json
+honcho conclusion search "topic" --observer <peer_id> --json
+```
+
+### 4. Debug a session
+
+```bash
+honcho session inspect <session_id> --json
+honcho message list <session_id> --last 20 --json
+honcho session context <session_id> --json
+honcho session summaries <session_id> --json
+```
+
+### 5. Search across workspace
+
+```bash
+honcho workspace search "query" --json
+honcho peer search <peer_id> "query" --json
+```
diff --git a/honcho-cli/src/honcho_cli/validation.py b/honcho-cli/src/honcho_cli/validation.py
new file mode 100644
index 000000000..b11118027
--- /dev/null
+++ b/honcho-cli/src/honcho_cli/validation.py
@@ -0,0 +1,44 @@
+"""Input hardening: validate resource IDs and workspace names.
+
+Agents hallucinate bad IDs. Catch them early with clear errors.
+"""
+
+from __future__ import annotations
+
+import re
+
+from honcho_cli.output import print_error
+
+UNSAFE_CHARS = re.compile(r'[?#%\x00-\x1f\x7f/\\]')
+
+
+def validate_resource_id(value: str, resource_type: str = "resource") -> str:
+    """Validate a resource ID. Returns the value if valid, raises SystemExit on invalid."""
+    if not value:
+        _fail(
+            "EMPTY_ID",
+            f"Empty {resource_type} ID provided",
+            {resource_type: ""},
+        )
+
+    if UNSAFE_CHARS.search(value):
+        _fail(
+            "INVALID_ID",
+            f"Invalid {resource_type} ID: contains unsafe characters (?, #, %, control chars, path separators)",
+            {resource_type: value},
+        )
+
+    if ".." in value:
+        _fail(
+            "INVALID_ID",
+            f"Invalid {resource_type} ID: contains path traversal",
+            {resource_type: value},
+        )
+
+    return value
+
+
+def _fail(code: str, message: str, details: dict) -> None:
+    """Print structured error and exit."""
+    print_error(code, message, details)
+    raise SystemExit(1)
diff --git a/honcho-cli/tests/__init__.py b/honcho-cli/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/honcho-cli/tests/test_commands.py b/honcho-cli/tests/test_commands.py
new file mode 100644
index 000000000..61de7d47b
--- /dev/null
+++ b/honcho-cli/tests/test_commands.py
@@ -0,0 +1,195 @@
+"""Command-level tests: init flow, destructive confirms, JSON output contract, exit codes.
+
+Uses Typer's CliRunner against the real `app`. stdout is not a TTY under
+CliRunner, so `use_json()` returns True and the CLI emits JSON —
+which is exactly what scripts and agents consume.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+from unittest.mock import MagicMock, patch
+
+import pytest
+from typer.testing import CliRunner
+
+from honcho_cli.main import app
+
+
+@pytest.fixture
+def cfg(tmp_path, monkeypatch):
+    """Isolated config file + clean HONCHO_* env."""
+    f = tmp_path / "config.json"
+    monkeypatch.setattr("honcho_cli.config.CONFIG_DIR", tmp_path)
+    monkeypatch.setattr("honcho_cli.config.CONFIG_FILE", f)
+    monkeypatch.setattr("honcho_cli.commands.setup.CONFIG_FILE", f)
+    for k in [k for k in os.environ if k.startswith("HONCHO_")]:
+        monkeypatch.delenv(k)
+    return f
+
+
+@pytest.fixture
+def runner():
+    return CliRunner()
+
+
+# --------------------------------------------------------------------------- #
+# 1. `honcho init` end-to-end
+
+class TestInit:
+    def test_first_run_writes_exact_shape(self, cfg, runner):
+        """First run with --api-key + --base-url writes apiKey + environmentUrl only."""
+        with patch("honcho_cli.commands.setup._test_connection", return_value=(True, "OK")):
+            result = runner.invoke(
+                app,
+                ["init", "--api-key", "test-key-123", "--base-url", "http://localhost:8000"],
+            )
+        assert result.exit_code == 0, result.stderr
+        assert json.loads(cfg.read_text()) == {
+            "environmentUrl": "http://localhost:8000",
+            "apiKey": "test-key-123",
+        }
+
+    def test_preserves_foreign_keys(self, cfg, runner):
+        """Second run must not clobber sibling-tool keys (`hosts`, `sessions`, ...)."""
+        cfg.write_text(json.dumps({
+            "apiKey": "old",
+            "environmentUrl": "http://old.example",
+            "hosts": {"claude_code": {"peerName": "user"}},
+            "sessions": {"/Users/user": "home-chat"},
+            "sessionStrategy": "chat-instance",
+        }))
+        with patch("honcho_cli.commands.setup._test_connection", return_value=(True, "OK")):
+            result = runner.invoke(
+                app,
+                ["init", "--api-key", "new-key", "--base-url", "https://api.honcho.dev"],
+            )
+        assert result.exit_code == 0, result.stderr
+        on_disk = json.loads(cfg.read_text())
+        assert on_disk["apiKey"] == "new-key"
+        assert on_disk["environmentUrl"] == "https://api.honcho.dev"
+        assert on_disk["hosts"] == {"claude_code": {"peerName": "user"}}
+        assert on_disk["sessions"] == {"/Users/user": "home-chat"}
+        assert on_disk["sessionStrategy"] == "chat-instance"
+
+
+# --------------------------------------------------------------------------- #
+# 2. Destructive-confirm guards
+
+class TestDestructiveConfirm:
+    def test_workspace_delete_aborts_on_no(self, cfg, runner):
+        """`workspace delete` without --yes: 'n' at prompt → no API call, non-zero exit."""
+        cfg.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        fake = MagicMock()
+        fake.sessions.return_value = MagicMock(has_next_page=lambda: False, _raw_items=[])
+        with patch("honcho_cli.commands.workspace.get_client", return_value=(fake, MagicMock())), \
+             patch("honcho_cli.commands.workspace._with_workspace", return_value=fake):
+            result = runner.invoke(app, ["workspace", "delete", "ws1"], input="n\n")
+        assert result.exit_code != 0
+        fake.delete_workspace.assert_not_called()
+
+    def test_session_delete_aborts_on_no(self, cfg, runner):
+        cfg.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        session = MagicMock()
+        client = MagicMock()
+        client.session.return_value = session
+        config = MagicMock(session_id="s1", workspace_id="ws1")
+        with patch("honcho_cli.commands.workspace.get_client", return_value=(client, config)):
+            result = runner.invoke(app, ["session", "delete", "s1"], input="n\n")
+        assert result.exit_code != 0
+        session.delete.assert_not_called()
+
+
+# --------------------------------------------------------------------------- #
+# 3. JSON output contract — scripts pipe these
+
+class TestJsonContract:
+    def test_workspace_list_json_array_shape(self, cfg, runner):
+        cfg.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        client = MagicMock()
+        client.workspaces.return_value = ["ws-a", "ws-b"]
+        with patch("honcho_cli.commands.workspace.get_client", return_value=(client, MagicMock())):
+            result = runner.invoke(app, ["workspace", "list"])
+        assert result.exit_code == 0, result.stderr
+        assert json.loads(result.stdout) == [{"id": "ws-a"}, {"id": "ws-b"}]
+
+    def test_workspace_search_preserves_full_content_in_json_mode(self, cfg, runner):
+        cfg.write_text(json.dumps({
+            "apiKey": "k",
+            "environmentUrl": "http://localhost:8000",
+            "workspace_id": "ws1",
+        }))
+        message = MagicMock(
+            id="msg1",
+            content="x" * 250,
+            peer_id="peer1",
+            session_id="sess1",
+            created_at="2026-01-01T00:00:00Z",
+        )
+        client = MagicMock()
+        client.search.return_value = [message]
+        config = MagicMock(workspace_id="ws1")
+        with patch("honcho_cli.commands.workspace.get_client", return_value=(client, config)):
+            result = runner.invoke(app, ["workspace", "search", "topic", "-w", "ws1"])
+        assert result.exit_code == 0, result.stderr
+        payload = json.loads(result.stdout)
+        assert payload == [{
+            "id": "msg1",
+            "content": "x" * 250,
+            "peer_id": "peer1",
+            "session_id": "sess1",
+            "created_at": "2026-01-01T00:00:00Z",
+        }]
+
+    def test_message_get_returns_single_json_object(self, cfg, runner):
+        cfg.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        msg = MagicMock(
+            id="msg1",
+            peer_id="peer1",
+            content="hello",
+            token_count=7,
+            metadata={"kind": "demo"},
+            created_at="2026-01-01T00:00:00Z",
+        )
+        session = MagicMock()
+        session.get_message.return_value = msg
+        client = MagicMock()
+        client.session.return_value = session
+        config = MagicMock(session_id="sess1", workspace_id="ws1")
+        with patch("honcho_cli.commands.message.get_client", return_value=(client, config)):
+            result = runner.invoke(app, ["message", "get", "msg1", "-s", "sess1", "-w", "ws1"])
+        assert result.exit_code == 0, result.stderr
+        assert json.loads(result.stdout) == {
+            "id": "msg1",
+            "peer_id": "peer1",
+            "content": "hello",
+            "token_count": 7,
+            "metadata": {"kind": "demo"},
+            "created_at": "2026-01-01T00:00:00Z",
+        }
+
+
+# --------------------------------------------------------------------------- #
+# 4. Exit codes on error
+
+class TestExitCodes:
+    def test_no_workspace_scoped_exits_nonzero_with_code(self, cfg, runner):
+        """Running a workspace-scoped command with no workspace → NO_WORKSPACE on stderr, exit 1."""
+        cfg.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        result = runner.invoke(app, ["peer", "list"])
+        assert result.exit_code == 1
+        assert json.loads(result.stderr)["error"]["code"] == "NO_WORKSPACE"
+
+    def test_not_found_exits_nonzero_with_code(self, cfg, runner):
+        """SDK NotFoundError → structured error, exit 1."""
+        from honcho import NotFoundError
+
+        cfg.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        client = MagicMock()
+        client.peer.return_value.get_card.side_effect = NotFoundError("not found")
+        config = MagicMock(peer_id="missing", session_id="", workspace_id="ws1")
+        with patch("honcho_cli.commands.peer.get_client", return_value=(client, config)):
+            result = runner.invoke(app, ["peer", "inspect", "missing", "-w", "ws1"])
+        assert result.exit_code == 1
+        assert json.loads(result.stderr)["error"]["code"] == "PEER_NOT_FOUND"
diff --git a/honcho-cli/tests/test_config.py b/honcho-cli/tests/test_config.py
new file mode 100644
index 000000000..bdac2bc40
--- /dev/null
+++ b/honcho-cli/tests/test_config.py
@@ -0,0 +1,113 @@
+"""Tests for config management."""
+
+import json
+import os
+
+import pytest
+from honcho_cli.config import CLIConfig
+
+
+@pytest.fixture
+def cfg_path(tmp_path, monkeypatch):
+    """Redirect CONFIG_FILE to tmp_path and clear HONCHO_* env vars."""
+    f = tmp_path / "config.json"
+    monkeypatch.setattr("honcho_cli.config.CONFIG_FILE", f)
+    monkeypatch.setattr("honcho_cli.config.CONFIG_DIR", tmp_path)
+    for key in [k for k in os.environ if k.startswith("HONCHO_")]:
+        monkeypatch.delenv(key)
+    return f
+
+
+class TestLoad:
+    def test_defaults_when_no_file(self, cfg_path):
+        loaded = CLIConfig.load()
+        assert loaded.base_url == "https://api.honcho.dev"
+        assert loaded.api_key == ""
+        assert loaded.workspace_id == ""
+
+    def test_malformed_file_uses_defaults(self, cfg_path):
+        cfg_path.write_text("not-json{{{")
+        assert CLIConfig.load().api_key == ""
+
+    def test_reads_environment_url(self, cfg_path):
+        cfg_path.write_text(json.dumps({"apiKey": "k", "environmentUrl": "http://localhost:8000"}))
+        loaded = CLIConfig.load()
+        assert loaded.base_url == "http://localhost:8000"
+        assert loaded.api_key == "k"
+
+    def test_api_key_and_base_url_from_env(self, cfg_path, monkeypatch):
+        """HONCHO_API_KEY and HONCHO_BASE_URL override config file at runtime."""
+        cfg_path.write_text(json.dumps({"environmentUrl": "https://api.honcho.dev"}))
+        monkeypatch.setenv("HONCHO_API_KEY", "env-key")
+        monkeypatch.setenv("HONCHO_BASE_URL", "http://localhost:8000")
+        loaded = CLIConfig.load()
+        assert loaded.api_key == "env-key"
+        assert loaded.base_url == "http://localhost:8000"
+
+
+class TestSave:
+    def test_writes_only_cli_owned_keys(self, cfg_path):
+        """apiKey + environmentUrl are written; workspace/peer/session are not."""
+        CLIConfig(
+            base_url="http://localhost:8000",
+            api_key="test-key-123",
+            workspace_id="my-ws",  # must NOT be persisted
+            peer_id="user",
+            session_id="s1",
+        ).save()
+        assert json.loads(cfg_path.read_text()) == {
+            "environmentUrl": "http://localhost:8000",
+            "apiKey": "test-key-123",
+        }
+
+    def test_preserves_foreign_keys(self, cfg_path):
+        """Other tools' top-level keys (hosts, sessions, ...) are untouched."""
+        seed = {
+            "apiKey": "old-key",
+            "environmentUrl": "https://api.honcho.dev",
+            "saveMessages": True,
+            "sessions": {"/Users/user": "home-chat"},
+            "hosts": {"claude_code": {"peerName": "user", "workspace": "agents"}},
+            "sessionStrategy": "chat-instance",
+        }
+        cfg_path.write_text(json.dumps(seed))
+        cfg = CLIConfig.load()
+        cfg.api_key = "new-key"
+        cfg.save()
+
+        on_disk = json.loads(cfg_path.read_text())
+        assert on_disk["apiKey"] == "new-key"
+        assert on_disk["environmentUrl"] == "https://api.honcho.dev"
+        for k in ("saveMessages", "sessions", "hosts", "sessionStrategy"):
+            assert on_disk[k] == seed[k]
+
+
+@pytest.mark.parametrize(
+    "api_key, expected",
+    [
+        # Long JWT: only last 4 chars visible, masked prefix.
+        ("eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.abcdef", "***cdef"),
+        # Short value > 4 chars: still only last 4.
+        ("abcdef", "***cdef"),
+        # 4 or fewer chars: fully masked — don't leak the whole key.
+        ("abcd", "***"),
+        ("x", "***"),
+    ],
+)
+def test_api_key_redaction_shows_last4_only(api_key, expected):
+    """Redacted api_key must show ``***<last4>`` at most, never the header/body."""
+    assert CLIConfig(api_key=api_key).redacted()["api_key"] == expected
+
+
+def test_api_key_redaction_empty_omitted():
+    """Empty api_key is omitted from redacted output entirely."""
+    assert "api_key" not in CLIConfig(api_key="").redacted()
+
+
+def test_save_sets_600_permissions(cfg_path):
+    """Config with plaintext API key must be owner-readable only on POSIX."""
+    import stat
+    CLIConfig(base_url="http://localhost:8000", api_key="sekret").save()
+    mode = stat.S_IMODE(os.stat(cfg_path).st_mode)
+    # chmod(0o600) → rw- --- ---
+    assert mode == 0o600, f"expected 0o600, got {oct(mode)}"
diff --git a/honcho-cli/tests/test_validation.py b/honcho-cli/tests/test_validation.py
new file mode 100644
index 000000000..c117c9ec2
--- /dev/null
+++ b/honcho-cli/tests/test_validation.py
@@ -0,0 +1,57 @@
+"""Tests for resource-ID validation.
+
+Agents hallucinate IDs; ``validate_resource_id`` is the defense in depth
+between that and the API. These tests pin the accept/reject rules.
+"""
+
+from __future__ import annotations
+
+import pytest
+
+from honcho_cli.validation import validate_resource_id
+
+
+class TestAccepts:
+    @pytest.mark.parametrize(
+        "value",
+        [
+            "eri",
+            "my-peer-01",
+            "workspace_name",
+            "UPPER",
+            "with.dots",
+            "123abc",
+            "a",
+            "long-id-with-many-parts_v2",
+        ],
+    )
+    def test_safe_id_round_trips(self, value):
+        assert validate_resource_id(value, "peer") == value
+
+
+class TestRejects:
+    def test_empty_string(self):
+        with pytest.raises(SystemExit):
+            validate_resource_id("", "peer")
+
+    @pytest.mark.parametrize(
+        "value",
+        [
+            "bad/slash",
+            "bad\\backslash",
+            "bad?query",
+            "bad#hash",
+            "bad%encoded",
+            "with\x00null",
+            "with\x1fctrl",
+            "with\x7fdel",
+        ],
+    )
+    def test_unsafe_chars(self, value):
+        with pytest.raises(SystemExit):
+            validate_resource_id(value, "peer")
+
+    @pytest.mark.parametrize("value", ["..", "../etc", "foo/..", "a..b"])
+    def test_path_traversal(self, value):
+        with pytest.raises(SystemExit):
+            validate_resource_id(value, "peer")
diff --git a/honcho-cli/uv.lock b/honcho-cli/uv.lock
new file mode 100644
index 000000000..dcadb6f3b
--- /dev/null
+++ b/honcho-cli/uv.lock
@@ -0,0 +1,407 @@
+version = 1
+revision = 3
+requires-python = ">=3.11"
+
+[[package]]
+name = "annotated-doc"
+version = "0.0.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/57/ba/046ceea27344560984e26a590f90bc7f4a75b06701f653222458922b558c/annotated_doc-0.0.4.tar.gz", hash = "sha256:fbcda96e87e9c92ad167c2e53839e57503ecfda18804ea28102353485033faa4", size = 7288, upload-time = "2025-11-10T22:07:42.062Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/1e/d3/26bf1008eb3d2daa8ef4cacc7f3bfdc11818d111f7e2d0201bc6e3b49d45/annotated_doc-0.0.4-py3-none-any.whl", hash = "sha256:571ac1dc6991c450b25a9c2d84a3705e2ae7a53467b5d111c24fa8baabbed320", size = 5303, upload-time = "2025-11-10T22:07:40.673Z" },
+]
+
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
+[[package]]
+name = "anyio"
+version = "4.12.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "idna" },
+    { name = "typing-extensions", marker = "python_full_version < '3.13'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/96/f0/5eb65b2bb0d09ac6776f2eb54adee6abe8228ea05b20a5ad0e4945de8aac/anyio-4.12.1.tar.gz", hash = "sha256:41cfcc3a4c85d3f05c932da7c26d0201ac36f72abd4435ba90d0464a3ffed703", size = 228685, upload-time = "2026-01-06T11:45:21.246Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/38/0e/27be9fdef66e72d64c0cdc3cc2823101b80585f8119b5c112c2e8f5f7dab/anyio-4.12.1-py3-none-any.whl", hash = "sha256:d405828884fc140aa80a3c667b8beed277f1dfedec42ba031bd6ac3db606ab6c", size = 113592, upload-time = "2026-01-06T11:45:19.497Z" },
+]
+
+[[package]]
+name = "certifi"
+version = "2026.2.25"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/af/2d/7bf41579a8986e348fa033a31cdd0e4121114f6bce2457e8876010b092dd/certifi-2026.2.25.tar.gz", hash = "sha256:e887ab5cee78ea814d3472169153c2d12cd43b14bd03329a39a9c6e2e80bfba7", size = 155029, upload-time = "2026-02-25T02:54:17.342Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/9a/3c/c17fb3ca2d9c3acff52e30b309f538586f9f5b9c9cf454f3845fc9af4881/certifi-2026.2.25-py3-none-any.whl", hash = "sha256:027692e4402ad994f1c42e52a4997a9763c646b73e4096e4d5d6db8af1d6f0fa", size = 153684, upload-time = "2026-02-25T02:54:15.766Z" },
+]
+
+[[package]]
+name = "click"
+version = "8.3.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/3d/fa/656b739db8587d7b5dfa22e22ed02566950fbfbcdc20311993483657a5c0/click-8.3.1.tar.gz", hash = "sha256:12ff4785d337a1bb490bb7e9c2b1ee5da3112e94a8622f26a6c77f5d2fc6842a", size = 295065, upload-time = "2025-11-15T20:45:42.706Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/98/78/01c019cdb5d6498122777c1a43056ebb3ebfeef2076d9d026bfe15583b2b/click-8.3.1-py3-none-any.whl", hash = "sha256:981153a64e25f12d547d3426c367a4857371575ee7ad18df2a6183ab0545b2a6", size = 108274, upload-time = "2025-11-15T20:45:41.139Z" },
+]
+
+[[package]]
+name = "colorama"
+version = "0.4.6"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d8/53/6f443c9a4a8358a93a6792e2acffb9d9d5cb0a5cfd8802644b7b1c9a02e4/colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44", size = 27697, upload-time = "2022-10-25T02:36:22.414Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/d1/d6/3965ed04c63042e047cb6a3e6ed1a63a35087b6a609aa3a15ed8ac56c221/colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6", size = 25335, upload-time = "2022-10-25T02:36:20.889Z" },
+]
+
+[[package]]
+name = "h11"
+version = "0.16.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/01/ee/02a2c011bdab74c6fb3c75474d40b3052059d95df7e73351460c8588d963/h11-0.16.0.tar.gz", hash = "sha256:4e35b956cf45792e4caa5885e69fba00bdbc6ffafbfa020300e549b208ee5ff1", size = 101250, upload-time = "2025-04-24T03:35:25.427Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/04/4b/29cac41a4d98d144bf5f6d33995617b185d14b22401f75ca86f384e87ff1/h11-0.16.0-py3-none-any.whl", hash = "sha256:63cf8bbe7522de3bf65932fda1d9c2772064ffb3dae62d55932da54b31cb6c86", size = 37515, upload-time = "2025-04-24T03:35:24.344Z" },
+]
+
+[[package]]
+name = "honcho-ai"
+version = "2.0.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "httpx" },
+    { name = "pydantic" },
+    { name = "typing-extensions", marker = "python_full_version < '3.12'" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/93/30/d30ba159404050d53b4b1b1c4477f9591f43af18758be1fb7dab6afbfe7d/honcho_ai-2.0.1.tar.gz", hash = "sha256:6fdeebf9454e62bc523d57888e50359e67baafdb21f68621f9c14e08dc00623a", size = 46732, upload-time = "2026-02-09T21:03:26.99Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e2/de/83fda0c057cfa11d6b5ed532623184591aa7dcff4a067934ba6811026229/honcho_ai-2.0.1-py3-none-any.whl", hash = "sha256:94887e61d59f353e1e1e20b395858040780f5d67ca1e9d450538646544e4e42f", size = 56780, upload-time = "2026-02-09T21:03:25.992Z" },
+]
+
+[[package]]
+name = "honcho-cli"
+version = "0.1.0"
+source = { editable = "." }
+dependencies = [
+    { name = "honcho-ai" },
+    { name = "httpx" },
+    { name = "rich" },
+    { name = "typer" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "pytest" },
+    { name = "pytest-mock" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "honcho-ai", specifier = ">=0.1.0" },
+    { name = "httpx", specifier = ">=0.27.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
+    { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },
+    { name = "rich", specifier = ">=13.0.0" },
+    { name = "typer", specifier = ">=0.15.0" },
+]
+provides-extras = ["dev"]
+
+[[package]]
+name = "httpcore"
+version = "1.0.9"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "certifi" },
+    { name = "h11" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/06/94/82699a10bca87a5556c9c59b5963f2d039dbd239f25bc2a63907a05a14cb/httpcore-1.0.9.tar.gz", hash = "sha256:6e34463af53fd2ab5d807f399a9b45ea31c3dfa2276f15a2c3f00afff6e176e8", size = 85484, upload-time = "2025-04-24T22:06:22.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/7e/f5/f66802a942d491edb555dd61e3a9961140fd64c90bce1eafd741609d334d/httpcore-1.0.9-py3-none-any.whl", hash = "sha256:2d400746a40668fc9dec9810239072b40b4484b640a8c38fd654a024c7a1bf55", size = 78784, upload-time = "2025-04-24T22:06:20.566Z" },
+]
+
+[[package]]
+name = "httpx"
+version = "0.28.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "anyio" },
+    { name = "certifi" },
+    { name = "httpcore" },
+    { name = "idna" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b1/df/48c586a5fe32a0f01324ee087459e112ebb7224f646c0b5023f5e79e9956/httpx-0.28.1.tar.gz", hash = "sha256:75e98c5f16b0f35b567856f597f06ff2270a374470a5c2392242528e3e3e42fc", size = 141406, upload-time = "2024-12-06T15:37:23.222Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/2a/39/e50c7c3a983047577ee07d2a9e53faf5a69493943ec3f6a384bdc792deb2/httpx-0.28.1-py3-none-any.whl", hash = "sha256:d909fcccc110f8c7faf814ca82a9a4d816bc5a6dbfea25d6591d6985b8ba59ad", size = 73517, upload-time = "2024-12-06T15:37:21.509Z" },
+]
+
+[[package]]
+name = "idna"
+version = "3.11"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/6f/6d/0703ccc57f3a7233505399edb88de3cbd678da106337b9fcde432b65ed60/idna-3.11.tar.gz", hash = "sha256:795dafcc9c04ed0c1fb032c2aa73654d8e8c5023a7df64a53f39190ada629902", size = 194582, upload-time = "2025-10-12T14:55:20.501Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/0e/61/66938bbb5fc52dbdf84594873d5b51fb1f7c7794e9c0f5bd885f30bc507b/idna-3.11-py3-none-any.whl", hash = "sha256:771a87f49d9defaf64091e6e6fe9c18d4833f140bd19464795bc32d966ca37ea", size = 71008, upload-time = "2025-10-12T14:55:18.883Z" },
+]
+
+[[package]]
+name = "iniconfig"
+version = "2.3.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/34/14ca021ce8e5dfedc35312d08ba8bf51fdd999c576889fc2c24cb97f4f10/iniconfig-2.3.0.tar.gz", hash = "sha256:c76315c77db068650d49c5b56314774a7804df16fee4402c1f19d6d15d8c4730", size = 20503, upload-time = "2025-10-18T21:55:43.219Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/cb/b1/3846dd7f199d53cb17f49cba7e651e9ce294d8497c8c150530ed11865bb8/iniconfig-2.3.0-py3-none-any.whl", hash = "sha256:f631c04d2c48c52b84d0d0549c99ff3859c98df65b3101406327ecc7d53fbf12", size = 7484, upload-time = "2025-10-18T21:55:41.639Z" },
+]
+
+[[package]]
+name = "markdown-it-py"
+version = "4.0.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "mdurl" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/5b/f5/4ec618ed16cc4f8fb3b701563655a69816155e79e24a17b651541804721d/markdown_it_py-4.0.0.tar.gz", hash = "sha256:cb0a2b4aa34f932c007117b194e945bd74e0ec24133ceb5bac59009cda1cb9f3", size = 73070, upload-time = "2025-08-11T12:57:52.854Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/94/54/e7d793b573f298e1c9013b8c4dade17d481164aa517d1d7148619c2cedbf/markdown_it_py-4.0.0-py3-none-any.whl", hash = "sha256:87327c59b172c5011896038353a81343b6754500a08cd7a4973bb48c6d578147", size = 87321, upload-time = "2025-08-11T12:57:51.923Z" },
+]
+
+[[package]]
+name = "mdurl"
+version = "0.1.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d6/54/cfe61301667036ec958cb99bd3efefba235e65cdeb9c84d24a8293ba1d90/mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba", size = 8729, upload-time = "2022-08-14T12:40:10.846Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b3/38/89ba8ad64ae25be8de66a6d463314cf1eb366222074cfda9ee839c56a4b4/mdurl-0.1.2-py3-none-any.whl", hash = "sha256:84008a41e51615a49fc9966191ff91509e3c40b939176e643fd50a5c2196b8f8", size = 9979, upload-time = "2022-08-14T12:40:09.779Z" },
+]
+
+[[package]]
+name = "packaging"
+version = "26.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/65/ee/299d360cdc32edc7d2cf530f3accf79c4fca01e96ffc950d8a52213bd8e4/packaging-26.0.tar.gz", hash = "sha256:00243ae351a257117b6a241061796684b084ed1c516a08c48a3f7e147a9d80b4", size = 143416, upload-time = "2026-01-21T20:50:39.064Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/b7/b9/c538f279a4e237a006a2c98387d081e9eb060d203d8ed34467cc0f0b9b53/packaging-26.0-py3-none-any.whl", hash = "sha256:b36f1fef9334a5588b4166f8bcd26a14e521f2b55e6b9de3aaa80d3ff7a37529", size = 74366, upload-time = "2026-01-21T20:50:37.788Z" },
+]
+
+[[package]]
+name = "pluggy"
+version = "1.6.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/f9/e2/3e91f31a7d2b083fe6ef3fa267035b518369d9511ffab804f839851d2779/pluggy-1.6.0.tar.gz", hash = "sha256:7dcc130b76258d33b90f61b658791dede3486c3e6bfb003ee5c9bfb396dd22f3", size = 69412, upload-time = "2025-05-15T12:30:07.975Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/54/20/4d324d65cc6d9205fabedc306948156824eb9f0ee1633355a8f7ec5c66bf/pluggy-1.6.0-py3-none-any.whl", hash = "sha256:e920276dd6813095e9377c0bc5566d94c932c33b27a3e3945d8389c374dd4746", size = 20538, upload-time = "2025-05-15T12:30:06.134Z" },
+]
+
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" },
+    { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" },
+    { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" },
+    { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" },
+    { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" },
+    { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" },
+    { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" },
+    { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" },
+    { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" },
+    { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
+]
+
+[[package]]
+name = "pygments"
+version = "2.19.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/b0/77/a5b8c569bf593b0140bde72ea885a803b82086995367bf2037de0159d924/pygments-2.19.2.tar.gz", hash = "sha256:636cb2477cec7f8952536970bc533bc43743542f70392ae026374600add5b887", size = 4968631, upload-time = "2025-06-21T13:39:12.283Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
+]
+
+[[package]]
+name = "pytest"
+version = "9.0.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "colorama", marker = "sys_platform == 'win32'" },
+    { name = "iniconfig" },
+    { name = "packaging" },
+    { name = "pluggy" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/3b/ab/b3226f0bd7cdcf710fbede2b3548584366da3b19b5021e74f5bde2a8fa3f/pytest-9.0.2-py3-none-any.whl", hash = "sha256:711ffd45bf766d5264d487b917733b453d917afd2b0ad65223959f59089f875b", size = 374801, upload-time = "2025-12-06T21:30:49.154Z" },
+]
+
+[[package]]
+name = "pytest-mock"
+version = "3.15.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
+]
+
+[[package]]
+name = "rich"
+version = "14.3.3"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "markdown-it-py" },
+    { name = "pygments" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/b3/c6/f3b320c27991c46f43ee9d856302c70dc2d0fb2dba4842ff739d5f46b393/rich-14.3.3.tar.gz", hash = "sha256:b8daa0b9e4eef54dd8cf7c86c03713f53241884e814f4e2f5fb342fe520f639b", size = 230582, upload-time = "2026-02-19T17:23:12.474Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/14/25/b208c5683343959b670dc001595f2f3737e051da617f66c31f7c4fa93abc/rich-14.3.3-py3-none-any.whl", hash = "sha256:793431c1f8619afa7d3b52b2cdec859562b950ea0d4b6b505397612db8d5362d", size = 310458, upload-time = "2026-02-19T17:23:13.732Z" },
+]
+
+[[package]]
+name = "shellingham"
+version = "1.5.4"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/58/15/8b3609fd3830ef7b27b655beb4b4e9c62313a4e8da8c676e142cc210d58e/shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de", size = 10310, upload-time = "2023-10-24T04:13:40.426Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/e0/f9/0595336914c5619e5f28a1fb793285925a8cd4b432c9da0a987836c7f822/shellingham-1.5.4-py2.py3-none-any.whl", hash = "sha256:7ecfff8f2fd72616f7481040475a65b2bf8af90a56c89140852d1120324e8686", size = 9755, upload-time = "2023-10-24T04:13:38.866Z" },
+]
+
+[[package]]
+name = "typer"
+version = "0.24.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-doc" },
+    { name = "click" },
+    { name = "rich" },
+    { name = "shellingham" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/f5/24/cb09efec5cc954f7f9b930bf8279447d24618bb6758d4f6adf2574c41780/typer-0.24.1.tar.gz", hash = "sha256:e39b4732d65fbdcde189ae76cf7cd48aeae72919dea1fdfc16593be016256b45", size = 118613, upload-time = "2026-02-21T16:54:40.609Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/4a/91/48db081e7a63bb37284f9fbcefda7c44c277b18b0e13fbc36ea2335b71e6/typer-0.24.1-py3-none-any.whl", hash = "sha256:112c1f0ce578bfb4cab9ffdabc68f031416ebcc216536611ba21f04e9aa84c9e", size = 56085, upload-time = "2026-02-21T16:54:41.616Z" },
+]
+
+[[package]]
+name = "typing-extensions"
+version = "4.15.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/72/94/1a15dd82efb362ac84269196e94cf00f187f7ed21c242792a923cdb1c61f/typing_extensions-4.15.0.tar.gz", hash = "sha256:0cea48d173cc12fa28ecabc3b837ea3cf6f38c6d1136f85cbaaf598984861466", size = 109391, upload-time = "2025-08-25T13:49:26.313Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
+]
+
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
diff --git a/pyproject.toml b/pyproject.toml
index 228ca719b..0a2a49ca8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -62,10 +62,12 @@ dev = [
 [tool.uv.workspace]
 members = [
     "sdks/python",
+    "honcho-cli",
 ]
 
 [tool.uv.sources]
 honcho-ai = { workspace = true }
+honcho-cli = { workspace = true }
 
 [tool.ruff.lint]
 # from https://docs.astral.sh/ruff/linter/#rule-selection example
diff --git a/sdks/python/pyproject.toml b/sdks/python/pyproject.toml
index acee404d5..07b532f82 100644
--- a/sdks/python/pyproject.toml
+++ b/sdks/python/pyproject.toml
@@ -29,6 +29,9 @@ classifiers = [
   "Topic :: Software Development :: Libraries :: Python Modules",
 ]
 
+[project.optional-dependencies]
+cli = ["honcho-cli>=0.1.0"]
+
 [project.urls]
 Homepage = "https://github.com/plastic-labs/honcho"
 Repository = "https://github.com/plastic-labs/honcho"
diff --git a/uv.lock b/uv.lock
index 6e7dffa9c..c0bbd9db3 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1,17 +1,17 @@
 version = 1
 revision = 3
-requires-python = ">=3.10"
+requires-python = ">=3.11"
 resolution-markers = [
     "python_full_version >= '3.14'",
     "python_full_version == '3.13.*'",
-    "python_full_version >= '3.11' and python_full_version < '3.13'",
-    "python_full_version < '3.11'",
+    "python_full_version < '3.13'",
 ]
 
 [manifest]
 members = [
     "honcho",
     "honcho-ai",
+    "honcho-cli",
 ]
 
 [[package]]
@@ -30,7 +30,6 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "aiohappyeyeballs" },
     { name = "aiosignal" },
-    { name = "async-timeout", marker = "python_full_version < '3.11'" },
     { name = "attrs" },
     { name = "frozenlist" },
     { name = "multidict" },
@@ -39,23 +38,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/50/42/32cf8e7704ceb4481406eb87161349abb46a57fee3f008ba9cb610968646/aiohttp-3.13.3.tar.gz", hash = "sha256:a949eee43d3782f2daae4f4a2819b2cb9b0c5d3b7f7a927067cc84dafdbb9f88", size = 7844556, upload-time = "2026-01-03T17:33:05.204Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/36/d6/5aec9313ee6ea9c7cde8b891b69f4ff4001416867104580670a31daeba5b/aiohttp-3.13.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d5a372fd5afd301b3a89582817fdcdb6c34124787c70dbcc616f259013e7eef7", size = 738950, upload-time = "2026-01-03T17:29:13.002Z" },
-    { url = "https://files.pythonhosted.org/packages/68/03/8fa90a7e6d11ff20a18837a8e2b5dd23db01aabc475aa9271c8ad33299f5/aiohttp-3.13.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:147e422fd1223005c22b4fe080f5d93ced44460f5f9c105406b753612b587821", size = 496099, upload-time = "2026-01-03T17:29:15.268Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/23/b81f744d402510a8366b74eb420fc0cc1170d0c43daca12d10814df85f10/aiohttp-3.13.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:859bd3f2156e81dd01432f5849fc73e2243d4a487c4fd26609b1299534ee1845", size = 491072, upload-time = "2026-01-03T17:29:16.922Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/e1/56d1d1c0dd334cd203dd97706ce004c1aa24b34a813b0b8daf3383039706/aiohttp-3.13.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:dca68018bf48c251ba17c72ed479f4dafe9dbd5a73707ad8d28a38d11f3d42af", size = 1671588, upload-time = "2026-01-03T17:29:18.539Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/34/8d7f962604f4bc2b4e39eb1220dac7d4e4cba91fb9ba0474b4ecd67db165/aiohttp-3.13.3-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:fee0c6bc7db1de362252affec009707a17478a00ec69f797d23ca256e36d5940", size = 1640334, upload-time = "2026-01-03T17:29:21.028Z" },
-    { url = "https://files.pythonhosted.org/packages/94/1d/fcccf2c668d87337ddeef9881537baee13c58d8f01f12ba8a24215f2b804/aiohttp-3.13.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c048058117fd649334d81b4b526e94bde3ccaddb20463a815ced6ecbb7d11160", size = 1722656, upload-time = "2026-01-03T17:29:22.531Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/98/c6f3b081c4c606bc1e5f2ec102e87d6411c73a9ef3616fea6f2d5c98c062/aiohttp-3.13.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:215a685b6fbbfcf71dfe96e3eba7a6f58f10da1dfdf4889c7dd856abe430dca7", size = 1817625, upload-time = "2026-01-03T17:29:24.276Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/c0/cfcc3d2e11b477f86e1af2863f3858c8850d751ce8dc39c4058a072c9e54/aiohttp-3.13.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2c184bb1fe2cbd2cefba613e9db29a5ab559323f994b6737e370d3da0ac455", size = 1672604, upload-time = "2026-01-03T17:29:26.099Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/77/6b4ffcbcac4c6a5d041343a756f34a6dd26174ae07f977a64fe028dda5b0/aiohttp-3.13.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:75ca857eba4e20ce9f546cd59c7007b33906a4cd48f2ff6ccf1ccfc3b646f279", size = 1554370, upload-time = "2026-01-03T17:29:28.121Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/f0/e3ddfa93f17d689dbe014ba048f18e0c9f9b456033b70e94349a2e9048be/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:81e97251d9298386c2b7dbeb490d3d1badbdc69107fb8c9299dd04eb39bddc0e", size = 1642023, upload-time = "2026-01-03T17:29:30.002Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/45/c14019c9ec60a8e243d06d601b33dcc4fd92379424bde3021725859d7f99/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:c0e2d366af265797506f0283487223146af57815b388623f0357ef7eac9b209d", size = 1649680, upload-time = "2026-01-03T17:29:31.782Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/fd/09c9451dae5aa5c5ed756df95ff9ef549d45d4be663bafd1e4954fd836f0/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4e239d501f73d6db1522599e14b9b321a7e3b1de66ce33d53a765d975e9f4808", size = 1692407, upload-time = "2026-01-03T17:29:33.392Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/81/938bc2ec33c10efd6637ccb3d22f9f3160d08e8f3aa2587a2c2d5ab578eb/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:0db318f7a6f065d84cb1e02662c526294450b314a02bd9e2a8e67f0d8564ce40", size = 1543047, upload-time = "2026-01-03T17:29:34.855Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/23/80488ee21c8d567c83045e412e1d9b7077d27171591a4eb7822586e8c06a/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:bfc1cc2fe31a6026a8a88e4ecfb98d7f6b1fec150cfd708adbfd1d2f42257c29", size = 1715264, upload-time = "2026-01-03T17:29:36.389Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/83/259a8da6683182768200b368120ab3deff5370bed93880fb9a3a86299f34/aiohttp-3.13.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:af71fff7bac6bb7508956696dce8f6eec2bbb045eceb40343944b1ae62b5ef11", size = 1657275, upload-time = "2026-01-03T17:29:38.162Z" },
-    { url = "https://files.pythonhosted.org/packages/3f/4f/2c41f800a0b560785c10fb316216ac058c105f9be50bdc6a285de88db625/aiohttp-3.13.3-cp310-cp310-win32.whl", hash = "sha256:37da61e244d1749798c151421602884db5270faf479cf0ef03af0ff68954c9dd", size = 434053, upload-time = "2026-01-03T17:29:40.074Z" },
-    { url = "https://files.pythonhosted.org/packages/80/df/29cd63c7ecfdb65ccc12f7d808cac4fa2a19544660c06c61a4a48462de0c/aiohttp-3.13.3-cp310-cp310-win_amd64.whl", hash = "sha256:7e63f210bc1b57ef699035f2b4b6d9ce096b5914414a49b0997c839b2bd2223c", size = 456687, upload-time = "2026-01-03T17:29:41.819Z" },
     { url = "https://files.pythonhosted.org/packages/f1/4c/a164164834f03924d9a29dc3acd9e7ee58f95857e0b467f6d04298594ebb/aiohttp-3.13.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:5b6073099fb654e0a068ae678b10feff95c5cae95bbfcbfa7af669d361a8aa6b", size = 746051, upload-time = "2026-01-03T17:29:43.287Z" },
     { url = "https://files.pythonhosted.org/packages/82/71/d5c31390d18d4f58115037c432b7e0348c60f6f53b727cad33172144a112/aiohttp-3.13.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cb93e166e6c28716c8c6aeb5f99dfb6d5ccf482d29fe9bf9a794110e6d0ab64", size = 499234, upload-time = "2026-01-03T17:29:44.822Z" },
     { url = "https://files.pythonhosted.org/packages/0e/c9/741f8ac91e14b1d2e7100690425a5b2b919a87a5075406582991fb7de920/aiohttp-3.13.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:28e027cf2f6b641693a09f631759b4d9ce9165099d2b5d92af9bd4e197690eea", size = 494979, upload-time = "2026-01-03T17:29:46.405Z" },
@@ -163,7 +145,6 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "mako" },
     { name = "sqlalchemy" },
-    { name = "tomli", marker = "python_full_version < '3.11'" },
     { name = "typing-extensions" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/94/13/8b084e0f2efb0275a1d534838844926f798bd766566b1375174e2448cd31/alembic-1.18.4.tar.gz", hash = "sha256:cb6e1fd84b6174ab8dbb2329f86d631ba9559dd78df550b57804d607672cedbc", size = 2056725, upload-time = "2026-02-10T16:00:47.195Z" }
@@ -213,7 +194,6 @@ name = "anyio"
 version = "4.12.1"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "idna" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
@@ -249,15 +229,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148, upload-time = "2022-10-05T19:19:30.546Z" },
 ]
 
-[[package]]
-name = "backports-asyncio-runner"
-version = "1.2.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/8e/ff/70dca7d7cb1cbc0edb2c6cc0c38b65cba36cccc491eca64cabd5fe7f8670/backports_asyncio_runner-1.2.0.tar.gz", hash = "sha256:a5aa7b2b7d8f8bfcaa2b57313f70792df84e32a2a746f585213373f900b42162", size = 69893, upload-time = "2025-07-02T02:27:15.685Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/a0/59/76ab57e3fe74484f48a53f8e337171b4a2349e506eabe136d7e01d059086/backports_asyncio_runner-1.2.0-py3-none-any.whl", hash = "sha256:0da0a936a8aeb554eccb426dc55af3ba63bcdc69fa1a600b5bb305413a4477b5", size = 12313, upload-time = "2025-07-02T02:27:14.263Z" },
-]
-
 [[package]]
 name = "basedpyright"
 version = "1.38.1"
@@ -330,18 +301,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/eb/56/b1ba7935a17738ae8453301356628e8147c79dbb825bcbc73dc7401f9846/cffi-2.0.0.tar.gz", hash = "sha256:44d1b5909021139fe36001ae048dbdde8214afa20200eda0f64c068cac5d5529", size = 523588, upload-time = "2025-09-08T23:24:04.541Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/93/d7/516d984057745a6cd96575eea814fe1edd6646ee6efd552fb7b0921dec83/cffi-2.0.0-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:0cf2d91ecc3fcc0625c2c530fe004f82c110405f101548512cce44322fa8ac44", size = 184283, upload-time = "2025-09-08T23:22:08.01Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/84/ad6a0b408daa859246f57c03efd28e5dd1b33c21737c2db84cae8c237aa5/cffi-2.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f73b96c41e3b2adedc34a7356e64c8eb96e03a3782b535e043a986276ce12a49", size = 180504, upload-time = "2025-09-08T23:22:10.637Z" },
-    { url = "https://files.pythonhosted.org/packages/50/bd/b1a6362b80628111e6653c961f987faa55262b4002fcec42308cad1db680/cffi-2.0.0-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:53f77cbe57044e88bbd5ed26ac1d0514d2acf0591dd6bb02a3ae37f76811b80c", size = 208811, upload-time = "2025-09-08T23:22:12.267Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/27/6933a8b2562d7bd1fb595074cf99cc81fc3789f6a6c05cdabb46284a3188/cffi-2.0.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3e837e369566884707ddaf85fc1744b47575005c0a229de3327f8f9a20f4efeb", size = 216402, upload-time = "2025-09-08T23:22:13.455Z" },
-    { url = "https://files.pythonhosted.org/packages/05/eb/b86f2a2645b62adcfff53b0dd97e8dfafb5c8aa864bd0d9a2c2049a0d551/cffi-2.0.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:5eda85d6d1879e692d546a078b44251cdd08dd1cfb98dfb77b670c97cee49ea0", size = 203217, upload-time = "2025-09-08T23:22:14.596Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/e0/6cbe77a53acf5acc7c08cc186c9928864bd7c005f9efd0d126884858a5fe/cffi-2.0.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:9332088d75dc3241c702d852d4671613136d90fa6881da7d770a483fd05248b4", size = 203079, upload-time = "2025-09-08T23:22:15.769Z" },
-    { url = "https://files.pythonhosted.org/packages/98/29/9b366e70e243eb3d14a5cb488dfd3a0b6b2f1fb001a203f653b93ccfac88/cffi-2.0.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:fc7de24befaeae77ba923797c7c87834c73648a05a4bde34b3b7e5588973a453", size = 216475, upload-time = "2025-09-08T23:22:17.427Z" },
-    { url = "https://files.pythonhosted.org/packages/21/7a/13b24e70d2f90a322f2900c5d8e1f14fa7e2a6b3332b7309ba7b2ba51a5a/cffi-2.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:cf364028c016c03078a23b503f02058f1814320a56ad535686f90565636a9495", size = 218829, upload-time = "2025-09-08T23:22:19.069Z" },
-    { url = "https://files.pythonhosted.org/packages/60/99/c9dc110974c59cc981b1f5b66e1d8af8af764e00f0293266824d9c4254bc/cffi-2.0.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:e11e82b744887154b182fd3e7e8512418446501191994dbf9c9fc1f32cc8efd5", size = 211211, upload-time = "2025-09-08T23:22:20.588Z" },
-    { url = "https://files.pythonhosted.org/packages/49/72/ff2d12dbf21aca1b32a40ed792ee6b40f6dc3a9cf1644bd7ef6e95e0ac5e/cffi-2.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8ea985900c5c95ce9db1745f7933eeef5d314f0565b27625d9a10ec9881e1bfb", size = 218036, upload-time = "2025-09-08T23:22:22.143Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/cc/027d7fb82e58c48ea717149b03bcadcbdc293553edb283af792bd4bcbb3f/cffi-2.0.0-cp310-cp310-win32.whl", hash = "sha256:1f72fb8906754ac8a2cc3f9f5aaa298070652a0ffae577e0ea9bd480dc3c931a", size = 172184, upload-time = "2025-09-08T23:22:23.328Z" },
-    { url = "https://files.pythonhosted.org/packages/33/fa/072dd15ae27fbb4e06b437eb6e944e75b068deb09e2a2826039e49ee2045/cffi-2.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:b18a3ed7d5b3bd8d9ef7a8cb226502c6bf8308df1525e1cc676c3680e7176739", size = 182790, upload-time = "2025-09-08T23:22:24.752Z" },
     { url = "https://files.pythonhosted.org/packages/12/4a/3dfd5f7850cbf0d06dc84ba9aa00db766b52ca38d8b86e3a38314d52498c/cffi-2.0.0-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:b4c854ef3adc177950a8dfc81a86f5115d2abd545751a304c5bcf2c2c7283cfe", size = 184344, upload-time = "2025-09-08T23:22:26.456Z" },
     { url = "https://files.pythonhosted.org/packages/4f/8b/f0e4c441227ba756aafbe78f117485b25bb26b1c059d01f137fa6d14896b/cffi-2.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2de9a304e27f7596cd03d16f1b7c72219bd944e99cc52b84d0145aefb07cbd3c", size = 180560, upload-time = "2025-09-08T23:22:28.197Z" },
     { url = "https://files.pythonhosted.org/packages/b1/b7/1200d354378ef52ec227395d95c2576330fd22a869f7a70e88e1447eb234/cffi-2.0.0-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:baf5215e0ab74c16e2dd324e8ec067ef59e41125d3eade2b863d294fd5035c92", size = 209613, upload-time = "2025-09-08T23:22:29.475Z" },
@@ -418,22 +377,6 @@ version = "3.4.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/13/69/33ddede1939fdd074bce5434295f38fae7136463422fe4fd3e0e89b98062/charset_normalizer-3.4.4.tar.gz", hash = "sha256:94537985111c35f28720e43603b8e7b43a6ecfb2ce1d3058bbe955b73404e21a", size = 129418, upload-time = "2025-10-14T04:42:32.879Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1f/b8/6d51fc1d52cbd52cd4ccedd5b5b2f0f6a11bbf6765c782298b0f3e808541/charset_normalizer-3.4.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:e824f1492727fa856dd6eda4f7cee25f8518a12f3c4a56a74e8095695089cf6d", size = 209709, upload-time = "2025-10-14T04:40:11.385Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/af/1f9d7f7faafe2ddfb6f72a2e07a548a629c61ad510fe60f9630309908fef/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4bd5d4137d500351a30687c2d3971758aac9a19208fc110ccb9d7188fbe709e8", size = 148814, upload-time = "2025-10-14T04:40:13.135Z" },
-    { url = "https://files.pythonhosted.org/packages/79/3d/f2e3ac2bbc056ca0c204298ea4e3d9db9b4afe437812638759db2c976b5f/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:027f6de494925c0ab2a55eab46ae5129951638a49a34d87f4c3eda90f696b4ad", size = 144467, upload-time = "2025-10-14T04:40:14.728Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/85/1bf997003815e60d57de7bd972c57dc6950446a3e4ccac43bc3070721856/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:f820802628d2694cb7e56db99213f930856014862f3fd943d290ea8438d07ca8", size = 162280, upload-time = "2025-10-14T04:40:16.14Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/8e/6aa1952f56b192f54921c436b87f2aaf7c7a7c3d0d1a765547d64fd83c13/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:798d75d81754988d2565bff1b97ba5a44411867c0cf32b77a7e8f8d84796b10d", size = 159454, upload-time = "2025-10-14T04:40:17.567Z" },
-    { url = "https://files.pythonhosted.org/packages/36/3b/60cbd1f8e93aa25d1c669c649b7a655b0b5fb4c571858910ea9332678558/charset_normalizer-3.4.4-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9d1bb833febdff5c8927f922386db610b49db6e0d4f4ee29601d71e7c2694313", size = 153609, upload-time = "2025-10-14T04:40:19.08Z" },
-    { url = "https://files.pythonhosted.org/packages/64/91/6a13396948b8fd3c4b4fd5bc74d045f5637d78c9675585e8e9fbe5636554/charset_normalizer-3.4.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:9cd98cdc06614a2f768d2b7286d66805f94c48cde050acdbbb7db2600ab3197e", size = 151849, upload-time = "2025-10-14T04:40:20.607Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/7a/59482e28b9981d105691e968c544cc0df3b7d6133152fb3dcdc8f135da7a/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:077fbb858e903c73f6c9db43374fd213b0b6a778106bc7032446a8e8b5b38b93", size = 151586, upload-time = "2025-10-14T04:40:21.719Z" },
-    { url = "https://files.pythonhosted.org/packages/92/59/f64ef6a1c4bdd2baf892b04cd78792ed8684fbc48d4c2afe467d96b4df57/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:244bfb999c71b35de57821b8ea746b24e863398194a4014e4c76adc2bbdfeff0", size = 145290, upload-time = "2025-10-14T04:40:23.069Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/63/3bf9f279ddfa641ffa1962b0db6a57a9c294361cc2f5fcac997049a00e9c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:64b55f9dce520635f018f907ff1b0df1fdc31f2795a922fb49dd14fbcdf48c84", size = 163663, upload-time = "2025-10-14T04:40:24.17Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/09/c9e38fc8fa9e0849b172b581fd9803bdf6e694041127933934184e19f8c3/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:faa3a41b2b66b6e50f84ae4a68c64fcd0c44355741c6374813a800cd6695db9e", size = 151964, upload-time = "2025-10-14T04:40:25.368Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/d1/d28b747e512d0da79d8b6a1ac18b7ab2ecfd81b2944c4c710e166d8dd09c/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:6515f3182dbe4ea06ced2d9e8666d97b46ef4c75e326b79bb624110f122551db", size = 161064, upload-time = "2025-10-14T04:40:26.806Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/9a/31d62b611d901c3b9e5500c36aab0ff5eb442043fb3a1c254200d3d397d9/charset_normalizer-3.4.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:cc00f04ed596e9dc0da42ed17ac5e596c6ccba999ba6bd92b0e0aef2f170f2d6", size = 155015, upload-time = "2025-10-14T04:40:28.284Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/f3/107e008fa2bff0c8b9319584174418e5e5285fef32f79d8ee6a430d0039c/charset_normalizer-3.4.4-cp310-cp310-win32.whl", hash = "sha256:f34be2938726fc13801220747472850852fe6b1ea75869a048d6f896838c896f", size = 99792, upload-time = "2025-10-14T04:40:29.613Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/66/e396e8a408843337d7315bab30dbf106c38966f1819f123257f5520f8a96/charset_normalizer-3.4.4-cp310-cp310-win_amd64.whl", hash = "sha256:a61900df84c667873b292c3de315a786dd8dac506704dea57bc957bd31e22c7d", size = 107198, upload-time = "2025-10-14T04:40:30.644Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/58/01b4f815bf0312704c267f2ccb6e5d42bcc7752340cd487bc9f8c3710597/charset_normalizer-3.4.4-cp310-cp310-win_arm64.whl", hash = "sha256:cead0978fc57397645f12578bfd2d5ea9138ea0fac82b2f63f7f7c6877986a69", size = 100262, upload-time = "2025-10-14T04:40:32.108Z" },
     { url = "https://files.pythonhosted.org/packages/ed/27/c6491ff4954e58a10f69ad90aca8a1b6fe9c5d3c6f380907af3c37435b59/charset_normalizer-3.4.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6e1fcf0720908f200cd21aa4e6750a48ff6ce4afe7ff5a79a90d5ed8a08296f8", size = 206988, upload-time = "2025-10-14T04:40:33.79Z" },
     { url = "https://files.pythonhosted.org/packages/94/59/2e87300fe67ab820b5428580a53cad894272dbb97f38a7a814a2a1ac1011/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5f819d5fe9234f9f82d75bdfa9aef3a3d72c4d24a6e57aeaebba32a704553aa0", size = 147324, upload-time = "2025-10-14T04:40:34.961Z" },
     { url = "https://files.pythonhosted.org/packages/07/fb/0cf61dc84b2b088391830f6274cb57c82e4da8bbc2efeac8c025edb88772/charset_normalizer-3.4.4-cp311-cp311-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a59cb51917aa591b1c4e6a43c132f0cdc3c76dbad6155df4e28ee626cc77a0a3", size = 142742, upload-time = "2025-10-14T04:40:36.105Z" },
@@ -540,20 +483,6 @@ version = "7.13.4"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/24/56/95b7e30fa389756cb56630faa728da46a27b8c6eb46f9d557c68fff12b65/coverage-7.13.4.tar.gz", hash = "sha256:e5c8f6ed1e61a8b2dcdf31eb0b9bbf0130750ca79c1c49eb898e2ad86f5ccc91", size = 827239, upload-time = "2026-02-09T12:59:03.86Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/44/d4/7827d9ffa34d5d4d752eec907022aa417120936282fc488306f5da08c292/coverage-7.13.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0fc31c787a84f8cd6027eba44010517020e0d18487064cd3d8968941856d1415", size = 219152, upload-time = "2026-02-09T12:56:11.974Z" },
-    { url = "https://files.pythonhosted.org/packages/35/b0/d69df26607c64043292644dbb9dc54b0856fabaa2cbb1eeee3331cc9e280/coverage-7.13.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a32ebc02a1805adf637fc8dec324b5cdacd2e493515424f70ee33799573d661b", size = 219667, upload-time = "2026-02-09T12:56:13.33Z" },
-    { url = "https://files.pythonhosted.org/packages/82/a4/c1523f7c9e47b2271dbf8c2a097e7a1f89ef0d66f5840bb59b7e8814157b/coverage-7.13.4-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:e24f9156097ff9dc286f2f913df3a7f63c0e333dcafa3c196f2c18b4175ca09a", size = 246425, upload-time = "2026-02-09T12:56:14.552Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/02/aa7ec01d1a5023c4b680ab7257f9bfde9defe8fdddfe40be096ac19e8177/coverage-7.13.4-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:8041b6c5bfdc03257666e9881d33b1abc88daccaf73f7b6340fb7946655cd10f", size = 248229, upload-time = "2026-02-09T12:56:16.31Z" },
-    { url = "https://files.pythonhosted.org/packages/35/98/85aba0aed5126d896162087ef3f0e789a225697245256fc6181b95f47207/coverage-7.13.4-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:2a09cfa6a5862bc2fc6ca7c3def5b2926194a56b8ab78ffcf617d28911123012", size = 250106, upload-time = "2026-02-09T12:56:18.024Z" },
-    { url = "https://files.pythonhosted.org/packages/96/72/1db59bd67494bc162e3e4cd5fbc7edba2c7026b22f7c8ef1496d58c2b94c/coverage-7.13.4-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:296f8b0af861d3970c2a4d8c91d48eb4dd4771bcef9baedec6a9b515d7de3def", size = 252021, upload-time = "2026-02-09T12:56:19.272Z" },
-    { url = "https://files.pythonhosted.org/packages/9d/97/72899c59c7066961de6e3daa142d459d47d104956db43e057e034f015c8a/coverage-7.13.4-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:e101609bcbbfb04605ea1027b10dc3735c094d12d40826a60f897b98b1c30256", size = 247114, upload-time = "2026-02-09T12:56:21.051Z" },
-    { url = "https://files.pythonhosted.org/packages/39/1f/f1885573b5970235e908da4389176936c8933e86cb316b9620aab1585fa2/coverage-7.13.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:aa3feb8db2e87ff5e6d00d7e1480ae241876286691265657b500886c98f38bda", size = 248143, upload-time = "2026-02-09T12:56:22.585Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/cf/e80390c5b7480b722fa3e994f8202807799b85bc562aa4f1dde209fbb7be/coverage-7.13.4-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:4fc7fa81bbaf5a02801b65346c8b3e657f1d93763e58c0abdf7c992addd81a92", size = 246152, upload-time = "2026-02-09T12:56:23.748Z" },
-    { url = "https://files.pythonhosted.org/packages/44/bf/f89a8350d85572f95412debb0fb9bb4795b1d5b5232bd652923c759e787b/coverage-7.13.4-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:33901f604424145c6e9c2398684b92e176c0b12df77d52db81c20abd48c3794c", size = 249959, upload-time = "2026-02-09T12:56:25.209Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/6e/612a02aece8178c818df273e8d1642190c4875402ca2ba74514394b27aba/coverage-7.13.4-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:bb28c0f2cf2782508a40cec377935829d5fcc3ad9a3681375af4e84eb34b6b58", size = 246416, upload-time = "2026-02-09T12:56:26.475Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/98/b5afc39af67c2fa6786b03c3a7091fc300947387ce8914b096db8a73d67a/coverage-7.13.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:9d107aff57a83222ddbd8d9ee705ede2af2cc926608b57abed8ef96b50b7e8f9", size = 247025, upload-time = "2026-02-09T12:56:27.727Z" },
-    { url = "https://files.pythonhosted.org/packages/51/30/2bba8ef0682d5bd210c38fe497e12a06c9f8d663f7025e9f5c2c31ce847d/coverage-7.13.4-cp310-cp310-win32.whl", hash = "sha256:a6f94a7d00eb18f1b6d403c91a88fd58cfc92d4b16080dfdb774afc8294469bf", size = 221758, upload-time = "2026-02-09T12:56:29.051Z" },
-    { url = "https://files.pythonhosted.org/packages/78/13/331f94934cf6c092b8ea59ff868eb587bc8fe0893f02c55bc6c0183a192e/coverage-7.13.4-cp310-cp310-win_amd64.whl", hash = "sha256:2cb0f1e000ebc419632bbe04366a8990b6e32c4e0b51543a6484ffe15eaeda95", size = 222693, upload-time = "2026-02-09T12:56:30.366Z" },
     { url = "https://files.pythonhosted.org/packages/b4/ad/b59e5b451cf7172b8d1043dc0fa718f23aab379bc1521ee13d4bd9bfa960/coverage-7.13.4-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d490ba50c3f35dd7c17953c68f3270e7ccd1c6642e2d2afe2d8e720b98f5a053", size = 219278, upload-time = "2026-02-09T12:56:31.673Z" },
     { url = "https://files.pythonhosted.org/packages/f1/17/0cb7ca3de72e5f4ef2ec2fa0089beafbcaaaead1844e8b8a63d35173d77d/coverage-7.13.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:19bc3c88078789f8ef36acb014d7241961dbf883fd2533d18cb1e7a5b4e28b11", size = 219783, upload-time = "2026-02-09T12:56:33.104Z" },
     { url = "https://files.pythonhosted.org/packages/ab/63/325d8e5b11e0eaf6d0f6a44fad444ae58820929a9b0de943fa377fe73e85/coverage-7.13.4-cp311-cp311-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:3998e5a32e62fdf410c0dbd3115df86297995d6e3429af80b8798aad894ca7aa", size = 250200, upload-time = "2026-02-09T12:56:34.474Z" },
@@ -658,7 +587,6 @@ version = "46.0.5"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "cffi", marker = "platform_python_implementation != 'PyPy'" },
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/60/04/ee2a9e8542e4fa2773b81771ff8349ff19cdd56b7258a0cc442639052edb/cryptography-46.0.5.tar.gz", hash = "sha256:abace499247268e3757271b2f1e244b36b06f8515cf27c4d49468fc9eb16e93d", size = 750064, upload-time = "2026-02-10T19:18:38.255Z" }
 wheels = [
@@ -773,18 +701,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/de/15/545e2b6cf2e3be84bc1ed85613edd75b8aea69807a71c26f4ca6a9258e82/email_validator-2.3.0-py3-none-any.whl", hash = "sha256:80f13f623413e6b197ae73bb10bf4eb0908faf509ad8362c5edeb0be7fd450b4", size = 35604, upload-time = "2025-08-26T13:09:05.858Z" },
 ]
 
-[[package]]
-name = "exceptiongroup"
-version = "1.3.1"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/50/79/66800aadf48771f6b62f7eb014e352e5d06856655206165d775e675a02c9/exceptiongroup-1.3.1.tar.gz", hash = "sha256:8b412432c6055b0b7d14c310000ae93352ed6754f70fa8f7c34141f91c4e3219", size = 30371, upload-time = "2025-11-21T23:01:54.787Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/8a/0e/97c33bf5009bdbac74fd2beace167cab3f978feb69cc36f1ef79360d6c4e/exceptiongroup-1.3.1-py3-none-any.whl", hash = "sha256:a7a39a3bd276781e98394987d3a5701d0c4edffb633bb7a5144577f82c773598", size = 16740, upload-time = "2025-11-21T23:01:53.443Z" },
-]
-
 [[package]]
 name = "execnet"
 version = "2.1.2"
@@ -801,7 +717,6 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "redis" },
     { name = "sortedcontainers" },
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d8/44/c403963727d707e03f49a417712b0a23e853d33ae50729679040b6cfe281/fakeredis-2.34.0.tar.gz", hash = "sha256:72bc51a7ab39bedf5004f0cf1b5206822619c1be8c2657fd878d1f4250256c57", size = 177156, upload-time = "2026-02-16T15:56:34.318Z" }
 wheels = [
@@ -842,7 +757,6 @@ version = "0.0.23"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "rich-toolkit" },
-    { name = "tomli", marker = "python_full_version < '3.11'" },
     { name = "typer" },
     { name = "uvicorn", extra = ["standard"] },
 ]
@@ -896,20 +810,6 @@ version = "0.8.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/69/e7/f89d54fb04104114dd0552836dc2b47914f416cc0e200b409dd04a33de5e/fastar-0.8.0.tar.gz", hash = "sha256:f4d4d68dbf1c4c2808f0e730fac5843493fc849f70fe3ad3af60dfbaf68b9a12", size = 68524, upload-time = "2025-11-26T02:36:00.72Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c3/e2/51d9ee443aabcd5aa581d45b18b6198ced364b5cd97e5504c5d782ceb82c/fastar-0.8.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:c9f930cff014cf79d396d0541bd9f3a3f170c9b5e45d10d634d98f9ed08788c3", size = 708536, upload-time = "2025-11-26T02:34:35.236Z" },
-    { url = "https://files.pythonhosted.org/packages/07/2a/edfc6274768b8a3859a5ca4f8c29cb7f614d7f27d2378e2c88aa91cda54e/fastar-0.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07b70f712d20622346531a4b46bb332569bea621f61314c0b7e80903a16d14cf", size = 632235, upload-time = "2025-11-26T02:34:19.367Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/1e/3cfbaaec464caef196700ee2ffae1c03f94f7c5e2a85d0ec0ea9cdd1da81/fastar-0.8.0-cp310-cp310-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:330639db3bfba4c6d132421a2a4aeb81e7bea8ce9159cdb6e247fbc5fae97686", size = 871386, upload-time = "2025-11-26T02:33:47.613Z" },
-    { url = "https://files.pythonhosted.org/packages/82/50/224a674ad541054179e4e6e0b54bb6e162f04f698a2512b42a8085fc6b6f/fastar-0.8.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98ea7ceb6231e48d7bb0d7dc13e946baa29c7f6873eaf4afb69725d6da349033", size = 764955, upload-time = "2025-11-26T02:32:44.279Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/5e/4608184aa57cb6a54f62c1eb3e5133ba8d461fc7f13193c0255effbec12a/fastar-0.8.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a90695a601a78bbca910fdf2efcdf3103c55d0de5a5c6e93556d707bf886250b", size = 765987, upload-time = "2025-11-26T02:32:59.701Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/53/6afd2b680dddfa10df9a16bbcf6cabfee0d92435d5c7e3f4cfe3b1712662/fastar-0.8.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:9d0bf655ff4c9320b0ca8a5b128063d5093c0c8c1645a2b5f7167143fd8531aa", size = 930900, upload-time = "2025-11-26T02:33:16.059Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/1e/b7a304bfcc1d06845cbfa4b464516f6fff9c8c6692f6ef80a3a86b04e199/fastar-0.8.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d8df22cdd8d58e7689aa89b2e4a07e8e5fa4f88d2d9c2621f0e88a49be97ccea", size = 821523, upload-time = "2025-11-26T02:33:30.897Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/da/9ef8605c6d233cd6ca3a95f7f518ac22aa064903afe6afa57733bfb7c31b/fastar-0.8.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e8a5e6ad722685128521c8fb44cf25bd38669650ba3a4b466b8903e5aa28e1a0", size = 821268, upload-time = "2025-11-26T02:34:04.003Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/22/ed37c78a6b4420de1677d82e79742787975c34847229c33dc376334c7283/fastar-0.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:31cd541231a2456e32104da891cf9962c3b40234d0465cbf9322a6bc8a1b05d5", size = 986286, upload-time = "2025-11-26T02:34:50.279Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/a6/366b15f432d85d4089e6e4b52a09cc2a2bcf4d7a1f0771e3d3194deccb1e/fastar-0.8.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:175db2a98d67ced106468e8987975484f8bbbd5ad99201da823b38bafb565ed5", size = 1041921, upload-time = "2025-11-26T02:35:07.292Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/45/45f8e6991e3ce9f8aeefdc8d4c200daada41097a36808643d1703464c3e2/fastar-0.8.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:ada877ab1c65197d772ce1b1c2e244d4799680d8b3f136a4308360f3d8661b23", size = 1047302, upload-time = "2025-11-26T02:35:24.995Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/e2/a587796111a3cd4b78cd61ec3fc1252d8517d81f763f4164ed5680f84810/fastar-0.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:01084cb75f13ca6a8e80bd41584322523189f8e81b472053743d6e6c3062b5a6", size = 995141, upload-time = "2025-11-26T02:35:42.449Z" },
-    { url = "https://files.pythonhosted.org/packages/89/c0/7a8ec86695b0b77168e220cf2af1aa30592f5ecdbd0ce6d641d29c4a8bae/fastar-0.8.0-cp310-cp310-win32.whl", hash = "sha256:ca639b9909805e44364ea13cca2682b487e74826e4ad75957115ec693228d6b6", size = 456544, upload-time = "2025-11-26T02:36:23.801Z" },
-    { url = "https://files.pythonhosted.org/packages/be/a9/8da4deb840121c59deabd939ce2dca3d6beec85576f3743d1144441938b5/fastar-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:fbc0f2ed0f4add7fb58034c576584d44d7eaaf93dee721dfb26dbed6e222dbac", size = 490701, upload-time = "2025-11-26T02:36:09.625Z" },
     { url = "https://files.pythonhosted.org/packages/cd/15/1c764530b81b266f6d27d78d49b6bef22a73b3300cd83a280bfd244908c5/fastar-0.8.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:cd9c0d3ebf7a0a6f642f771cf41b79f7c98d40a3072a8abe1174fbd9bd615bd3", size = 708427, upload-time = "2025-11-26T02:34:36.502Z" },
     { url = "https://files.pythonhosted.org/packages/41/fc/75d42c008516543219e4293e4d8ac55da57a5c63147484f10468bd1bc24e/fastar-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2875a077340fe4f8099bd3ed8fa90d9595e1ac3cd62ae19ab690d5bf550eeb35", size = 631740, upload-time = "2025-11-26T02:34:20.718Z" },
     { url = "https://files.pythonhosted.org/packages/50/8d/9632984f7824ed2210157dcebd8e9821ef6d4f2b28510d0516db6625ff9b/fastar-0.8.0-cp311-cp311-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:a999263d9f87184bf2801833b2ecf105e03c0dd91cac78685673b70da564fd64", size = 871628, upload-time = "2025-11-26T02:33:49.279Z" },
@@ -985,18 +885,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/c9/e2/dfa19a4b260b8ab3581b7484dcb80c09b25324f4daa6b6ae1c7640d1607a/fastar-0.8.0-cp314-cp314t-win32.whl", hash = "sha256:187f61dc739afe45ac8e47ed7fd1adc45d52eac110cf27d579155720507d6fbe", size = 455767, upload-time = "2025-11-26T02:36:34.758Z" },
     { url = "https://files.pythonhosted.org/packages/51/47/df65c72afc1297797b255f90c4778b5d6f1f0f80282a134d5ab610310ed9/fastar-0.8.0-cp314-cp314t-win_amd64.whl", hash = "sha256:40e9d763cf8bf85ce2fa256e010aa795c0fe3d3bd1326d5c3084e6ce7857127e", size = 489971, upload-time = "2025-11-26T02:36:22.081Z" },
     { url = "https://files.pythonhosted.org/packages/85/11/0aa8455af26f0ae89e42be67f3a874255ee5d7f0f026fc86e8d56f76b428/fastar-0.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:e59673307b6a08210987059a2bdea2614fe26e3335d0e5d1a3d95f49a05b1418", size = 460467, upload-time = "2025-11-26T02:36:07.978Z" },
-    { url = "https://files.pythonhosted.org/packages/25/9f/6eaa810c240236eff2edf736cd50a17c97dbab1693cda4f7bcea09d13418/fastar-0.8.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:2127cf2e80ffd49744a160201e0e2f55198af6c028a7b3f750026e0b1f1caa4e", size = 710544, upload-time = "2025-11-26T02:34:46.195Z" },
-    { url = "https://files.pythonhosted.org/packages/1d/a5/58ff9e49a1cd5fbfc8f1238226cbf83b905376a391a6622cdd396b2cfa29/fastar-0.8.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:ff85094f10003801339ac4fa9b20a3410c2d8f284d4cba2dc99de6e98c877812", size = 634020, upload-time = "2025-11-26T02:34:31.085Z" },
-    { url = "https://files.pythonhosted.org/packages/80/94/f839257c6600a83fbdb5a7fcc06319599086137b25ba38ca3d2c0fe14562/fastar-0.8.0-pp310-pypy310_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:3dbca235f0bd804cca6602fe055d3892bebf95fb802e6c6c7d872fb10f7abc6c", size = 871735, upload-time = "2025-11-26T02:34:00.088Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/79/4124c54260f7ee5cb7034bfe499eff2f8512b052d54be4671e59d4f25a4f/fastar-0.8.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:722e54bfdee6c81a0005e147319e93d8797f442308032c92fa28d03ef8fda076", size = 766779, upload-time = "2025-11-26T02:32:55.109Z" },
-    { url = "https://files.pythonhosted.org/packages/36/b6/043b263c4126bf6557c942d099503989af9c5c7ee5cca9a04e00f754816f/fastar-0.8.0-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:0a78e5221b94a80800930b7fd0d0e797ae73aadf7044c05ed46cb9bdf870f022", size = 766755, upload-time = "2025-11-26T02:33:11.595Z" },
-    { url = "https://files.pythonhosted.org/packages/57/ff/29a5dc06f2940439ebf98661ecc98d48d3f22fed8d6a2d5dc985d1e8da24/fastar-0.8.0-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:997092d31ff451de8d0568f6773f3517cb87dcd0bc76184edb65d7154390a6f8", size = 932732, upload-time = "2025-11-26T02:33:27.122Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/e8/2218830f422b37aad52c24b53cb84b5d88bd6fd6ad411bd6689b1a32500d/fastar-0.8.0-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:558e8fcf8fe574541df5db14a46cd98bfbed14a811b7014a54f2b714c0cfac42", size = 822571, upload-time = "2025-11-26T02:33:42.986Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/fd/ba6dfeff77cddfe58d85c490b1735c002b81c0d6f826916a8b6c4f8818bc/fastar-0.8.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f1d2a54f87e2908cc19e1a6ee249620174fbefc54a219aba1eaa6f31657683c3", size = 822440, upload-time = "2025-11-26T02:34:15.439Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/57/54d5740c84b35de0eb12975397ecc16785b5ad8bed2dbac38b8c8a7c1edd/fastar-0.8.0-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:ef94901537be277f9ec59db939eb817960496c6351afede5b102699b5098604d", size = 987424, upload-time = "2025-11-26T02:35:02.742Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/c7/18115927f16deb1ddffdbd4ae992e7e33064bc6defa2b92a147948f8bc0c/fastar-0.8.0-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:0afbb92f78bf29d5e9db76fb46cbabc429e49015cddf72ab9e761afbe88ac100", size = 1042675, upload-time = "2025-11-26T02:35:20.252Z" },
-    { url = "https://files.pythonhosted.org/packages/d7/1a/ca884fc7973ec6d765e87af23a4dd25784fb0a36ac2df825f18c3630bbab/fastar-0.8.0-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:fb59c7925e7710ad178d9e1a3e65edf295d9a042a0cdcb673b4040949eb8ad0a", size = 1047098, upload-time = "2025-11-26T02:35:37.643Z" },
-    { url = "https://files.pythonhosted.org/packages/44/ee/25cd645db749b206bb95e1512e57e75d56ccbbb8ec3536f52a7979deab6b/fastar-0.8.0-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:e6c4d6329da568ec36b1347b0c09c4d27f9dfdeddf9f438ddb16799ecf170098", size = 997397, upload-time = "2025-11-26T02:35:56.215Z" },
     { url = "https://files.pythonhosted.org/packages/98/6e/6c46aa7f8c8734e7f96ee5141acd3877667ce66f34eea10703aa7571d191/fastar-0.8.0-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:998e3fa4b555b63eb134e6758437ed739ad1652fdd2a61dfe1dacbfddc35fe66", size = 710662, upload-time = "2025-11-26T02:34:47.593Z" },
     { url = "https://files.pythonhosted.org/packages/70/27/fd622442f2fbd4ff5459677987481ef1c60e077cb4e63a2ed4d8dce6f869/fastar-0.8.0-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:5f83e60d845091f3a12bc37f412774264d161576eaf810ed8b43567eb934b7e5", size = 634049, upload-time = "2025-11-26T02:34:32.365Z" },
     { url = "https://files.pythonhosted.org/packages/8f/ee/aa4d08aea25b5419a7277132e738ab1cd775f26aebddce11413b07e2fdff/fastar-0.8.0-pp311-pypy311_pp73-manylinux_2_12_i686.manylinux2010_i686.whl", hash = "sha256:299672e1c74d8b73c61684fac9159cfc063d35f4b165996a88facb0e26862cb5", size = 872055, upload-time = "2025-11-26T02:34:01.377Z" },
@@ -1026,22 +914,6 @@ version = "1.8.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/2d/f5/c831fac6cc817d26fd54c7eaccd04ef7e0288806943f7cc5bbf69f3ac1f0/frozenlist-1.8.0.tar.gz", hash = "sha256:3ede829ed8d842f6cd48fc7081d7a41001a56f1f38603f9d49bf3020d59a31ad", size = 45875, upload-time = "2025-10-06T05:38:17.865Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/83/4a/557715d5047da48d54e659203b9335be7bfaafda2c3f627b7c47e0b3aaf3/frozenlist-1.8.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:b37f6d31b3dcea7deb5e9696e529a6aa4a898adc33db82da12e4c60a7c4d2011", size = 86230, upload-time = "2025-10-06T05:35:23.699Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/fb/c85f9fed3ea8fe8740e5b46a59cc141c23b842eca617da8876cfce5f760e/frozenlist-1.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ef2b7b394f208233e471abc541cc6991f907ffd47dc72584acee3147899d6565", size = 49621, upload-time = "2025-10-06T05:35:25.341Z" },
-    { url = "https://files.pythonhosted.org/packages/63/70/26ca3f06aace16f2352796b08704338d74b6d1a24ca38f2771afbb7ed915/frozenlist-1.8.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:a88f062f072d1589b7b46e951698950e7da00442fc1cacbe17e19e025dc327ad", size = 49889, upload-time = "2025-10-06T05:35:26.797Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/ed/c7895fd2fde7f3ee70d248175f9b6cdf792fb741ab92dc59cd9ef3bd241b/frozenlist-1.8.0-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f57fb59d9f385710aa7060e89410aeb5058b99e62f4d16b08b91986b9a2140c2", size = 219464, upload-time = "2025-10-06T05:35:28.254Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/83/4d587dccbfca74cb8b810472392ad62bfa100bf8108c7223eb4c4fa2f7b3/frozenlist-1.8.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:799345ab092bee59f01a915620b5d014698547afd011e691a208637312db9186", size = 221649, upload-time = "2025-10-06T05:35:29.454Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/c6/fd3b9cd046ec5fff9dab66831083bc2077006a874a2d3d9247dea93ddf7e/frozenlist-1.8.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:c23c3ff005322a6e16f71bf8692fcf4d5a304aaafe1e262c98c6d4adc7be863e", size = 219188, upload-time = "2025-10-06T05:35:30.951Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/80/6693f55eb2e085fc8afb28cf611448fb5b90e98e068fa1d1b8d8e66e5c7d/frozenlist-1.8.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:8a76ea0f0b9dfa06f254ee06053d93a600865b3274358ca48a352ce4f0798450", size = 231748, upload-time = "2025-10-06T05:35:32.101Z" },
-    { url = "https://files.pythonhosted.org/packages/97/d6/e9459f7c5183854abd989ba384fe0cc1a0fb795a83c033f0571ec5933ca4/frozenlist-1.8.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:c7366fe1418a6133d5aa824ee53d406550110984de7637d65a178010f759c6ef", size = 236351, upload-time = "2025-10-06T05:35:33.834Z" },
-    { url = "https://files.pythonhosted.org/packages/97/92/24e97474b65c0262e9ecd076e826bfd1d3074adcc165a256e42e7b8a7249/frozenlist-1.8.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:13d23a45c4cebade99340c4165bd90eeb4a56c6d8a9d8aa49568cac19a6d0dc4", size = 218767, upload-time = "2025-10-06T05:35:35.205Z" },
-    { url = "https://files.pythonhosted.org/packages/ee/bf/dc394a097508f15abff383c5108cb8ad880d1f64a725ed3b90d5c2fbf0bb/frozenlist-1.8.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:e4a3408834f65da56c83528fb52ce7911484f0d1eaf7b761fc66001db1646eff", size = 235887, upload-time = "2025-10-06T05:35:36.354Z" },
-    { url = "https://files.pythonhosted.org/packages/40/90/25b201b9c015dbc999a5baf475a257010471a1fa8c200c843fd4abbee725/frozenlist-1.8.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:42145cd2748ca39f32801dad54aeea10039da6f86e303659db90db1c4b614c8c", size = 228785, upload-time = "2025-10-06T05:35:37.949Z" },
-    { url = "https://files.pythonhosted.org/packages/84/f4/b5bc148df03082f05d2dd30c089e269acdbe251ac9a9cf4e727b2dbb8a3d/frozenlist-1.8.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e2de870d16a7a53901e41b64ffdf26f2fbb8917b3e6ebf398098d72c5b20bd7f", size = 230312, upload-time = "2025-10-06T05:35:39.178Z" },
-    { url = "https://files.pythonhosted.org/packages/db/4b/87e95b5d15097c302430e647136b7d7ab2398a702390cf4c8601975709e7/frozenlist-1.8.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:20e63c9493d33ee48536600d1a5c95eefc870cd71e7ab037763d1fbb89cc51e7", size = 217650, upload-time = "2025-10-06T05:35:40.377Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/70/78a0315d1fea97120591a83e0acd644da638c872f142fd72a6cebee825f3/frozenlist-1.8.0-cp310-cp310-win32.whl", hash = "sha256:adbeebaebae3526afc3c96fad434367cafbfd1b25d72369a9e5858453b1bb71a", size = 39659, upload-time = "2025-10-06T05:35:41.863Z" },
-    { url = "https://files.pythonhosted.org/packages/66/aa/3f04523fb189a00e147e60c5b2205126118f216b0aa908035c45336e27e4/frozenlist-1.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:667c3777ca571e5dbeb76f331562ff98b957431df140b54c85fd4d52eea8d8f6", size = 43837, upload-time = "2025-10-06T05:35:43.205Z" },
-    { url = "https://files.pythonhosted.org/packages/39/75/1135feecdd7c336938bd55b4dc3b0dfc46d85b9be12ef2628574b28de776/frozenlist-1.8.0-cp310-cp310-win_arm64.whl", hash = "sha256:80f85f0a7cc86e7a54c46d99c9e1318ff01f4687c172ede30fd52d19d1da1c8e", size = 39989, upload-time = "2025-10-06T05:35:44.596Z" },
     { url = "https://files.pythonhosted.org/packages/bc/03/077f869d540370db12165c0aa51640a873fb661d8b315d1d4d67b284d7ac/frozenlist-1.8.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:09474e9831bc2b2199fad6da3c14c7b0fbdd377cce9d3d77131be28906cb7d84", size = 86912, upload-time = "2025-10-06T05:35:45.98Z" },
     { url = "https://files.pythonhosted.org/packages/df/b5/7610b6bd13e4ae77b96ba85abea1c8cb249683217ef09ac9e0ae93f25a91/frozenlist-1.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:17c883ab0ab67200b5f964d2b9ed6b00971917d5d8a92df149dc2c9779208ee9", size = 50046, upload-time = "2025-10-06T05:35:47.009Z" },
     { url = "https://files.pythonhosted.org/packages/6e/ef/0e8f1fe32f8a53dd26bdd1f9347efe0778b0fddf62789ea683f4cc7d787d/frozenlist-1.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:fa47e444b8ba08fffd1c18e8cdb9a75db1b6a27f17507522834ad13ed5922b93", size = 50119, upload-time = "2025-10-06T05:35:48.38Z" },
@@ -1200,14 +1072,6 @@ version = "3.3.2"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/a3/51/1664f6b78fc6ebbd98019a1fd730e83fa78f2db7058f72b1463d3612b8db/greenlet-3.3.2.tar.gz", hash = "sha256:2eaf067fc6d886931c7962e8c6bede15d2f01965560f3359b27c80bde2d151f2", size = 188267, upload-time = "2026-02-20T20:54:15.531Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/38/3f/9859f655d11901e7b2996c6e3d33e0caa9a1d4572c3bc61ed0faa64b2f4c/greenlet-3.3.2-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:9bc885b89709d901859cf95179ec9f6bb67a3d2bb1f0e88456461bd4b7f8fd0d", size = 277747, upload-time = "2026-02-20T20:16:21.325Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/07/cb284a8b5c6498dbd7cba35d31380bb123d7dceaa7907f606c8ff5993cbf/greenlet-3.3.2-cp310-cp310-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b568183cf65b94919be4438dc28416b234b678c608cafac8874dfeeb2a9bbe13", size = 579202, upload-time = "2026-02-20T20:47:28.955Z" },
-    { url = "https://files.pythonhosted.org/packages/ed/45/67922992b3a152f726163b19f890a85129a992f39607a2a53155de3448b8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:527fec58dc9f90efd594b9b700662ed3fb2493c2122067ac9c740d98080a620e", size = 590620, upload-time = "2026-02-20T20:55:55.581Z" },
-    { url = "https://files.pythonhosted.org/packages/03/5f/6e2a7d80c353587751ef3d44bb947f0565ec008a2e0927821c007e96d3a7/greenlet-3.3.2-cp310-cp310-manylinux_2_24_s390x.manylinux_2_28_s390x.whl", hash = "sha256:508c7f01f1791fbc8e011bd508f6794cb95397fdb198a46cb6635eb5b78d85a7", size = 602132, upload-time = "2026-02-20T21:02:43.261Z" },
-    { url = "https://files.pythonhosted.org/packages/ad/55/9f1ebb5a825215fadcc0f7d5073f6e79e3007e3282b14b22d6aba7ca6cb8/greenlet-3.3.2-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:ad0c8917dd42a819fe77e6bdfcb84e3379c0de956469301d9fd36427a1ca501f", size = 591729, upload-time = "2026-02-20T20:20:58.395Z" },
-    { url = "https://files.pythonhosted.org/packages/24/b4/21f5455773d37f94b866eb3cf5caed88d6cea6dd2c6e1f9c34f463cba3ec/greenlet-3.3.2-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:97245cc10e5515dbc8c3104b2928f7f02b6813002770cfaffaf9a6e0fc2b94ef", size = 1551946, upload-time = "2026-02-20T20:49:31.102Z" },
-    { url = "https://files.pythonhosted.org/packages/00/68/91f061a926abead128fe1a87f0b453ccf07368666bd59ffa46016627a930/greenlet-3.3.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8c1fdd7d1b309ff0da81d60a9688a8bd044ac4e18b250320a96fc68d31c209ca", size = 1618494, upload-time = "2026-02-20T20:21:06.541Z" },
-    { url = "https://files.pythonhosted.org/packages/ac/78/f93e840cbaef8becaf6adafbaf1319682a6c2d8c1c20224267a5c6c8c891/greenlet-3.3.2-cp310-cp310-win_amd64.whl", hash = "sha256:5d0e35379f93a6d0222de929a25ab47b5eb35b5ef4721c2b9cbcc4036129ff1f", size = 230092, upload-time = "2026-02-20T20:17:09.379Z" },
     { url = "https://files.pythonhosted.org/packages/f3/47/16400cb42d18d7a6bb46f0626852c1718612e35dcb0dffa16bbaffdf5dd2/greenlet-3.3.2-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:c56692189a7d1c7606cb794be0a8381470d95c57ce5be03fb3d0ef57c7853b86", size = 278890, upload-time = "2026-02-20T20:19:39.263Z" },
     { url = "https://files.pythonhosted.org/packages/a3/90/42762b77a5b6aa96cd8c0e80612663d39211e8ae8a6cd47c7f1249a66262/greenlet-3.3.2-cp311-cp311-manylinux_2_24_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ebd458fa8285960f382841da585e02201b53a5ec2bac6b156fc623b5ce4499f", size = 581120, upload-time = "2026-02-20T20:47:30.161Z" },
     { url = "https://files.pythonhosted.org/packages/bf/6f/f3d64f4fa0a9c7b5c5b3c810ff1df614540d5aa7d519261b53fba55d4df9/greenlet-3.3.2-cp311-cp311-manylinux_2_24_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:a443358b33c4ec7b05b79a7c8b466f5d275025e750298be7340f8fc63dff2a55", size = 594363, upload-time = "2026-02-20T20:55:56.965Z" },
@@ -1292,8 +1156,7 @@ dependencies = [
     { name = "python-dotenv" },
     { name = "redis" },
     { name = "rich" },
-    { name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "scikit-learn" },
     { name = "sentry-sdk", extra = ["anthropic", "fastapi", "sqlalchemy"] },
     { name = "sqlalchemy" },
     { name = "tenacity" },
@@ -1317,8 +1180,7 @@ dev = [
     { name = "pytest-cov" },
     { name = "pytest-xdist" },
     { name = "ruff" },
-    { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "scipy" },
     { name = "sqlalchemy-utils" },
 ]
 
@@ -1386,6 +1248,11 @@ dependencies = [
     { name = "typing-extensions", marker = "python_full_version < '3.12'" },
 ]
 
+[package.optional-dependencies]
+cli = [
+    { name = "honcho-cli" },
+]
+
 [package.dev-dependencies]
 dev = [
     { name = "ruff" },
@@ -1393,14 +1260,44 @@ dev = [
 
 [package.metadata]
 requires-dist = [
+    { name = "honcho-cli", marker = "extra == 'cli'", editable = "honcho-cli" },
     { name = "httpx", specifier = ">=0.28.0,<1" },
     { name = "pydantic", specifier = ">=2.0.0,<3" },
     { name = "typing-extensions", marker = "python_full_version < '3.12'", specifier = ">=4.12.0" },
 ]
+provides-extras = ["cli"]
 
 [package.metadata.requires-dev]
 dev = [{ name = "ruff", specifier = ">=0.11.13" }]
 
+[[package]]
+name = "honcho-cli"
+version = "0.1.0"
+source = { editable = "honcho-cli" }
+dependencies = [
+    { name = "honcho-ai" },
+    { name = "httpx" },
+    { name = "rich" },
+    { name = "typer" },
+]
+
+[package.optional-dependencies]
+dev = [
+    { name = "pytest" },
+    { name = "pytest-mock" },
+]
+
+[package.metadata]
+requires-dist = [
+    { name = "honcho-ai", editable = "sdks/python" },
+    { name = "httpx", specifier = ">=0.27.0" },
+    { name = "pytest", marker = "extra == 'dev'", specifier = ">=8.0.0" },
+    { name = "pytest-mock", marker = "extra == 'dev'", specifier = ">=3.14.0" },
+    { name = "rich", specifier = ">=13.0.0" },
+    { name = "typer", specifier = ">=0.15.0" },
+]
+provides-extras = ["dev"]
+
 [[package]]
 name = "httpcore"
 version = "1.0.9"
@@ -1420,13 +1317,6 @@ version = "0.7.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/b5/46/120a669232c7bdedb9d52d4aeae7e6c7dfe151e99dc70802e2fc7a5e1993/httptools-0.7.1.tar.gz", hash = "sha256:abd72556974f8e7c74a259655924a717a2365b236c882c3f6f8a45fe94703ac9", size = 258961, upload-time = "2025-10-10T03:55:08.559Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c7/e5/c07e0bcf4ec8db8164e9f6738c048b2e66aabf30e7506f440c4cc6953f60/httptools-0.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:11d01b0ff1fe02c4c32d60af61a4d613b74fad069e47e06e9067758c01e9ac78", size = 204531, upload-time = "2025-10-10T03:54:20.887Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/4f/35e3a63f863a659f92ffd92bef131f3e81cf849af26e6435b49bd9f6f751/httptools-0.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:84d86c1e5afdc479a6fdabf570be0d3eb791df0ae727e8dbc0259ed1249998d4", size = 109408, upload-time = "2025-10-10T03:54:22.455Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/71/b0a9193641d9e2471ac541d3b1b869538a5fb6419d52fd2669fa9c79e4b8/httptools-0.7.1-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:c8c751014e13d88d2be5f5f14fc8b89612fcfa92a9cc480f2bc1598357a23a05", size = 440889, upload-time = "2025-10-10T03:54:23.753Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/d9/2e34811397b76718750fea44658cb0205b84566e895192115252e008b152/httptools-0.7.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:654968cb6b6c77e37b832a9be3d3ecabb243bbe7a0b8f65fbc5b6b04c8fcabed", size = 440460, upload-time = "2025-10-10T03:54:25.313Z" },
-    { url = "https://files.pythonhosted.org/packages/01/3f/a04626ebeacc489866bb4d82362c0657b2262bef381d68310134be7f40bb/httptools-0.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b580968316348b474b020edf3988eecd5d6eec4634ee6561e72ae3a2a0e00a8a", size = 425267, upload-time = "2025-10-10T03:54:26.81Z" },
-    { url = "https://files.pythonhosted.org/packages/a5/99/adcd4f66614db627b587627c8ad6f4c55f18881549bab10ecf180562e7b9/httptools-0.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:d496e2f5245319da9d764296e86c5bb6fcf0cf7a8806d3d000717a889c8c0b7b", size = 424429, upload-time = "2025-10-10T03:54:28.174Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/72/ec8fc904a8fd30ba022dfa85f3bbc64c3c7cd75b669e24242c0658e22f3c/httptools-0.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:cbf8317bfccf0fed3b5680c559d3459cccf1abe9039bfa159e62e391c7270568", size = 86173, upload-time = "2025-10-10T03:54:29.5Z" },
     { url = "https://files.pythonhosted.org/packages/9c/08/17e07e8d89ab8f343c134616d72eebfe03798835058e2ab579dcc8353c06/httptools-0.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:474d3b7ab469fefcca3697a10d11a32ee2b9573250206ba1e50d5980910da657", size = 206521, upload-time = "2025-10-10T03:54:31.002Z" },
     { url = "https://files.pythonhosted.org/packages/aa/06/c9c1b41ff52f16aee526fd10fbda99fa4787938aa776858ddc4a1ea825ec/httptools-0.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:a3c3b7366bb6c7b96bd72d0dbe7f7d5eead261361f013be5f6d9590465ea1c70", size = 110375, upload-time = "2025-10-10T03:54:31.941Z" },
     { url = "https://files.pythonhosted.org/packages/cc/cc/10935db22fda0ee34c76f047590ca0a8bd9de531406a3ccb10a90e12ea21/httptools-0.7.1-cp311-cp311-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:379b479408b8747f47f3b253326183d7c009a3936518cdb70db58cffd369d9df", size = 456621, upload-time = "2025-10-10T03:54:33.176Z" },
@@ -1521,7 +1411,6 @@ dependencies = [
     { name = "colorama" },
     { name = "py" },
     { name = "tabulate" },
-    { name = "tomli", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/8b/22/74f7fcc96280eea46cf2bcbfa1354ac31de0e60a4be6f7966f12cef20893/interrogate-1.7.0.tar.gz", hash = "sha256:a320d6ec644dfd887cc58247a345054fc4d9f981100c45184470068f4b3719b0", size = 159636, upload-time = "2024-04-07T22:30:46.217Z" }
 wheels = [
@@ -1546,18 +1435,6 @@ version = "0.13.0"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/0d/5e/4ec91646aee381d01cdb9974e30882c9cd3b8c5d1079d6b5ff4af522439a/jiter-0.13.0.tar.gz", hash = "sha256:f2839f9c2c7e2dffc1bc5929a510e14ce0a946be9365fd1219e7ef342dae14f4", size = 164847, upload-time = "2026-02-02T12:37:56.441Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d0/5a/41da76c5ea07bec1b0472b6b2fdb1b651074d504b19374d7e130e0cdfb25/jiter-0.13.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:2ffc63785fd6c7977defe49b9824ae6ce2b2e2b77ce539bdaf006c26da06342e", size = 311164, upload-time = "2026-02-02T12:35:17.688Z" },
-    { url = "https://files.pythonhosted.org/packages/40/cb/4a1bf994a3e869f0d39d10e11efb471b76d0ad70ecbfb591427a46c880c2/jiter-0.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4a638816427006c1e3f0013eb66d391d7a3acda99a7b0cf091eff4497ccea33a", size = 320296, upload-time = "2026-02-02T12:35:19.828Z" },
-    { url = "https://files.pythonhosted.org/packages/09/82/acd71ca9b50ecebadc3979c541cd717cce2fe2bc86236f4fa597565d8f1a/jiter-0.13.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:19928b5d1ce0ff8c1ee1b9bdef3b5bfc19e8304f1b904e436caf30bc15dc6cf5", size = 352742, upload-time = "2026-02-02T12:35:21.258Z" },
-    { url = "https://files.pythonhosted.org/packages/71/03/d1fc996f3aecfd42eb70922edecfb6dd26421c874503e241153ad41df94f/jiter-0.13.0-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:309549b778b949d731a2f0e1594a3f805716be704a73bf3ad9a807eed5eb5721", size = 363145, upload-time = "2026-02-02T12:35:24.653Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/61/a30492366378cc7a93088858f8991acd7d959759fe6138c12a4644e58e81/jiter-0.13.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bcdabaea26cb04e25df3103ce47f97466627999260290349a88c8136ecae0060", size = 487683, upload-time = "2026-02-02T12:35:26.162Z" },
-    { url = "https://files.pythonhosted.org/packages/20/4e/4223cffa9dbbbc96ed821c5aeb6bca510848c72c02086d1ed3f1da3d58a7/jiter-0.13.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:a3a377af27b236abbf665a69b2bdd680e3b5a0bd2af825cd3b81245279a7606c", size = 373579, upload-time = "2026-02-02T12:35:27.582Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/c9/b0489a01329ab07a83812d9ebcffe7820a38163c6d9e7da644f926ff877c/jiter-0.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fe49d3ff6db74321f144dff9addd4a5874d3105ac5ba7c5b77fac099cfae31ae", size = 362904, upload-time = "2026-02-02T12:35:28.925Z" },
-    { url = "https://files.pythonhosted.org/packages/05/af/53e561352a44afcba9a9bc67ee1d320b05a370aed8df54eafe714c4e454d/jiter-0.13.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2113c17c9a67071b0f820733c0893ed1d467b5fcf4414068169e5c2cabddb1e2", size = 392380, upload-time = "2026-02-02T12:35:30.385Z" },
-    { url = "https://files.pythonhosted.org/packages/76/2a/dd805c3afb8ed5b326c5ae49e725d1b1255b9754b1b77dbecdc621b20773/jiter-0.13.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:ab1185ca5c8b9491b55ebf6c1e8866b8f68258612899693e24a92c5fdb9455d5", size = 517939, upload-time = "2026-02-02T12:35:31.865Z" },
-    { url = "https://files.pythonhosted.org/packages/20/2a/7b67d76f55b8fe14c937e7640389612f05f9a4145fc28ae128aaa5e62257/jiter-0.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:9621ca242547edc16400981ca3231e0c91c0c4c1ab8573a596cd9bb3575d5c2b", size = 551696, upload-time = "2026-02-02T12:35:33.306Z" },
-    { url = "https://files.pythonhosted.org/packages/85/9c/57cdd64dac8f4c6ab8f994fe0eb04dc9fd1db102856a4458fcf8a99dfa62/jiter-0.13.0-cp310-cp310-win32.whl", hash = "sha256:a7637d92b1c9d7a771e8c56f445c7f84396d48f2e756e5978840ecba2fac0894", size = 204592, upload-time = "2026-02-02T12:35:34.58Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/38/f4f3ea5788b8a5bae7510a678cdc747eda0c45ffe534f9878ff37e7cf3b3/jiter-0.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c1b609e5cbd2f52bb74fb721515745b407df26d7b800458bd97cb3b972c29e7d", size = 206016, upload-time = "2026-02-02T12:35:36.435Z" },
     { url = "https://files.pythonhosted.org/packages/71/29/499f8c9eaa8a16751b1c0e45e6f5f1761d180da873d417996cc7bddc8eef/jiter-0.13.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:ea026e70a9a28ebbdddcbcf0f1323128a8db66898a06eaad3a4e62d2f554d096", size = 311157, upload-time = "2026-02-02T12:35:37.758Z" },
     { url = "https://files.pythonhosted.org/packages/50/f6/566364c777d2ab450b92100bea11333c64c38d32caf8dc378b48e5b20c46/jiter-0.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:66aa3e663840152d18cc8ff1e4faad3dd181373491b9cfdc6004b92198d67911", size = 319729, upload-time = "2026-02-02T12:35:39.246Z" },
     { url = "https://files.pythonhosted.org/packages/73/dd/560f13ec5e4f116d8ad2658781646cca91b617ae3b8758d4a5076b278f70/jiter-0.13.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c3524798e70655ff19aec58c7d05adb1f074fecff62da857ea9be2b908b6d701", size = 354766, upload-time = "2026-02-02T12:35:40.662Z" },
@@ -1698,8 +1575,7 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "deprecation" },
     { name = "lance-namespace" },
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy" },
     { name = "overrides", marker = "python_full_version < '3.12'" },
     { name = "packaging" },
     { name = "pyarrow" },
@@ -1766,17 +1642,6 @@ version = "3.0.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/7e/99/7690b6d4034fffd95959cbe0c02de8deb3098cc577c67bb6a24fe5d7caa7/markupsafe-3.0.3.tar.gz", hash = "sha256:722695808f4b6457b320fdc131280796bdceb04ab50fe1795cd540799ebe1698", size = 80313, upload-time = "2025-09-27T18:37:40.426Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/e8/4b/3541d44f3937ba468b75da9eebcae497dcf67adb65caa16760b0a6807ebb/markupsafe-3.0.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:2f981d352f04553a7171b8e44369f2af4055f888dfb147d55e42d29e29e74559", size = 11631, upload-time = "2025-09-27T18:36:05.558Z" },
-    { url = "https://files.pythonhosted.org/packages/98/1b/fbd8eed11021cabd9226c37342fa6ca4e8a98d8188a8d9b66740494960e4/markupsafe-3.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:e1c1493fb6e50ab01d20a22826e57520f1284df32f2d8601fdd90b6304601419", size = 12057, upload-time = "2025-09-27T18:36:07.165Z" },
-    { url = "https://files.pythonhosted.org/packages/40/01/e560d658dc0bb8ab762670ece35281dec7b6c1b33f5fbc09ebb57a185519/markupsafe-3.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1ba88449deb3de88bd40044603fafffb7bc2b055d626a330323a9ed736661695", size = 22050, upload-time = "2025-09-27T18:36:08.005Z" },
-    { url = "https://files.pythonhosted.org/packages/af/cd/ce6e848bbf2c32314c9b237839119c5a564a59725b53157c856e90937b7a/markupsafe-3.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f42d0984e947b8adf7dd6dde396e720934d12c506ce84eea8476409563607591", size = 20681, upload-time = "2025-09-27T18:36:08.881Z" },
-    { url = "https://files.pythonhosted.org/packages/c9/2a/b5c12c809f1c3045c4d580b035a743d12fcde53cf685dbc44660826308da/markupsafe-3.0.3-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:c0c0b3ade1c0b13b936d7970b1d37a57acde9199dc2aecc4c336773e1d86049c", size = 20705, upload-time = "2025-09-27T18:36:10.131Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/e3/9427a68c82728d0a88c50f890d0fc072a1484de2f3ac1ad0bfc1a7214fd5/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0303439a41979d9e74d18ff5e2dd8c43ed6c6001fd40e5bf2e43f7bd9bbc523f", size = 21524, upload-time = "2025-09-27T18:36:11.324Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/36/23578f29e9e582a4d0278e009b38081dbe363c5e7165113fad546918a232/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:d2ee202e79d8ed691ceebae8e0486bd9a2cd4794cec4824e1c99b6f5009502f6", size = 20282, upload-time = "2025-09-27T18:36:12.573Z" },
-    { url = "https://files.pythonhosted.org/packages/56/21/dca11354e756ebd03e036bd8ad58d6d7168c80ce1fe5e75218e4945cbab7/markupsafe-3.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:177b5253b2834fe3678cb4a5f0059808258584c559193998be2601324fdeafb1", size = 20745, upload-time = "2025-09-27T18:36:13.504Z" },
-    { url = "https://files.pythonhosted.org/packages/87/99/faba9369a7ad6e4d10b6a5fbf71fa2a188fe4a593b15f0963b73859a1bbd/markupsafe-3.0.3-cp310-cp310-win32.whl", hash = "sha256:2a15a08b17dd94c53a1da0438822d70ebcd13f8c3a95abe3a9ef9f11a94830aa", size = 14571, upload-time = "2025-09-27T18:36:14.779Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/25/55dc3ab959917602c96985cb1253efaa4ff42f71194bddeb61eb7278b8be/markupsafe-3.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:c4ffb7ebf07cfe8931028e3e4c85f0357459a3f9f9490886198848f4fa002ec8", size = 15056, upload-time = "2025-09-27T18:36:16.125Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/9e/0a02226640c255d1da0b8d12e24ac2aa6734da68bff14c05dd53b94a0fc3/markupsafe-3.0.3-cp310-cp310-win_arm64.whl", hash = "sha256:e2103a929dfa2fcaf9bb4e7c091983a49c9ac3b19c9061b6d5427dd7d14d81a1", size = 13932, upload-time = "2025-09-27T18:36:17.311Z" },
     { url = "https://files.pythonhosted.org/packages/08/db/fefacb2136439fc8dd20e797950e749aa1f4997ed584c62cfb8ef7c2be0e/markupsafe-3.0.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:1cc7ea17a6824959616c525620e387f6dd30fec8cb44f649e31712db02123dad", size = 11631, upload-time = "2025-09-27T18:36:18.185Z" },
     { url = "https://files.pythonhosted.org/packages/e1/2e/5898933336b61975ce9dc04decbc0a7f2fee78c30353c5efba7f2d6ff27a/markupsafe-3.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4bd4cd07944443f5a265608cc6aab442e4f74dff8088b0dfc8238647b8f6ae9a", size = 12058, upload-time = "2025-09-27T18:36:19.444Z" },
     { url = "https://files.pythonhosted.org/packages/1d/09/adf2df3699d87d1d8184038df46a9c80d78c0148492323f4693df54e17bb/markupsafe-3.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:6b5420a1d9450023228968e7e6a9ce57f65d148ab56d2313fcd589eee96a7a50", size = 24287, upload-time = "2025-09-27T18:36:20.768Z" },
@@ -1858,29 +1723,8 @@ wheels = [
 name = "multidict"
 version = "6.7.1"
 source = { registry = "https://pypi.org/simple" }
-dependencies = [
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
-]
 sdist = { url = "https://files.pythonhosted.org/packages/1a/c2/c2d94cbe6ac1753f3fc980da97b3d930efe1da3af3c9f5125354436c073d/multidict-6.7.1.tar.gz", hash = "sha256:ec6652a1bee61c53a3e5776b6049172c53b6aaba34f18c9ad04f82712bac623d", size = 102010, upload-time = "2026-01-26T02:46:45.979Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/84/0b/19348d4c98980c4851d2f943f8ebafdece2ae7ef737adcfa5994ce8e5f10/multidict-6.7.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c93c3db7ea657dd4637d57e74ab73de31bccefe144d3d4ce370052035bc85fb5", size = 77176, upload-time = "2026-01-26T02:42:59.784Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/04/9de3f8077852e3d438215c81e9b691244532d2e05b4270e89ce67b7d103c/multidict-6.7.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:974e72a2474600827abaeda71af0c53d9ebbc3c2eb7da37b37d7829ae31232d8", size = 44996, upload-time = "2026-01-26T02:43:01.674Z" },
-    { url = "https://files.pythonhosted.org/packages/31/5c/08c7f7fe311f32e83f7621cd3f99d805f45519cd06fafb247628b861da7d/multidict-6.7.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:cdea2e7b2456cfb6694fb113066fd0ec7ea4d67e3a35e1f4cbeea0b448bf5872", size = 44631, upload-time = "2026-01-26T02:43:03.169Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/7f/0e3b1390ae772f27501199996b94b52ceeb64fe6f9120a32c6c3f6b781be/multidict-6.7.1-cp310-cp310-manylinux1_i686.manylinux_2_28_i686.manylinux_2_5_i686.whl", hash = "sha256:17207077e29342fdc2c9a82e4b306f1127bf1ea91f8b71e02d4798a70bb99991", size = 242561, upload-time = "2026-01-26T02:43:04.733Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/f4/8719f4f167586af317b69dd3e90f913416c91ca610cac79a45c53f590312/multidict-6.7.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d4f49cb5661344764e4c7c7973e92a47a59b8fc19b6523649ec9dc4960e58a03", size = 242223, upload-time = "2026-01-26T02:43:06.695Z" },
-    { url = "https://files.pythonhosted.org/packages/47/ab/7c36164cce64a6ad19c6d9a85377b7178ecf3b89f8fd589c73381a5eedfd/multidict-6.7.1-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:a9fc4caa29e2e6ae408d1c450ac8bf19892c5fca83ee634ecd88a53332c59981", size = 222322, upload-time = "2026-01-26T02:43:08.472Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/79/a25add6fb38035b5337bc5734f296d9afc99163403bbcf56d4170f97eb62/multidict-6.7.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:c5f0c21549ab432b57dcc82130f388d84ad8179824cc3f223d5e7cfbfd4143f6", size = 254005, upload-time = "2026-01-26T02:43:10.127Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/7b/64a87cf98e12f756fc8bd444b001232ffff2be37288f018ad0d3f0aae931/multidict-6.7.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:7dfb78d966b2c906ae1d28ccf6e6712a3cd04407ee5088cd276fe8cb42186190", size = 251173, upload-time = "2026-01-26T02:43:11.731Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/ac/b605473de2bb404e742f2cc3583d12aedb2352a70e49ae8fce455b50c5aa/multidict-6.7.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9b0d9b91d1aa44db9c1f1ecd0d9d2ae610b2f4f856448664e01a3b35899f3f92", size = 243273, upload-time = "2026-01-26T02:43:13.063Z" },
-    { url = "https://files.pythonhosted.org/packages/03/65/11492d6a0e259783720f3bc1d9ea55579a76f1407e31ed44045c99542004/multidict-6.7.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:dd96c01a9dcd4889dcfcf9eb5544ca0c77603f239e3ffab0524ec17aea9a93ee", size = 238956, upload-time = "2026-01-26T02:43:14.843Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/a7/7ee591302af64e7c196fb63fe856c788993c1372df765102bd0448e7e165/multidict-6.7.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:067343c68cd6612d375710f895337b3a98a033c94f14b9a99eff902f205424e2", size = 233477, upload-time = "2026-01-26T02:43:16.025Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/99/c109962d58756c35fd9992fed7f2355303846ea2ff054bb5f5e9d6b888de/multidict-6.7.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5884a04f4ff56c6120f6ccf703bdeb8b5079d808ba604d4d53aec0d55dc33568", size = 243615, upload-time = "2026-01-26T02:43:17.84Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/5f/1973e7c771c86e93dcfe1c9cc55a5481b610f6614acfc28c0d326fe6bfad/multidict-6.7.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:8affcf1c98b82bc901702eb73b6947a1bfa170823c153fe8a47b5f5f02e48e40", size = 249930, upload-time = "2026-01-26T02:43:19.06Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/a5/f170fc2268c3243853580203378cd522446b2df632061e0a5409817854c7/multidict-6.7.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:0d17522c37d03e85c8098ec8431636309b2682cf12e58f4dbc76121fb50e4962", size = 243807, upload-time = "2026-01-26T02:43:20.286Z" },
-    { url = "https://files.pythonhosted.org/packages/de/01/73856fab6d125e5bc652c3986b90e8699a95e84b48d72f39ade6c0e74a8c/multidict-6.7.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:24c0cf81544ca5e17cfcb6e482e7a82cd475925242b308b890c9452a074d4505", size = 239103, upload-time = "2026-01-26T02:43:21.508Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/46/f1220bd9944d8aa40d8ccff100eeeee19b505b857b6f603d6078cb5315b0/multidict-6.7.1-cp310-cp310-win32.whl", hash = "sha256:d82dd730a95e6643802f4454b8fdecdf08667881a9c5670db85bc5a56693f122", size = 41416, upload-time = "2026-01-26T02:43:22.703Z" },
-    { url = "https://files.pythonhosted.org/packages/68/00/9b38e272a770303692fc406c36e1a4c740f401522d5787691eb38a8925a8/multidict-6.7.1-cp310-cp310-win_amd64.whl", hash = "sha256:cf37cbe5ced48d417ba045aca1b21bafca67489452debcde94778a576666a1df", size = 46022, upload-time = "2026-01-26T02:43:23.77Z" },
-    { url = "https://files.pythonhosted.org/packages/64/65/d8d42490c02ee07b6bbe00f7190d70bb4738b3cce7629aaf9f213ef730dd/multidict-6.7.1-cp310-cp310-win_arm64.whl", hash = "sha256:59bc83d3f66b41dac1e7460aac1d196edc70c9ba3094965c467715a70ecb46db", size = 43238, upload-time = "2026-01-26T02:43:24.882Z" },
     { url = "https://files.pythonhosted.org/packages/ce/f1/a90635c4f88fb913fbf4ce660b83b7445b7a02615bda034b2f8eb38fd597/multidict-6.7.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:7ff981b266af91d7b4b3793ca3382e53229088d193a85dfad6f5f4c27fc73e5d", size = 76626, upload-time = "2026-01-26T02:43:26.485Z" },
     { url = "https://files.pythonhosted.org/packages/a6/9b/267e64eaf6fc637a15b35f5de31a566634a2740f97d8d094a69d34f524a4/multidict-6.7.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:844c5bca0b5444adb44a623fb0a1310c2f4cd41f402126bb269cd44c9b3f3e1e", size = 44706, upload-time = "2026-01-26T02:43:27.607Z" },
     { url = "https://files.pythonhosted.org/packages/dd/a4/d45caf2b97b035c57267791ecfaafbd59c68212004b3842830954bb4b02e/multidict-6.7.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f2a0a924d4c2e9afcd7ec64f9de35fcd96915149b2216e1cb2c10a56df483855", size = 44356, upload-time = "2026-01-26T02:43:28.661Z" },
@@ -2026,80 +1870,10 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ab/c4/7532325f968ecfc078e8a028e69a52e4c3f95fb800906bf6931ac1e89e2b/nodejs_wheel_binaries-24.13.1-py2.py3-none-win_arm64.whl", hash = "sha256:caec398cb9e94c560bacdcba56b3828df22a355749eb291f47431af88cbf26dc", size = 38881194, upload-time = "2026-02-12T17:31:00.214Z" },
 ]
 
-[[package]]
-name = "numpy"
-version = "2.2.6"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.11'",
-]
-sdist = { url = "https://files.pythonhosted.org/packages/76/21/7d2a95e4bba9dc13d043ee156a356c0a8f0c6309dff6b21b4d71a073b8a8/numpy-2.2.6.tar.gz", hash = "sha256:e29554e2bef54a90aa5cc07da6ce955accb83f21ab5de01a62c8478897b264fd", size = 20276440, upload-time = "2025-05-17T22:38:04.611Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/3e/ed6db5be21ce87955c0cbd3009f2803f59fa08df21b5df06862e2d8e2bdd/numpy-2.2.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b412caa66f72040e6d268491a59f2c43bf03eb6c96dd8f0307829feb7fa2b6fb", size = 21165245, upload-time = "2025-05-17T21:27:58.555Z" },
-    { url = "https://files.pythonhosted.org/packages/22/c2/4b9221495b2a132cc9d2eb862e21d42a009f5a60e45fc44b00118c174bff/numpy-2.2.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8e41fd67c52b86603a91c1a505ebaef50b3314de0213461c7a6e99c9a3beff90", size = 14360048, upload-time = "2025-05-17T21:28:21.406Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/77/dc2fcfc66943c6410e2bf598062f5959372735ffda175b39906d54f02349/numpy-2.2.6-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:37e990a01ae6ec7fe7fa1c26c55ecb672dd98b19c3d0e1d1f326fa13cb38d163", size = 5340542, upload-time = "2025-05-17T21:28:30.931Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/4f/1cb5fdc353a5f5cc7feb692db9b8ec2c3d6405453f982435efc52561df58/numpy-2.2.6-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:5a6429d4be8ca66d889b7cf70f536a397dc45ba6faeb5f8c5427935d9592e9cf", size = 6878301, upload-time = "2025-05-17T21:28:41.613Z" },
-    { url = "https://files.pythonhosted.org/packages/eb/17/96a3acd228cec142fcb8723bd3cc39c2a474f7dcf0a5d16731980bcafa95/numpy-2.2.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:efd28d4e9cd7d7a8d39074a4d44c63eda73401580c5c76acda2ce969e0a38e83", size = 14297320, upload-time = "2025-05-17T21:29:02.78Z" },
-    { url = "https://files.pythonhosted.org/packages/b4/63/3de6a34ad7ad6646ac7d2f55ebc6ad439dbbf9c4370017c50cf403fb19b5/numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc7b73d02efb0e18c000e9ad8b83480dfcd5dfd11065997ed4c6747470ae8915", size = 16801050, upload-time = "2025-05-17T21:29:27.675Z" },
-    { url = "https://files.pythonhosted.org/packages/07/b6/89d837eddef52b3d0cec5c6ba0456c1bf1b9ef6a6672fc2b7873c3ec4e2e/numpy-2.2.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:74d4531beb257d2c3f4b261bfb0fc09e0f9ebb8842d82a7b4209415896adc680", size = 15807034, upload-time = "2025-05-17T21:29:51.102Z" },
-    { url = "https://files.pythonhosted.org/packages/01/c8/dc6ae86e3c61cfec1f178e5c9f7858584049b6093f843bca541f94120920/numpy-2.2.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:8fc377d995680230e83241d8a96def29f204b5782f371c532579b4f20607a289", size = 18614185, upload-time = "2025-05-17T21:30:18.703Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/c5/0064b1b7e7c89137b471ccec1fd2282fceaae0ab3a9550f2568782d80357/numpy-2.2.6-cp310-cp310-win32.whl", hash = "sha256:b093dd74e50a8cba3e873868d9e93a85b78e0daf2e98c6797566ad8044e8363d", size = 6527149, upload-time = "2025-05-17T21:30:29.788Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/dd/4b822569d6b96c39d1215dbae0582fd99954dcbcf0c1a13c61783feaca3f/numpy-2.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:f0fd6321b839904e15c46e0d257fdd101dd7f530fe03fd6359c1ea63738703f3", size = 12904620, upload-time = "2025-05-17T21:30:48.994Z" },
-    { url = "https://files.pythonhosted.org/packages/da/a8/4f83e2aa666a9fbf56d6118faaaf5f1974d456b1823fda0a176eff722839/numpy-2.2.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:f9f1adb22318e121c5c69a09142811a201ef17ab257a1e66ca3025065b7f53ae", size = 21176963, upload-time = "2025-05-17T21:31:19.36Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/2b/64e1affc7972decb74c9e29e5649fac940514910960ba25cd9af4488b66c/numpy-2.2.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c820a93b0255bc360f53eca31a0e676fd1101f673dda8da93454a12e23fc5f7a", size = 14406743, upload-time = "2025-05-17T21:31:41.087Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/9f/0121e375000b5e50ffdd8b25bf78d8e1a5aa4cca3f185d41265198c7b834/numpy-2.2.6-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3d70692235e759f260c3d837193090014aebdf026dfd167834bcba43e30c2a42", size = 5352616, upload-time = "2025-05-17T21:31:50.072Z" },
-    { url = "https://files.pythonhosted.org/packages/31/0d/b48c405c91693635fbe2dcd7bc84a33a602add5f63286e024d3b6741411c/numpy-2.2.6-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:481b49095335f8eed42e39e8041327c05b0f6f4780488f61286ed3c01368d491", size = 6889579, upload-time = "2025-05-17T21:32:01.712Z" },
-    { url = "https://files.pythonhosted.org/packages/52/b8/7f0554d49b565d0171eab6e99001846882000883998e7b7d9f0d98b1f934/numpy-2.2.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b64d8d4d17135e00c8e346e0a738deb17e754230d7e0810ac5012750bbd85a5a", size = 14312005, upload-time = "2025-05-17T21:32:23.332Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/dd/2238b898e51bd6d389b7389ffb20d7f4c10066d80351187ec8e303a5a475/numpy-2.2.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ba10f8411898fc418a521833e014a77d3ca01c15b0c6cdcce6a0d2897e6dbbdf", size = 16821570, upload-time = "2025-05-17T21:32:47.991Z" },
-    { url = "https://files.pythonhosted.org/packages/83/6c/44d0325722cf644f191042bf47eedad61c1e6df2432ed65cbe28509d404e/numpy-2.2.6-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:bd48227a919f1bafbdda0583705e547892342c26fb127219d60a5c36882609d1", size = 15818548, upload-time = "2025-05-17T21:33:11.728Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/9d/81e8216030ce66be25279098789b665d49ff19eef08bfa8cb96d4957f422/numpy-2.2.6-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:9551a499bf125c1d4f9e250377c1ee2eddd02e01eac6644c080162c0c51778ab", size = 18620521, upload-time = "2025-05-17T21:33:39.139Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/fd/e19617b9530b031db51b0926eed5345ce8ddc669bb3bc0044b23e275ebe8/numpy-2.2.6-cp311-cp311-win32.whl", hash = "sha256:0678000bb9ac1475cd454c6b8c799206af8107e310843532b04d49649c717a47", size = 6525866, upload-time = "2025-05-17T21:33:50.273Z" },
-    { url = "https://files.pythonhosted.org/packages/31/0a/f354fb7176b81747d870f7991dc763e157a934c717b67b58456bc63da3df/numpy-2.2.6-cp311-cp311-win_amd64.whl", hash = "sha256:e8213002e427c69c45a52bbd94163084025f533a55a59d6f9c5b820774ef3303", size = 12907455, upload-time = "2025-05-17T21:34:09.135Z" },
-    { url = "https://files.pythonhosted.org/packages/82/5d/c00588b6cf18e1da539b45d3598d3557084990dcc4331960c15ee776ee41/numpy-2.2.6-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:41c5a21f4a04fa86436124d388f6ed60a9343a6f767fced1a8a71c3fbca038ff", size = 20875348, upload-time = "2025-05-17T21:34:39.648Z" },
-    { url = "https://files.pythonhosted.org/packages/66/ee/560deadcdde6c2f90200450d5938f63a34b37e27ebff162810f716f6a230/numpy-2.2.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:de749064336d37e340f640b05f24e9e3dd678c57318c7289d222a8a2f543e90c", size = 14119362, upload-time = "2025-05-17T21:35:01.241Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/65/4baa99f1c53b30adf0acd9a5519078871ddde8d2339dc5a7fde80d9d87da/numpy-2.2.6-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:894b3a42502226a1cac872f840030665f33326fc3dac8e57c607905773cdcde3", size = 5084103, upload-time = "2025-05-17T21:35:10.622Z" },
-    { url = "https://files.pythonhosted.org/packages/cc/89/e5a34c071a0570cc40c9a54eb472d113eea6d002e9ae12bb3a8407fb912e/numpy-2.2.6-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:71594f7c51a18e728451bb50cc60a3ce4e6538822731b2933209a1f3614e9282", size = 6625382, upload-time = "2025-05-17T21:35:21.414Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/35/8c80729f1ff76b3921d5c9487c7ac3de9b2a103b1cd05e905b3090513510/numpy-2.2.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f2618db89be1b4e05f7a1a847a9c1c0abd63e63a1607d892dd54668dd92faf87", size = 14018462, upload-time = "2025-05-17T21:35:42.174Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/3d/1e1db36cfd41f895d266b103df00ca5b3cbe965184df824dec5c08c6b803/numpy-2.2.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fd83c01228a688733f1ded5201c678f0c53ecc1006ffbc404db9f7a899ac6249", size = 16527618, upload-time = "2025-05-17T21:36:06.711Z" },
-    { url = "https://files.pythonhosted.org/packages/61/c6/03ed30992602c85aa3cd95b9070a514f8b3c33e31124694438d88809ae36/numpy-2.2.6-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:37c0ca431f82cd5fa716eca9506aefcabc247fb27ba69c5062a6d3ade8cf8f49", size = 15505511, upload-time = "2025-05-17T21:36:29.965Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/25/5761d832a81df431e260719ec45de696414266613c9ee268394dd5ad8236/numpy-2.2.6-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:fe27749d33bb772c80dcd84ae7e8df2adc920ae8297400dabec45f0dedb3f6de", size = 18313783, upload-time = "2025-05-17T21:36:56.883Z" },
-    { url = "https://files.pythonhosted.org/packages/57/0a/72d5a3527c5ebffcd47bde9162c39fae1f90138c961e5296491ce778e682/numpy-2.2.6-cp312-cp312-win32.whl", hash = "sha256:4eeaae00d789f66c7a25ac5f34b71a7035bb474e679f410e5e1a94deb24cf2d4", size = 6246506, upload-time = "2025-05-17T21:37:07.368Z" },
-    { url = "https://files.pythonhosted.org/packages/36/fa/8c9210162ca1b88529ab76b41ba02d433fd54fecaf6feb70ef9f124683f1/numpy-2.2.6-cp312-cp312-win_amd64.whl", hash = "sha256:c1f9540be57940698ed329904db803cf7a402f3fc200bfe599334c9bd84a40b2", size = 12614190, upload-time = "2025-05-17T21:37:26.213Z" },
-    { url = "https://files.pythonhosted.org/packages/f9/5c/6657823f4f594f72b5471f1db1ab12e26e890bb2e41897522d134d2a3e81/numpy-2.2.6-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0811bb762109d9708cca4d0b13c4f67146e3c3b7cf8d34018c722adb2d957c84", size = 20867828, upload-time = "2025-05-17T21:37:56.699Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/9e/14520dc3dadf3c803473bd07e9b2bd1b69bc583cb2497b47000fed2fa92f/numpy-2.2.6-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:287cc3162b6f01463ccd86be154f284d0893d2b3ed7292439ea97eafa8170e0b", size = 14143006, upload-time = "2025-05-17T21:38:18.291Z" },
-    { url = "https://files.pythonhosted.org/packages/4f/06/7e96c57d90bebdce9918412087fc22ca9851cceaf5567a45c1f404480e9e/numpy-2.2.6-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:f1372f041402e37e5e633e586f62aa53de2eac8d98cbfb822806ce4bbefcb74d", size = 5076765, upload-time = "2025-05-17T21:38:27.319Z" },
-    { url = "https://files.pythonhosted.org/packages/73/ed/63d920c23b4289fdac96ddbdd6132e9427790977d5457cd132f18e76eae0/numpy-2.2.6-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:55a4d33fa519660d69614a9fad433be87e5252f4b03850642f88993f7b2ca566", size = 6617736, upload-time = "2025-05-17T21:38:38.141Z" },
-    { url = "https://files.pythonhosted.org/packages/85/c5/e19c8f99d83fd377ec8c7e0cf627a8049746da54afc24ef0a0cb73d5dfb5/numpy-2.2.6-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f92729c95468a2f4f15e9bb94c432a9229d0d50de67304399627a943201baa2f", size = 14010719, upload-time = "2025-05-17T21:38:58.433Z" },
-    { url = "https://files.pythonhosted.org/packages/19/49/4df9123aafa7b539317bf6d342cb6d227e49f7a35b99c287a6109b13dd93/numpy-2.2.6-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1bc23a79bfabc5d056d106f9befb8d50c31ced2fbc70eedb8155aec74a45798f", size = 16526072, upload-time = "2025-05-17T21:39:22.638Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/6c/04b5f47f4f32f7c2b0e7260442a8cbcf8168b0e1a41ff1495da42f42a14f/numpy-2.2.6-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e3143e4451880bed956e706a3220b4e5cf6172ef05fcc397f6f36a550b1dd868", size = 15503213, upload-time = "2025-05-17T21:39:45.865Z" },
-    { url = "https://files.pythonhosted.org/packages/17/0a/5cd92e352c1307640d5b6fec1b2ffb06cd0dabe7d7b8227f97933d378422/numpy-2.2.6-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:b4f13750ce79751586ae2eb824ba7e1e8dba64784086c98cdbbcc6a42112ce0d", size = 18316632, upload-time = "2025-05-17T21:40:13.331Z" },
-    { url = "https://files.pythonhosted.org/packages/f0/3b/5cba2b1d88760ef86596ad0f3d484b1cbff7c115ae2429678465057c5155/numpy-2.2.6-cp313-cp313-win32.whl", hash = "sha256:5beb72339d9d4fa36522fc63802f469b13cdbe4fdab4a288f0c441b74272ebfd", size = 6244532, upload-time = "2025-05-17T21:43:46.099Z" },
-    { url = "https://files.pythonhosted.org/packages/cb/3b/d58c12eafcb298d4e6d0d40216866ab15f59e55d148a5658bb3132311fcf/numpy-2.2.6-cp313-cp313-win_amd64.whl", hash = "sha256:b0544343a702fa80c95ad5d3d608ea3599dd54d4632df855e4c8d24eb6ecfa1c", size = 12610885, upload-time = "2025-05-17T21:44:05.145Z" },
-    { url = "https://files.pythonhosted.org/packages/6b/9e/4bf918b818e516322db999ac25d00c75788ddfd2d2ade4fa66f1f38097e1/numpy-2.2.6-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:0bca768cd85ae743b2affdc762d617eddf3bcf8724435498a1e80132d04879e6", size = 20963467, upload-time = "2025-05-17T21:40:44Z" },
-    { url = "https://files.pythonhosted.org/packages/61/66/d2de6b291507517ff2e438e13ff7b1e2cdbdb7cb40b3ed475377aece69f9/numpy-2.2.6-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:fc0c5673685c508a142ca65209b4e79ed6740a4ed6b2267dbba90f34b0b3cfda", size = 14225144, upload-time = "2025-05-17T21:41:05.695Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/25/480387655407ead912e28ba3a820bc69af9adf13bcbe40b299d454ec011f/numpy-2.2.6-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:5bd4fc3ac8926b3819797a7c0e2631eb889b4118a9898c84f585a54d475b7e40", size = 5200217, upload-time = "2025-05-17T21:41:15.903Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/4a/6e313b5108f53dcbf3aca0c0f3e9c92f4c10ce57a0a721851f9785872895/numpy-2.2.6-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:fee4236c876c4e8369388054d02d0e9bb84821feb1a64dd59e137e6511a551f8", size = 6712014, upload-time = "2025-05-17T21:41:27.321Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/30/172c2d5c4be71fdf476e9de553443cf8e25feddbe185e0bd88b096915bcc/numpy-2.2.6-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e1dda9c7e08dc141e0247a5b8f49cf05984955246a327d4c48bda16821947b2f", size = 14077935, upload-time = "2025-05-17T21:41:49.738Z" },
-    { url = "https://files.pythonhosted.org/packages/12/fb/9e743f8d4e4d3c710902cf87af3512082ae3d43b945d5d16563f26ec251d/numpy-2.2.6-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f447e6acb680fd307f40d3da4852208af94afdfab89cf850986c3ca00562f4fa", size = 16600122, upload-time = "2025-05-17T21:42:14.046Z" },
-    { url = "https://files.pythonhosted.org/packages/12/75/ee20da0e58d3a66f204f38916757e01e33a9737d0b22373b3eb5a27358f9/numpy-2.2.6-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:389d771b1623ec92636b0786bc4ae56abafad4a4c513d36a55dce14bd9ce8571", size = 15586143, upload-time = "2025-05-17T21:42:37.464Z" },
-    { url = "https://files.pythonhosted.org/packages/76/95/bef5b37f29fc5e739947e9ce5179ad402875633308504a52d188302319c8/numpy-2.2.6-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:8e9ace4a37db23421249ed236fdcdd457d671e25146786dfc96835cd951aa7c1", size = 18385260, upload-time = "2025-05-17T21:43:05.189Z" },
-    { url = "https://files.pythonhosted.org/packages/09/04/f2f83279d287407cf36a7a8053a5abe7be3622a4363337338f2585e4afda/numpy-2.2.6-cp313-cp313t-win32.whl", hash = "sha256:038613e9fb8c72b0a41f025a7e4c3f0b7a1b5d768ece4796b674c8f3fe13efff", size = 6377225, upload-time = "2025-05-17T21:43:16.254Z" },
-    { url = "https://files.pythonhosted.org/packages/67/0e/35082d13c09c02c011cf21570543d202ad929d961c02a147493cb0c2bdf5/numpy-2.2.6-cp313-cp313t-win_amd64.whl", hash = "sha256:6031dd6dfecc0cf9f668681a37648373bddd6421fff6c66ec1624eed0180ee06", size = 12771374, upload-time = "2025-05-17T21:43:35.479Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/3b/d94a75f4dbf1ef5d321523ecac21ef23a3cd2ac8b78ae2aac40873590229/numpy-2.2.6-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0b605b275d7bd0c640cad4e5d30fa701a8d59302e127e5f79138ad62762c3e3d", size = 21040391, upload-time = "2025-05-17T21:44:35.948Z" },
-    { url = "https://files.pythonhosted.org/packages/17/f4/09b2fa1b58f0fb4f7c7963a1649c64c4d315752240377ed74d9cd878f7b5/numpy-2.2.6-pp310-pypy310_pp73-macosx_14_0_x86_64.whl", hash = "sha256:7befc596a7dc9da8a337f79802ee8adb30a552a94f792b9c9d18c840055907db", size = 6786754, upload-time = "2025-05-17T21:44:47.446Z" },
-    { url = "https://files.pythonhosted.org/packages/af/30/feba75f143bdc868a1cc3f44ccfa6c4b9ec522b36458e738cd00f67b573f/numpy-2.2.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ce47521a4754c8f4593837384bd3424880629f718d87c5d44f8ed763edd63543", size = 16643476, upload-time = "2025-05-17T21:45:11.871Z" },
-    { url = "https://files.pythonhosted.org/packages/37/48/ac2a9584402fb6c0cd5b5d1a91dcf176b15760130dd386bbafdbfe3640bf/numpy-2.2.6-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d042d24c90c41b54fd506da306759e06e568864df8ec17ccc17e9e884634fd00", size = 12812666, upload-time = "2025-05-17T21:45:31.426Z" },
-]
-
 [[package]]
 name = "numpy"
 version = "2.4.2"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14'",
-    "python_full_version == '3.13.*'",
-    "python_full_version >= '3.11' and python_full_version < '3.13'",
-]
 sdist = { url = "https://files.pythonhosted.org/packages/57/fd/0005efbd0af48e55eb3c7208af93f2862d4b1a56cd78e84309a2d959208d/numpy-2.4.2.tar.gz", hash = "sha256:659a6107e31a83c4e33f763942275fd278b21d095094044eb35569e86a21ddae", size = 20723651, upload-time = "2026-01-31T23:13:10.135Z" }
 wheels = [
     { url = "https://files.pythonhosted.org/packages/d3/44/71852273146957899753e69986246d6a176061ea183407e95418c2aa4d9a/numpy-2.4.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e7e88598032542bd49af7c4747541422884219056c268823ef6e5e89851c8825", size = 16955478, upload-time = "2026-01-31T23:10:25.623Z" },
@@ -2282,19 +2056,6 @@ version = "3.11.7"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/53/45/b268004f745ede84e5798b48ee12b05129d19235d0e15267aa57dcdb400b/orjson-3.11.7.tar.gz", hash = "sha256:9b1a67243945819ce55d24a30b59d6a168e86220452d2c96f4d1f093e71c0c49", size = 6144992, upload-time = "2026-02-02T15:38:49.29Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/de/1a/a373746fa6d0e116dd9e54371a7b54622c44d12296d5d0f3ad5e3ff33490/orjson-3.11.7-cp310-cp310-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:a02c833f38f36546ba65a452127633afce4cf0dd7296b753d3bb54e55e5c0174", size = 229140, upload-time = "2026-02-02T15:37:06.082Z" },
-    { url = "https://files.pythonhosted.org/packages/52/a2/fa129e749d500f9b183e8a3446a193818a25f60261e9ce143ad61e975208/orjson-3.11.7-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b63c6e6738d7c3470ad01601e23376aa511e50e1f3931395b9f9c722406d1a67", size = 128670, upload-time = "2026-02-02T15:37:08.002Z" },
-    { url = "https://files.pythonhosted.org/packages/08/93/1e82011cd1e0bd051ef9d35bed1aa7fb4ea1f0a055dc2c841b46b43a9ebd/orjson-3.11.7-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:043d3006b7d32c7e233b8cfb1f01c651013ea079e08dcef7189a29abd8befe11", size = 123832, upload-time = "2026-02-02T15:37:09.191Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/d8/a26b431ef962c7d55736674dddade876822f3e33223c1f47a36879350d04/orjson-3.11.7-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:57036b27ac8a25d81112eb0cc9835cd4833c5b16e1467816adc0015f59e870dc", size = 129171, upload-time = "2026-02-02T15:37:11.112Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/19/f47819b84a580f490da260c3ee9ade214cf4cf78ac9ce8c1c758f80fdfc9/orjson-3.11.7-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:733ae23ada68b804b222c44affed76b39e30806d38660bf1eb200520d259cc16", size = 141967, upload-time = "2026-02-02T15:37:12.282Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/cd/37ece39a0777ba077fdcdbe4cccae3be8ed00290c14bf8afdc548befc260/orjson-3.11.7-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5fdfad2093bdd08245f2e204d977facd5f871c88c4a71230d5bcbd0e43bf6222", size = 130991, upload-time = "2026-02-02T15:37:13.465Z" },
-    { url = "https://files.pythonhosted.org/packages/8f/ed/f2b5d66aa9b6b5c02ff5f120efc7b38c7c4962b21e6be0f00fd99a5c348e/orjson-3.11.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cededd6738e1c153530793998e31c05086582b08315db48ab66649768f326baa", size = 133674, upload-time = "2026-02-02T15:37:14.694Z" },
-    { url = "https://files.pythonhosted.org/packages/c4/6e/baa83e68d1aa09fa8c3e5b2c087d01d0a0bd45256de719ed7bc22c07052d/orjson-3.11.7-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:14f440c7268c8f8633d1b3d443a434bd70cb15686117ea6beff8fdc8f5917a1e", size = 138722, upload-time = "2026-02-02T15:37:16.501Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/47/7f8ef4963b772cd56999b535e553f7eb5cd27e9dd6c049baee6f18bfa05d/orjson-3.11.7-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:3a2479753bbb95b0ebcf7969f562cdb9668e6d12416a35b0dda79febf89cdea2", size = 409056, upload-time = "2026-02-02T15:37:17.895Z" },
-    { url = "https://files.pythonhosted.org/packages/38/eb/2df104dd2244b3618f25325a656f85cc3277f74bbd91224752410a78f3c7/orjson-3.11.7-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:71924496986275a737f38e3f22b4e0878882b3f7a310d2ff4dc96e812789120c", size = 144196, upload-time = "2026-02-02T15:37:19.349Z" },
-    { url = "https://files.pythonhosted.org/packages/b6/2a/ee41de0aa3a6686598661eae2b4ebdff1340c65bfb17fcff8b87138aab21/orjson-3.11.7-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:b4a9eefdc70bf8bf9857f0290f973dec534ac84c35cd6a7f4083be43e7170a8f", size = 134979, upload-time = "2026-02-02T15:37:20.906Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/fa/92fc5d3d402b87a8b28277a9ed35386218a6a5287c7fe5ee9b9f02c53fb2/orjson-3.11.7-cp310-cp310-win32.whl", hash = "sha256:ae9e0b37a834cef7ce8f99de6498f8fad4a2c0bf6bfc3d02abd8ed56aa15b2de", size = 127968, upload-time = "2026-02-02T15:37:23.178Z" },
-    { url = "https://files.pythonhosted.org/packages/07/29/a576bf36d73d60df06904d3844a9df08e25d59eba64363aaf8ec2f9bff41/orjson-3.11.7-cp310-cp310-win_amd64.whl", hash = "sha256:d772afdb22555f0c58cfc741bdae44180122b3616faa1ecadb595cd526e4c993", size = 125128, upload-time = "2026-02-02T15:37:24.329Z" },
     { url = "https://files.pythonhosted.org/packages/37/02/da6cb01fc6087048d7f61522c327edf4250f1683a58a839fdcc435746dd5/orjson-3.11.7-cp311-cp311-macosx_10_15_x86_64.macosx_11_0_arm64.macosx_10_15_universal2.whl", hash = "sha256:9487abc2c2086e7c8eb9a211d2ce8855bae0e92586279d0d27b341d5ad76c85c", size = 228664, upload-time = "2026-02-02T15:37:25.542Z" },
     { url = "https://files.pythonhosted.org/packages/c1/c2/5885e7a5881dba9a9af51bc564e8967225a642b3e03d089289a35054e749/orjson-3.11.7-cp311-cp311-macosx_15_0_arm64.whl", hash = "sha256:79cacb0b52f6004caf92405a7e1f11e6e2de8bdf9019e4f76b44ba045125cd6b", size = 125344, upload-time = "2026-02-02T15:37:26.92Z" },
     { url = "https://files.pythonhosted.org/packages/a4/1d/4e7688de0a92d1caf600dfd5fb70b4c5bfff51dfa61ac555072ef2d0d32a/orjson-3.11.7-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c2e85fe4698b6a56d5e2ebf7ae87544d668eb6bde1ad1226c13f44663f20ec9e", size = 128404, upload-time = "2026-02-02T15:37:28.108Z" },
@@ -2407,8 +2168,7 @@ name = "pgvector"
 version = "0.4.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/25/6c/6d8b4b03b958c02fa8687ec6063c49d952a189f8c91ebbe51e877dfab8f7/pgvector-0.4.2.tar.gz", hash = "sha256:322cac0c1dc5d41c9ecf782bd9991b7966685dee3a00bc873631391ed949513a", size = 31354, upload-time = "2025-12-05T01:07:17.87Z" }
 wheels = [
@@ -2421,17 +2181,6 @@ version = "12.1.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/1f/42/5c74462b4fd957fcd7b13b04fb3205ff8349236ea74c7c375766d6c82288/pillow-12.1.1.tar.gz", hash = "sha256:9ad8fa5937ab05218e2b6a4cff30295ad35afd2f83ac592e68c0d871bb0fdbc4", size = 46980264, upload-time = "2026-02-11T04:23:07.146Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1d/30/5bd3d794762481f8c8ae9c80e7b76ecea73b916959eb587521358ef0b2f9/pillow-12.1.1-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:1f1625b72740fdda5d77b4def688eb8fd6490975d06b909fd19f13f391e077e0", size = 5304099, upload-time = "2026-02-11T04:20:06.13Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/c1/aab9e8f3eeb4490180e357955e15c2ef74b31f64790ff356c06fb6cf6d84/pillow-12.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:178aa072084bd88ec759052feca8e56cbb14a60b39322b99a049e58090479713", size = 4657880, upload-time = "2026-02-11T04:20:09.291Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/0a/9879e30d56815ad529d3985aeff5af4964202425c27261a6ada10f7cbf53/pillow-12.1.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b66e95d05ba806247aaa1561f080abc7975daf715c30780ff92a20e4ec546e1b", size = 6222587, upload-time = "2026-02-11T04:20:10.82Z" },
-    { url = "https://files.pythonhosted.org/packages/5a/5f/a1b72ff7139e4f89014e8d451442c74a774d5c43cd938fb0a9f878576b37/pillow-12.1.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89c7e895002bbe49cdc5426150377cbbc04767d7547ed145473f496dfa40408b", size = 8027678, upload-time = "2026-02-11T04:20:12.455Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/c2/c7cb187dac79a3d22c3ebeae727abee01e077c8c7d930791dc592f335153/pillow-12.1.1-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:3a5cbdcddad0af3da87cb16b60d23648bc3b51967eb07223e9fed77a82b457c4", size = 6335777, upload-time = "2026-02-11T04:20:14.441Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/7b/f9b09a7804ec7336effb96c26d37c29d27225783dc1501b7d62dcef6ae25/pillow-12.1.1-cp310-cp310-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9f51079765661884a486727f0729d29054242f74b46186026582b4e4769918e4", size = 7027140, upload-time = "2026-02-11T04:20:16.387Z" },
-    { url = "https://files.pythonhosted.org/packages/98/b2/2fa3c391550bd421b10849d1a2144c44abcd966daadd2f7c12e19ea988c4/pillow-12.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:99c1506ea77c11531d75e3a412832a13a71c7ebc8192ab9e4b2e355555920e3e", size = 6449855, upload-time = "2026-02-11T04:20:18.554Z" },
-    { url = "https://files.pythonhosted.org/packages/96/ff/9caf4b5b950c669263c39e96c78c0d74a342c71c4f43fd031bb5cb7ceac9/pillow-12.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:36341d06738a9f66c8287cf8b876d24b18db9bd8740fa0672c74e259ad408cff", size = 7151329, upload-time = "2026-02-11T04:20:20.646Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/f8/4b24841f582704da675ca535935bccb32b00a6da1226820845fac4a71136/pillow-12.1.1-cp310-cp310-win32.whl", hash = "sha256:6c52f062424c523d6c4db85518774cc3d50f5539dd6eed32b8f6229b26f24d40", size = 6325574, upload-time = "2026-02-11T04:20:22.43Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/f9/9f6b01c0881d7036063aa6612ef04c0e2cad96be21325a1e92d0203f8e91/pillow-12.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:c6008de247150668a705a6338156efb92334113421ceecf7438a12c9a12dab23", size = 7032347, upload-time = "2026-02-11T04:20:23.932Z" },
-    { url = "https://files.pythonhosted.org/packages/79/13/c7922edded3dcdaf10c59297540b72785620abc0538872c819915746757d/pillow-12.1.1-cp310-cp310-win_arm64.whl", hash = "sha256:1a9b0ee305220b392e1124a764ee4265bd063e54a751a6b62eff69992f457fa9", size = 2453457, upload-time = "2026-02-11T04:20:25.392Z" },
     { url = "https://files.pythonhosted.org/packages/2b/46/5da1ec4a5171ee7bf1a0efa064aba70ba3d6e0788ce3f5acd1375d23c8c0/pillow-12.1.1-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:e879bb6cd5c73848ef3b2b48b8af9ff08c5b71ecda8048b7dd22d8a33f60be32", size = 5304084, upload-time = "2026-02-11T04:20:27.501Z" },
     { url = "https://files.pythonhosted.org/packages/78/93/a29e9bc02d1cf557a834da780ceccd54e02421627200696fcf805ebdc3fb/pillow-12.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:365b10bb9417dd4498c0e3b128018c4a624dc11c7b97d8cc54effe3b096f4c38", size = 4657866, upload-time = "2026-02-11T04:20:29.827Z" },
     { url = "https://files.pythonhosted.org/packages/13/84/583a4558d492a179d31e4aae32eadce94b9acf49c0337c4ce0b70e0a01f2/pillow-12.1.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d4ce8e329c93845720cd2014659ca67eac35f6433fd3050393d85f3ecef0dad5", size = 6232148, upload-time = "2026-02-11T04:20:31.329Z" },
@@ -2562,21 +2311,6 @@ version = "0.4.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/9e/da/e9fc233cf63743258bff22b3dfa7ea5baef7b5bc324af47a0ad89b8ffc6f/propcache-0.4.1.tar.gz", hash = "sha256:f48107a8c637e80362555f37ecf49abe20370e557cc4ab374f04ec4423c97c3d", size = 46442, upload-time = "2025-10-08T19:49:02.291Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3c/0e/934b541323035566a9af292dba85a195f7b78179114f2c6ebb24551118a9/propcache-0.4.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7c2d1fa3201efaf55d730400d945b5b3ab6e672e100ba0f9a409d950ab25d7db", size = 79534, upload-time = "2025-10-08T19:46:02.083Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/6b/db0d03d96726d995dc7171286c6ba9d8d14251f37433890f88368951a44e/propcache-0.4.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1eb2994229cc8ce7fe9b3db88f5465f5fd8651672840b2e426b88cdb1a30aac8", size = 45526, upload-time = "2025-10-08T19:46:03.884Z" },
-    { url = "https://files.pythonhosted.org/packages/e4/c3/82728404aea669e1600f304f2609cde9e665c18df5a11cdd57ed73c1dceb/propcache-0.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:66c1f011f45a3b33d7bcb22daed4b29c0c9e2224758b6be00686731e1b46f925", size = 47263, upload-time = "2025-10-08T19:46:05.405Z" },
-    { url = "https://files.pythonhosted.org/packages/df/1b/39313ddad2bf9187a1432654c38249bab4562ef535ef07f5eb6eb04d0b1b/propcache-0.4.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:9a52009f2adffe195d0b605c25ec929d26b36ef986ba85244891dee3b294df21", size = 201012, upload-time = "2025-10-08T19:46:07.165Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/01/f1d0b57d136f294a142acf97f4ed58c8e5b974c21e543000968357115011/propcache-0.4.1-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:5d4e2366a9c7b837555cf02fb9be2e3167d333aff716332ef1b7c3a142ec40c5", size = 209491, upload-time = "2025-10-08T19:46:08.909Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/c8/038d909c61c5bb039070b3fb02ad5cccdb1dde0d714792e251cdb17c9c05/propcache-0.4.1-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:9d2b6caef873b4f09e26ea7e33d65f42b944837563a47a94719cc3544319a0db", size = 215319, upload-time = "2025-10-08T19:46:10.7Z" },
-    { url = "https://files.pythonhosted.org/packages/08/57/8c87e93142b2c1fa2408e45695205a7ba05fb5db458c0bf5c06ba0e09ea6/propcache-0.4.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2b16ec437a8c8a965ecf95739448dd938b5c7f56e67ea009f4300d8df05f32b7", size = 196856, upload-time = "2025-10-08T19:46:12.003Z" },
-    { url = "https://files.pythonhosted.org/packages/42/df/5615fec76aa561987a534759b3686008a288e73107faa49a8ae5795a9f7a/propcache-0.4.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:296f4c8ed03ca7476813fe666c9ea97869a8d7aec972618671b33a38a5182ef4", size = 193241, upload-time = "2025-10-08T19:46:13.495Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/21/62949eb3a7a54afe8327011c90aca7e03547787a88fb8bd9726806482fea/propcache-0.4.1-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1f0978529a418ebd1f49dad413a2b68af33f85d5c5ca5c6ca2a3bed375a7ac60", size = 190552, upload-time = "2025-10-08T19:46:14.938Z" },
-    { url = "https://files.pythonhosted.org/packages/30/ee/ab4d727dd70806e5b4de96a798ae7ac6e4d42516f030ee60522474b6b332/propcache-0.4.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:fd138803047fb4c062b1c1dd95462f5209456bfab55c734458f15d11da288f8f", size = 200113, upload-time = "2025-10-08T19:46:16.695Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/0b/38b46208e6711b016aa8966a3ac793eee0d05c7159d8342aa27fc0bc365e/propcache-0.4.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:8c9b3cbe4584636d72ff556d9036e0c9317fa27b3ac1f0f558e7e84d1c9c5900", size = 200778, upload-time = "2025-10-08T19:46:18.023Z" },
-    { url = "https://files.pythonhosted.org/packages/cf/81/5abec54355ed344476bee711e9f04815d4b00a311ab0535599204eecc257/propcache-0.4.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:f93243fdc5657247533273ac4f86ae106cc6445a0efacb9a1bfe982fcfefd90c", size = 193047, upload-time = "2025-10-08T19:46:19.449Z" },
-    { url = "https://files.pythonhosted.org/packages/ec/b6/1f237c04e32063cb034acd5f6ef34ef3a394f75502e72703545631ab1ef6/propcache-0.4.1-cp310-cp310-win32.whl", hash = "sha256:a0ee98db9c5f80785b266eb805016e36058ac72c51a064040f2bc43b61101cdb", size = 38093, upload-time = "2025-10-08T19:46:20.643Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/67/354aac4e0603a15f76439caf0427781bcd6797f370377f75a642133bc954/propcache-0.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:1cdb7988c4e5ac7f6d175a28a9aa0c94cb6f2ebe52756a3c0cda98d2809a9e37", size = 41638, upload-time = "2025-10-08T19:46:21.935Z" },
-    { url = "https://files.pythonhosted.org/packages/e0/e1/74e55b9fd1a4c209ff1a9a824bf6c8b3d1fc5a1ac3eabe23462637466785/propcache-0.4.1-cp310-cp310-win_arm64.whl", hash = "sha256:d82ad62b19645419fe79dd63b3f9253e15b30e955c0170e5cebc350c1844e581", size = 38229, upload-time = "2025-10-08T19:46:23.368Z" },
     { url = "https://files.pythonhosted.org/packages/8c/d4/4e2c9aaf7ac2242b9358f98dccd8f90f2605402f5afeff6c578682c2c491/propcache-0.4.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:60a8fda9644b7dfd5dece8c61d8a85e271cb958075bfc4e01083c148b61a7caf", size = 80208, upload-time = "2025-10-08T19:46:24.597Z" },
     { url = "https://files.pythonhosted.org/packages/c2/21/d7b68e911f9c8e18e4ae43bdbc1e1e9bbd971f8866eb81608947b6f585ff/propcache-0.4.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c30b53e7e6bda1d547cabb47c825f3843a0a1a42b0496087bb58d8fedf9f41b5", size = 45777, upload-time = "2025-10-08T19:46:25.733Z" },
     { url = "https://files.pythonhosted.org/packages/d3/1d/11605e99ac8ea9435651ee71ab4cb4bf03f0949586246476a25aadfec54a/propcache-0.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:6918ecbd897443087a3b7cd978d56546a812517dcaaca51b49526720571fa93e", size = 47647, upload-time = "2025-10-08T19:46:27.304Z" },
@@ -2708,17 +2442,6 @@ name = "psycopg-binary"
 version = "3.3.3"
 source = { registry = "https://pypi.org/simple" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/b4/d8/a763308a41e2ecfb6256ba0877d340c2f2b124c8b2746401863d96fa2c7a/psycopg_binary-3.3.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b3385b58b2fe408a13d084c14b8dcf468cd36cbbe774408250facc128f9fa75c", size = 4609758, upload-time = "2026-02-18T16:46:33.132Z" },
-    { url = "https://files.pythonhosted.org/packages/6c/a9/f8a683e85400c1208685e7c895abc049dc13aa0b6ea989e6adf0a3681fe0/psycopg_binary-3.3.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1bef235a50a80f6aba05147002bc354559657cb6386dbd04d8e1c97d1d7cbe84", size = 4676740, upload-time = "2026-02-18T16:46:42.904Z" },
-    { url = "https://files.pythonhosted.org/packages/e3/7d/03512c4aaac8a58fc3b1221f38293aa517a1950d10ef8646c72c49addc7d/psycopg_binary-3.3.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:97c839717bf8c8df3f6d983a20949c4fb22e2a34ee172e3e427ede363feda27b", size = 5496335, upload-time = "2026-02-18T16:46:51.517Z" },
-    { url = "https://files.pythonhosted.org/packages/8a/bc/23319b4b1c2c0b810d225e1b6f16efbb16150074fc0ea96bfcabdf59ee09/psycopg_binary-3.3.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:48e500cf1c0984dacf1f28ea482c3cdbb4c2288d51c336c04bc64198ab21fc51", size = 5172032, upload-time = "2026-02-18T16:47:00.878Z" },
-    { url = "https://files.pythonhosted.org/packages/aa/c8/6d61dc0a56654c558a37b2d9b2094e470aa12621305cc7935fd769122e32/psycopg_binary-3.3.3-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:eb36a08859b9432d94ea6b26ec41a2f98f83f14868c91321d0c1e11f672eeae7", size = 6763107, upload-time = "2026-02-18T16:47:11.784Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/b5/e2a3c90aa1059f5b5f593379caad7be3cc3c2ce1ddfc7730e39854e174fe/psycopg_binary-3.3.3-cp310-cp310-manylinux_2_38_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:0dde92cfde09293fb63b3f547919ba7d73bd2654573c03502b3263dd0218e44e", size = 5006494, upload-time = "2026-02-18T16:47:17.062Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/3e/bf126e0a1f864e191b7f3eeea667ee2ce13d582b036255fb8b12946d1f7a/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:78c9ce98caaf82ac8484d269791c1b403d7598633e0e4e2fa1097baae244e2f1", size = 4533850, upload-time = "2026-02-18T16:47:21.673Z" },
-    { url = "https://files.pythonhosted.org/packages/f4/d8/bb5e8d395deb945629aa0c65d12ab90ec3bfcbdf56be89e2a84d001864c9/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:d593612758d0041cb13cb0003f7f8d3fabb7ad9319e651e78afae49b1cf5860e", size = 4223316, upload-time = "2026-02-18T16:47:25.82Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/70/33eef61b0f0fd41ebf93b9699f44067313a45016827f67b3c8cc41f0a7ab/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:f24e8e17035200a465c178e9ea945527ad0738118694184c450f1192a452ff25", size = 3954515, upload-time = "2026-02-18T16:47:30.434Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/db/27c2b3b9698e713e83e11e8540daa27516f9e90390ec21a41091cb15fcaf/psycopg_binary-3.3.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e7b607f0e14f2a4cf7e78a05ebd13df6144acfba87cb90842e70d3f125d9f53f", size = 4260274, upload-time = "2026-02-18T16:47:36.128Z" },
-    { url = "https://files.pythonhosted.org/packages/a1/3b/71e5d603059bf5474215f573a3e2d357a4e95672b26e04d41674400d4862/psycopg_binary-3.3.3-cp310-cp310-win_amd64.whl", hash = "sha256:b27d3a23c79fa59557d2cc63a7e8bb4c7e022c018558eda36f9d7c4e6b99a6e0", size = 3557375, upload-time = "2026-02-18T16:47:42.799Z" },
     { url = "https://files.pythonhosted.org/packages/be/c0/b389119dd754483d316805260f3e73cdcad97925839107cc7a296f6132b1/psycopg_binary-3.3.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:a89bb9ee11177b2995d87186b1d9fa892d8ea725e85eab28c6525e4cc14ee048", size = 4609740, upload-time = "2026-02-18T16:47:51.093Z" },
     { url = "https://files.pythonhosted.org/packages/cf/e3/9976eef20f61840285174d360da4c820a311ab39d6b82fa09fbb545be825/psycopg_binary-3.3.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:9f7d0cf072c6fbac3795b08c98ef9ea013f11db609659dcfc6b1f6cc31f9e181", size = 4676837, upload-time = "2026-02-18T16:47:55.523Z" },
     { url = "https://files.pythonhosted.org/packages/9f/f2/d28ba2f7404fd7f68d41e8a11df86313bd646258244cb12a8dd83b868a97/psycopg_binary-3.3.3-cp311-cp311-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:90eecd93073922f085967f3ed3a98ba8c325cbbc8c1a204e300282abd2369e13", size = 5497070, upload-time = "2026-02-18T16:47:59.929Z" },
@@ -2795,13 +2518,6 @@ version = "23.0.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/88/22/134986a4cc224d593c1afde5494d18ff629393d74cc2eddb176669f234a4/pyarrow-23.0.1.tar.gz", hash = "sha256:b8c5873e33440b2bc2f4a79d2b47017a89c5a24116c055625e6f2ee50523f019", size = 1167336, upload-time = "2026-02-16T10:14:12.39Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/bc/a8/24e5dc6855f50a62936ceb004e6e9645e4219a8065f304145d7fb8a79d5d/pyarrow-23.0.1-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:3fab8f82571844eb3c460f90a75583801d14ca0cc32b1acc8c361650e006fd56", size = 34307390, upload-time = "2026-02-16T10:08:08.654Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/8e/4be5617b4aaae0287f621ad31c6036e5f63118cfca0dc57d42121ff49b51/pyarrow-23.0.1-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:3f91c038b95f71ddfc865f11d5876c42f343b4495535bd262c7b321b0b94507c", size = 35853761, upload-time = "2026-02-16T10:08:17.811Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/08/3e56a18819462210432ae37d10f5c8eed3828be1d6c751b6e6a2e93c286a/pyarrow-23.0.1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:d0744403adabef53c985a7f8a082b502a368510c40d184df349a0a8754533258", size = 44493116, upload-time = "2026-02-16T10:08:25.792Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/82/c40b68001dbec8a3faa4c08cd8c200798ac732d2854537c5449dc859f55a/pyarrow-23.0.1-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:c33b5bf406284fd0bba436ed6f6c3ebe8e311722b441d89397c54f871c6863a2", size = 47564532, upload-time = "2026-02-16T10:08:34.27Z" },
-    { url = "https://files.pythonhosted.org/packages/20/bc/73f611989116b6f53347581b02177f9f620efdf3cd3f405d0e83cdf53a83/pyarrow-23.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ddf743e82f69dcd6dbbcb63628895d7161e04e56794ef80550ac6f3315eeb1d5", size = 48183685, upload-time = "2026-02-16T10:08:42.889Z" },
-    { url = "https://files.pythonhosted.org/packages/b0/cc/6c6b3ecdae2a8c3aced99956187e8302fc954cc2cca2a37cf2111dad16ce/pyarrow-23.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e052a211c5ac9848ae15d5ec875ed0943c0221e2fcfe69eee80b604b4e703222", size = 50605582, upload-time = "2026-02-16T10:08:51.641Z" },
-    { url = "https://files.pythonhosted.org/packages/8d/94/d359e708672878d7638a04a0448edf7c707f9e5606cee11e15aaa5c7535a/pyarrow-23.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:5abde149bb3ce524782d838eb67ac095cd3fd6090eba051130589793f1a7f76d", size = 27521148, upload-time = "2026-02-16T10:08:58.077Z" },
     { url = "https://files.pythonhosted.org/packages/b0/41/8e6b6ef7e225d4ceead8459427a52afdc23379768f54dd3566014d7618c1/pyarrow-23.0.1-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:6f0147ee9e0386f519c952cc670eb4a8b05caa594eeffe01af0e25f699e4e9bb", size = 34302230, upload-time = "2026-02-16T10:09:03.859Z" },
     { url = "https://files.pythonhosted.org/packages/bf/4a/1472c00392f521fea03ae93408bf445cc7bfa1ab81683faf9bc188e36629/pyarrow-23.0.1-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:0ae6e17c828455b6265d590100c295193f93cc5675eb0af59e49dbd00d2de350", size = 35850050, upload-time = "2026-02-16T10:09:11.877Z" },
     { url = "https://files.pythonhosted.org/packages/0c/b2/bd1f2f05ded56af7f54d702c8364c9c43cd6abb91b0e9933f3d77b4f4132/pyarrow-23.0.1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:fed7020203e9ef273360b9e45be52a2a47d3103caf156a30ace5247ffb51bdbd", size = 44491918, upload-time = "2026-02-16T10:09:18.144Z" },
@@ -2873,25 +2589,6 @@ version = "1.4.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/aa/b8/4ed5c7ad5ec15b08d35cc79ace6145d5c1ae426e46435f4987379439dfea/pybase64-1.4.3.tar.gz", hash = "sha256:c2ed274c9e0ba9c8f9c4083cfe265e66dd679126cd9c2027965d807352f3f053", size = 137272, upload-time = "2025-12-06T13:27:04.013Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/39/47/16d7af6fae7803f4c691856bc0d8d433ccf30e106432e2ef7707ee19a38a/pybase64-1.4.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f63aa7f29139b8a05ce5f97cdb7fad63d29071e5bdc8a638a343311fe996112a", size = 38241, upload-time = "2025-12-06T13:22:27.396Z" },
-    { url = "https://files.pythonhosted.org/packages/4d/3e/268beb8d2240ab55396af4d1b45d2494935982212549b92a5f5b57079bd3/pybase64-1.4.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f5943ec1ae87a8b4fe310905bb57205ea4330c75e2c628433a7d9dd52295b588", size = 31672, upload-time = "2025-12-06T13:22:28.854Z" },
-    { url = "https://files.pythonhosted.org/packages/80/14/4365fa33222edcc46b6db4973f9e22bda82adfb6ab2a01afff591f1e41c8/pybase64-1.4.3-cp310-cp310-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:5f2b8aef86f35cd5894c13681faf433a1fffc5b2e76544dcb5416a514a1a8347", size = 65978, upload-time = "2025-12-06T13:22:30.191Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/22/e89739d8bc9b96c68ead44b4eec42fe555683d9997e4ba65216d384920fc/pybase64-1.4.3-cp310-cp310-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:a6ec7e53dd09b0a8116ccf5c3265c7c7fce13c980747525be76902aef36a514a", size = 68903, upload-time = "2025-12-06T13:22:31.29Z" },
-    { url = "https://files.pythonhosted.org/packages/77/e1/7e59a19f8999cdefe9eb0d56bfd701dd38263b0f6fb4a4d29fce165a1b36/pybase64-1.4.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:7528604cd69c538e1dbaafded46e9e4915a2adcd6f2a60fcef6390d87ca922ea", size = 57516, upload-time = "2025-12-06T13:22:32.395Z" },
-    { url = "https://files.pythonhosted.org/packages/42/ad/f47dc7e6fe32022b176868b88b671a32dab389718c8ca905cab79280aaaf/pybase64-1.4.3-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.whl", hash = "sha256:4ec645f32b50593879031e09158f8681a1db9f5df0f72af86b3969a1c5d1fa2b", size = 54533, upload-time = "2025-12-06T13:22:33.457Z" },
-    { url = "https://files.pythonhosted.org/packages/7c/9a/7ab312b5a324833953b00e47b23eb4f83d45bd5c5c854b4b4e51b2a0cf5b/pybase64-1.4.3-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.whl", hash = "sha256:634a000c5b3485ccc18bb9b244e0124f74b6fbc7f43eade815170237a7b34c64", size = 57187, upload-time = "2025-12-06T13:22:34.566Z" },
-    { url = "https://files.pythonhosted.org/packages/2c/84/80acab1fcbaaae103e6b862ef5019192c8f2cd8758433595a202179a0d1d/pybase64-1.4.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.whl", hash = "sha256:309ea32ad07639a485580af1be0ad447a434deb1924e76adced63ac2319cfe15", size = 57730, upload-time = "2025-12-06T13:22:35.581Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/24/84256d472400ea3163d7d69c44bb7e2e1027f0f1d4d20c47629a7dc4578e/pybase64-1.4.3-cp310-cp310-manylinux_2_31_riscv64.whl", hash = "sha256:d10d517566b748d3f25f6ac7162af779360c1c6426ad5f962927ee205990d27c", size = 53036, upload-time = "2025-12-06T13:22:36.621Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/0f/33aecbed312ee0431798a73fa25e00dedbffdd91389ee23121fed397c550/pybase64-1.4.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a74cc0f4d835400857cc5c6d27ec854f7949491e07a04e6d66e2137812831f4c", size = 56321, upload-time = "2025-12-06T13:22:37.7Z" },
-    { url = "https://files.pythonhosted.org/packages/dc/1c/a341b050746658cbec8cab3c733aeb3ef52ce8f11e60d0d47adbdf729ebf/pybase64-1.4.3-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:1b591d774ac09d5eb73c156a03277cb271438fbd8042bae4109ff3a827cd218c", size = 50114, upload-time = "2025-12-06T13:22:38.752Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/d3/f7e6680ae6dc4ddff39112ad66e0fa6b2ec346e73881bafc08498c560bc0/pybase64-1.4.3-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:5eb588d35a04302ef6157d17db62354a787ac6f8b1585dd0b90c33d63a97a550", size = 66570, upload-time = "2025-12-06T13:22:40.221Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/71/774748eecc7fe23869b7e5df028e3c4c2efa16b506b83ea3fa035ea95dc2/pybase64-1.4.3-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:df8b122d5be2c96962231cc4831d9c2e1eae6736fb12850cec4356d8b06fe6f8", size = 55700, upload-time = "2025-12-06T13:22:41.289Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/91/dd15075bb2fe0086193e1cd4bad80a43652c38d8a572f9218d46ba721802/pybase64-1.4.3-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:31b7a85c661fc591bbcce82fb8adaebe2941e6a83b08444b0957b77380452a4b", size = 52491, upload-time = "2025-12-06T13:22:42.628Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/27/f357d63ea3774c937fc47160e040419ed528827aa3d4306d5ec9826259c0/pybase64-1.4.3-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:e6d7beaae65979fef250e25e66cf81c68a8f81910bcda1a2f43297ab486a7e4e", size = 53957, upload-time = "2025-12-06T13:22:44.615Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/c3/243693771701a54e67ff5ccbf4c038344f429613f5643169a7befc51f007/pybase64-1.4.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:4a6276bc3a3962d172a2b5aba544d89881c4037ea954517b86b00892c703d007", size = 68422, upload-time = "2025-12-06T13:22:45.641Z" },
-    { url = "https://files.pythonhosted.org/packages/75/95/f987081bf6bc1d1eda3012dae1b06ad427732ef9933a632cb8b58f9917f8/pybase64-1.4.3-cp310-cp310-win32.whl", hash = "sha256:4bdd07ef017515204ee6eaab17e1ad05f83c0ccb5af8ae24a0fe6d9cb5bb0b7a", size = 33622, upload-time = "2025-12-06T13:22:47.348Z" },
-    { url = "https://files.pythonhosted.org/packages/79/28/c169a769fe90128f16d394aad87b2096dd4bf2f035ae0927108a46b617df/pybase64-1.4.3-cp310-cp310-win_amd64.whl", hash = "sha256:5db0b6bbda15110db2740c61970a8fda3bf9c93c3166a3f57f87c7865ed1125c", size = 35799, upload-time = "2025-12-06T13:22:48.731Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/f2/bdbe6af0bd4f3fe5bc70e77ead7f7d523bb9d3ca3ad50ac42b9adbb9ca14/pybase64-1.4.3-cp310-cp310-win_arm64.whl", hash = "sha256:f96367dfc82598569aa02b1103ebd419298293e59e1151abda2b41728703284b", size = 31158, upload-time = "2025-12-06T13:22:50.021Z" },
     { url = "https://files.pythonhosted.org/packages/2b/63/21e981e9d3f1f123e0b0ee2130112b1956cad9752309f574862c7ae77c08/pybase64-1.4.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:70b0d4a4d54e216ce42c2655315378b8903933ecfa32fced453989a92b4317b2", size = 38237, upload-time = "2025-12-06T13:22:52.159Z" },
     { url = "https://files.pythonhosted.org/packages/92/fb/3f448e139516404d2a3963915cc10dc9dde7d3a67de4edba2f827adfef17/pybase64-1.4.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8127f110cdee7a70e576c5c9c1d4e17e92e76c191869085efbc50419f4ae3c72", size = 31673, upload-time = "2025-12-06T13:22:53.241Z" },
     { url = "https://files.pythonhosted.org/packages/3c/fb/bb06a5b9885e7d853ac1e801c4d8abfdb4c8506deee33e53d55aa6690e67/pybase64-1.4.3-cp311-cp311-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:f9ef0388878bc15a084bd9bf73ec1b2b4ee513d11009b1506375e10a7aae5032", size = 68331, upload-time = "2025-12-06T13:22:54.197Z" },
@@ -3026,12 +2723,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fa/8f/43c3bb11ca9bacf81cb0b7a71500bb65b2eda6d5fe07433c09b543de97f3/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:5c29a582b0ea3936d02bd6fe9bf674ab6059e6e45ab71c78404ab2c913224414", size = 43461, upload-time = "2025-12-06T13:26:28.906Z" },
     { url = "https://files.pythonhosted.org/packages/2d/4c/2a5258329200be57497d3972b5308558c6de42e3749c6cc2aa1cbe34b25a/pybase64-1.4.3-graalpy312-graalpy250_312_native-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:b6b664758c804fa919b4f1257aa8cf68e95db76fc331de5f70bfc3a34655afe1", size = 36058, upload-time = "2025-12-06T13:26:30.092Z" },
     { url = "https://files.pythonhosted.org/packages/ea/6d/41faa414cde66ec023b0ca8402a8f11cb61731c3dc27c082909cbbd1f929/pybase64-1.4.3-graalpy312-graalpy250_312_native-win_amd64.whl", hash = "sha256:f7537fa22ae56a0bf51e4b0ffc075926ad91c618e1416330939f7ef366b58e3b", size = 36231, upload-time = "2025-12-06T13:26:31.656Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/cf/6e712491bd665ea8633efb0b484121893ea838d8e830e06f39f2aae37e58/pybase64-1.4.3-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:94cf50c36bb2f8618982ee5a978c4beed9db97d35944fa96e8586dd953c7994a", size = 38007, upload-time = "2025-12-06T13:26:32.804Z" },
-    { url = "https://files.pythonhosted.org/packages/38/c0/9272cae1c49176337dcdbd97511e2843faae1aaf5a5fb48569093c6cd4ce/pybase64-1.4.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:01bc3ff5ca1341685c6d2d945b035f442f7b9c3b068a5c6ee8408a41fda5754e", size = 31538, upload-time = "2025-12-06T13:26:34.001Z" },
-    { url = "https://files.pythonhosted.org/packages/20/f2/17546f97befe429c73f622bbd869ceebb518c40fdb0dec4c4f98312e80a5/pybase64-1.4.3-pp310-pypy310_pp73-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:03d0aa3761a99034960496280c02aa063f856a3cc9b33771bc4eab0e4e72b5c2", size = 40682, upload-time = "2025-12-06T13:26:35.168Z" },
-    { url = "https://files.pythonhosted.org/packages/92/a0/464b36d5dfb61f3da17858afaeaa876a9342d58e9f17803ce7f28b5de9e8/pybase64-1.4.3-pp310-pypy310_pp73-manylinux1_x86_64.manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:7ca5b1ce768520acd6440280cdab35235b27ad2faacfcec064bc9c3377066ef1", size = 41306, upload-time = "2025-12-06T13:26:36.351Z" },
-    { url = "https://files.pythonhosted.org/packages/07/c9/a748dfc0969a8d960ecf1e82c8a2a16046ffec22f8e7ece582aa3b1c6cf9/pybase64-1.4.3-pp310-pypy310_pp73-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:3caa1e2ddad1c50553ffaaa1c86b74b3f9fbd505bea9970326ab88fc68c4c184", size = 35452, upload-time = "2025-12-06T13:26:37.772Z" },
-    { url = "https://files.pythonhosted.org/packages/95/b7/4d37bd3577d1aa6c732dc099087fe027c48873e223de3784b095e5653f8b/pybase64-1.4.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:bd47076f736b27a8b0f9b30d93b6bb4f5af01b0dc8971f883ed3b75934f39a99", size = 36125, upload-time = "2025-12-06T13:26:39.78Z" },
     { url = "https://files.pythonhosted.org/packages/b2/76/160dded493c00d3376d4ad0f38a2119c5345de4a6693419ad39c3565959b/pybase64-1.4.3-pp311-pypy311_pp73-macosx_10_15_x86_64.whl", hash = "sha256:277de6e03cc9090fb359365c686a2a3036d23aee6cd20d45d22b8c89d1247f17", size = 37939, upload-time = "2025-12-06T13:26:41.014Z" },
     { url = "https://files.pythonhosted.org/packages/b7/b8/a0f10be8d648d6f8f26e560d6e6955efa7df0ff1e009155717454d76f601/pybase64-1.4.3-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:ab1dd8b1ed2d1d750260ed58ab40defaa5ba83f76a30e18b9ebd5646f6247ae5", size = 31466, upload-time = "2025-12-06T13:26:42.539Z" },
     { url = "https://files.pythonhosted.org/packages/d3/22/832a2f9e76cdf39b52e01e40d8feeb6a04cf105494f2c3e3126d0149717f/pybase64-1.4.3-pp311-pypy311_pp73-manylinux1_i686.manylinux2014_i686.manylinux_2_17_i686.manylinux_2_5_i686.whl", hash = "sha256:bd4d2293de9fd212e294c136cec85892460b17d24e8c18a6ba18750928037750", size = 40681, upload-time = "2025-12-06T13:26:43.782Z" },
@@ -3078,19 +2769,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" },
-    { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" },
-    { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" },
-    { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" },
-    { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" },
-    { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" },
-    { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" },
-    { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" },
     { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" },
     { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" },
     { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" },
@@ -3169,14 +2847,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
     { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
     { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" },
-    { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" },
-    { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" },
-    { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" },
-    { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" },
-    { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" },
     { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" },
     { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" },
     { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" },
@@ -3267,12 +2937,10 @@ version = "9.0.2"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "colorama", marker = "sys_platform == 'win32'" },
-    { name = "exceptiongroup", marker = "python_full_version < '3.11'" },
     { name = "iniconfig" },
     { name = "packaging" },
     { name = "pluggy" },
     { name = "pygments" },
-    { name = "tomli", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/d1/db/7ef3487e0fb0049ddb5ce41d3a49c235bf9ad299b6a25d5780a89f19230f/pytest-9.0.2.tar.gz", hash = "sha256:75186651a92bd89611d1d9fc20f0b4345fd827c41ccd5c299a868a05d70edf11", size = 1568901, upload-time = "2025-12-06T21:30:51.014Z" }
 wheels = [
@@ -3284,7 +2952,6 @@ name = "pytest-asyncio"
 version = "1.3.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
-    { name = "backports-asyncio-runner", marker = "python_full_version < '3.11'" },
     { name = "pytest" },
     { name = "typing-extensions", marker = "python_full_version < '3.13'" },
 ]
@@ -3307,6 +2974,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/ee/49/1377b49de7d0c1ce41292161ea0f721913fa8722c19fb9c1e3aa0367eecb/pytest_cov-7.0.0-py3-none-any.whl", hash = "sha256:3b8e9558b16cc1479da72058bdecf8073661c7f57f7d3c5f22a1c23507f2d861", size = 22424, upload-time = "2025-09-09T10:57:00.695Z" },
 ]
 
+[[package]]
+name = "pytest-mock"
+version = "3.15.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "pytest" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/68/14/eb014d26be205d38ad5ad20d9a80f7d201472e08167f0bb4361e251084a9/pytest_mock-3.15.1.tar.gz", hash = "sha256:1849a238f6f396da19762269de72cb1814ab44416fa73a8686deac10b0d87a0f", size = 34036, upload-time = "2025-09-16T16:37:27.081Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/cc/06253936f4a7fa2e0f48dfe6d851d9c56df896a9ab09ac019d70b760619c/pytest_mock-3.15.1-py3-none-any.whl", hash = "sha256:0a25e2eb88fe5168d535041d09a4529a188176ae608a6d249ee65abc0949630d", size = 10095, upload-time = "2025-09-16T16:37:25.734Z" },
+]
+
 [[package]]
 name = "pytest-xdist"
 version = "3.8.0"
@@ -3356,15 +3035,6 @@ version = "6.0.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/05/8e/961c0007c59b8dd7729d542c61a4d537767a59645b82a0b521206e1e25c2/pyyaml-6.0.3.tar.gz", hash = "sha256:d76623373421df22fb4cf8817020cbb7ef15c725b9d5e45f17e189bfc384190f", size = 130960, upload-time = "2025-09-25T21:33:16.546Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/f4/a0/39350dd17dd6d6c6507025c0e53aef67a9293a6d37d3511f23ea510d5800/pyyaml-6.0.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:214ed4befebe12df36bcc8bc2b64b396ca31be9304b8f59e25c11cf94a4c033b", size = 184227, upload-time = "2025-09-25T21:31:46.04Z" },
-    { url = "https://files.pythonhosted.org/packages/05/14/52d505b5c59ce73244f59c7a50ecf47093ce4765f116cdb98286a71eeca2/pyyaml-6.0.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:02ea2dfa234451bbb8772601d7b8e426c2bfa197136796224e50e35a78777956", size = 174019, upload-time = "2025-09-25T21:31:47.706Z" },
-    { url = "https://files.pythonhosted.org/packages/43/f7/0e6a5ae5599c838c696adb4e6330a59f463265bfa1e116cfd1fbb0abaaae/pyyaml-6.0.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b30236e45cf30d2b8e7b3e85881719e98507abed1011bf463a8fa23e9c3e98a8", size = 740646, upload-time = "2025-09-25T21:31:49.21Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/3a/61b9db1d28f00f8fd0ae760459a5c4bf1b941baf714e207b6eb0657d2578/pyyaml-6.0.3-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:66291b10affd76d76f54fad28e22e51719ef9ba22b29e1d7d03d6777a9174198", size = 840793, upload-time = "2025-09-25T21:31:50.735Z" },
-    { url = "https://files.pythonhosted.org/packages/7a/1e/7acc4f0e74c4b3d9531e24739e0ab832a5edf40e64fbae1a9c01941cabd7/pyyaml-6.0.3-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9c7708761fccb9397fe64bbc0395abcae8c4bf7b0eac081e12b809bf47700d0b", size = 770293, upload-time = "2025-09-25T21:31:51.828Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/ef/abd085f06853af0cd59fa5f913d61a8eab65d7639ff2a658d18a25d6a89d/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:418cf3f2111bc80e0933b2cd8cd04f286338bb88bdc7bc8e6dd775ebde60b5e0", size = 732872, upload-time = "2025-09-25T21:31:53.282Z" },
-    { url = "https://files.pythonhosted.org/packages/1f/15/2bc9c8faf6450a8b3c9fc5448ed869c599c0a74ba2669772b1f3a0040180/pyyaml-6.0.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5e0b74767e5f8c593e8c9b5912019159ed0533c70051e9cce3e8b6aa699fcd69", size = 758828, upload-time = "2025-09-25T21:31:54.807Z" },
-    { url = "https://files.pythonhosted.org/packages/a3/00/531e92e88c00f4333ce359e50c19b8d1de9fe8d581b1534e35ccfbc5f393/pyyaml-6.0.3-cp310-cp310-win32.whl", hash = "sha256:28c8d926f98f432f88adc23edf2e6d4921ac26fb084b028c733d01868d19007e", size = 142415, upload-time = "2025-09-25T21:31:55.885Z" },
-    { url = "https://files.pythonhosted.org/packages/2a/fa/926c003379b19fca39dd4634818b00dec6c62d87faf628d1394e137354d4/pyyaml-6.0.3-cp310-cp310-win_amd64.whl", hash = "sha256:bdb2c67c6c1390b63c6ff89f210c8fd09d9a1217a465701eac7316313c915e4c", size = 158561, upload-time = "2025-09-25T21:31:57.406Z" },
     { url = "https://files.pythonhosted.org/packages/6d/16/a95b6757765b7b031c9374925bb718d55e0a9ba8a1b6a12d25962ea44347/pyyaml-6.0.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:44edc647873928551a01e7a563d7452ccdebee747728c1080d881d68af7b997e", size = 185826, upload-time = "2025-09-25T21:31:58.655Z" },
     { url = "https://files.pythonhosted.org/packages/16/19/13de8e4377ed53079ee996e1ab0a9c33ec2faf808a4647b7b4c0d46dd239/pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:652cb6edd41e718550aad172851962662ff2681490a8a711af6a4d288dd96824", size = 175577, upload-time = "2025-09-25T21:32:00.088Z" },
     { url = "https://files.pythonhosted.org/packages/0c/62/d2eb46264d4b157dae1275b573017abec435397aa59cbcdab6fc978a8af4/pyyaml-6.0.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:10892704fc220243f5305762e276552a0395f7beb4dbf9b14ec8fd43b57f126c", size = 775556, upload-time = "2025-09-25T21:32:01.31Z" },
@@ -3432,23 +3102,6 @@ version = "2026.2.19"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/ff/c0/d8079d4f6342e4cec5c3e7d7415b5cd3e633d5f4124f7a4626908dbe84c7/regex-2026.2.19.tar.gz", hash = "sha256:6fb8cb09b10e38f3ae17cc6dc04a1df77762bd0351b6ba9041438e7cc85ec310", size = 414973, upload-time = "2026-02-19T19:03:47.899Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/af/de/f10b4506acfd684de4e42b0aa56ccea1a778a18864da8f6d319a40591062/regex-2026.2.19-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f5a37a17d110f9d5357a43aa7e3507cb077bf3143d1c549a45c4649e90e40a70", size = 488369, upload-time = "2026-02-19T18:59:45.01Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/2f/b4eaef1f0b4d0bf2a73eaf07c08f6c13422918a4180c9211ce0521746d0c/regex-2026.2.19-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:676c4e6847a83a1d5732b4ed553881ad36f0a8133627bb695a89ecf3571499d3", size = 290743, upload-time = "2026-02-19T18:59:48.527Z" },
-    { url = "https://files.pythonhosted.org/packages/76/7c/805413bd0a88d04688c0725c222cfb811bd54a2f571004c24199a1ae55d6/regex-2026.2.19-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:82336faeecac33297cd42857c3b36f12b91810e3fdd276befdd128f73a2b43fa", size = 288652, upload-time = "2026-02-19T18:59:50.2Z" },
-    { url = "https://files.pythonhosted.org/packages/08/ff/2c4cd530a878b1975398e76faef4285f11e7c9ccf1aaedfd528bfcc1f580/regex-2026.2.19-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:52136f5b71f095cb74b736cc3a1b578030dada2e361ef2f07ca582240b703946", size = 781759, upload-time = "2026-02-19T18:59:51.836Z" },
-    { url = "https://files.pythonhosted.org/packages/37/45/9608ab1b41f6740ff4076eabadde8e8b3f3400942b348ac41e8599ccc131/regex-2026.2.19-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:4192464fe3e6cb0ef6751f7d3b16f886d8270d359ed1590dd555539d364f0ff7", size = 850947, upload-time = "2026-02-19T18:59:53.739Z" },
-    { url = "https://files.pythonhosted.org/packages/90/3a/66471b6c4f7cac17e14bf5300e46661bba2b17ffb0871bd2759e837a6f82/regex-2026.2.19-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:e561dd47a85d2660d3d3af4e6cb2da825cf20f121e577147963f875b83d32786", size = 898794, upload-time = "2026-02-19T18:59:55.993Z" },
-    { url = "https://files.pythonhosted.org/packages/c2/d2/38c53929a5931f7398e5e49f5a5a3079cb2aba30119b4350608364cfad8c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:00ec994d7824bf01cd6c7d14c7a6a04d9aeaf7c42a2bc22d2359d715634d539b", size = 791922, upload-time = "2026-02-19T18:59:58.216Z" },
-    { url = "https://files.pythonhosted.org/packages/8b/bd/b046e065630fa25059d9c195b7b5308ea94da45eee65d40879772500f74c/regex-2026.2.19-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:2cb00aabd96b345d56a8c2bc328c8d6c4d29935061e05078bf1f02302e12abf5", size = 783345, upload-time = "2026-02-19T18:59:59.948Z" },
-    { url = "https://files.pythonhosted.org/packages/d4/8f/045c643d2fa255a985e8f87d848e4be230b711a8935e4bdc58e60b8f7b84/regex-2026.2.19-cp310-cp310-manylinux_2_31_riscv64.manylinux_2_39_riscv64.whl", hash = "sha256:f374366ed35673ea81b86a8859c457d4fae6ba092b71024857e9e237410c7404", size = 768055, upload-time = "2026-02-19T19:00:01.65Z" },
-    { url = "https://files.pythonhosted.org/packages/72/9f/ab7ae9f5447559562f1a788bbc85c0e526528c5e6c20542d18e4afc86aad/regex-2026.2.19-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:f9417fd853fcd00b7d55167e692966dd12d95ba1a88bf08a62002ccd85030790", size = 774955, upload-time = "2026-02-19T19:00:03.368Z" },
-    { url = "https://files.pythonhosted.org/packages/37/5c/f16fc23c56f60b6f4ff194604a6e53bb8aec7b6e8e4a23a482dee8d77235/regex-2026.2.19-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:12e86a01594031abf892686fcb309b041bf3de3d13d99eb7e2b02a8f3c687df1", size = 846010, upload-time = "2026-02-19T19:00:05.079Z" },
-    { url = "https://files.pythonhosted.org/packages/51/c8/6be4c854135d7c9f35d4deeafdaf124b039ecb4ffcaeb7ed0495ad2c97ca/regex-2026.2.19-cp310-cp310-musllinux_1_2_riscv64.whl", hash = "sha256:79014115e6fdf18fd9b32e291d58181bf42d4298642beaa13fd73e69810e4cb6", size = 755938, upload-time = "2026-02-19T19:00:07.148Z" },
-    { url = "https://files.pythonhosted.org/packages/d6/8d/f683d49b9663a5324b95a328e69d397f6dade7cb84154eec116bf79fe150/regex-2026.2.19-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:31aefac2506967b7dd69af2c58eca3cc8b086d4110b66d6ac6e9026f0ee5b697", size = 835773, upload-time = "2026-02-19T19:00:08.939Z" },
-    { url = "https://files.pythonhosted.org/packages/16/cd/619224b90da09f167fe4497c350a0d0b30edc539ee9244bf93e604c073c3/regex-2026.2.19-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:49cef7bb2a491f91a8869c7cdd90babf0a417047ab0bf923cd038ed2eab2ccb8", size = 780075, upload-time = "2026-02-19T19:00:10.838Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/88/19cfb0c262d6f9d722edef29157125418bf90eb3508186bf79335afeedae/regex-2026.2.19-cp310-cp310-win32.whl", hash = "sha256:3a039474986e7a314ace6efb9ce52f5da2bdb80ac4955358723d350ec85c32ad", size = 266004, upload-time = "2026-02-19T19:00:12.371Z" },
-    { url = "https://files.pythonhosted.org/packages/82/af/5b487e0287ef72545d7ae92edecdacbe3d44e531cac24fda7de5598ba8dd/regex-2026.2.19-cp310-cp310-win_amd64.whl", hash = "sha256:5b81ff4f9cad99f90c807a00c5882fbcda86d8b3edd94e709fb531fc52cb3d25", size = 277895, upload-time = "2026-02-19T19:00:13.75Z" },
-    { url = "https://files.pythonhosted.org/packages/4c/19/b6715a187ffca4d2979af92a46ce922445ba41f910bf187ccd666a2d52ef/regex-2026.2.19-cp310-cp310-win_arm64.whl", hash = "sha256:a032bc01a4bc73fc3cadba793fce28eb420da39338f47910c59ffcc11a5ba5ef", size = 270465, upload-time = "2026-02-19T19:00:15.127Z" },
     { url = "https://files.pythonhosted.org/packages/6f/93/43f405a98f54cc59c786efb4fc0b644615ed2392fc89d57d30da11f35b5b/regex-2026.2.19-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:93b16a18cadb938f0f2306267161d57eb33081a861cee9ffcd71e60941eb5dfc", size = 488365, upload-time = "2026-02-19T19:00:17.857Z" },
     { url = "https://files.pythonhosted.org/packages/66/46/da0efce22cd8f5ae28eeb25ac69703f49edcad3331ac22440776f4ea0867/regex-2026.2.19-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:78af1e499cab704131f6f4e2f155b7f54ce396ca2acb6ef21a49507e4752e0be", size = 290737, upload-time = "2026-02-19T19:00:19.869Z" },
     { url = "https://files.pythonhosted.org/packages/fb/19/f735078448132c1c974974d30d5306337bc297fe6b6f126164bff72c1019/regex-2026.2.19-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:eb20c11aa4c3793c9ad04c19a972078cdadb261b8429380364be28e867a843f2", size = 288654, upload-time = "2026-02-19T19:00:21.307Z" },
@@ -3595,20 +3248,6 @@ version = "0.7.6"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/e5/f5/8bed2310abe4ae04b67a38374a4d311dd85220f5d8da56f47ae9361be0b0/rignore-0.7.6.tar.gz", hash = "sha256:00d3546cd793c30cb17921ce674d2c8f3a4b00501cb0e3dd0e82217dbeba2671", size = 57140, upload-time = "2025-11-05T21:41:21.968Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/86/7a/b970cd0138b0ece72eb28f086e933f9ed75b795716ad3de5ab22994b3b54/rignore-0.7.6-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:f3c74a7e5ee77aea669c95fdb3933f2a6c7549893700082e759128a29cf67e45", size = 884999, upload-time = "2025-11-05T20:42:38.373Z" },
-    { url = "https://files.pythonhosted.org/packages/ca/05/23faca29616d8966ada63fb0e13c214107811fa9a0aba2275e4c7ca63bd5/rignore-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b7202404958f5fe3474bac91f65350f0b1dde1a5e05089f2946549b7e91e79ec", size = 824824, upload-time = "2025-11-05T20:42:22.1Z" },
-    { url = "https://files.pythonhosted.org/packages/fa/2e/05a1e61f04cf2548524224f0b5f21ca19ea58f7273a863bac10846b8ff69/rignore-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6bde7c5835fa3905bfb7e329a4f1d7eccb676de63da7a3f934ddd5c06df20597", size = 899121, upload-time = "2025-11-05T20:40:48.94Z" },
-    { url = "https://files.pythonhosted.org/packages/ff/35/71518847e10bdbf359badad8800e4681757a01f4777b3c5e03dbde8a42d8/rignore-0.7.6-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:626c3d4ba03af266694d25101bc1d8d16eda49c5feb86cedfec31c614fceca7d", size = 873813, upload-time = "2025-11-05T20:41:04.71Z" },
-    { url = "https://files.pythonhosted.org/packages/f6/c8/32ae405d3e7fd4d9f9b7838f2fcca0a5005bb87fa514b83f83fd81c0df22/rignore-0.7.6-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0a43841e651e7a05a4274b9026cc408d1912e64016ede8cd4c145dae5d0635be", size = 1168019, upload-time = "2025-11-05T20:41:20.723Z" },
-    { url = "https://files.pythonhosted.org/packages/25/98/013c955982bc5b4719bf9a5bea58be317eea28aa12bfd004025e3cd7c000/rignore-0.7.6-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7978c498dbf7f74d30cdb8859fe612167d8247f0acd377ae85180e34490725da", size = 942822, upload-time = "2025-11-05T20:41:36.99Z" },
-    { url = "https://files.pythonhosted.org/packages/90/fb/9a3f3156c6ed30bcd597e63690353edac1fcffe9d382ad517722b56ac195/rignore-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2d22f72ab695c07d2d96d2a645208daff17084441b5d58c07378c9dd6f9c4c87", size = 959820, upload-time = "2025-11-05T20:42:06.364Z" },
-    { url = "https://files.pythonhosted.org/packages/5e/b2/93bf609633021e9658acaff24cfb055d8cdaf7f5855d10ebb35307900dda/rignore-0.7.6-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d5bd8e1a91ed1a789b2cbe39eeea9204a6719d4f2cf443a9544b521a285a295f", size = 985050, upload-time = "2025-11-05T20:41:51.124Z" },
-    { url = "https://files.pythonhosted.org/packages/69/bc/ec2d040469bdfd7b743df10f2201c5d285009a4263d506edbf7a06a090bb/rignore-0.7.6-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:bc1fc03efad5789365018e94ac4079f851a999bc154d1551c45179f7fcf45322", size = 1079164, upload-time = "2025-11-05T21:40:10.368Z" },
-    { url = "https://files.pythonhosted.org/packages/df/26/4b635f4ea5baf4baa8ba8eee06163f6af6e76dfbe72deb57da34bb24b19d/rignore-0.7.6-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:ce2617fe28c51367fd8abfd4eeea9e61664af63c17d4ea00353d8ef56dfb95fa", size = 1139028, upload-time = "2025-11-05T21:40:27.977Z" },
-    { url = "https://files.pythonhosted.org/packages/6a/54/a3147ebd1e477b06eb24e2c2c56d951ae5faa9045b7b36d7892fec5080d9/rignore-0.7.6-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:7c4ad2cee85068408e7819a38243043214e2c3047e9bd4c506f8de01c302709e", size = 1119024, upload-time = "2025-11-05T21:40:45.148Z" },
-    { url = "https://files.pythonhosted.org/packages/fb/f4/27475db769a57cff18fe7e7267b36e6cdb5b1281caa185ba544171106cba/rignore-0.7.6-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:02cd240bfd59ecc3907766f4839cbba20530a2e470abca09eaa82225e4d946fb", size = 1128531, upload-time = "2025-11-05T21:41:02.734Z" },
-    { url = "https://files.pythonhosted.org/packages/97/32/6e782d3b352e4349fa0e90bf75b13cb7f11d8908b36d9e2b262224b65d9a/rignore-0.7.6-cp310-cp310-win32.whl", hash = "sha256:fe2bd8fa1ff555259df54c376abc73855cb02628a474a40d51b358c3a1ddc55b", size = 646817, upload-time = "2025-11-05T21:41:47.51Z" },
-    { url = "https://files.pythonhosted.org/packages/c0/8a/53185c69abb3bb362e8a46b8089999f820bf15655629ff8395107633c8ab/rignore-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:d80afd6071c78baf3765ec698841071b19e41c326f994cfa69b5a1df676f5d39", size = 727001, upload-time = "2025-11-05T21:41:32.778Z" },
     { url = "https://files.pythonhosted.org/packages/25/41/b6e2be3069ef3b7f24e35d2911bd6deb83d20ed5642ad81d5a6d1c015473/rignore-0.7.6-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:40be8226e12d6653abbebaffaea2885f80374c1c8f76fe5ca9e0cadd120a272c", size = 885285, upload-time = "2025-11-05T20:42:39.763Z" },
     { url = "https://files.pythonhosted.org/packages/52/66/ba7f561b6062402022887706a7f2b2c2e2e2a28f1e3839202b0a2f77e36d/rignore-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:182f4e5e4064d947c756819446a7d4cdede8e756b8c81cf9e509683fe38778d7", size = 823882, upload-time = "2025-11-05T20:42:23.488Z" },
     { url = "https://files.pythonhosted.org/packages/f5/81/4087453df35a90b07370647b19017029324950c1b9137d54bf1f33843f17/rignore-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:16b63047648a916a87be1e51bb5c009063f1b8b6f5afe4f04f875525507e63dc", size = 899362, upload-time = "2025-11-05T20:40:51.111Z" },
@@ -3684,18 +3323,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fc/d3/18210222b37e87e36357f7b300b7d98c6dd62b133771e71ae27acba83a4f/rignore-0.7.6-cp314-cp314t-win32.whl", hash = "sha256:c1d8f117f7da0a4a96a8daef3da75bc090e3792d30b8b12cfadc240c631353f9", size = 647033, upload-time = "2025-11-05T21:42:00.095Z" },
     { url = "https://files.pythonhosted.org/packages/3e/87/033eebfbee3ec7d92b3bb1717d8f68c88e6fc7de54537040f3b3a405726f/rignore-0.7.6-cp314-cp314t-win_amd64.whl", hash = "sha256:ca36e59408bec81de75d307c568c2d0d410fb880b1769be43611472c61e85c96", size = 725647, upload-time = "2025-11-05T21:41:44.449Z" },
     { url = "https://files.pythonhosted.org/packages/79/62/b88e5879512c55b8ee979c666ee6902adc4ed05007226de266410ae27965/rignore-0.7.6-cp314-cp314t-win_arm64.whl", hash = "sha256:b83adabeb3e8cf662cabe1931b83e165b88c526fa6af6b3aa90429686e474896", size = 656035, upload-time = "2025-11-05T21:41:31.13Z" },
-    { url = "https://files.pythonhosted.org/packages/85/12/62d690b4644c330d7ac0f739b7f078190ab4308faa909a60842d0e4af5b2/rignore-0.7.6-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:c3d3a523af1cd4ed2c0cba8d277a32d329b0c96ef9901fb7ca45c8cfaccf31a5", size = 887462, upload-time = "2025-11-05T20:42:50.804Z" },
-    { url = "https://files.pythonhosted.org/packages/05/bc/6528a0e97ed2bd7a7c329183367d1ffbc5b9762ae8348d88dae72cc9d1f5/rignore-0.7.6-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:990853566e65184a506e1e2af2d15045afad3ebaebb8859cb85b882081915110", size = 826918, upload-time = "2025-11-05T20:42:33.689Z" },
-    { url = "https://files.pythonhosted.org/packages/3e/2c/7d7bad116e09a04e9e1688c6f891fa2d4fd33f11b69ac0bd92419ddebeae/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cab9ff2e436ce7240d7ee301c8ef806ed77c1fd6b8a8239ff65f9bbbcb5b8a3", size = 900922, upload-time = "2025-11-05T20:41:00.361Z" },
-    { url = "https://files.pythonhosted.org/packages/09/ba/e5ea89fbde8e37a90ce456e31c5e9d85512cef5ae38e0f4d2426eb776a19/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:d1a6671b2082c13bfd9a5cf4ce64670f832a6d41470556112c4ab0b6519b2fc4", size = 876987, upload-time = "2025-11-05T20:41:16.219Z" },
-    { url = "https://files.pythonhosted.org/packages/d0/fb/93d14193f0ec0c3d35b763f0a000e9780f63b2031f3d3756442c2152622d/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2468729b4c5295c199d084ab88a40afcb7c8b974276805105239c07855bbacee", size = 1171110, upload-time = "2025-11-05T20:41:32.631Z" },
-    { url = "https://files.pythonhosted.org/packages/9e/46/08436312ff96ffa29cfa4e1a987efc37e094531db46ba5e9fda9bb792afd/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:775710777fd71e5fdf54df69cdc249996a1d6f447a2b5bfb86dbf033fddd9cf9", size = 943339, upload-time = "2025-11-05T20:41:47.128Z" },
-    { url = "https://files.pythonhosted.org/packages/34/28/3b3c51328f505cfaf7e53f408f78a1e955d561135d02f9cb0341ea99f69a/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4565407f4a77f72cf9d91469e75d15d375f755f0a01236bb8aaa176278cc7085", size = 961680, upload-time = "2025-11-05T20:42:18.061Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/9e/cbff75c8676d4f4a90bd58a1581249d255c7305141b0868f0abc0324836b/rignore-0.7.6-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc44c33f8fb2d5c9da748de7a6e6653a78aa740655e7409895e94a247ffa97c8", size = 987045, upload-time = "2025-11-05T20:42:02.315Z" },
-    { url = "https://files.pythonhosted.org/packages/8c/25/d802d1d369502a7ddb8816059e7c79d2d913e17df975b863418e0aca4d8a/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_aarch64.whl", hash = "sha256:8f32478f05540513c11923e8838afab9efef0131d66dca7f67f0e1bbd118af6a", size = 1080310, upload-time = "2025-11-05T21:40:23.184Z" },
-    { url = "https://files.pythonhosted.org/packages/43/f0/250b785c2e473b1ab763eaf2be820934c2a5409a722e94b279dddac21c7d/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_armv7l.whl", hash = "sha256:1b63a3dd76225ea35b01dd6596aa90b275b5d0f71d6dc28fce6dd295d98614aa", size = 1140998, upload-time = "2025-11-05T21:40:40.603Z" },
-    { url = "https://files.pythonhosted.org/packages/f5/d6/bb42fd2a8bba6aea327962656e20621fd495523259db40cfb4c5f760f05c/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_i686.whl", hash = "sha256:fe6c41175c36554a4ef0994cd1b4dbd6d73156fca779066456b781707402048e", size = 1121178, upload-time = "2025-11-05T21:40:57.585Z" },
-    { url = "https://files.pythonhosted.org/packages/97/f4/aeb548374129dce3dc191a4bb598c944d9ed663f467b9af830315d86059c/rignore-0.7.6-pp310-pypy310_pp73-musllinux_1_2_x86_64.whl", hash = "sha256:9a0c6792406ae36f4e7664dc772da909451d46432ff8485774526232d4885063", size = 1130190, upload-time = "2025-11-05T21:41:16.403Z" },
     { url = "https://files.pythonhosted.org/packages/82/78/a6250ff0c49a3cdb943910ada4116e708118e9b901c878cfae616c80a904/rignore-0.7.6-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:a20b6fb61bcced9a83dfcca6599ad45182b06ba720cff7c8d891e5b78db5b65f", size = 886470, upload-time = "2025-11-05T20:42:52.314Z" },
     { url = "https://files.pythonhosted.org/packages/35/af/c69c0c51b8f9f7914d95c4ea91c29a2ac067572048cae95dd6d2efdbe05d/rignore-0.7.6-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:392dcabfecbe176c9ebbcb40d85a5e86a5989559c4f988c2741da7daf1b5be25", size = 825976, upload-time = "2025-11-05T20:42:35.118Z" },
     { url = "https://files.pythonhosted.org/packages/f1/d2/1b264f56132264ea609d3213ab603d6a27016b19559a1a1ede1a66a03dcd/rignore-0.7.6-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:22baa462abdc36fdd5a5e2dae423107723351b85ff093762f9261148b9d0a04a", size = 899739, upload-time = "2025-11-05T20:41:01.518Z" },
@@ -3759,67 +3386,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fc/51/727abb13f44c1fcf6d145979e1535a35794db0f6e450a0cb46aa24732fe2/s3transfer-0.16.0-py3-none-any.whl", hash = "sha256:18e25d66fed509e3868dc1572b3f427ff947dd2c56f844a5bf09481ad3f3b2fe", size = 86830, upload-time = "2025-12-01T02:30:57.729Z" },
 ]
 
-[[package]]
-name = "scikit-learn"
-version = "1.7.2"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.11'",
-]
-dependencies = [
-    { name = "joblib", marker = "python_full_version < '3.11'" },
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "scipy", version = "1.15.3", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-    { name = "threadpoolctl", marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/98/c2/a7855e41c9d285dfe86dc50b250978105dce513d6e459ea66a6aeb0e1e0c/scikit_learn-1.7.2.tar.gz", hash = "sha256:20e9e49ecd130598f1ca38a1d85090e1a600147b9c02fa6f15d69cb53d968fda", size = 7193136, upload-time = "2025-09-09T08:21:29.075Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/ba/3e/daed796fd69cce768b8788401cc464ea90b306fb196ae1ffed0b98182859/scikit_learn-1.7.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6b33579c10a3081d076ab403df4a4190da4f4432d443521674637677dc91e61f", size = 9336221, upload-time = "2025-09-09T08:20:19.328Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/ce/af9d99533b24c55ff4e18d9b7b4d9919bbc6cd8f22fe7a7be01519a347d5/scikit_learn-1.7.2-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:36749fb62b3d961b1ce4fedf08fa57a1986cd409eff2d783bca5d4b9b5fce51c", size = 8653834, upload-time = "2025-09-09T08:20:22.073Z" },
-    { url = "https://files.pythonhosted.org/packages/58/0e/8c2a03d518fb6bd0b6b0d4b114c63d5f1db01ff0f9925d8eb10960d01c01/scikit_learn-1.7.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:7a58814265dfc52b3295b1900cfb5701589d30a8bb026c7540f1e9d3499d5ec8", size = 9660938, upload-time = "2025-09-09T08:20:24.327Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/75/4311605069b5d220e7cf5adabb38535bd96f0079313cdbb04b291479b22a/scikit_learn-1.7.2-cp310-cp310-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:4a847fea807e278f821a0406ca01e387f97653e284ecbd9750e3ee7c90347f18", size = 9477818, upload-time = "2025-09-09T08:20:26.845Z" },
-    { url = "https://files.pythonhosted.org/packages/7f/9b/87961813c34adbca21a6b3f6b2bea344c43b30217a6d24cc437c6147f3e8/scikit_learn-1.7.2-cp310-cp310-win_amd64.whl", hash = "sha256:ca250e6836d10e6f402436d6463d6c0e4d8e0234cfb6a9a47835bd392b852ce5", size = 8886969, upload-time = "2025-09-09T08:20:29.329Z" },
-    { url = "https://files.pythonhosted.org/packages/43/83/564e141eef908a5863a54da8ca342a137f45a0bfb71d1d79704c9894c9d1/scikit_learn-1.7.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7509693451651cd7361d30ce4e86a1347493554f172b1c72a39300fa2aea79e", size = 9331967, upload-time = "2025-09-09T08:20:32.421Z" },
-    { url = "https://files.pythonhosted.org/packages/18/d6/ba863a4171ac9d7314c4d3fc251f015704a2caeee41ced89f321c049ed83/scikit_learn-1.7.2-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:0486c8f827c2e7b64837c731c8feff72c0bd2b998067a8a9cbc10643c31f0fe1", size = 8648645, upload-time = "2025-09-09T08:20:34.436Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/0e/97dbca66347b8cf0ea8b529e6bb9367e337ba2e8be0ef5c1a545232abfde/scikit_learn-1.7.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:89877e19a80c7b11a2891a27c21c4894fb18e2c2e077815bcade10d34287b20d", size = 9715424, upload-time = "2025-09-09T08:20:36.776Z" },
-    { url = "https://files.pythonhosted.org/packages/f7/32/1f3b22e3207e1d2c883a7e09abb956362e7d1bd2f14458c7de258a26ac15/scikit_learn-1.7.2-cp311-cp311-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:8da8bf89d4d79aaec192d2bda62f9b56ae4e5b4ef93b6a56b5de4977e375c1f1", size = 9509234, upload-time = "2025-09-09T08:20:38.957Z" },
-    { url = "https://files.pythonhosted.org/packages/9f/71/34ddbd21f1da67c7a768146968b4d0220ee6831e4bcbad3e03dd3eae88b6/scikit_learn-1.7.2-cp311-cp311-win_amd64.whl", hash = "sha256:9b7ed8d58725030568523e937c43e56bc01cadb478fc43c042a9aca1dacb3ba1", size = 8894244, upload-time = "2025-09-09T08:20:41.166Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/aa/3996e2196075689afb9fce0410ebdb4a09099d7964d061d7213700204409/scikit_learn-1.7.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:8d91a97fa2b706943822398ab943cde71858a50245e31bc71dba62aab1d60a96", size = 9259818, upload-time = "2025-09-09T08:20:43.19Z" },
-    { url = "https://files.pythonhosted.org/packages/43/5d/779320063e88af9c4a7c2cf463ff11c21ac9c8bd730c4a294b0000b666c9/scikit_learn-1.7.2-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:acbc0f5fd2edd3432a22c69bed78e837c70cf896cd7993d71d51ba6708507476", size = 8636997, upload-time = "2025-09-09T08:20:45.468Z" },
-    { url = "https://files.pythonhosted.org/packages/5c/d0/0c577d9325b05594fdd33aa970bf53fb673f051a45496842caee13cfd7fe/scikit_learn-1.7.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e5bf3d930aee75a65478df91ac1225ff89cd28e9ac7bd1196853a9229b6adb0b", size = 9478381, upload-time = "2025-09-09T08:20:47.982Z" },
-    { url = "https://files.pythonhosted.org/packages/82/70/8bf44b933837ba8494ca0fc9a9ab60f1c13b062ad0197f60a56e2fc4c43e/scikit_learn-1.7.2-cp312-cp312-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:b4d6e9deed1a47aca9fe2f267ab8e8fe82ee20b4526b2c0cd9e135cea10feb44", size = 9300296, upload-time = "2025-09-09T08:20:50.366Z" },
-    { url = "https://files.pythonhosted.org/packages/c6/99/ed35197a158f1fdc2fe7c3680e9c70d0128f662e1fee4ed495f4b5e13db0/scikit_learn-1.7.2-cp312-cp312-win_amd64.whl", hash = "sha256:6088aa475f0785e01bcf8529f55280a3d7d298679f50c0bb70a2364a82d0b290", size = 8731256, upload-time = "2025-09-09T08:20:52.627Z" },
-    { url = "https://files.pythonhosted.org/packages/ae/93/a3038cb0293037fd335f77f31fe053b89c72f17b1c8908c576c29d953e84/scikit_learn-1.7.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:0b7dacaa05e5d76759fb071558a8b5130f4845166d88654a0f9bdf3eb57851b7", size = 9212382, upload-time = "2025-09-09T08:20:54.731Z" },
-    { url = "https://files.pythonhosted.org/packages/40/dd/9a88879b0c1104259136146e4742026b52df8540c39fec21a6383f8292c7/scikit_learn-1.7.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:abebbd61ad9e1deed54cca45caea8ad5f79e1b93173dece40bb8e0c658dbe6fe", size = 8592042, upload-time = "2025-09-09T08:20:57.313Z" },
-    { url = "https://files.pythonhosted.org/packages/46/af/c5e286471b7d10871b811b72ae794ac5fe2989c0a2df07f0ec723030f5f5/scikit_learn-1.7.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:502c18e39849c0ea1a5d681af1dbcf15f6cce601aebb657aabbfe84133c1907f", size = 9434180, upload-time = "2025-09-09T08:20:59.671Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/fd/df59faa53312d585023b2da27e866524ffb8faf87a68516c23896c718320/scikit_learn-1.7.2-cp313-cp313-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:7a4c328a71785382fe3fe676a9ecf2c86189249beff90bf85e22bdb7efaf9ae0", size = 9283660, upload-time = "2025-09-09T08:21:01.71Z" },
-    { url = "https://files.pythonhosted.org/packages/a7/c7/03000262759d7b6f38c836ff9d512f438a70d8a8ddae68ee80de72dcfb63/scikit_learn-1.7.2-cp313-cp313-win_amd64.whl", hash = "sha256:63a9afd6f7b229aad94618c01c252ce9e6fa97918c5ca19c9a17a087d819440c", size = 8702057, upload-time = "2025-09-09T08:21:04.234Z" },
-    { url = "https://files.pythonhosted.org/packages/55/87/ef5eb1f267084532c8e4aef98a28b6ffe7425acbfd64b5e2f2e066bc29b3/scikit_learn-1.7.2-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:9acb6c5e867447b4e1390930e3944a005e2cb115922e693c08a323421a6966e8", size = 9558731, upload-time = "2025-09-09T08:21:06.381Z" },
-    { url = "https://files.pythonhosted.org/packages/93/f8/6c1e3fc14b10118068d7938878a9f3f4e6d7b74a8ddb1e5bed65159ccda8/scikit_learn-1.7.2-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:2a41e2a0ef45063e654152ec9d8bcfc39f7afce35b08902bfe290c2498a67a6a", size = 9038852, upload-time = "2025-09-09T08:21:08.628Z" },
-    { url = "https://files.pythonhosted.org/packages/83/87/066cafc896ee540c34becf95d30375fe5cbe93c3b75a0ee9aa852cd60021/scikit_learn-1.7.2-cp313-cp313t-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:98335fb98509b73385b3ab2bd0639b1f610541d3988ee675c670371d6a87aa7c", size = 9527094, upload-time = "2025-09-09T08:21:11.486Z" },
-    { url = "https://files.pythonhosted.org/packages/9c/2b/4903e1ccafa1f6453b1ab78413938c8800633988c838aa0be386cbb33072/scikit_learn-1.7.2-cp313-cp313t-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:191e5550980d45449126e23ed1d5e9e24b2c68329ee1f691a3987476e115e09c", size = 9367436, upload-time = "2025-09-09T08:21:13.602Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/aa/8444be3cfb10451617ff9d177b3c190288f4563e6c50ff02728be67ad094/scikit_learn-1.7.2-cp313-cp313t-win_amd64.whl", hash = "sha256:57dc4deb1d3762c75d685507fbd0bc17160144b2f2ba4ccea5dc285ab0d0e973", size = 9275749, upload-time = "2025-09-09T08:21:15.96Z" },
-    { url = "https://files.pythonhosted.org/packages/d9/82/dee5acf66837852e8e68df6d8d3a6cb22d3df997b733b032f513d95205b7/scikit_learn-1.7.2-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:fa8f63940e29c82d1e67a45d5297bdebbcb585f5a5a50c4914cc2e852ab77f33", size = 9208906, upload-time = "2025-09-09T08:21:18.557Z" },
-    { url = "https://files.pythonhosted.org/packages/3c/30/9029e54e17b87cb7d50d51a5926429c683d5b4c1732f0507a6c3bed9bf65/scikit_learn-1.7.2-cp314-cp314-macosx_12_0_arm64.whl", hash = "sha256:f95dc55b7902b91331fa4e5845dd5bde0580c9cd9612b1b2791b7e80c3d32615", size = 8627836, upload-time = "2025-09-09T08:21:20.695Z" },
-    { url = "https://files.pythonhosted.org/packages/60/18/4a52c635c71b536879f4b971c2cedf32c35ee78f48367885ed8025d1f7ee/scikit_learn-1.7.2-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:9656e4a53e54578ad10a434dc1f993330568cfee176dff07112b8785fb413106", size = 9426236, upload-time = "2025-09-09T08:21:22.645Z" },
-    { url = "https://files.pythonhosted.org/packages/99/7e/290362f6ab582128c53445458a5befd471ed1ea37953d5bcf80604619250/scikit_learn-1.7.2-cp314-cp314-manylinux_2_27_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:96dc05a854add0e50d3f47a1ef21a10a595016da5b007c7d9cd9d0bffd1fcc61", size = 9312593, upload-time = "2025-09-09T08:21:24.65Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/87/24f541b6d62b1794939ae6422f8023703bbf6900378b2b34e0b4384dfefd/scikit_learn-1.7.2-cp314-cp314-win_amd64.whl", hash = "sha256:bb24510ed3f9f61476181e4db51ce801e2ba37541def12dc9333b946fc7a9cf8", size = 8820007, upload-time = "2025-09-09T08:21:26.713Z" },
-]
-
 [[package]]
 name = "scikit-learn"
 version = "1.8.0"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14'",
-    "python_full_version == '3.13.*'",
-    "python_full_version >= '3.11' and python_full_version < '3.13'",
-]
 dependencies = [
-    { name = "joblib", marker = "python_full_version >= '3.11'" },
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "scipy", version = "1.17.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
-    { name = "threadpoolctl", marker = "python_full_version >= '3.11'" },
+    { name = "joblib" },
+    { name = "numpy" },
+    { name = "scipy" },
+    { name = "threadpoolctl" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/0e/d4/40988bf3b8e34feec1d0e6a051446b1f66225f8529b9309becaeef62b6c4/scikit_learn-1.8.0.tar.gz", hash = "sha256:9bccbb3b40e3de10351f8f5068e105d0f4083b1a65fa07b6634fbc401a6287fd", size = 7335585, upload-time = "2025-12-10T07:08:53.618Z" }
 wheels = [
@@ -3861,76 +3436,12 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/60/22/d7b2ebe4704a5e50790ba089d5c2ae308ab6bb852719e6c3bd4f04c3a363/scikit_learn-1.8.0-cp314-cp314t-win_arm64.whl", hash = "sha256:f28dd15c6bb0b66ba09728cf09fd8736c304be29409bd8445a080c1280619e8c", size = 8002647, upload-time = "2025-12-10T07:08:51.601Z" },
 ]
 
-[[package]]
-name = "scipy"
-version = "1.15.3"
-source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version < '3.11'",
-]
-dependencies = [
-    { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/0f/37/6964b830433e654ec7485e45a00fc9a27cf868d622838f6b6d9c5ec0d532/scipy-1.15.3.tar.gz", hash = "sha256:eae3cf522bc7df64b42cad3925c876e1b0b6c35c1337c93e12c0f366f55b0eaf", size = 59419214, upload-time = "2025-05-08T16:13:05.955Z" }
-wheels = [
-    { url = "https://files.pythonhosted.org/packages/78/2f/4966032c5f8cc7e6a60f1b2e0ad686293b9474b65246b0c642e3ef3badd0/scipy-1.15.3-cp310-cp310-macosx_10_13_x86_64.whl", hash = "sha256:a345928c86d535060c9c2b25e71e87c39ab2f22fc96e9636bd74d1dbf9de448c", size = 38702770, upload-time = "2025-05-08T16:04:20.849Z" },
-    { url = "https://files.pythonhosted.org/packages/a0/6e/0c3bf90fae0e910c274db43304ebe25a6b391327f3f10b5dcc638c090795/scipy-1.15.3-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:ad3432cb0f9ed87477a8d97f03b763fd1d57709f1bbde3c9369b1dff5503b253", size = 30094511, upload-time = "2025-05-08T16:04:27.103Z" },
-    { url = "https://files.pythonhosted.org/packages/ea/b1/4deb37252311c1acff7f101f6453f0440794f51b6eacb1aad4459a134081/scipy-1.15.3-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:aef683a9ae6eb00728a542b796f52a5477b78252edede72b8327a886ab63293f", size = 22368151, upload-time = "2025-05-08T16:04:31.731Z" },
-    { url = "https://files.pythonhosted.org/packages/38/7d/f457626e3cd3c29b3a49ca115a304cebb8cc6f31b04678f03b216899d3c6/scipy-1.15.3-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:1c832e1bd78dea67d5c16f786681b28dd695a8cb1fb90af2e27580d3d0967e92", size = 25121732, upload-time = "2025-05-08T16:04:36.596Z" },
-    { url = "https://files.pythonhosted.org/packages/db/0a/92b1de4a7adc7a15dcf5bddc6e191f6f29ee663b30511ce20467ef9b82e4/scipy-1.15.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:263961f658ce2165bbd7b99fa5135195c3a12d9bef045345016b8b50c315cb82", size = 35547617, upload-time = "2025-05-08T16:04:43.546Z" },
-    { url = "https://files.pythonhosted.org/packages/8e/6d/41991e503e51fc1134502694c5fa7a1671501a17ffa12716a4a9151af3df/scipy-1.15.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9e2abc762b0811e09a0d3258abee2d98e0c703eee49464ce0069590846f31d40", size = 37662964, upload-time = "2025-05-08T16:04:49.431Z" },
-    { url = "https://files.pythonhosted.org/packages/25/e1/3df8f83cb15f3500478c889be8fb18700813b95e9e087328230b98d547ff/scipy-1.15.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:ed7284b21a7a0c8f1b6e5977ac05396c0d008b89e05498c8b7e8f4a1423bba0e", size = 37238749, upload-time = "2025-05-08T16:04:55.215Z" },
-    { url = "https://files.pythonhosted.org/packages/93/3e/b3257cf446f2a3533ed7809757039016b74cd6f38271de91682aa844cfc5/scipy-1.15.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:5380741e53df2c566f4d234b100a484b420af85deb39ea35a1cc1be84ff53a5c", size = 40022383, upload-time = "2025-05-08T16:05:01.914Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/84/55bc4881973d3f79b479a5a2e2df61c8c9a04fcb986a213ac9c02cfb659b/scipy-1.15.3-cp310-cp310-win_amd64.whl", hash = "sha256:9d61e97b186a57350f6d6fd72640f9e99d5a4a2b8fbf4b9ee9a841eab327dc13", size = 41259201, upload-time = "2025-05-08T16:05:08.166Z" },
-    { url = "https://files.pythonhosted.org/packages/96/ab/5cc9f80f28f6a7dff646c5756e559823614a42b1939d86dd0ed550470210/scipy-1.15.3-cp311-cp311-macosx_10_13_x86_64.whl", hash = "sha256:993439ce220d25e3696d1b23b233dd010169b62f6456488567e830654ee37a6b", size = 38714255, upload-time = "2025-05-08T16:05:14.596Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/4a/66ba30abe5ad1a3ad15bfb0b59d22174012e8056ff448cb1644deccbfed2/scipy-1.15.3-cp311-cp311-macosx_12_0_arm64.whl", hash = "sha256:34716e281f181a02341ddeaad584205bd2fd3c242063bd3423d61ac259ca7eba", size = 30111035, upload-time = "2025-05-08T16:05:20.152Z" },
-    { url = "https://files.pythonhosted.org/packages/4b/fa/a7e5b95afd80d24313307f03624acc65801846fa75599034f8ceb9e2cbf6/scipy-1.15.3-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:3b0334816afb8b91dab859281b1b9786934392aa3d527cd847e41bb6f45bee65", size = 22384499, upload-time = "2025-05-08T16:05:24.494Z" },
-    { url = "https://files.pythonhosted.org/packages/17/99/f3aaddccf3588bb4aea70ba35328c204cadd89517a1612ecfda5b2dd9d7a/scipy-1.15.3-cp311-cp311-macosx_14_0_x86_64.whl", hash = "sha256:6db907c7368e3092e24919b5e31c76998b0ce1684d51a90943cb0ed1b4ffd6c1", size = 25152602, upload-time = "2025-05-08T16:05:29.313Z" },
-    { url = "https://files.pythonhosted.org/packages/56/c5/1032cdb565f146109212153339f9cb8b993701e9fe56b1c97699eee12586/scipy-1.15.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:721d6b4ef5dc82ca8968c25b111e307083d7ca9091bc38163fb89243e85e3889", size = 35503415, upload-time = "2025-05-08T16:05:34.699Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/37/89f19c8c05505d0601ed5650156e50eb881ae3918786c8fd7262b4ee66d3/scipy-1.15.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:39cb9c62e471b1bb3750066ecc3a3f3052b37751c7c3dfd0fd7e48900ed52982", size = 37652622, upload-time = "2025-05-08T16:05:40.762Z" },
-    { url = "https://files.pythonhosted.org/packages/7e/31/be59513aa9695519b18e1851bb9e487de66f2d31f835201f1b42f5d4d475/scipy-1.15.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:795c46999bae845966368a3c013e0e00947932d68e235702b5c3f6ea799aa8c9", size = 37244796, upload-time = "2025-05-08T16:05:48.119Z" },
-    { url = "https://files.pythonhosted.org/packages/10/c0/4f5f3eeccc235632aab79b27a74a9130c6c35df358129f7ac8b29f562ac7/scipy-1.15.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:18aaacb735ab38b38db42cb01f6b92a2d0d4b6aabefeb07f02849e47f8fb3594", size = 40047684, upload-time = "2025-05-08T16:05:54.22Z" },
-    { url = "https://files.pythonhosted.org/packages/ab/a7/0ddaf514ce8a8714f6ed243a2b391b41dbb65251affe21ee3077ec45ea9a/scipy-1.15.3-cp311-cp311-win_amd64.whl", hash = "sha256:ae48a786a28412d744c62fd7816a4118ef97e5be0bee968ce8f0a2fba7acf3bb", size = 41246504, upload-time = "2025-05-08T16:06:00.437Z" },
-    { url = "https://files.pythonhosted.org/packages/37/4b/683aa044c4162e10ed7a7ea30527f2cbd92e6999c10a8ed8edb253836e9c/scipy-1.15.3-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6ac6310fdbfb7aa6612408bd2f07295bcbd3fda00d2d702178434751fe48e019", size = 38766735, upload-time = "2025-05-08T16:06:06.471Z" },
-    { url = "https://files.pythonhosted.org/packages/7b/7e/f30be3d03de07f25dc0ec926d1681fed5c732d759ac8f51079708c79e680/scipy-1.15.3-cp312-cp312-macosx_12_0_arm64.whl", hash = "sha256:185cd3d6d05ca4b44a8f1595af87f9c372bb6acf9c808e99aa3e9aa03bd98cf6", size = 30173284, upload-time = "2025-05-08T16:06:11.686Z" },
-    { url = "https://files.pythonhosted.org/packages/07/9c/0ddb0d0abdabe0d181c1793db51f02cd59e4901da6f9f7848e1f96759f0d/scipy-1.15.3-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:05dc6abcd105e1a29f95eada46d4a3f251743cfd7d3ae8ddb4088047f24ea477", size = 22446958, upload-time = "2025-05-08T16:06:15.97Z" },
-    { url = "https://files.pythonhosted.org/packages/af/43/0bce905a965f36c58ff80d8bea33f1f9351b05fad4beaad4eae34699b7a1/scipy-1.15.3-cp312-cp312-macosx_14_0_x86_64.whl", hash = "sha256:06efcba926324df1696931a57a176c80848ccd67ce6ad020c810736bfd58eb1c", size = 25242454, upload-time = "2025-05-08T16:06:20.394Z" },
-    { url = "https://files.pythonhosted.org/packages/56/30/a6f08f84ee5b7b28b4c597aca4cbe545535c39fe911845a96414700b64ba/scipy-1.15.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c05045d8b9bfd807ee1b9f38761993297b10b245f012b11b13b91ba8945f7e45", size = 35210199, upload-time = "2025-05-08T16:06:26.159Z" },
-    { url = "https://files.pythonhosted.org/packages/0b/1f/03f52c282437a168ee2c7c14a1a0d0781a9a4a8962d84ac05c06b4c5b555/scipy-1.15.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:271e3713e645149ea5ea3e97b57fdab61ce61333f97cfae392c28ba786f9bb49", size = 37309455, upload-time = "2025-05-08T16:06:32.778Z" },
-    { url = "https://files.pythonhosted.org/packages/89/b1/fbb53137f42c4bf630b1ffdfc2151a62d1d1b903b249f030d2b1c0280af8/scipy-1.15.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6cfd56fc1a8e53f6e89ba3a7a7251f7396412d655bca2aa5611c8ec9a6784a1e", size = 36885140, upload-time = "2025-05-08T16:06:39.249Z" },
-    { url = "https://files.pythonhosted.org/packages/2e/2e/025e39e339f5090df1ff266d021892694dbb7e63568edcfe43f892fa381d/scipy-1.15.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:0ff17c0bb1cb32952c09217d8d1eed9b53d1463e5f1dd6052c7857f83127d539", size = 39710549, upload-time = "2025-05-08T16:06:45.729Z" },
-    { url = "https://files.pythonhosted.org/packages/e6/eb/3bf6ea8ab7f1503dca3a10df2e4b9c3f6b3316df07f6c0ded94b281c7101/scipy-1.15.3-cp312-cp312-win_amd64.whl", hash = "sha256:52092bc0472cfd17df49ff17e70624345efece4e1a12b23783a1ac59a1b728ed", size = 40966184, upload-time = "2025-05-08T16:06:52.623Z" },
-    { url = "https://files.pythonhosted.org/packages/73/18/ec27848c9baae6e0d6573eda6e01a602e5649ee72c27c3a8aad673ebecfd/scipy-1.15.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:2c620736bcc334782e24d173c0fdbb7590a0a436d2fdf39310a8902505008759", size = 38728256, upload-time = "2025-05-08T16:06:58.696Z" },
-    { url = "https://files.pythonhosted.org/packages/74/cd/1aef2184948728b4b6e21267d53b3339762c285a46a274ebb7863c9e4742/scipy-1.15.3-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:7e11270a000969409d37ed399585ee530b9ef6aa99d50c019de4cb01e8e54e62", size = 30109540, upload-time = "2025-05-08T16:07:04.209Z" },
-    { url = "https://files.pythonhosted.org/packages/5b/d8/59e452c0a255ec352bd0a833537a3bc1bfb679944c4938ab375b0a6b3a3e/scipy-1.15.3-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:8c9ed3ba2c8a2ce098163a9bdb26f891746d02136995df25227a20e71c396ebb", size = 22383115, upload-time = "2025-05-08T16:07:08.998Z" },
-    { url = "https://files.pythonhosted.org/packages/08/f5/456f56bbbfccf696263b47095291040655e3cbaf05d063bdc7c7517f32ac/scipy-1.15.3-cp313-cp313-macosx_14_0_x86_64.whl", hash = "sha256:0bdd905264c0c9cfa74a4772cdb2070171790381a5c4d312c973382fc6eaf730", size = 25163884, upload-time = "2025-05-08T16:07:14.091Z" },
-    { url = "https://files.pythonhosted.org/packages/a2/66/a9618b6a435a0f0c0b8a6d0a2efb32d4ec5a85f023c2b79d39512040355b/scipy-1.15.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:79167bba085c31f38603e11a267d862957cbb3ce018d8b38f79ac043bc92d825", size = 35174018, upload-time = "2025-05-08T16:07:19.427Z" },
-    { url = "https://files.pythonhosted.org/packages/b5/09/c5b6734a50ad4882432b6bb7c02baf757f5b2f256041da5df242e2d7e6b6/scipy-1.15.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9deabd6d547aee2c9a81dee6cc96c6d7e9a9b1953f74850c179f91fdc729cb7", size = 37269716, upload-time = "2025-05-08T16:07:25.712Z" },
-    { url = "https://files.pythonhosted.org/packages/77/0a/eac00ff741f23bcabd352731ed9b8995a0a60ef57f5fd788d611d43d69a1/scipy-1.15.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:dde4fc32993071ac0c7dd2d82569e544f0bdaff66269cb475e0f369adad13f11", size = 36872342, upload-time = "2025-05-08T16:07:31.468Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/54/4379be86dd74b6ad81551689107360d9a3e18f24d20767a2d5b9253a3f0a/scipy-1.15.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f77f853d584e72e874d87357ad70f44b437331507d1c311457bed8ed2b956126", size = 39670869, upload-time = "2025-05-08T16:07:38.002Z" },
-    { url = "https://files.pythonhosted.org/packages/87/2e/892ad2862ba54f084ffe8cc4a22667eaf9c2bcec6d2bff1d15713c6c0703/scipy-1.15.3-cp313-cp313-win_amd64.whl", hash = "sha256:b90ab29d0c37ec9bf55424c064312930ca5f4bde15ee8619ee44e69319aab163", size = 40988851, upload-time = "2025-05-08T16:08:33.671Z" },
-    { url = "https://files.pythonhosted.org/packages/1b/e9/7a879c137f7e55b30d75d90ce3eb468197646bc7b443ac036ae3fe109055/scipy-1.15.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3ac07623267feb3ae308487c260ac684b32ea35fd81e12845039952f558047b8", size = 38863011, upload-time = "2025-05-08T16:07:44.039Z" },
-    { url = "https://files.pythonhosted.org/packages/51/d1/226a806bbd69f62ce5ef5f3ffadc35286e9fbc802f606a07eb83bf2359de/scipy-1.15.3-cp313-cp313t-macosx_12_0_arm64.whl", hash = "sha256:6487aa99c2a3d509a5227d9a5e889ff05830a06b2ce08ec30df6d79db5fcd5c5", size = 30266407, upload-time = "2025-05-08T16:07:49.891Z" },
-    { url = "https://files.pythonhosted.org/packages/e5/9b/f32d1d6093ab9eeabbd839b0f7619c62e46cc4b7b6dbf05b6e615bbd4400/scipy-1.15.3-cp313-cp313t-macosx_14_0_arm64.whl", hash = "sha256:50f9e62461c95d933d5c5ef4a1f2ebf9a2b4e83b0db374cb3f1de104d935922e", size = 22540030, upload-time = "2025-05-08T16:07:54.121Z" },
-    { url = "https://files.pythonhosted.org/packages/e7/29/c278f699b095c1a884f29fda126340fcc201461ee8bfea5c8bdb1c7c958b/scipy-1.15.3-cp313-cp313t-macosx_14_0_x86_64.whl", hash = "sha256:14ed70039d182f411ffc74789a16df3835e05dc469b898233a245cdfd7f162cb", size = 25218709, upload-time = "2025-05-08T16:07:58.506Z" },
-    { url = "https://files.pythonhosted.org/packages/24/18/9e5374b617aba742a990581373cd6b68a2945d65cc588482749ef2e64467/scipy-1.15.3-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0a769105537aa07a69468a0eefcd121be52006db61cdd8cac8a0e68980bbb723", size = 34809045, upload-time = "2025-05-08T16:08:03.929Z" },
-    { url = "https://files.pythonhosted.org/packages/e1/fe/9c4361e7ba2927074360856db6135ef4904d505e9b3afbbcb073c4008328/scipy-1.15.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9db984639887e3dffb3928d118145ffe40eff2fa40cb241a306ec57c219ebbbb", size = 36703062, upload-time = "2025-05-08T16:08:09.558Z" },
-    { url = "https://files.pythonhosted.org/packages/b7/8e/038ccfe29d272b30086b25a4960f757f97122cb2ec42e62b460d02fe98e9/scipy-1.15.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:40e54d5c7e7ebf1aa596c374c49fa3135f04648a0caabcb66c52884b943f02b4", size = 36393132, upload-time = "2025-05-08T16:08:15.34Z" },
-    { url = "https://files.pythonhosted.org/packages/10/7e/5c12285452970be5bdbe8352c619250b97ebf7917d7a9a9e96b8a8140f17/scipy-1.15.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5e721fed53187e71d0ccf382b6bf977644c533e506c4d33c3fb24de89f5c3ed5", size = 38979503, upload-time = "2025-05-08T16:08:21.513Z" },
-    { url = "https://files.pythonhosted.org/packages/81/06/0a5e5349474e1cbc5757975b21bd4fad0e72ebf138c5592f191646154e06/scipy-1.15.3-cp313-cp313t-win_amd64.whl", hash = "sha256:76ad1fb5f8752eabf0fa02e4cc0336b4e8f021e2d5f061ed37d6d264db35e3ca", size = 40308097, upload-time = "2025-05-08T16:08:27.627Z" },
-]
-
 [[package]]
 name = "scipy"
 version = "1.17.1"
 source = { registry = "https://pypi.org/simple" }
-resolution-markers = [
-    "python_full_version >= '3.14'",
-    "python_full_version == '3.13.*'",
-    "python_full_version >= '3.11' and python_full_version < '3.13'",
-]
 dependencies = [
-    { name = "numpy", version = "2.4.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+    { name = "numpy" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7a/97/5a3609c4f8d58b039179648e62dd220f89864f56f7357f5d4f45c29eb2cc/scipy-1.17.1.tar.gz", hash = "sha256:95d8e012d8cb8816c226aef832200b1d45109ed4464303e997c5b13122b297c0", size = 30573822, upload-time = "2026-02-23T00:26:24.851Z" }
 wheels = [
@@ -4066,13 +3577,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/06/aa/9ce0f3e7a9829ead5c8ce549392f33a12c4555a6c0609bb27d882e9c7ddf/sqlalchemy-2.0.46.tar.gz", hash = "sha256:cf36851ee7219c170bb0793dbc3da3e80c582e04a5437bc601bfe8c85c9216d7", size = 9865393, upload-time = "2026-01-21T18:03:45.119Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/40/26/66ba59328dc25e523bfcb0f8db48bdebe2035e0159d600e1f01c0fc93967/sqlalchemy-2.0.46-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:895296687ad06dc9b11a024cf68e8d9d3943aa0b4964278d2553b86f1b267735", size = 2155051, upload-time = "2026-01-21T18:27:28.965Z" },
-    { url = "https://files.pythonhosted.org/packages/21/cd/9336732941df972fbbfa394db9caa8bb0cf9fe03656ec728d12e9cbd6edc/sqlalchemy-2.0.46-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ab65cb2885a9f80f979b85aa4e9c9165a31381ca322cbde7c638fe6eefd1ec39", size = 3234666, upload-time = "2026-01-21T18:32:28.72Z" },
-    { url = "https://files.pythonhosted.org/packages/38/62/865ae8b739930ec433cd4123760bee7f8dafdc10abefd725a025604fb0de/sqlalchemy-2.0.46-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:52fe29b3817bd191cc20bad564237c808967972c97fa683c04b28ec8979ae36f", size = 3232917, upload-time = "2026-01-21T18:44:54.064Z" },
-    { url = "https://files.pythonhosted.org/packages/24/38/805904b911857f2b5e00fdea44e9570df62110f834378706939825579296/sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:09168817d6c19954d3b7655da6ba87fcb3a62bb575fb396a81a8b6a9fadfe8b5", size = 3185790, upload-time = "2026-01-21T18:32:30.581Z" },
-    { url = "https://files.pythonhosted.org/packages/69/4f/3260bb53aabd2d274856337456ea52f6a7eccf6cce208e558f870cec766b/sqlalchemy-2.0.46-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:be6c0466b4c25b44c5d82b0426b5501de3c424d7a3220e86cd32f319ba56798e", size = 3207206, upload-time = "2026-01-21T18:44:55.93Z" },
-    { url = "https://files.pythonhosted.org/packages/ce/b3/67c432d7f9d88bb1a61909b67e29f6354d59186c168fb5d381cf438d3b73/sqlalchemy-2.0.46-cp310-cp310-win32.whl", hash = "sha256:1bc3f601f0a818d27bfe139f6766487d9c88502062a2cd3a7ee6c342e81d5047", size = 2115296, upload-time = "2026-01-21T18:33:12.498Z" },
-    { url = "https://files.pythonhosted.org/packages/4a/8c/25fb284f570f9d48e6c240f0269a50cec9cf009a7e08be4c0aaaf0654972/sqlalchemy-2.0.46-cp310-cp310-win_amd64.whl", hash = "sha256:e0c05aff5c6b1bb5fb46a87e0f9d2f733f83ef6cbbbcd5c642b6c01678268061", size = 2138540, upload-time = "2026-01-21T18:33:14.22Z" },
     { url = "https://files.pythonhosted.org/packages/69/ac/b42ad16800d0885105b59380ad69aad0cce5a65276e269ce2729a2343b6a/sqlalchemy-2.0.46-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:261c4b1f101b4a411154f1da2b76497d73abbfc42740029205d4d01fa1052684", size = 2154851, upload-time = "2026-01-21T18:27:30.54Z" },
     { url = "https://files.pythonhosted.org/packages/a0/60/d8710068cb79f64d002ebed62a7263c00c8fd95f4ebd4b5be8f7ca93f2bc/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:181903fe8c1b9082995325f1b2e84ac078b1189e2819380c2303a5f90e114a62", size = 3311241, upload-time = "2026-01-21T18:32:33.45Z" },
     { url = "https://files.pythonhosted.org/packages/2b/0f/20c71487c7219ab3aa7421c7c62d93824c97c1460f2e8bb72404b0192d13/sqlalchemy-2.0.46-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:590be24e20e2424a4c3c1b0835e9405fa3d0af5823a1a9fc02e5dff56471515f", size = 3310741, upload-time = "2026-01-21T18:44:57.887Z" },
@@ -4174,13 +3678,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806, upload-time = "2025-10-06T20:22:45.419Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/89/b3/2cb7c17b6c4cf8ca983204255d3f1d95eda7213e247e6947a0ee2c747a2c/tiktoken-0.12.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3de02f5a491cfd179aec916eddb70331814bd6bf764075d39e21d5862e533970", size = 1051991, upload-time = "2025-10-06T20:21:34.098Z" },
-    { url = "https://files.pythonhosted.org/packages/27/0f/df139f1df5f6167194ee5ab24634582ba9a1b62c6b996472b0277ec80f66/tiktoken-0.12.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b6cfb6d9b7b54d20af21a912bfe63a2727d9cfa8fbda642fd8322c70340aad16", size = 995798, upload-time = "2025-10-06T20:21:35.579Z" },
-    { url = "https://files.pythonhosted.org/packages/ef/5d/26a691f28ab220d5edc09b9b787399b130f24327ef824de15e5d85ef21aa/tiktoken-0.12.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:cde24cdb1b8a08368f709124f15b36ab5524aac5fa830cc3fdce9c03d4fb8030", size = 1129865, upload-time = "2025-10-06T20:21:36.675Z" },
-    { url = "https://files.pythonhosted.org/packages/b2/94/443fab3d4e5ebecac895712abd3849b8da93b7b7dec61c7db5c9c7ebe40c/tiktoken-0.12.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:6de0da39f605992649b9cfa6f84071e3f9ef2cec458d08c5feb1b6f0ff62e134", size = 1152856, upload-time = "2025-10-06T20:21:37.873Z" },
-    { url = "https://files.pythonhosted.org/packages/54/35/388f941251b2521c70dd4c5958e598ea6d2c88e28445d2fb8189eecc1dfc/tiktoken-0.12.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:6faa0534e0eefbcafaccb75927a4a380463a2eaa7e26000f0173b920e98b720a", size = 1195308, upload-time = "2025-10-06T20:21:39.577Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/00/c6681c7f833dd410576183715a530437a9873fa910265817081f65f9105f/tiktoken-0.12.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:82991e04fc860afb933efb63957affc7ad54f83e2216fe7d319007dab1ba5892", size = 1255697, upload-time = "2025-10-06T20:21:41.154Z" },
-    { url = "https://files.pythonhosted.org/packages/5f/d2/82e795a6a9bafa034bf26a58e68fe9a89eeaaa610d51dbeb22106ba04f0a/tiktoken-0.12.0-cp310-cp310-win_amd64.whl", hash = "sha256:6fb2995b487c2e31acf0a9e17647e3b242235a20832642bb7a9d1a181c0c1bb1", size = 879375, upload-time = "2025-10-06T20:21:43.201Z" },
     { url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565, upload-time = "2025-10-06T20:21:44.566Z" },
     { url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284, upload-time = "2025-10-06T20:21:45.622Z" },
     { url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201, upload-time = "2025-10-06T20:21:47.074Z" },
@@ -4372,7 +3869,6 @@ source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "click" },
     { name = "h11" },
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/32/ce/eeb58ae4ac36fe09e3842eb02e0eb676bf2c53ae062b98f1b2531673efdd/uvicorn-0.41.0.tar.gz", hash = "sha256:09d11cf7008da33113824ee5a1c6422d89fbc2ff476540d69a34c87fab8b571a", size = 82633, upload-time = "2026-02-16T23:07:24.1Z" }
 wheels = [
@@ -4396,12 +3892,6 @@ version = "0.22.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/06/f0/18d39dbd1971d6d62c4629cc7fa67f74821b0dc1f5a77af43719de7936a7/uvloop-0.22.1.tar.gz", hash = "sha256:6c84bae345b9147082b17371e3dd5d42775bddce91f885499017f4607fdaf39f", size = 2443250, upload-time = "2025-10-16T22:17:19.342Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/eb/14/ecceb239b65adaaf7fde510aa8bd534075695d1e5f8dadfa32b5723d9cfb/uvloop-0.22.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ef6f0d4cc8a9fa1f6a910230cd53545d9a14479311e87e3cb225495952eb672c", size = 1343335, upload-time = "2025-10-16T22:16:11.43Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/ae/6f6f9af7f590b319c94532b9567409ba11f4fa71af1148cab1bf48a07048/uvloop-0.22.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7cd375a12b71d33d46af85a3343b35d98e8116134ba404bd657b3b1d15988792", size = 742903, upload-time = "2025-10-16T22:16:12.979Z" },
-    { url = "https://files.pythonhosted.org/packages/09/bd/3667151ad0702282a1f4d5d29288fce8a13c8b6858bf0978c219cd52b231/uvloop-0.22.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ac33ed96229b7790eb729702751c0e93ac5bc3bcf52ae9eccbff30da09194b86", size = 3648499, upload-time = "2025-10-16T22:16:14.451Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/f6/21657bb3beb5f8c57ce8be3b83f653dd7933c2fd00545ed1b092d464799a/uvloop-0.22.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:481c990a7abe2c6f4fc3d98781cc9426ebd7f03a9aaa7eb03d3bfc68ac2a46bd", size = 3700133, upload-time = "2025-10-16T22:16:16.272Z" },
-    { url = "https://files.pythonhosted.org/packages/09/e0/604f61d004ded805f24974c87ddd8374ef675644f476f01f1df90e4cdf72/uvloop-0.22.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:a592b043a47ad17911add5fbd087c76716d7c9ccc1d64ec9249ceafd735f03c2", size = 3512681, upload-time = "2025-10-16T22:16:18.07Z" },
-    { url = "https://files.pythonhosted.org/packages/bb/ce/8491fd370b0230deb5eac69c7aae35b3be527e25a911c0acdffb922dc1cd/uvloop-0.22.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:1489cf791aa7b6e8c8be1c5a080bae3a672791fcb4e9e12249b05862a2ca9cec", size = 3615261, upload-time = "2025-10-16T22:16:19.596Z" },
     { url = "https://files.pythonhosted.org/packages/c7/d5/69900f7883235562f1f50d8184bb7dd84a2fb61e9ec63f3782546fdbd057/uvloop-0.22.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c60ebcd36f7b240b30788554b6f0782454826a0ed765d8430652621b5de674b9", size = 1352420, upload-time = "2025-10-16T22:16:21.187Z" },
     { url = "https://files.pythonhosted.org/packages/a8/73/c4e271b3bce59724e291465cc936c37758886a4868787da0278b3b56b905/uvloop-0.22.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3b7f102bf3cb1995cfeaee9321105e8f5da76fdb104cdad8986f85461a1b7b77", size = 748677, upload-time = "2025-10-16T22:16:22.558Z" },
     { url = "https://files.pythonhosted.org/packages/86/94/9fb7fad2f824d25f8ecac0d70b94d0d48107ad5ece03769a9c543444f78a/uvloop-0.22.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:53c85520781d84a4b8b230e24a5af5b0778efdb39142b424990ff1ef7c48ba21", size = 3753819, upload-time = "2025-10-16T22:16:23.903Z" },
@@ -4442,7 +3932,6 @@ dependencies = [
     { name = "distlib" },
     { name = "filelock" },
     { name = "platformdirs" },
-    { name = "typing-extensions", marker = "python_full_version < '3.11'" },
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/ed/54/809199edc537dbace273495ac0884d13df26436e910a5ed4d0ec0a69806b/virtualenv-20.39.0.tar.gz", hash = "sha256:a15f0cebd00d50074fd336a169d53422436a12dfe15149efec7072cfe817df8b", size = 5869141, upload-time = "2026-02-23T18:09:13.349Z" }
 wheels = [
@@ -4458,18 +3947,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/c2/c9/8869df9b2a2d6c59d79220a4db37679e74f807c559ffe5265e08b227a210/watchfiles-1.1.1.tar.gz", hash = "sha256:a173cb5c16c4f40ab19cecf48a534c409f7ea983ab8fed0741304a1c0a31b3f2", size = 94440, upload-time = "2025-10-14T15:06:21.08Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/a7/1a/206e8cf2dd86fddf939165a57b4df61607a1e0add2785f170a3f616b7d9f/watchfiles-1.1.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:eef58232d32daf2ac67f42dea51a2c80f0d03379075d44a587051e63cc2e368c", size = 407318, upload-time = "2025-10-14T15:04:18.753Z" },
-    { url = "https://files.pythonhosted.org/packages/b3/0f/abaf5262b9c496b5dad4ed3c0e799cbecb1f8ea512ecb6ddd46646a9fca3/watchfiles-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03fa0f5237118a0c5e496185cafa92878568b652a2e9a9382a5151b1a0380a43", size = 394478, upload-time = "2025-10-14T15:04:20.297Z" },
-    { url = "https://files.pythonhosted.org/packages/b1/04/9cc0ba88697b34b755371f5ace8d3a4d9a15719c07bdc7bd13d7d8c6a341/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8ca65483439f9c791897f7db49202301deb6e15fe9f8fe2fed555bf986d10c31", size = 449894, upload-time = "2025-10-14T15:04:21.527Z" },
-    { url = "https://files.pythonhosted.org/packages/d2/9c/eda4615863cd8621e89aed4df680d8c3ec3da6a4cf1da113c17decd87c7f/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f0ab1c1af0cb38e3f598244c17919fb1a84d1629cc08355b0074b6d7f53138ac", size = 459065, upload-time = "2025-10-14T15:04:22.795Z" },
-    { url = "https://files.pythonhosted.org/packages/84/13/f28b3f340157d03cbc8197629bc109d1098764abe1e60874622a0be5c112/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3bc570d6c01c206c46deb6e935a260be44f186a2f05179f52f7fcd2be086a94d", size = 488377, upload-time = "2025-10-14T15:04:24.138Z" },
-    { url = "https://files.pythonhosted.org/packages/86/93/cfa597fa9389e122488f7ffdbd6db505b3b915ca7435ecd7542e855898c2/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:e84087b432b6ac94778de547e08611266f1f8ffad28c0ee4c82e028b0fc5966d", size = 595837, upload-time = "2025-10-14T15:04:25.057Z" },
-    { url = "https://files.pythonhosted.org/packages/57/1e/68c1ed5652b48d89fc24d6af905d88ee4f82fa8bc491e2666004e307ded1/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:620bae625f4cb18427b1bb1a2d9426dc0dd5a5ba74c7c2cdb9de405f7b129863", size = 473456, upload-time = "2025-10-14T15:04:26.497Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/dc/1a680b7458ffa3b14bb64878112aefc8f2e4f73c5af763cbf0bd43100658/watchfiles-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:544364b2b51a9b0c7000a4b4b02f90e9423d97fbbf7e06689236443ebcad81ab", size = 455614, upload-time = "2025-10-14T15:04:27.539Z" },
-    { url = "https://files.pythonhosted.org/packages/61/a5/3d782a666512e01eaa6541a72ebac1d3aae191ff4a31274a66b8dd85760c/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:bbe1ef33d45bc71cf21364df962af171f96ecaeca06bd9e3d0b583efb12aec82", size = 630690, upload-time = "2025-10-14T15:04:28.495Z" },
-    { url = "https://files.pythonhosted.org/packages/9b/73/bb5f38590e34687b2a9c47a244aa4dd50c56a825969c92c9c5fc7387cea1/watchfiles-1.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:1a0bb430adb19ef49389e1ad368450193a90038b5b752f4ac089ec6942c4dff4", size = 622459, upload-time = "2025-10-14T15:04:29.491Z" },
-    { url = "https://files.pythonhosted.org/packages/f1/ac/c9bb0ec696e07a20bd58af5399aeadaef195fb2c73d26baf55180fe4a942/watchfiles-1.1.1-cp310-cp310-win32.whl", hash = "sha256:3f6d37644155fb5beca5378feb8c1708d5783145f2a0f1c4d5a061a210254844", size = 272663, upload-time = "2025-10-14T15:04:30.435Z" },
-    { url = "https://files.pythonhosted.org/packages/11/a0/a60c5a7c2ec59fa062d9a9c61d02e3b6abd94d32aac2d8344c4bdd033326/watchfiles-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:a36d8efe0f290835fd0f33da35042a1bb5dc0e83cbc092dcf69bce442579e88e", size = 287453, upload-time = "2025-10-14T15:04:31.53Z" },
     { url = "https://files.pythonhosted.org/packages/1f/f8/2c5f479fb531ce2f0564eda479faecf253d886b1ab3630a39b7bf7362d46/watchfiles-1.1.1-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:f57b396167a2565a4e8b5e56a5a1c537571733992b226f4f1197d79e94cf0ae5", size = 406529, upload-time = "2025-10-14T15:04:32.899Z" },
     { url = "https://files.pythonhosted.org/packages/fe/cd/f515660b1f32f65df671ddf6f85bfaca621aee177712874dc30a97397977/watchfiles-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:421e29339983e1bebc281fab40d812742268ad057db4aee8c4d2bce0af43b741", size = 394384, upload-time = "2025-10-14T15:04:33.761Z" },
     { url = "https://files.pythonhosted.org/packages/7b/c3/28b7dc99733eab43fca2d10f55c86e03bd6ab11ca31b802abac26b23d161/watchfiles-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6e43d39a741e972bab5d8100b5cdacf69db64e34eb19b6e9af162bccf63c5cc6", size = 448789, upload-time = "2025-10-14T15:04:34.679Z" },
@@ -4542,10 +4019,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/4f/55/2af26693fd15165c4ff7857e38330e1b61ab8c37d15dc79118cdba115b7a/watchfiles-1.1.1-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8c91ed27800188c2ae96d16e3149f199d62f86c7af5f5f4d2c61a3ed8cd3666c", size = 455072, upload-time = "2025-10-14T15:05:48.928Z" },
     { url = "https://files.pythonhosted.org/packages/66/1d/d0d200b10c9311ec25d2273f8aad8c3ef7cc7ea11808022501811208a750/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:311ff15a0bae3714ffb603e6ba6dbfba4065ab60865d15a6ec544133bdb21099", size = 629104, upload-time = "2025-10-14T15:05:49.908Z" },
     { url = "https://files.pythonhosted.org/packages/e3/bd/fa9bb053192491b3867ba07d2343d9f2252e00811567d30ae8d0f78136fe/watchfiles-1.1.1-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:a916a2932da8f8ab582f242c065f5c81bed3462849ca79ee357dd9551b0e9b01", size = 622112, upload-time = "2025-10-14T15:05:50.941Z" },
-    { url = "https://files.pythonhosted.org/packages/ba/4c/a888c91e2e326872fa4705095d64acd8aa2fb9c1f7b9bd0588f33850516c/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:17ef139237dfced9da49fb7f2232c86ca9421f666d78c264c7ffca6601d154c3", size = 409611, upload-time = "2025-10-14T15:06:05.809Z" },
-    { url = "https://files.pythonhosted.org/packages/1e/c7/5420d1943c8e3ce1a21c0a9330bcf7edafb6aa65d26b21dbb3267c9e8112/watchfiles-1.1.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:672b8adf25b1a0d35c96b5888b7b18699d27d4194bac8beeae75be4b7a3fc9b2", size = 396889, upload-time = "2025-10-14T15:06:07.035Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/e5/0072cef3804ce8d3aaddbfe7788aadff6b3d3f98a286fdbee9fd74ca59a7/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:77a13aea58bc2b90173bc69f2a90de8e282648939a00a602e1dc4ee23e26b66d", size = 451616, upload-time = "2025-10-14T15:06:08.072Z" },
-    { url = "https://files.pythonhosted.org/packages/83/4e/b87b71cbdfad81ad7e83358b3e447fedd281b880a03d64a760fe0a11fc2e/watchfiles-1.1.1-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:0b495de0bb386df6a12b18335a0285dda90260f51bdb505503c02bcd1ce27a8b", size = 458413, upload-time = "2025-10-14T15:06:09.209Z" },
     { url = "https://files.pythonhosted.org/packages/d3/8e/e500f8b0b77be4ff753ac94dc06b33d8f0d839377fee1b78e8c8d8f031bf/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:db476ab59b6765134de1d4fe96a1a9c96ddf091683599be0f26147ea1b2e4b88", size = 408250, upload-time = "2025-10-14T15:06:10.264Z" },
     { url = "https://files.pythonhosted.org/packages/bd/95/615e72cd27b85b61eec764a5ca51bd94d40b5adea5ff47567d9ebc4d275a/watchfiles-1.1.1-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:89eef07eee5e9d1fda06e38822ad167a044153457e6fd997f8a858ab7564a336", size = 396117, upload-time = "2025-10-14T15:06:11.28Z" },
     { url = "https://files.pythonhosted.org/packages/c9/81/e7fe958ce8a7fb5c73cc9fb07f5aeaf755e6aa72498c57d760af760c91f8/watchfiles-1.1.1-pp311-pypy311_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ce19e06cbda693e9e7686358af9cd6f5d61312ab8b00488bc36f5aabbaf77e24", size = 450493, upload-time = "2025-10-14T15:06:12.321Z" },
@@ -4558,17 +4031,6 @@ version = "15.0.1"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/21/e6/26d09fab466b7ca9c7737474c52be4f76a40301b08362eb2dbc19dcc16c1/websockets-15.0.1.tar.gz", hash = "sha256:82544de02076bafba038ce055ee6412d68da13ab47f0c60cab827346de828dee", size = 177016, upload-time = "2025-03-05T20:03:41.606Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/1e/da/6462a9f510c0c49837bbc9345aca92d767a56c1fb2939e1579df1e1cdcf7/websockets-15.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d63efaa0cd96cf0c5fe4d581521d9fa87744540d4bc999ae6e08595a1014b45b", size = 175423, upload-time = "2025-03-05T20:01:35.363Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/9f/9d11c1a4eb046a9e106483b9ff69bce7ac880443f00e5ce64261b47b07e7/websockets-15.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac60e3b188ec7574cb761b08d50fcedf9d77f1530352db4eef1707fe9dee7205", size = 173080, upload-time = "2025-03-05T20:01:37.304Z" },
-    { url = "https://files.pythonhosted.org/packages/d5/4f/b462242432d93ea45f297b6179c7333dd0402b855a912a04e7fc61c0d71f/websockets-15.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:5756779642579d902eed757b21b0164cd6fe338506a8083eb58af5c372e39d9a", size = 173329, upload-time = "2025-03-05T20:01:39.668Z" },
-    { url = "https://files.pythonhosted.org/packages/6e/0c/6afa1f4644d7ed50284ac59cc70ef8abd44ccf7d45850d989ea7310538d0/websockets-15.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fdfe3e2a29e4db3659dbd5bbf04560cea53dd9610273917799f1cde46aa725e", size = 182312, upload-time = "2025-03-05T20:01:41.815Z" },
-    { url = "https://files.pythonhosted.org/packages/dd/d4/ffc8bd1350b229ca7a4db2a3e1c482cf87cea1baccd0ef3e72bc720caeec/websockets-15.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4c2529b320eb9e35af0fa3016c187dffb84a3ecc572bcee7c3ce302bfeba52bf", size = 181319, upload-time = "2025-03-05T20:01:43.967Z" },
-    { url = "https://files.pythonhosted.org/packages/97/3a/5323a6bb94917af13bbb34009fac01e55c51dfde354f63692bf2533ffbc2/websockets-15.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ac1e5c9054fe23226fb11e05a6e630837f074174c4c2f0fe442996112a6de4fb", size = 181631, upload-time = "2025-03-05T20:01:46.104Z" },
-    { url = "https://files.pythonhosted.org/packages/a6/cc/1aeb0f7cee59ef065724041bb7ed667b6ab1eeffe5141696cccec2687b66/websockets-15.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:5df592cd503496351d6dc14f7cdad49f268d8e618f80dce0cd5a36b93c3fc08d", size = 182016, upload-time = "2025-03-05T20:01:47.603Z" },
-    { url = "https://files.pythonhosted.org/packages/79/f9/c86f8f7af208e4161a7f7e02774e9d0a81c632ae76db2ff22549e1718a51/websockets-15.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:0a34631031a8f05657e8e90903e656959234f3a04552259458aac0b0f9ae6fd9", size = 181426, upload-time = "2025-03-05T20:01:48.949Z" },
-    { url = "https://files.pythonhosted.org/packages/c7/b9/828b0bc6753db905b91df6ae477c0b14a141090df64fb17f8a9d7e3516cf/websockets-15.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3d00075aa65772e7ce9e990cab3ff1de702aa09be3940d1dc88d5abf1ab8a09c", size = 181360, upload-time = "2025-03-05T20:01:50.938Z" },
-    { url = "https://files.pythonhosted.org/packages/89/fb/250f5533ec468ba6327055b7d98b9df056fb1ce623b8b6aaafb30b55d02e/websockets-15.0.1-cp310-cp310-win32.whl", hash = "sha256:1234d4ef35db82f5446dca8e35a7da7964d02c127b095e172e54397fb6a6c256", size = 176388, upload-time = "2025-03-05T20:01:52.213Z" },
-    { url = "https://files.pythonhosted.org/packages/1c/46/aca7082012768bb98e5608f01658ff3ac8437e563eca41cf068bd5849a5e/websockets-15.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:39c1fec2c11dc8d89bba6b2bf1556af381611a173ac2b511cf7231622058af41", size = 176830, upload-time = "2025-03-05T20:01:53.922Z" },
     { url = "https://files.pythonhosted.org/packages/9f/32/18fcd5919c293a398db67443acd33fde142f283853076049824fc58e6f75/websockets-15.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:823c248b690b2fd9303ba00c4f66cd5e2d8c3ba4aa968b2779be9532a4dad431", size = 175423, upload-time = "2025-03-05T20:01:56.276Z" },
     { url = "https://files.pythonhosted.org/packages/76/70/ba1ad96b07869275ef42e2ce21f07a5b0148936688c2baf7e4a1f60d5058/websockets-15.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:678999709e68425ae2593acf2e3ebcbcf2e69885a5ee78f9eb80e6e371f1bf57", size = 173082, upload-time = "2025-03-05T20:01:57.563Z" },
     { url = "https://files.pythonhosted.org/packages/86/f2/10b55821dd40eb696ce4704a87d57774696f9451108cff0d2824c97e0f97/websockets-15.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:d50fd1ee42388dcfb2b3676132c78116490976f1300da28eb629272d5d93e905", size = 173330, upload-time = "2025-03-05T20:01:59.063Z" },
@@ -4602,12 +4064,6 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/33/2b/1f168cb6041853eef0362fb9554c3824367c5560cbdaad89ac40f8c2edfc/websockets-15.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:558d023b3df0bffe50a04e710bc87742de35060580a293c2a984299ed83bc4e4", size = 182195, upload-time = "2025-03-05T20:02:51.561Z" },
     { url = "https://files.pythonhosted.org/packages/86/eb/20b6cdf273913d0ad05a6a14aed4b9a85591c18a987a3d47f20fa13dcc47/websockets-15.0.1-cp313-cp313-win32.whl", hash = "sha256:ba9e56e8ceeeedb2e080147ba85ffcd5cd0711b89576b83784d8605a7df455fa", size = 176393, upload-time = "2025-03-05T20:02:53.814Z" },
     { url = "https://files.pythonhosted.org/packages/1b/6c/c65773d6cab416a64d191d6ee8a8b1c68a09970ea6909d16965d26bfed1e/websockets-15.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:e09473f095a819042ecb2ab9465aee615bd9c2028e4ef7d933600a8401c79561", size = 176837, upload-time = "2025-03-05T20:02:55.237Z" },
-    { url = "https://files.pythonhosted.org/packages/02/9e/d40f779fa16f74d3468357197af8d6ad07e7c5a27ea1ca74ceb38986f77a/websockets-15.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:0c9e74d766f2818bb95f84c25be4dea09841ac0f734d1966f415e4edfc4ef1c3", size = 173109, upload-time = "2025-03-05T20:03:17.769Z" },
-    { url = "https://files.pythonhosted.org/packages/bc/cd/5b887b8585a593073fd92f7c23ecd3985cd2c3175025a91b0d69b0551372/websockets-15.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1009ee0c7739c08a0cd59de430d6de452a55e42d6b522de7aa15e6f67db0b8e1", size = 173343, upload-time = "2025-03-05T20:03:19.094Z" },
-    { url = "https://files.pythonhosted.org/packages/fe/ae/d34f7556890341e900a95acf4886833646306269f899d58ad62f588bf410/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:76d1f20b1c7a2fa82367e04982e708723ba0e7b8d43aa643d3dcd404d74f1475", size = 174599, upload-time = "2025-03-05T20:03:21.1Z" },
-    { url = "https://files.pythonhosted.org/packages/71/e6/5fd43993a87db364ec60fc1d608273a1a465c0caba69176dd160e197ce42/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f29d80eb9a9263b8d109135351caf568cc3f80b9928bccde535c235de55c22d9", size = 174207, upload-time = "2025-03-05T20:03:23.221Z" },
-    { url = "https://files.pythonhosted.org/packages/2b/fb/c492d6daa5ec067c2988ac80c61359ace5c4c674c532985ac5a123436cec/websockets-15.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b359ed09954d7c18bbc1680f380c7301f92c60bf924171629c5db97febb12f04", size = 174155, upload-time = "2025-03-05T20:03:25.321Z" },
-    { url = "https://files.pythonhosted.org/packages/68/a1/dcb68430b1d00b698ae7a7e0194433bce4f07ded185f0ee5fb21e2a2e91e/websockets-15.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:cad21560da69f4ce7658ca2cb83138fb4cf695a2ba3e475e0559e05991aa8122", size = 176884, upload-time = "2025-03-05T20:03:27.934Z" },
     { url = "https://files.pythonhosted.org/packages/fa/a8/5b41e0da817d64113292ab1f8247140aac61cbf6cfd085d6a0fa77f4984f/websockets-15.0.1-py3-none-any.whl", hash = "sha256:f7a866fbc1e97b5c617ee4116daaa09b722101d4a3c170c787450ba409f9736f", size = 169743, upload-time = "2025-03-05T20:03:39.41Z" },
 ]
 
@@ -4617,16 +4073,6 @@ version = "1.17.3"
 source = { registry = "https://pypi.org/simple" }
 sdist = { url = "https://files.pythonhosted.org/packages/95/8f/aeb76c5b46e273670962298c23e7ddde79916cb74db802131d49a85e4b7d/wrapt-1.17.3.tar.gz", hash = "sha256:f66eb08feaa410fe4eebd17f2a2c8e2e46d3476e9f8c783daa8e09e0faa666d0", size = 55547, upload-time = "2025-08-12T05:53:21.714Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/3f/23/bb82321b86411eb51e5a5db3fb8f8032fd30bd7c2d74bfe936136b2fa1d6/wrapt-1.17.3-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:88bbae4d40d5a46142e70d58bf664a89b6b4befaea7b2ecc14e03cedb8e06c04", size = 53482, upload-time = "2025-08-12T05:51:44.467Z" },
-    { url = "https://files.pythonhosted.org/packages/45/69/f3c47642b79485a30a59c63f6d739ed779fb4cc8323205d047d741d55220/wrapt-1.17.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6b13af258d6a9ad602d57d889f83b9d5543acd471eee12eb51f5b01f8eb1bc2", size = 38676, upload-time = "2025-08-12T05:51:32.636Z" },
-    { url = "https://files.pythonhosted.org/packages/d1/71/e7e7f5670c1eafd9e990438e69d8fb46fa91a50785332e06b560c869454f/wrapt-1.17.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:fd341868a4b6714a5962c1af0bd44f7c404ef78720c7de4892901e540417111c", size = 38957, upload-time = "2025-08-12T05:51:54.655Z" },
-    { url = "https://files.pythonhosted.org/packages/de/17/9f8f86755c191d6779d7ddead1a53c7a8aa18bccb7cea8e7e72dfa6a8a09/wrapt-1.17.3-cp310-cp310-manylinux1_x86_64.manylinux_2_28_x86_64.manylinux_2_5_x86_64.whl", hash = "sha256:f9b2601381be482f70e5d1051a5965c25fb3625455a2bf520b5a077b22afb775", size = 81975, upload-time = "2025-08-12T05:52:30.109Z" },
-    { url = "https://files.pythonhosted.org/packages/f2/15/dd576273491f9f43dd09fce517f6c2ce6eb4fe21681726068db0d0467096/wrapt-1.17.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:343e44b2a8e60e06a7e0d29c1671a0d9951f59174f3709962b5143f60a2a98bd", size = 83149, upload-time = "2025-08-12T05:52:09.316Z" },
-    { url = "https://files.pythonhosted.org/packages/0c/c4/5eb4ce0d4814521fee7aa806264bf7a114e748ad05110441cd5b8a5c744b/wrapt-1.17.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:33486899acd2d7d3066156b03465b949da3fd41a5da6e394ec49d271baefcf05", size = 82209, upload-time = "2025-08-12T05:52:10.331Z" },
-    { url = "https://files.pythonhosted.org/packages/31/4b/819e9e0eb5c8dc86f60dfc42aa4e2c0d6c3db8732bce93cc752e604bb5f5/wrapt-1.17.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:e6f40a8aa5a92f150bdb3e1c44b7e98fb7113955b2e5394122fa5532fec4b418", size = 81551, upload-time = "2025-08-12T05:52:31.137Z" },
-    { url = "https://files.pythonhosted.org/packages/f8/83/ed6baf89ba3a56694700139698cf703aac9f0f9eb03dab92f57551bd5385/wrapt-1.17.3-cp310-cp310-win32.whl", hash = "sha256:a36692b8491d30a8c75f1dfee65bef119d6f39ea84ee04d9f9311f83c5ad9390", size = 36464, upload-time = "2025-08-12T05:53:01.204Z" },
-    { url = "https://files.pythonhosted.org/packages/2f/90/ee61d36862340ad7e9d15a02529df6b948676b9a5829fd5e16640156627d/wrapt-1.17.3-cp310-cp310-win_amd64.whl", hash = "sha256:afd964fd43b10c12213574db492cb8f73b2f0826c8df07a68288f8f19af2ebe6", size = 38748, upload-time = "2025-08-12T05:53:00.209Z" },
-    { url = "https://files.pythonhosted.org/packages/bd/c3/cefe0bd330d389c9983ced15d326f45373f4073c9f4a8c2f99b50bfea329/wrapt-1.17.3-cp310-cp310-win_arm64.whl", hash = "sha256:af338aa93554be859173c39c85243970dc6a289fa907402289eeae7543e1ae18", size = 36810, upload-time = "2025-08-12T05:52:51.906Z" },
     { url = "https://files.pythonhosted.org/packages/52/db/00e2a219213856074a213503fdac0511203dceefff26e1daa15250cc01a0/wrapt-1.17.3-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:273a736c4645e63ac582c60a56b0acb529ef07f78e08dc6bfadf6a46b19c0da7", size = 53482, upload-time = "2025-08-12T05:51:45.79Z" },
     { url = "https://files.pythonhosted.org/packages/5e/30/ca3c4a5eba478408572096fe9ce36e6e915994dd26a4e9e98b4f729c06d9/wrapt-1.17.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:5531d911795e3f935a9c23eb1c8c03c211661a5060aab167065896bbf62a5f85", size = 38674, upload-time = "2025-08-12T05:51:34.629Z" },
     { url = "https://files.pythonhosted.org/packages/31/25/3e8cc2c46b5329c5957cec959cb76a10718e1a513309c31399a4dad07eb3/wrapt-1.17.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0610b46293c59a3adbae3dee552b648b984176f8562ee0dba099a56cfbe4df1f", size = 38959, upload-time = "2025-08-12T05:51:56.074Z" },
@@ -4691,22 +4137,6 @@ dependencies = [
 ]
 sdist = { url = "https://files.pythonhosted.org/packages/57/63/0c6ebca57330cd313f6102b16dd57ffaf3ec4c83403dcb45dbd15c6f3ea1/yarl-1.22.0.tar.gz", hash = "sha256:bebf8557577d4401ba8bd9ff33906f1376c877aa78d1fe216ad01b4d6745af71", size = 187169, upload-time = "2025-10-06T14:12:55.963Z" }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/d1/43/a2204825342f37c337f5edb6637040fa14e365b2fcc2346960201d457579/yarl-1.22.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c7bd6683587567e5a49ee6e336e0612bec8329be1b7d4c8af5687dcdeb67ee1e", size = 140517, upload-time = "2025-10-06T14:08:42.494Z" },
-    { url = "https://files.pythonhosted.org/packages/44/6f/674f3e6f02266428c56f704cd2501c22f78e8b2eeb23f153117cc86fb28a/yarl-1.22.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5cdac20da754f3a723cceea5b3448e1a2074866406adeb4ef35b469d089adb8f", size = 93495, upload-time = "2025-10-06T14:08:46.2Z" },
-    { url = "https://files.pythonhosted.org/packages/b8/12/5b274d8a0f30c07b91b2f02cba69152600b47830fcfb465c108880fcee9c/yarl-1.22.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:07a524d84df0c10f41e3ee918846e1974aba4ec017f990dc735aad487a0bdfdf", size = 94400, upload-time = "2025-10-06T14:08:47.855Z" },
-    { url = "https://files.pythonhosted.org/packages/e2/7f/df1b6949b1fa1aa9ff6de6e2631876ad4b73c4437822026e85d8acb56bb1/yarl-1.22.0-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:e1b329cb8146d7b736677a2440e422eadd775d1806a81db2d4cded80a48efc1a", size = 347545, upload-time = "2025-10-06T14:08:49.683Z" },
-    { url = "https://files.pythonhosted.org/packages/84/09/f92ed93bd6cd77872ab6c3462df45ca45cd058d8f1d0c9b4f54c1704429f/yarl-1.22.0-cp310-cp310-manylinux2014_armv7l.manylinux_2_17_armv7l.manylinux_2_31_armv7l.whl", hash = "sha256:75976c6945d85dbb9ee6308cd7ff7b1fb9409380c82d6119bd778d8fcfe2931c", size = 319598, upload-time = "2025-10-06T14:08:51.215Z" },
-    { url = "https://files.pythonhosted.org/packages/c3/97/ac3f3feae7d522cf7ccec3d340bb0b2b61c56cb9767923df62a135092c6b/yarl-1.22.0-cp310-cp310-manylinux2014_ppc64le.manylinux_2_17_ppc64le.manylinux_2_28_ppc64le.whl", hash = "sha256:80ddf7a5f8c86cb3eb4bc9028b07bbbf1f08a96c5c0bc1244be5e8fefcb94147", size = 363893, upload-time = "2025-10-06T14:08:53.144Z" },
-    { url = "https://files.pythonhosted.org/packages/06/49/f3219097403b9c84a4d079b1d7bda62dd9b86d0d6e4428c02d46ab2c77fc/yarl-1.22.0-cp310-cp310-manylinux2014_s390x.manylinux_2_17_s390x.manylinux_2_28_s390x.whl", hash = "sha256:d332fc2e3c94dad927f2112395772a4e4fedbcf8f80efc21ed7cdfae4d574fdb", size = 371240, upload-time = "2025-10-06T14:08:55.036Z" },
-    { url = "https://files.pythonhosted.org/packages/35/9f/06b765d45c0e44e8ecf0fe15c9eacbbde342bb5b7561c46944f107bfb6c3/yarl-1.22.0-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0cf71bf877efeac18b38d3930594c0948c82b64547c1cf420ba48722fe5509f6", size = 346965, upload-time = "2025-10-06T14:08:56.722Z" },
-    { url = "https://files.pythonhosted.org/packages/c5/69/599e7cea8d0fcb1694323b0db0dda317fa3162f7b90166faddecf532166f/yarl-1.22.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:663e1cadaddae26be034a6ab6072449a8426ddb03d500f43daf952b74553bba0", size = 342026, upload-time = "2025-10-06T14:08:58.563Z" },
-    { url = "https://files.pythonhosted.org/packages/95/6f/9dfd12c8bc90fea9eab39832ee32ea48f8e53d1256252a77b710c065c89f/yarl-1.22.0-cp310-cp310-musllinux_1_2_armv7l.whl", hash = "sha256:6dcbb0829c671f305be48a7227918cfcd11276c2d637a8033a99a02b67bf9eda", size = 335637, upload-time = "2025-10-06T14:09:00.506Z" },
-    { url = "https://files.pythonhosted.org/packages/57/2e/34c5b4eb9b07e16e873db5b182c71e5f06f9b5af388cdaa97736d79dd9a6/yarl-1.22.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:f0d97c18dfd9a9af4490631905a3f131a8e4c9e80a39353919e2cfed8f00aedc", size = 359082, upload-time = "2025-10-06T14:09:01.936Z" },
-    { url = "https://files.pythonhosted.org/packages/31/71/fa7e10fb772d273aa1f096ecb8ab8594117822f683bab7d2c5a89914c92a/yarl-1.22.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:437840083abe022c978470b942ff832c3940b2ad3734d424b7eaffcd07f76737", size = 357811, upload-time = "2025-10-06T14:09:03.445Z" },
-    { url = "https://files.pythonhosted.org/packages/26/da/11374c04e8e1184a6a03cf9c8f5688d3e5cec83ed6f31ad3481b3207f709/yarl-1.22.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a899cbd98dce6f5d8de1aad31cb712ec0a530abc0a86bd6edaa47c1090138467", size = 351223, upload-time = "2025-10-06T14:09:05.401Z" },
-    { url = "https://files.pythonhosted.org/packages/82/8f/e2d01f161b0c034a30410e375e191a5d27608c1f8693bab1a08b089ca096/yarl-1.22.0-cp310-cp310-win32.whl", hash = "sha256:595697f68bd1f0c1c159fcb97b661fc9c3f5db46498043555d04805430e79bea", size = 82118, upload-time = "2025-10-06T14:09:11.148Z" },
-    { url = "https://files.pythonhosted.org/packages/62/46/94c76196642dbeae634c7a61ba3da88cd77bed875bf6e4a8bed037505aa6/yarl-1.22.0-cp310-cp310-win_amd64.whl", hash = "sha256:cb95a9b1adaa48e41815a55ae740cfda005758104049a640a398120bf02515ca", size = 86852, upload-time = "2025-10-06T14:09:12.958Z" },
-    { url = "https://files.pythonhosted.org/packages/af/af/7df4f179d3b1a6dcb9a4bd2ffbc67642746fcafdb62580e66876ce83fff4/yarl-1.22.0-cp310-cp310-win_arm64.whl", hash = "sha256:b85b982afde6df99ecc996990d4ad7ccbdbb70e2a4ba4de0aecde5922ba98a0b", size = 82012, upload-time = "2025-10-06T14:09:14.664Z" },
     { url = "https://files.pythonhosted.org/packages/4d/27/5ab13fc84c76a0250afd3d26d5936349a35be56ce5785447d6c423b26d92/yarl-1.22.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:1ab72135b1f2db3fed3997d7e7dc1b80573c67138023852b6efb336a5eae6511", size = 141607, upload-time = "2025-10-06T14:09:16.298Z" },
     { url = "https://files.pythonhosted.org/packages/6a/a1/d065d51d02dc02ce81501d476b9ed2229d9a990818332242a882d5d60340/yarl-1.22.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:669930400e375570189492dc8d8341301578e8493aec04aebc20d4717f899dd6", size = 94027, upload-time = "2025-10-06T14:09:17.786Z" },
     { url = "https://files.pythonhosted.org/packages/c1/da/8da9f6a53f67b5106ffe902c6fa0164e10398d4e150d85838b82f424072a/yarl-1.22.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:792a2af6d58177ef7c19cbf0097aba92ca1b9cb3ffdd9c7470e156c8f9b5e028", size = 94963, upload-time = "2025-10-06T14:09:19.662Z" },

From 51497f174f9b2d0a2f81ea4b7b41621cc35669d4 Mon Sep 17 00:00:00 2001
From: Alfredo Colangelo <6275423+colangelo@users.noreply.github.com>
Date: Mon, 20 Apr 2026 22:13:16 +0200
Subject: [PATCH 12/46] feat(mcp): read HONCHO_API_URL env var to support
 self-hosted Honcho (#575)

The MCP Worker hardcoded https://api.honcho.dev for every request, forcing
anyone running a self-hosted Honcho instance to patch the source before
deploying their own Worker alongside it.

Route the baseUrl through the Worker env so operators can set
HONCHO_API_URL (via .dev.vars for local development or wrangler secret for
deployed Workers) and point the Worker at their instance. The variable is
intentionally not exposed as a request header: that would let public
clients steer traffic to internal URLs, which is a latency and security
regression.

When HONCHO_API_URL is unset, the Worker falls back to
https://api.honcho.dev, so existing deployments are unaffected.

Closes #508
---
 mcp/.dev.vars.example |  4 ++++
 mcp/.gitignore        |  1 +
 mcp/README.md         | 22 ++++++++++++++++++++++
 mcp/src/config.ts     | 16 +++++++++++++---
 mcp/src/index.ts      |  6 +++---
 5 files changed, 43 insertions(+), 6 deletions(-)
 create mode 100644 mcp/.dev.vars.example

diff --git a/mcp/.dev.vars.example b/mcp/.dev.vars.example
new file mode 100644
index 000000000..b61d97bfc
--- /dev/null
+++ b/mcp/.dev.vars.example
@@ -0,0 +1,4 @@
+# Uncomment and point at a self-hosted Honcho to bypass the managed API.
+# wrangler dev reads this file automatically when it exists as `.dev.vars`.
+# For deployed Workers, use: wrangler secret put HONCHO_API_URL
+# HONCHO_API_URL=http://127.0.0.1:28000
diff --git a/mcp/.gitignore b/mcp/.gitignore
index 51fb86912..492977a58 100644
--- a/mcp/.gitignore
+++ b/mcp/.gitignore
@@ -12,6 +12,7 @@ dist/
 .env
 .env.local
 .env.production
+.dev.vars
 
 # IDE files
 .vscode/
diff --git a/mcp/README.md b/mcp/README.md
index e7e3cd14a..ba90710b4 100644
--- a/mcp/README.md
+++ b/mcp/README.md
@@ -69,6 +69,28 @@ Built on:
 - **[@modelcontextprotocol/sdk](https://www.npmjs.com/package/@modelcontextprotocol/sdk)** — `McpServer` for tool registration
 - **[@honcho-ai/sdk](https://www.npmjs.com/package/@honcho-ai/sdk)** v2 — Honcho TypeScript SDK
 
+## Self-Hosted Honcho
+
+If you run Honcho yourself (for privacy, latency, or offline use), deploy the
+MCP Worker alongside your instance and set `HONCHO_API_URL` in its
+environment.
+
+**Local dev (`bun run dev`):** create `mcp/.dev.vars`:
+
+```
+HONCHO_API_URL=http://127.0.0.1:28000
+```
+
+**Deployed Worker:**
+
+```bash
+wrangler secret put HONCHO_API_URL
+# paste your URL when prompted
+```
+
+When `HONCHO_API_URL` is unset the Worker routes to `https://api.honcho.dev`,
+so this change is backward-compatible.
+
 ## Development
 
 ### Setup
diff --git a/mcp/src/config.ts b/mcp/src/config.ts
index bd440343b..58e4307e5 100644
--- a/mcp/src/config.ts
+++ b/mcp/src/config.ts
@@ -8,11 +8,21 @@ export interface HonchoConfig {
   workspaceId: string;
 }
 
+export interface Env {
+  HONCHO_API_URL?: string;
+}
+
 /**
- * Parse configuration from request headers.
+ * Parse configuration from request headers and Worker env bindings.
  * Throws on missing required fields so callers get clear errors.
+ *
+ * The Honcho API URL is read from the `HONCHO_API_URL` env var when set,
+ * allowing operators to run this Worker alongside a self-hosted Honcho
+ * instance (see the "Self-Hosted Honcho" section in README.md). It is
+ * intentionally not exposed as a request header: routing public requests
+ * to an internal URL would be a latency and security regression.
  */
-export function parseConfig(request: Request): HonchoConfig {
+export function parseConfig(request: Request, env: Env = {}): HonchoConfig {
   const authHeader = request.headers.get("Authorization");
   const trimmedAuthHeader = authHeader?.trim();
   if (!trimmedAuthHeader?.startsWith("Bearer ")) {
@@ -37,7 +47,7 @@ export function parseConfig(request: Request): HonchoConfig {
     apiKey,
     userName,
     assistantName: request.headers.get("X-Honcho-Assistant-Name")?.trim() || "Assistant",
-    baseUrl: "https://api.honcho.dev",
+    baseUrl: env.HONCHO_API_URL?.trim() || "https://api.honcho.dev",
     workspaceId: request.headers.get("X-Honcho-Workspace-ID")?.trim() || "default",
   };
 }
diff --git a/mcp/src/index.ts b/mcp/src/index.ts
index 15733beb0..4e8951989 100644
--- a/mcp/src/index.ts
+++ b/mcp/src/index.ts
@@ -1,5 +1,5 @@
 import { createMcpHandler } from "agents/mcp";
-import { parseConfig, createClient } from "./config.js";
+import { parseConfig, createClient, type Env } from "./config.js";
 import { createServer } from "./server.js";
 
 const CORS_ORIGIN = "*";
@@ -16,7 +16,7 @@ const CORS_HEADERS = {
 export default {
   async fetch(
     request: Request,
-    env: unknown,
+    env: Env,
     executionCtx: ExecutionContext,
   ): Promise<Response> {
     if (request.method === "OPTIONS") {
@@ -25,7 +25,7 @@ export default {
 
     let config;
     try {
-      config = parseConfig(request);
+      config = parseConfig(request, env);
     } catch (e) {
       const message =
         e instanceof Error ? e.message : "Invalid request";

From 2c50791642e5e7822b9526f6196d6a9f04c3e505 Mon Sep 17 00:00:00 2001
From: Rajat Ahuja <rahuja445@gmail.com>
Date: Mon, 20 Apr 2026 16:30:35 -0400
Subject: [PATCH 13/46] fix: add namespace, model, and provider to langfuse
 metadata so we can filter (#565)

---
 src/llm/api.py              |  9 +++++++-
 src/llm/runtime.py          | 29 ++++++++++++++++++++++++
 tests/utils/test_clients.py | 45 ++++++++++++++++++++++++++++++++++++-
 3 files changed, 81 insertions(+), 2 deletions(-)

diff --git a/src/llm/api.py b/src/llm/api.py
index 4639cad46..2a8f05299 100644
--- a/src/llm/api.py
+++ b/src/llm/api.py
@@ -31,6 +31,7 @@
     effective_temperature,
     plan_attempt,
     resolve_runtime_model_config,
+    update_current_langfuse_observation,
 )
 from .tool_loop import execute_tool_loop
 from .types import (
@@ -193,13 +194,19 @@ async def honcho_llm_call(
     current_attempt.set(1)
 
     def _get_attempt_plan() -> AttemptPlan:
-        return plan_attempt(
+        plan = plan_attempt(
             runtime_model_config=runtime_model_config,
             attempt=current_attempt.get(),
             retry_attempts=retry_attempts,
             call_thinking_budget_tokens=thinking_budget_tokens,
             call_reasoning_effort=reasoning_effort,
         )
+        update_current_langfuse_observation(
+            plan.provider,
+            plan.model,
+            name=track_name,
+        )
+        return plan
 
     async def _call_with_provider_selection() -> (
         HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
diff --git a/src/llm/runtime.py b/src/llm/runtime.py
index 27bc56d92..2c29f397a 100644
--- a/src/llm/runtime.py
+++ b/src/llm/runtime.py
@@ -21,6 +21,7 @@
     ModelConfig,
     ModelTransport,
     resolve_model_config,
+    settings,
 )
 
 from .registry import backend_for_provider, client_for_model_config
@@ -32,6 +33,33 @@
 current_attempt: ContextVar[int] = ContextVar("current_attempt", default=0)
 
 
+def update_current_langfuse_observation(
+    provider: ModelTransport,
+    model: str,
+    *,
+    name: str | None = None,
+) -> None:
+    """Best-effort annotation of the current Langfuse span with LLM routing."""
+    if not settings.LANGFUSE_PUBLIC_KEY:
+        return
+
+    try:
+        from langfuse import get_client
+
+        update_kwargs: dict[str, Any] = {
+            "metadata": {
+                "namespace": settings.NAMESPACE,
+                "provider": provider,
+                "model": model,
+            }
+        }
+        if name is not None:
+            update_kwargs["name"] = name
+        get_client().update_current_span(**update_kwargs)
+    except Exception as exc:  # pragma: no cover - best-effort telemetry
+        logger.debug("Failed to update Langfuse span metadata: %s", exc)
+
+
 @dataclass(frozen=True)
 class AttemptPlan:
     """Per-attempt plan produced by `plan_attempt`.
@@ -204,4 +232,5 @@ def resolve_backend_for_plan(plan: AttemptPlan) -> Any:
     "resolve_backend_for_plan",
     "resolve_runtime_model_config",
     "select_model_config_for_attempt",
+    "update_current_langfuse_observation",
 ]
diff --git a/tests/utils/test_clients.py b/tests/utils/test_clients.py
index 213a370cb..f5506790e 100644
--- a/tests/utils/test_clients.py
+++ b/tests/utils/test_clients.py
@@ -23,7 +23,12 @@
 from openai.types.completion_usage import CompletionUsage
 from pydantic import BaseModel, Field
 
-from src.config import ConfiguredModelSettings, ModelConfig, ResolvedFallbackConfig
+from src.config import (
+    ConfiguredModelSettings,
+    ModelConfig,
+    ResolvedFallbackConfig,
+    settings,
+)
 from src.exceptions import LLMError, ValidationException
 from src.llm import (
     CLIENTS,
@@ -905,6 +910,44 @@ async def test_retry_disabled(self):
 
             assert response.content == "No retry response"
 
+    async def test_track_name_updates_langfuse_span_name(self):
+        """track_name should rename the top-level Langfuse span."""
+
+        mock_llm_client = AsyncMock(spec=AsyncAnthropic)
+        mock_response = Mock()
+        mock_response.content = [TextBlock(text="Named response", type="text")]
+        mock_response.usage = Usage(input_tokens=5, output_tokens=5)
+        mock_response.stop_reason = "stop"
+        mock_llm_client.messages.create = AsyncMock(return_value=mock_response)
+
+        mock_langfuse_client = Mock()
+
+        with (
+            patch.dict(CLIENTS, {"anthropic": mock_llm_client}),
+            patch.object(settings, "LANGFUSE_PUBLIC_KEY", "test-public-key"),
+            patch("langfuse.get_client", return_value=mock_langfuse_client),
+        ):
+            response = await honcho_llm_call(
+                model_config=ConfiguredModelSettings(
+                    model="claude-4-sonnet",
+                    transport="anthropic",
+                ),
+                prompt="Hello",
+                max_tokens=100,
+                enable_retry=False,
+                track_name="Dialectic Agent",
+            )
+
+            assert response.content == "Named response"
+            mock_langfuse_client.update_current_span.assert_called_once_with(
+                name="Dialectic Agent",
+                metadata={
+                    "namespace": settings.NAMESPACE,
+                    "provider": "anthropic",
+                    "model": "claude-4-sonnet",
+                },
+            )
+
 
 class TestEdgeCases:
     """Tests for edge cases and boundary conditions"""

From 1c3e3f8816eb2bd4657af6f90c4dbf47923b2bd6 Mon Sep 17 00:00:00 2001
From: qxxaa <mrhanoi@outlook.com>
Date: Mon, 20 Apr 2026 21:35:13 +0100
Subject: [PATCH 14/46] fix: embed() sends string input instead of array,
 breaking OpenAI-compatible providers (#586)

* fix: wrap single embed() input in array for OpenAI-compatible provider compatibility

* Fix input format in embedding test assertion
---
 src/embedding_client.py            | 2 +-
 tests/llm/test_embedding_client.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/embedding_client.py b/src/embedding_client.py
index e163cc66c..d6c8b46dc 100644
--- a/src/embedding_client.py
+++ b/src/embedding_client.py
@@ -99,7 +99,7 @@ async def embed(self, query: str) -> list[float]:
             return self._validate_embedding_dimensions(response.embeddings[0].values)
         else:  # openai
             response = await self.client.embeddings.create(
-                model=self.model, input=query
+                model=self.model, input=[query]
             )
             return self._validate_embedding_dimensions(response.data[0].embedding)
 
diff --git a/tests/llm/test_embedding_client.py b/tests/llm/test_embedding_client.py
index 463b3dd6d..14def1760 100644
--- a/tests/llm/test_embedding_client.py
+++ b/tests/llm/test_embedding_client.py
@@ -51,7 +51,7 @@ def __init__(self, *, api_key: str | None, base_url: str | None) -> None:
 
     assert embedding == [0.1] * 8
     assert fake_embeddings.calls == [
-        {"model": "text-embedding-3-small", "input": "hello world"}
+        {"model": "text-embedding-3-small", "input": ["hello world"]}
     ]
 
 

From 7fae16b351f4db3ef5672bf51c625fa19c6fe5b7 Mon Sep 17 00:00:00 2001
From: Rajat Ahuja <rahuja445@gmail.com>
Date: Mon, 20 Apr 2026 16:56:51 -0400
Subject: [PATCH 15/46] handle turbopuffer server errors (#561)

* fix: catch InternalServerError from turbopuffer

* fix: remove unused VectorUpsertResult

* fix: downgrade vector store sync errors to warnings

* fix: remove upsert_with_retry

* fix: (vector) add silent path and explicit path for vector db server errors

---------

Co-authored-by: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com>
---
 src/crud/document.py                        | 53 ++++++++++----
 src/crud/message.py                         | 30 ++++++--
 src/reconciler/sync_vectors.py              | 21 +++++-
 src/vector_store/__init__.py                | 22 +-----
 src/vector_store/lancedb.py                 |  8 +-
 src/vector_store/turbopuffer.py             | 41 +++++++++--
 src/vector_store/utils.py                   | 57 ---------------
 tests/conftest.py                           |  7 +-
 tests/deriver/test_vector_reconciliation.py | 19 ++---
 tests/vector_store/test_turbopuffer.py      | 81 +++++++++++++++++++++
 10 files changed, 211 insertions(+), 128 deletions(-)
 delete mode 100644 src/vector_store/utils.py
 create mode 100644 tests/vector_store/test_turbopuffer.py

diff --git a/src/crud/document.py b/src/crud/document.py
index 7de9dfd6e..ba652eea7 100644
--- a/src/crud/document.py
+++ b/src/crud/document.py
@@ -17,13 +17,16 @@
 from src.crud.session import get_session
 from src.dependencies import tracked_db
 from src.embedding_client import embedding_client
-from src.exceptions import ResourceNotFoundException, ValidationException
+from src.exceptions import (
+    ResourceNotFoundException,
+    ValidationException,
+    VectorStoreError,
+)
 from src.utils.filter import apply_filter
 from src.vector_store import (
     VectorRecord,
     VectorStore,
     get_external_vector_store,
-    upsert_with_retry,
 )
 
 logger = getLogger(__name__)
@@ -560,11 +563,9 @@ async def create_documents(
                         )
                     )
 
-                # Upsert to external vector store with retry and update sync state
+                # Upsert to external vector store and update sync state
                 try:
-                    await upsert_with_retry(
-                        external_vector_store, namespace, vector_records
-                    )
+                    await external_vector_store.upsert_many(namespace, vector_records)
                     # Success: mark as synced
                     await db.execute(
                         update(models.Document)
@@ -577,9 +578,21 @@ async def create_documents(
                     )
                     await db.commit()
 
+                except VectorStoreError:
+                    # Vector store unavailable - increment sync_attempts for reconciliation
+                    logger.warning("Vector store unavailable; leaving docs unsynced")
+                    await db.execute(
+                        update(models.Document)
+                        .where(models.Document.id.in_(doc_ids))
+                        .values(
+                            sync_attempts=models.Document.sync_attempts + 1,
+                            last_sync_at=func.now(),
+                        )
+                    )
+                    await db.commit()
+
                 except Exception:
-                    # Failed after retries - increment sync_attempts for reconciliation
-                    logger.exception("Failed to upsert vectors after retries")
+                    logger.exception("Unexpected error upserting vectors")
                     await db.execute(
                         update(models.Document)
                         .where(models.Document.id.in_(doc_ids))
@@ -846,11 +859,9 @@ async def create_observations(
                         )
                     )
 
-                # Upsert to external vector store with retry and update sync state
+                # Upsert to external vector store and update sync state
                 try:
-                    await upsert_with_retry(
-                        external_vector_store, namespace, vector_records
-                    )
+                    await external_vector_store.upsert_many(namespace, vector_records)
                     # Success: mark as synced
                     await db.execute(
                         update(models.Document)
@@ -863,10 +874,24 @@ async def create_observations(
                     )
                     await db.commit()
 
+                except VectorStoreError:
+                    logger.warning(
+                        "Vector store unavailable for namespace %s; leaving observations unsynced",
+                        namespace,
+                    )
+                    await db.execute(
+                        update(models.Document)
+                        .where(models.Document.id.in_(doc_ids))
+                        .values(
+                            sync_attempts=models.Document.sync_attempts + 1,
+                            last_sync_at=func.now(),
+                        )
+                    )
+                    await db.commit()
+
                 except Exception:
-                    # Failed after retries - increment sync_attempts for reconciliation
                     logger.exception(
-                        f"Failed to upsert vectors for {namespace} after retries"
+                        "Unexpected error upserting vectors for %s", namespace
                     )
                     await db.execute(
                         update(models.Document)
diff --git a/src/crud/message.py b/src/crud/message.py
index 08c4c8610..3334c6e87 100644
--- a/src/crud/message.py
+++ b/src/crud/message.py
@@ -11,9 +11,10 @@
 from src.config import settings
 from src.dependencies import tracked_db
 from src.embedding_client import embedding_client
+from src.exceptions import VectorStoreError
 from src.utils.filter import apply_filter
 from src.utils.formatting import ILIKE_ESCAPE_CHAR, escape_ilike_pattern
-from src.vector_store import VectorRecord, get_external_vector_store, upsert_with_retry
+from src.vector_store import VectorRecord, get_external_vector_store
 
 from .session import get_or_create_session
 
@@ -349,11 +350,11 @@ async def create_messages(
                         )
                     )
 
-                # Upsert to external vector store with retry and update sync state
+                # Upsert to external vector store and update sync state
                 if vector_records:
                     try:
-                        await upsert_with_retry(
-                            external_vector_store, namespace, vector_records
+                        await external_vector_store.upsert_many(
+                            namespace, vector_records
                         )
                         # Success: mark as synced if we have DB rows
                         if embedding_ids:
@@ -368,10 +369,9 @@ async def create_messages(
                             )
                             await db.commit()
 
-                    except Exception:
-                        # Failed after retries - increment sync_attempts for reconciliation
-                        logger.exception(
-                            "Failed to upsert message vectors after retries"
+                    except VectorStoreError:
+                        logger.warning(
+                            "Vector store unavailable; leaving message vectors unsynced"
                         )
                         if embedding_ids:
                             await db.execute(
@@ -385,6 +385,20 @@ async def create_messages(
                             )
                             await db.commit()
 
+                    except Exception:
+                        logger.exception("Unexpected error upserting message vectors")
+                        if embedding_ids:
+                            await db.execute(
+                                update(models.MessageEmbedding)
+                                .where(models.MessageEmbedding.id.in_(embedding_ids))
+                                .values(
+                                    sync_attempts=models.MessageEmbedding.sync_attempts
+                                    + 1,
+                                    last_sync_at=func.now(),
+                                )
+                            )
+                            await db.commit()
+
     except Exception:
         logger.exception(
             "Failed to generate message embeddings for %s messages in workspace %s and session %s.",
diff --git a/src/reconciler/sync_vectors.py b/src/reconciler/sync_vectors.py
index 32de0d19c..4a17e40e2 100644
--- a/src/reconciler/sync_vectors.py
+++ b/src/reconciler/sync_vectors.py
@@ -19,6 +19,7 @@
 from src.config import settings
 from src.dependencies import tracked_db
 from src.embedding_client import embedding_client
+from src.exceptions import VectorStoreError
 from src.vector_store import VectorRecord, VectorStore, get_external_vector_store
 
 logger = logging.getLogger(__name__)
@@ -259,8 +260,16 @@ async def _sync_documents(
                 .values(sync_state="synced", last_sync_at=func.now(), sync_attempts=0)
             )
             synced_count += len(docs_to_sync)
+        except VectorStoreError:
+            logger.warning(
+                "Vector store unavailable while syncing namespace %s", namespace
+            )
+            await _bump_document_sync_attempts(db, docs_to_sync)
+            failed_count += len(docs_to_sync)
         except Exception:
-            logger.exception("Failed to sync documents to namespace %s", namespace)
+            logger.exception(
+                "Unexpected error syncing documents to namespace %s", namespace
+            )
             await _bump_document_sync_attempts(db, docs_to_sync)
             failed_count += len(docs_to_sync)
 
@@ -399,9 +408,17 @@ async def _sync_message_embeddings(
                 .values(sync_state="synced", last_sync_at=func.now(), sync_attempts=0)
             )
             synced_count += len(embs_to_sync)
+        except VectorStoreError:
+            logger.warning(
+                "Vector store unavailable while syncing message embeddings to namespace %s",
+                namespace,
+            )
+            await _bump_message_embedding_sync_attempts(db, embs_to_sync)
+            failed_count += len(embs_to_sync)
         except Exception:
             logger.exception(
-                "Failed to sync message embeddings to namespace %s", namespace
+                "Unexpected error syncing message embeddings to namespace %s",
+                namespace,
             )
             await _bump_message_embedding_sync_attempts(db, embs_to_sync)
             failed_count += len(embs_to_sync)
diff --git a/src/vector_store/__init__.py b/src/vector_store/__init__.py
index c77064a7e..5a22abd95 100644
--- a/src/vector_store/__init__.py
+++ b/src/vector_store/__init__.py
@@ -50,17 +50,6 @@ class VectorQueryResult(BaseModel):
     metadata: dict[str, Any] = Field(default_factory=dict)
 
 
-class VectorUpsertResult(BaseModel):
-    """Result for a vector upsert operation."""
-
-    model_config: ClassVar[ConfigDict] = ConfigDict(
-        extra="forbid",
-        frozen=True,
-    )
-
-    ok: bool
-
-
 class VectorStore(ABC):
     """
     Abstract base class for vector store implementations.
@@ -123,7 +112,7 @@ async def upsert_many(
         self,
         namespace: str,
         vectors: list[VectorRecord],
-    ) -> VectorUpsertResult:
+    ) -> None:
         """
         Upsert multiple vectors into the store.
 
@@ -131,8 +120,8 @@ async def upsert_many(
             namespace: The namespace to store the vectors in
             vectors: List of VectorRecord objects to upsert
 
-        Returns:
-            Result describing primary/secondary outcomes.
+        Raises:
+            Exception: If the write fails.
         """
         ...
 
@@ -192,9 +181,6 @@ async def close(self) -> None:
         ...
 
 
-from src.vector_store.utils import upsert_with_retry  # noqa: E402
-
-
 def _create_store_by_type(store_type: str) -> VectorStore:
     """Create a vector store instance by type name."""
     if store_type == "turbopuffer":
@@ -251,9 +237,7 @@ async def close_external_vector_store() -> None:
     "VectorStore",
     "VectorRecord",
     "VectorQueryResult",
-    "VectorUpsertResult",
     "get_external_vector_store",
     "close_external_vector_store",
-    "upsert_with_retry",
     "_hash_namespace_components",
 ]
diff --git a/src/vector_store/lancedb.py b/src/vector_store/lancedb.py
index 77c3c6cd3..f63b8cfd9 100644
--- a/src/vector_store/lancedb.py
+++ b/src/vector_store/lancedb.py
@@ -17,7 +17,7 @@
 from src.config import settings
 from src.exceptions import VectorStoreError
 
-from . import VectorQueryResult, VectorRecord, VectorStore, VectorUpsertResult
+from . import VectorQueryResult, VectorRecord, VectorStore
 
 logger = logging.getLogger(__name__)
 
@@ -156,7 +156,7 @@ async def upsert_many(
         self,
         namespace: str,
         vectors: list[VectorRecord],
-    ) -> VectorUpsertResult:
+    ) -> None:
         """
         Upsert multiple vectors into LanceDB.
 
@@ -165,7 +165,7 @@ async def upsert_many(
             vectors: List of VectorRecord objects to upsert
         """
         if not vectors:
-            return VectorUpsertResult(ok=True)
+            return
 
         try:
             rows = [self._row_to_dict(v) for v in vectors]
@@ -180,7 +180,7 @@ async def upsert_many(
             )
 
             logger.debug(f"Upserted {len(vectors)} vectors to namespace {namespace}")
-            return VectorUpsertResult(ok=True)
+            return
         except Exception as e:
             logger.exception(
                 f"Failed to upsert {len(vectors)} vectors to namespace {namespace}"
diff --git a/src/vector_store/turbopuffer.py b/src/vector_store/turbopuffer.py
index 39b93f5f0..c84c87744 100644
--- a/src/vector_store/turbopuffer.py
+++ b/src/vector_store/turbopuffer.py
@@ -8,13 +8,14 @@
 from collections.abc import Sequence
 from typing import Any, Literal, cast
 
-from turbopuffer import AsyncTurbopuffer, NotFoundError
+from turbopuffer import AsyncTurbopuffer, InternalServerError, NotFoundError
 from turbopuffer.lib.namespace import AsyncNamespace
 from turbopuffer.types import Filter
 
 from src.config import settings
+from src.exceptions import VectorStoreError
 
-from . import VectorQueryResult, VectorRecord, VectorStore, VectorUpsertResult
+from . import VectorQueryResult, VectorRecord, VectorStore
 
 logger = logging.getLogger(__name__)
 
@@ -62,7 +63,7 @@ async def upsert_many(
         self,
         namespace: str,
         vectors: list[VectorRecord],
-    ) -> VectorUpsertResult:
+    ) -> None:
         """
         Upsert multiple vectors into Turbopuffer.
 
@@ -71,7 +72,7 @@ async def upsert_many(
             vectors: List of VectorRecord objects to upsert
         """
         if not vectors:
-            return VectorUpsertResult(ok=True)
+            return
 
         ns = self._get_namespace(namespace)
 
@@ -89,7 +90,18 @@ async def upsert_many(
                 upsert_rows=rows,
                 distance_metric=DISTANCE_METRIC,
             )
-            return VectorUpsertResult(ok=True)
+            return
+        except InternalServerError as exc:
+            # Turbopuffer unavailable. SDK implicitly retries 5xx responses,
+            # so raise a vector store error and let callers leave writes unsynced.
+            logger.warning(
+                "Turbopuffer unavailable for upsert to namespace %s (%s after retries)",
+                namespace,
+                exc.status_code,
+            )
+            raise VectorStoreError(
+                f"Turbopuffer unavailable for upsert to namespace {namespace}"
+            ) from exc
         except Exception:
             logger.exception(
                 f"Failed to upsert {len(vectors)} vectors to namespace {namespace}"
@@ -183,6 +195,16 @@ async def query(
             )
             return []
 
+        except InternalServerError as exc:
+            # Turbopuffer unavailable. SDK implicitly retries 5xx responses,
+            # so we should return [].
+            logger.warning(
+                "Turbopuffer unavailable for query on namespace %s (%s after retries), returning empty results",
+                namespace,
+                exc.status_code,
+            )
+            return []
+
         except Exception:
             logger.exception(f"Failed to query namespace {namespace}")
             raise
@@ -247,6 +269,15 @@ async def delete_many(self, namespace: str, ids: list[str]) -> None:
         except NotFoundError:
             # Namespace doesn't exist - nothing to delete
             logger.debug(f"Namespace {namespace} does not exist, nothing to delete")
+        except InternalServerError as exc:
+            logger.warning(
+                "Turbopuffer unavailable for delete from namespace %s (%s after retries)",
+                namespace,
+                exc.status_code,
+            )
+            raise VectorStoreError(
+                f"Turbopuffer unavailable while deleting vectors in namespace {namespace}"
+            ) from exc
         except Exception:
             logger.exception(
                 f"Failed to delete {len(ids)} vectors from namespace {namespace}"
diff --git a/src/vector_store/utils.py b/src/vector_store/utils.py
deleted file mode 100644
index ae613adaf..000000000
--- a/src/vector_store/utils.py
+++ /dev/null
@@ -1,57 +0,0 @@
-"""
-Vector store utility functions.
-"""
-
-from __future__ import annotations
-
-import logging
-from typing import TYPE_CHECKING
-
-from tenacity import (
-    AsyncRetrying,
-    retry_if_exception_type,
-    stop_after_attempt,
-    wait_exponential,
-)
-
-if TYPE_CHECKING:
-    from src.vector_store import VectorRecord, VectorStore, VectorUpsertResult
-
-logger = logging.getLogger(__name__)
-
-
-async def upsert_with_retry(
-    vector_store: VectorStore,
-    namespace: str,
-    vector_records: list[VectorRecord],
-    max_attempts: int = 3,
-) -> VectorUpsertResult | None:
-    """
-    Upsert vectors with exponential backoff retry.
-
-    Args:
-        vector_store: The vector store to upsert into
-        namespace: The namespace for the vectors
-        vector_records: List of VectorRecord objects to upsert
-        max_attempts: Maximum number of retry attempts (default 3)
-
-    Returns:
-        VectorUpsertResult on success, or None if vector_records is empty
-
-    Raises:
-        Exception: If all retries fail
-    """
-    if not vector_records:
-        return None
-
-    result: VectorUpsertResult | None = None
-    async for attempt in AsyncRetrying(
-        stop=stop_after_attempt(max_attempts),
-        wait=wait_exponential(multiplier=0.5, min=0.5, max=2.0),
-        retry=retry_if_exception_type(Exception),
-        reraise=True,
-    ):
-        with attempt:
-            result = await vector_store.upsert_many(namespace, vector_records)
-
-    return result
diff --git a/tests/conftest.py b/tests/conftest.py
index cd739b7c6..3c9b8e63f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -509,21 +509,18 @@ def mock_vector_store(request: pytest.FixtureRequest):
     from src.vector_store import (
         VectorQueryResult,
         VectorRecord,
-        VectorUpsertResult,
         _hash_namespace_components,  # pyright: ignore[reportPrivateUsage]
     )
 
     # Create a mock vector store that stores vectors in memory
     vector_storage: dict[str, dict[str, tuple[list[float], dict[str, Any]]]] = {}
 
-    async def mock_upsert_many(
-        namespace: str, vectors: list[VectorRecord]
-    ) -> VectorUpsertResult:
+    async def mock_upsert_many(namespace: str, vectors: list[VectorRecord]) -> None:
         if namespace not in vector_storage:
             vector_storage[namespace] = {}
         for vector in vectors:
             vector_storage[namespace][vector.id] = (vector.embedding, vector.metadata)
-        return VectorUpsertResult(ok=True)
+        return
 
     async def mock_query(
         namespace: str, embedding: list[float], **kwargs: Any
diff --git a/tests/deriver/test_vector_reconciliation.py b/tests/deriver/test_vector_reconciliation.py
index 3b8e0e92e..cc6375977 100644
--- a/tests/deriver/test_vector_reconciliation.py
+++ b/tests/deriver/test_vector_reconciliation.py
@@ -27,7 +27,6 @@
 from src.vector_store import (
     VectorRecord,
     VectorStore,
-    VectorUpsertResult,
     _hash_namespace_components,  # pyright: ignore[reportPrivateUsage]
 )
 
@@ -84,9 +83,7 @@ async def test_pending_to_synced_on_success(
         mock_vector_store.get_vector_namespace = MagicMock(
             return_value=f"honcho.doc.{_hash_namespace_components(workspace.name, peer1.name, peer1.name)}"
         )
-        mock_vector_store.upsert_many = AsyncMock(
-            return_value=VectorUpsertResult(ok=True)
-        )
+        mock_vector_store.upsert_many = AsyncMock(return_value=None)
 
         # Run sync
         synced, failed = await _sync_documents(db_session, docs, mock_vector_store)
@@ -309,13 +306,11 @@ def mock_get_namespace(
         ) -> str:
             return f"honcho.doc.{_hash_namespace_components(workspace, observer, observed)}"
 
-        async def mock_upsert(
-            namespace: str, vectors: list[VectorRecord]
-        ) -> VectorUpsertResult:
+        async def mock_upsert(namespace: str, vectors: list[VectorRecord]) -> None:
             if namespace not in namespace_calls:
                 namespace_calls[namespace] = []
             namespace_calls[namespace].extend(vectors)
-            return VectorUpsertResult(ok=True)
+            return
 
         mock_vector_store.get_vector_namespace = mock_get_namespace
         mock_vector_store.upsert_many = mock_upsert
@@ -440,9 +435,7 @@ async def test_documents_without_embeddings_are_reembedded(
             mock_vector_store.get_vector_namespace = MagicMock(
                 return_value=f"honcho.doc.{_hash_namespace_components(workspace.name, peer1.name, peer1.name)}"
             )
-            mock_vector_store.upsert_many = AsyncMock(
-                return_value=VectorUpsertResult(ok=True)
-            )
+            mock_vector_store.upsert_many = AsyncMock(return_value=None)
 
             # Run sync
             synced, failed = await _sync_documents(db_session, docs, mock_vector_store)
@@ -512,9 +505,7 @@ async def track_batch_embed(contents: list[str]) -> list[list[float]]:
             mock_vector_store.get_vector_namespace = MagicMock(
                 return_value=f"honcho.doc.{_hash_namespace_components(workspace.name, peer1.name, peer1.name)}"
             )
-            mock_vector_store.upsert_many = AsyncMock(
-                return_value=VectorUpsertResult(ok=True)
-            )
+            mock_vector_store.upsert_many = AsyncMock(return_value=None)
 
             # Run sync
             await _sync_documents(db_session, docs, mock_vector_store)
diff --git a/tests/vector_store/test_turbopuffer.py b/tests/vector_store/test_turbopuffer.py
new file mode 100644
index 000000000..73bf3c204
--- /dev/null
+++ b/tests/vector_store/test_turbopuffer.py
@@ -0,0 +1,81 @@
+"""Tests for TurbopufferVectorStore error handling on 5xx responses."""
+
+from __future__ import annotations
+
+from unittest.mock import AsyncMock, MagicMock
+
+import httpx
+import pytest
+from turbopuffer import InternalServerError
+
+from src.config import settings
+from src.exceptions import VectorStoreError
+from src.vector_store import VectorRecord
+from src.vector_store.turbopuffer import TurbopufferVectorStore
+
+
+def _internal_server_error(status_code: int = 503) -> InternalServerError:
+    request = httpx.Request(
+        "POST", "https://api.turbopuffer.com/v2/namespaces/ns/write"
+    )
+    response = httpx.Response(status_code, request=request)
+    return InternalServerError("turbopuffer unavailable", response=response, body=None)
+
+
+@pytest.fixture
+def store(monkeypatch: pytest.MonkeyPatch) -> TurbopufferVectorStore:
+    monkeypatch.setattr(settings.VECTOR_STORE, "TURBOPUFFER_API_KEY", "test-key")
+    monkeypatch.setattr(settings.VECTOR_STORE, "TURBOPUFFER_REGION", "gcp-us-east4")
+    return TurbopufferVectorStore()
+
+
+@pytest.fixture
+def record() -> VectorRecord:
+    return VectorRecord(
+        id="doc_1", embedding=[0.1, 0.2, 0.3, 0.4], metadata={"foo": "bar"}
+    )
+
+
+@pytest.mark.asyncio
+async def test_upsert_many_raises_vector_store_error_on_5xx(
+    store: TurbopufferVectorStore,
+    record: VectorRecord,
+) -> None:
+    namespace_mock = MagicMock()
+    namespace_mock.write = AsyncMock(side_effect=_internal_server_error(503))
+    store._get_namespace = MagicMock(return_value=namespace_mock)  # pyright: ignore[reportPrivateUsage]
+
+    with pytest.raises(VectorStoreError) as excinfo:
+        await store.upsert_many("honcho.doc.test", [record])
+
+    assert "honcho.doc.test" in str(excinfo.value)
+    assert isinstance(excinfo.value.__cause__, InternalServerError)
+    namespace_mock.write.assert_awaited_once()
+
+
+@pytest.mark.asyncio
+async def test_upsert_many_short_circuits_on_empty(
+    store: TurbopufferVectorStore,
+) -> None:
+    namespace_mock = MagicMock()
+    namespace_mock.write = AsyncMock()
+    store._get_namespace = MagicMock(return_value=namespace_mock)  # pyright: ignore[reportPrivateUsage]
+
+    await store.upsert_many("honcho.doc.test", [])
+
+    namespace_mock.write.assert_not_awaited()
+
+
+@pytest.mark.asyncio
+async def test_upsert_many_succeeds_without_raising(
+    store: TurbopufferVectorStore,
+    record: VectorRecord,
+) -> None:
+    namespace_mock = MagicMock()
+    namespace_mock.write = AsyncMock()
+    store._get_namespace = MagicMock(return_value=namespace_mock)  # pyright: ignore[reportPrivateUsage]
+
+    result = await store.upsert_many("honcho.doc.test", [record])
+
+    assert result is None
+    namespace_mock.write.assert_awaited_once()

From ca1dc858ecde1c93feb027598cbdfccf49480636 Mon Sep 17 00:00:00 2001
From: ajspig <46900795+ajspig@users.noreply.github.com>
Date: Mon, 20 Apr 2026 23:25:12 -0400
Subject: [PATCH 16/46] cli docs (#589)

* docs: adding cli doc

* docs: adding generated script and content and github workflow

* chore: removing workflow

* fix: (docs) re-format and add details to cli-reference docs

---------

Co-authored-by: Vineeth Voruganti <13438633+VVoruganti@users.noreply.github.com>
---
 docs/docs.json                          |   3 +-
 docs/snippets/cli-commands.mdx          | 547 ++++++++++++++++++++++++
 docs/v3/documentation/reference/cli.mdx | 220 ++++++++++
 honcho-cli/scripts/generate_cli_docs.py | 261 +++++++++++
 4 files changed, 1030 insertions(+), 1 deletion(-)
 create mode 100644 docs/snippets/cli-commands.mdx
 create mode 100644 docs/v3/documentation/reference/cli.mdx
 create mode 100644 honcho-cli/scripts/generate_cli_docs.py

diff --git a/docs/docs.json b/docs/docs.json
index 0b3573678..54262e3f1 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -84,7 +84,8 @@
                 "group": "Reference",
                 "pages": [
                   "v3/documentation/reference/platform",
-                  "v3/documentation/reference/sdk"
+                  "v3/documentation/reference/sdk",
+                  "v3/documentation/reference/cli"
                 ]
               }
             ]
diff --git a/docs/snippets/cli-commands.mdx b/docs/snippets/cli-commands.mdx
new file mode 100644
index 000000000..4eb9d2d39
--- /dev/null
+++ b/docs/snippets/cli-commands.mdx
@@ -0,0 +1,547 @@
+{/*
+  GENERATED by honcho-cli/scripts/generate_cli_docs.py — do not edit.
+  Re-generate with: uv run --package honcho-cli python honcho-cli/scripts/generate_cli_docs.py
+  Source of truth: honcho-cli/src/honcho_cli/commands/
+*/}
+
+## honcho conclusion
+
+List, search, create, and delete peer conclusions (Honcho's memory atoms).
+
+<AccordionGroup>
+<Accordion title="create">
+Create a conclusion.
+
+```bash
+honcho conclusion create <content>
+```
+
+<ParamField path="content" type="string" required />
+<ParamField path="--observer" type="string">
+  Observer peer ID.
+</ParamField>
+<ParamField path="--observed" type="string">
+  Observed peer ID.
+</ParamField>
+<ParamField path="--session" type="string">
+  Session context. Short alias: `-s`.
+</ParamField>
+</Accordion>
+<Accordion title="delete">
+Delete a conclusion.
+
+```bash
+honcho conclusion delete <conclusion_id>
+```
+
+<ParamField path="conclusion_id" type="string" required />
+<ParamField path="--observer" type="string">
+  Observer peer ID.
+</ParamField>
+<ParamField path="--observed" type="string">
+  Observed peer ID.
+</ParamField>
+<ParamField path="--yes" type="boolean">
+  Skip confirmation. Short alias: `-y`.
+</ParamField>
+</Accordion>
+<Accordion title="list">
+List conclusions.
+
+```bash
+honcho conclusion list
+```
+
+<ParamField path="--observer" type="string">
+  Observer peer ID.
+</ParamField>
+<ParamField path="--observed" type="string">
+  Observed peer ID.
+</ParamField>
+<ParamField path="--limit" type="number" default="10">
+  Max results.
+</ParamField>
+</Accordion>
+<Accordion title="search">
+Semantic search over conclusions.
+
+```bash
+honcho conclusion search <query>
+```
+
+<ParamField path="query" type="string" required />
+<ParamField path="--observer" type="string">
+  Observer peer ID.
+</ParamField>
+<ParamField path="--observed" type="string">
+  Observed peer ID.
+</ParamField>
+<ParamField path="--top-k" type="number" default="10">
+  Max results.
+</ParamField>
+</Accordion>
+</AccordionGroup>
+
+## honcho config
+
+Inspect CLI configuration.
+
+```bash
+honcho config
+```
+
+
+## honcho doctor
+
+Verify config and connectivity. Scope with -w / -p to check workspace, peer, and queue health.
+
+```bash
+honcho doctor
+```
+
+
+## honcho help
+
+Show help message.
+
+```bash
+honcho help
+```
+
+
+## honcho init
+
+Set API key and server URL in ~/.honcho/config.json.
+
+Press Enter to keep the current value or type a replacement.
+Workspace / peer / session scoping is per-command via -w / -p / -s
+or HONCHO_* env vars — never persisted.
+
+```bash
+honcho init
+```
+
+<ParamField path="--api-key" type="string">
+  API key (admin JWT).
+</ParamField>
+<ParamField path="--base-url" type="string">
+  Honcho API URL (e.g. https://api.honcho.dev, http://localhost:8000).
+</ParamField>
+
+## honcho message
+
+List, create, and get messages within a session.
+
+<AccordionGroup>
+<Accordion title="create">
+Create a message in a session.
+
+```bash
+honcho message create <content>
+```
+
+<ParamField path="content" type="string" required />
+<ParamField path="--peer" type="string" required>
+  Peer ID of the message sender. Short alias: `-p`.
+</ParamField>
+<ParamField path="--metadata" type="string">
+  JSON metadata to associate with the message.
+</ParamField>
+<ParamField path="--session" type="string">
+  Session ID. Short alias: `-s`.
+</ParamField>
+</Accordion>
+<Accordion title="get">
+Get a single message by ID.
+
+```bash
+honcho message get <message_id>
+```
+
+<ParamField path="message_id" type="string" required />
+<ParamField path="--session" type="string">
+  Session ID. Short alias: `-s`.
+</ParamField>
+</Accordion>
+<Accordion title="list">
+List messages in a session. Scoped to a peer with -p.
+
+```bash
+honcho message list [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+<ParamField path="--last" type="number" default="20">
+  Number of recent messages.
+</ParamField>
+<ParamField path="--reverse" type="boolean">
+  Show oldest first (default is newest first).
+</ParamField>
+<ParamField path="--brief" type="boolean">
+  Show only IDs, peer, token count, and created_at (no content).
+</ParamField>
+<ParamField path="--peer" type="string">
+  Filter by peer ID. Short alias: `-p`.
+</ParamField>
+</Accordion>
+</AccordionGroup>
+
+## honcho peer
+
+List, create, chat with, search, and manage peers and their representations.
+
+<AccordionGroup>
+<Accordion title="card">
+Get raw peer card content.
+
+```bash
+honcho peer card [<peer_id>]
+```
+
+<ParamField path="peer_id" type="string" />
+<ParamField path="--target" type="string">
+  Target peer for relationship card.
+</ParamField>
+</Accordion>
+<Accordion title="chat">
+Query the dialectic about a peer.
+
+```bash
+honcho peer chat <query>
+```
+
+<ParamField path="query" type="string" required />
+<ParamField path="--target" type="string">
+  Target peer for perspective.
+</ParamField>
+<ParamField path="--reasoning" type="string">
+  Reasoning level: minimal, low, medium, high, max. Short alias: `-r`.
+</ParamField>
+</Accordion>
+<Accordion title="create">
+Create or get a peer.
+
+```bash
+honcho peer create <peer_id>
+```
+
+<ParamField path="peer_id" type="string" required />
+<ParamField path="--observe-me" type="boolean">
+  Whether Honcho will form a representation of this peer. Negate with `--no-observe-me`.
+</ParamField>
+<ParamField path="--metadata" type="string">
+  JSON metadata to associate with the peer.
+</ParamField>
+</Accordion>
+<Accordion title="get-metadata">
+Get metadata for a peer.
+
+```bash
+honcho peer get-metadata [<peer_id>]
+```
+
+<ParamField path="peer_id" type="string" />
+</Accordion>
+<Accordion title="inspect">
+Inspect a peer: card, session count, recent conclusions.
+
+```bash
+honcho peer inspect [<peer_id>]
+```
+
+<ParamField path="peer_id" type="string" />
+</Accordion>
+<Accordion title="list">
+List all peers in the workspace.
+
+```bash
+honcho peer list
+```
+
+</Accordion>
+<Accordion title="representation">
+Get the formatted representation for a peer.
+
+```bash
+honcho peer representation [<peer_id>]
+```
+
+<ParamField path="peer_id" type="string" />
+<ParamField path="--target" type="string">
+  Target peer to get representation about.
+</ParamField>
+<ParamField path="--search-query" type="string">
+  Semantic search query to filter conclusions.
+</ParamField>
+<ParamField path="--max-conclusions" type="number">
+  Maximum number of conclusions to include.
+</ParamField>
+</Accordion>
+<Accordion title="search">
+Search a peer's messages.
+
+```bash
+honcho peer search <query>
+```
+
+<ParamField path="query" type="string" required />
+<ParamField path="--limit" type="number" default="10">
+  Max results.
+</ParamField>
+</Accordion>
+<Accordion title="set-metadata">
+Set metadata for a peer.
+
+```bash
+honcho peer set-metadata <metadata>
+```
+
+<ParamField path="metadata" type="string" required />
+<ParamField path="--peer" type="string">
+  Peer ID (uses default if omitted). Short alias: `-p`.
+</ParamField>
+</Accordion>
+</AccordionGroup>
+
+## honcho session
+
+List, inspect, create, delete, and manage conversation sessions and their peers.
+
+<AccordionGroup>
+<Accordion title="add-peers">
+Add peers to a session.
+
+```bash
+honcho session add-peers <session_id> <peer_ids>
+```
+
+<ParamField path="session_id" type="string" required />
+<ParamField path="peer_ids" type="string" required />
+</Accordion>
+<Accordion title="context">
+Get session context (what an agent would see).
+
+```bash
+honcho session context [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+<ParamField path="--tokens" type="number">
+  Token budget.
+</ParamField>
+<ParamField path="--summary" type="boolean" default="true">
+  Include summary. Negate with `--no-summary`.
+</ParamField>
+</Accordion>
+<Accordion title="create">
+Create or get a session.
+
+```bash
+honcho session create <session_id>
+```
+
+<ParamField path="session_id" type="string" required />
+<ParamField path="--peers" type="string">
+  Comma-separated peer IDs to add to the session.
+</ParamField>
+<ParamField path="--metadata" type="string">
+  JSON metadata to associate with the session.
+</ParamField>
+</Accordion>
+<Accordion title="delete">
+Delete a session and all its data. Destructive — requires --yes or interactive confirm.
+
+```bash
+honcho session delete [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+<ParamField path="--yes" type="boolean">
+  Skip confirmation. Short alias: `-y`.
+</ParamField>
+</Accordion>
+<Accordion title="get-metadata">
+Get metadata for a session.
+
+```bash
+honcho session get-metadata [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+</Accordion>
+<Accordion title="inspect">
+Inspect a session: peers, message count, summaries, config.
+
+```bash
+honcho session inspect [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+</Accordion>
+<Accordion title="list">
+List sessions in the workspace.
+
+```bash
+honcho session list
+```
+
+<ParamField path="--peer" type="string">
+  Filter by peer. Short alias: `-p`.
+</ParamField>
+</Accordion>
+<Accordion title="peers">
+List peers in a session.
+
+```bash
+honcho session peers [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+</Accordion>
+<Accordion title="remove-peers">
+Remove peers from a session.
+
+```bash
+honcho session remove-peers <session_id> <peer_ids>
+```
+
+<ParamField path="session_id" type="string" required />
+<ParamField path="peer_ids" type="string" required />
+</Accordion>
+<Accordion title="representation">
+Get the representation of a peer within a session.
+
+```bash
+honcho session representation <peer_id> [<session_id>]
+```
+
+<ParamField path="peer_id" type="string" required />
+<ParamField path="session_id" type="string" />
+<ParamField path="--target" type="string">
+  Target peer (what peer_id knows about target).
+</ParamField>
+<ParamField path="--search-query" type="string">
+  Semantic search query to filter conclusions.
+</ParamField>
+<ParamField path="--max-conclusions" type="number">
+  Maximum number of conclusions to include.
+</ParamField>
+</Accordion>
+<Accordion title="search">
+Search messages in a session.
+
+```bash
+honcho session search <query> [<session_id>]
+```
+
+<ParamField path="query" type="string" required />
+<ParamField path="session_id" type="string" />
+<ParamField path="--limit" type="number" default="10">
+  Max results.
+</ParamField>
+</Accordion>
+<Accordion title="set-metadata">
+Set metadata for a session.
+
+```bash
+honcho session set-metadata [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+<ParamField path="--data" type="string" required>
+  JSON metadata to set (e.g. '\{"key": "value"\}'). Short alias: `-d`.
+</ParamField>
+</Accordion>
+<Accordion title="summaries">
+Get session summaries (short + long).
+
+```bash
+honcho session summaries [<session_id>]
+```
+
+<ParamField path="session_id" type="string" />
+</Accordion>
+</AccordionGroup>
+
+## honcho workspace
+
+List, create, inspect, delete, and search workspaces.
+
+<AccordionGroup>
+<Accordion title="create">
+Create or get a workspace.
+
+```bash
+honcho workspace create <workspace_id>
+```
+
+<ParamField path="workspace_id" type="string" required />
+<ParamField path="--metadata" type="string">
+  JSON metadata to associate with the workspace.
+</ParamField>
+</Accordion>
+<Accordion title="delete">
+Delete a workspace. Use --dry-run first to see what will be deleted.
+
+Requires --yes to skip confirmation, or will prompt interactively.
+If sessions exist, requires --cascade to delete them first.
+
+```bash
+honcho workspace delete <workspace_id>
+```
+
+<ParamField path="workspace_id" type="string" required />
+<ParamField path="--yes" type="boolean">
+  Skip confirmation prompt (for scripted/agent use). Short alias: `-y`.
+</ParamField>
+<ParamField path="--cascade" type="boolean">
+  Delete all sessions before deleting the workspace.
+</ParamField>
+<ParamField path="--dry-run" type="boolean">
+  Show what would be deleted without deleting.
+</ParamField>
+</Accordion>
+<Accordion title="inspect">
+Inspect a workspace: peers, sessions, config.
+
+```bash
+honcho workspace inspect [<workspace_id>]
+```
+
+<ParamField path="workspace_id" type="string" />
+</Accordion>
+<Accordion title="list">
+List all accessible workspaces.
+
+```bash
+honcho workspace list
+```
+
+</Accordion>
+<Accordion title="queue-status">
+Get queue processing status.
+
+```bash
+honcho workspace queue-status
+```
+
+<ParamField path="--observer" type="string">
+  Filter by observer peer.
+</ParamField>
+<ParamField path="--sender" type="string">
+  Filter by sender peer.
+</ParamField>
+</Accordion>
+<Accordion title="search">
+Search messages across workspace.
+
+```bash
+honcho workspace search <query>
+```
+
+<ParamField path="query" type="string" required />
+<ParamField path="--limit" type="number" default="10">
+  Max results.
+</ParamField>
+</Accordion>
+</AccordionGroup>
diff --git a/docs/v3/documentation/reference/cli.mdx b/docs/v3/documentation/reference/cli.mdx
new file mode 100644
index 000000000..7b89bfeb8
--- /dev/null
+++ b/docs/v3/documentation/reference/cli.mdx
@@ -0,0 +1,220 @@
+---
+title: 'CLI Reference'
+description: 'Command-line interface for Honcho — inspect workspaces, peers, sessions, and memory from your terminal'
+icon: 'terminal'
+---
+
+import CliCommands from "/snippets/cli-commands.mdx";
+
+## Install
+
+<CodeGroup>
+```bash uv (recommended)
+uv tool install honcho-cli
+```
+
+```bash uvx (ephemeral)
+uvx honcho-cli
+```
+</CodeGroup>
+
+## Quick Start
+
+```bash
+honcho init        # confirm/set apiKey + Honcho URL in ~/.honcho/config.json
+honcho doctor      # verify your config + connectivity
+honcho             # show banner + command list
+```
+
+## Configuration
+
+The CLI resolves config in this order: **flag → env var → config file → default**.
+
+| Value       | File key          | Env var                | Flag                   | Persisted? |
+|-------------|-------------------|------------------------|------------------------|------------|
+| API key     | `apiKey`          | `HONCHO_API_KEY`       | —                      | Yes        |
+| API URL     | `environmentUrl`  | `HONCHO_BASE_URL`      | —                      | Yes        |
+| Workspace   | —                 | `HONCHO_WORKSPACE_ID`  | `-w` / `--workspace`   | No         |
+| Peer        | —                 | `HONCHO_PEER_ID`       | `-p` / `--peer`        | No         |
+| Session     | —                 | `HONCHO_SESSION_ID`    | `-s` / `--session`     | No         |
+| JSON output | —                 | `HONCHO_JSON`          | `--json`               | No         |
+
+### Persisted config
+
+The CLI shares `~/.honcho/config.json` with sibling Honcho tools. It owns only
+`apiKey` and `environmentUrl` at the top level — everything else (`hosts`,
+`sessions`, etc.) is written by other tools and left untouched on save.
+
+```json
+{
+  "apiKey": "hch-v3-...",
+  "environmentUrl": "https://api.honcho.dev",
+  "hosts": { "claude_code": { "...": "..." } }
+}
+```
+<Info>
+Per-command scoping (workspace / peer / session) is handled via `-w` / `-p` / `-s`
+flags or `HONCHO_*` env vars. **Not** persisted as CLI defaults. This is
+deliberate: every invocation is explicit about what it operates on.
+</Info>
+
+### Runtime overrides
+
+Workspace, peer, and session scoping are **per-command only** — pass flags or
+`HONCHO_*` env vars on every invocation.
+
+```bash
+# Per-command flags
+honcho peer card -w prod -p user
+
+# Or export once per shell
+export HONCHO_WORKSPACE_ID=prod
+export HONCHO_PEER_ID=user
+honcho peer card
+
+# One-off against a different server
+HONCHO_BASE_URL=http://localhost:8000 honcho workspace list
+
+# CI/CD — env vars only, no config file needed
+export HONCHO_API_KEY=hch-v3-xxx
+export HONCHO_BASE_URL=https://api.honcho.dev
+honcho workspace list
+```
+
+## Output & exit codes
+
+Every command adapts its output to the context:
+
+- **TTY** — human-readable tables via Rich.
+- **Piped or redirected** — JSON automatically (detected via `isatty`).
+- **`--json` flag / `HONCHO_JSON=1`** — force JSON regardless of terminal.
+
+Collection commands emit JSON arrays; single-resource commands emit JSON objects. Errors are always structured:
+
+```json
+{
+  "error": {
+    "code": "PEER_NOT_FOUND",
+    "message": "Peer 'abc' not found in workspace 'my-ws'",
+    "details": {"workspace_id": "my-ws", "peer_id": "abc"}
+  }
+}
+```
+
+| Exit code | Meaning |
+|-----------|---------|
+| `0` | Success |
+| `1` | Client error (bad input, resource not found) |
+| `2` | Server error |
+| `3` | Auth error (missing or invalid API key) |
+
+CI pipelines and agent runtimes can branch on these without parsing stderr.
+
+## Command reference
+
+<CliCommands />
+
+## Workflows
+
+### Inspect an unfamiliar workspace
+
+When you pick up a workspace and need to orient — start broad, narrow to the peer and session you care about.
+
+<Steps>
+  <Step title="Survey the workspace">
+    ```bash
+    honcho workspace inspect --json
+    honcho peer list --json
+    ```
+  </Step>
+  <Step title="Inspect a specific peer">
+    ```bash
+    honcho peer inspect <peer_id> --json
+    honcho peer card <peer_id> --json
+    ```
+  </Step>
+  <Step title="Review the peer's memory">
+    ```bash
+    honcho conclusion list --observer <peer_id> --json
+    honcho conclusion search "topic" --observer <peer_id> --json
+    ```
+  </Step>
+  <Step title="Debug a session">
+    ```bash
+    honcho session inspect <session_id> --json
+    honcho message list <session_id> --last 20 --json
+    honcho session context <session_id> --json
+    honcho session summaries <session_id> --json
+    ```
+  </Step>
+</Steps>
+
+<Tip>
+  `honcho session context` shows exactly what an agent would receive at inference time — check it before `honcho peer chat` if a response surprises you.
+</Tip>
+
+### A peer isn't learning
+
+If new messages aren't producing new conclusions, work down the diagnostic ladder.
+
+```bash
+# Is observation enabled for this peer?
+honcho peer inspect <peer_id> --json | jq '.configuration'
+
+# Is the deriver actually processing?
+honcho workspace queue-status --json
+
+# Do any conclusions exist at all? Any for the expected topic?
+honcho conclusion list --observer <peer_id> --json
+honcho conclusion search "expected topic" --observer <peer_id> --json
+```
+
+### Session context looks wrong
+
+When an agent's responses don't reflect what you expect it to know.
+
+```bash
+honcho session context <session_id> --json
+honcho session summaries <session_id> --json
+honcho message list <session_id> --last 50 --json
+```
+
+### Dialectic returns bad answers
+
+When `honcho peer chat` or the dialectic API is hallucinating or missing context.
+
+```bash
+# What does the peer card actually say?
+honcho peer card <peer_id> --json
+
+# Any conclusions for this topic?
+honcho conclusion search "topic" --observer <peer_id> --json
+
+# Reproduce the query against the CLI
+honcho peer chat <peer_id> "what do you know about X?" --json
+```
+
+## Scripting & automation
+
+Pipe commands into `jq` for inline transforms, or set `HONCHO_*` env vars for a CI/CD environment with no config file:
+
+```bash
+# Pipe to jq
+honcho peer list --json | jq '.[].id'
+honcho workspace inspect --json | jq '.peers'
+
+# Machine-parseable health check — exit code for CI, details for logs
+honcho doctor --json
+
+# CI/CD — env vars only, no ~/.honcho/config.json
+export HONCHO_API_KEY=hch-v3-xxx
+export HONCHO_BASE_URL=https://api.honcho.dev
+honcho workspace list
+```
+
+Non-interactive onboarding:
+
+```bash
+# Pre-seed via flags / env vars; init still prompts for anything missing
+HONCHO_API_KEY=hch-v3-xxx honcho init --base-url https://api.honcho.dev
+```
diff --git a/honcho-cli/scripts/generate_cli_docs.py b/honcho-cli/scripts/generate_cli_docs.py
new file mode 100644
index 000000000..8fa4e223e
--- /dev/null
+++ b/honcho-cli/scripts/generate_cli_docs.py
@@ -0,0 +1,261 @@
+"""Generate ``docs/snippets/cli-commands.mdx`` from the Typer app.
+
+Walks the ``honcho`` Typer app and emits a Mintlify snippet using native
+Mintlify components: ``<AccordionGroup>`` / ``<Accordion>`` for subcommand
+grouping and ``<ParamField>`` for each argument and option. The output is a
+single snippet included by ``docs/v3/documentation/reference/cli.mdx``.
+
+Usage::
+
+    uv run --package honcho-cli python honcho-cli/scripts/generate_cli_docs.py
+
+    # Or as a drift check (non-zero exit if the committed snippet is stale):
+    uv run --package honcho-cli python honcho-cli/scripts/generate_cli_docs.py --check
+"""
+
+from __future__ import annotations
+
+import sys
+from argparse import ArgumentParser
+from pathlib import Path
+
+import click
+import typer.main
+
+from honcho_cli.main import app
+
+REPO_ROOT = Path(__file__).resolve().parents[2]
+OUTPUT = REPO_ROOT / "docs" / "snippets" / "cli-commands.mdx"
+
+HEADER = """{/*
+  GENERATED by honcho-cli/scripts/generate_cli_docs.py — do not edit.
+  Re-generate with: uv run --package honcho-cli python honcho-cli/scripts/generate_cli_docs.py
+  Source of truth: honcho-cli/src/honcho_cli/commands/
+*/}
+
+"""
+
+# Documented once in cli.mdx's Configuration table. Skip at the per-command
+# level so each Accordion only shows options specific to that subcommand.
+GLOBAL_OPTIONS: set[tuple[str, str]] = {
+    ("--workspace", "Override workspace ID"),
+    ("--peer", "Override peer ID"),
+    ("--session", "Override session ID"),
+    ("--json", "Force JSON output"),
+}
+
+
+def _escape_mdx(text: str) -> str:
+    """Escape MDX-sensitive characters in prose so Mintlify's parser doesn't
+    mistake ``{...}`` for a JSX expression or ``<x>`` for a JSX tag."""
+    return (
+        text.replace("\\", "\\\\")
+        .replace("{", "\\{")
+        .replace("}", "\\}")
+        .replace("<", "\\<")
+    )
+
+
+def _attr(value: str) -> str:
+    """Escape a string for use inside a JSX double-quoted attribute value."""
+    return value.replace("\\", "\\\\").replace('"', "'")
+
+
+def _long_opt(param: click.Option) -> str | None:
+    return next((o for o in param.opts if o.startswith("--")), None)
+
+
+def _short_opt(param: click.Option) -> str | None:
+    return next(
+        (o for o in param.opts if o.startswith("-") and not o.startswith("--")),
+        None,
+    )
+
+
+def _is_global(param: click.Parameter) -> bool:
+    if not isinstance(param, click.Option) or not param.help:
+        return False
+    return (_long_opt(param), param.help) in GLOBAL_OPTIONS
+
+
+def _param_type(param: click.Parameter) -> str:
+    if isinstance(param, click.Option) and param.is_flag:
+        return "boolean"
+    if isinstance(param.type, click.Choice):
+        return "string"
+    name = getattr(param.type, "name", "")
+    if name in ("integer", "int"):
+        return "number"
+    if name in ("float", "decimal"):
+        return "number"
+    if name == "boolean":
+        return "boolean"
+    return "string"
+
+
+def _param_path(param: click.Parameter) -> str:
+    if isinstance(param, click.Argument):
+        return param.name or ""
+    return _long_opt(param) or (param.opts[0] if param.opts else "")
+
+
+def _param_required(param: click.Parameter) -> bool:
+    if isinstance(param, click.Argument):
+        return param.required
+    if isinstance(param, click.Option):
+        return bool(param.required)
+    return False
+
+
+def _default_attr(param: click.Parameter) -> str | None:
+    default = param.default
+    if default is None or default is False or callable(default):
+        return None
+    if isinstance(default, (list, tuple)) and not default:
+        return None
+    if default is True:
+        return "true"
+    return _attr(str(default))
+
+
+def _ensure_period(text: str) -> str:
+    return text if text.endswith((".", "?", "!", ":")) else text + "."
+
+
+def _param_body(param: click.Parameter) -> str:
+    parts: list[str] = []
+    if isinstance(param, click.Option):
+        if param.help:
+            parts.append(_ensure_period(_escape_mdx(param.help.strip())))
+        short = _short_opt(param)
+        if short:
+            parts.append(f"Short alias: `{short}`.")
+        if param.secondary_opts:
+            neg = " / ".join(f"`{o}`" for o in param.secondary_opts)
+            parts.append(f"Negate with {neg}.")
+        if isinstance(param.type, click.Choice):
+            choices = ", ".join(f"`{c}`" for c in param.type.choices)
+            parts.append(f"One of: {choices}.")
+    return " ".join(parts)
+
+
+def _render_param(param: click.Parameter) -> list[str]:
+    props = [
+        f'path="{_attr(_param_path(param))}"',
+        f'type="{_param_type(param)}"',
+    ]
+    if _param_required(param):
+        props.append("required")
+    default_attr = _default_attr(param)
+    if default_attr is not None:
+        props.append(f'default="{default_attr}"')
+    body = _param_body(param).strip()
+    open_tag = f"<ParamField {' '.join(props)}>"
+    if body:
+        return [open_tag, f"  {body}", "</ParamField>"]
+    return [open_tag.replace(">", " />")]
+
+
+def _params_of(
+    cmd: click.Command, *, strip_globals: bool
+) -> list[click.Parameter]:
+    args = [p for p in cmd.params if isinstance(p, click.Argument)]
+    opts = [
+        p
+        for p in cmd.params
+        if isinstance(p, click.Option)
+        and not p.hidden
+        and not (strip_globals and _is_global(p))
+    ]
+    return args + opts
+
+
+def _invocation_line(cmd: click.Command, path: list[str]) -> str:
+    args = [p for p in cmd.params if isinstance(p, click.Argument)]
+    parts = [" ".join(path)]
+    for a in args:
+        placeholder = f"<{a.name}>"
+        if not a.required:
+            placeholder = f"[{placeholder}]"
+        parts.append(placeholder)
+    return " ".join(parts)
+
+
+def _render_accordion(cmd: click.Command, path: list[str]) -> list[str]:
+    lines = [f'<Accordion title="{_attr(path[-1])}">']
+    if cmd.help:
+        lines.append(_escape_mdx(cmd.help.strip()))
+        lines.append("")
+    lines.append("```bash")
+    lines.append(_invocation_line(cmd, path))
+    lines.append("```")
+    lines.append("")
+    for p in _params_of(cmd, strip_globals=True):
+        lines.extend(_render_param(p))
+    lines.append("</Accordion>")
+    return lines
+
+
+def _render_top(cmd: click.Command, path: list[str]) -> list[str]:
+    lines = [f"## {' '.join(path)}", ""]
+    if cmd.help:
+        lines.append(_escape_mdx(cmd.help.strip()))
+        lines.append("")
+
+    if isinstance(cmd, click.Group) and cmd.commands:
+        lines.append("<AccordionGroup>")
+        for sub_name in sorted(cmd.commands):
+            lines.extend(
+                _render_accordion(cmd.commands[sub_name], path + [sub_name])
+            )
+        lines.append("</AccordionGroup>")
+        lines.append("")
+        return lines
+
+    lines.append("```bash")
+    lines.append(_invocation_line(cmd, path))
+    lines.append("```")
+    lines.append("")
+    for p in _params_of(cmd, strip_globals=True):
+        lines.extend(_render_param(p))
+    lines.append("")
+    return lines
+
+
+def build() -> str:
+    root: click.Command = typer.main.get_command(app)
+    if not isinstance(root, click.Group):
+        raise SystemExit("Expected root command to be a Group")
+
+    body: list[str] = []
+    for name in sorted(root.commands):
+        body.extend(_render_top(root.commands[name], ["honcho", name]))
+    return HEADER + "\n".join(body) + "\n"
+
+
+def main() -> int:
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--check",
+        action="store_true",
+        help="Exit non-zero if the committed snippet differs from generated output.",
+    )
+    ns = parser.parse_args()
+    generated = build()
+    if ns.check:
+        current = OUTPUT.read_text() if OUTPUT.exists() else ""
+        if current != generated:
+            print(
+                f"::error::{OUTPUT} is stale. Re-run without --check to regenerate.",
+                file=sys.stderr,
+            )
+            return 1
+        return 0
+    OUTPUT.parent.mkdir(parents=True, exist_ok=True)
+    OUTPUT.write_text(generated)
+    print(f"Wrote {OUTPUT}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())

From ae05ab5bc8f8f9f80c6d885b121a5d00116879ed Mon Sep 17 00:00:00 2001
From: ajspig <46900795+ajspig@users.noreply.github.com>
Date: Tue, 21 Apr 2026 12:41:16 -0400
Subject: [PATCH 17/46] fix: moving cli skill (#591)

* fix: moving cli skills to root

* chore: updating cli readme

* chore: updating language

* chore: updating docs
---
 .claude/skills/honcho-cli/SKILL.md            | 117 ++++++++++++++++++
 .../documentation/introduction/vibecoding.mdx |  25 ++++
 honcho-cli/README.md                          |  20 +--
 honcho-cli/src/honcho_cli/skills/CONTEXT.md   |  50 --------
 .../src/honcho_cli/skills/honcho-debug.md     |  54 --------
 .../src/honcho_cli/skills/honcho-inspect.md   |  53 --------
 6 files changed, 152 insertions(+), 167 deletions(-)
 create mode 100644 .claude/skills/honcho-cli/SKILL.md
 delete mode 100644 honcho-cli/src/honcho_cli/skills/CONTEXT.md
 delete mode 100644 honcho-cli/src/honcho_cli/skills/honcho-debug.md
 delete mode 100644 honcho-cli/src/honcho_cli/skills/honcho-inspect.md

diff --git a/.claude/skills/honcho-cli/SKILL.md b/.claude/skills/honcho-cli/SKILL.md
new file mode 100644
index 000000000..e2276669e
--- /dev/null
+++ b/.claude/skills/honcho-cli/SKILL.md
@@ -0,0 +1,117 @@
+---
+name: honcho-cli
+description: Inspect and debug Honcho workspaces via the `honcho` CLI. Use when investigating peer representations, memory state, session context, queue status, or dialectic quality — any task that requires introspection of a Honcho deployment.
+allowed-tools: Bash(honcho:*), Bash(jq:*), Read, Grep
+---
+
+# Honcho CLI
+
+`honcho` wraps the Honcho Python SDK with agent-friendly defaults: JSON output, structured errors, input validation. Use it to inspect workspace state, debug peer memory, and diagnose the dialectic.
+
+## Output & config
+
+- **TTY**: human-readable tables (default when interactive)
+- **Piped / `--json`**: JSON — collection commands emit arrays, single-resource commands emit objects
+- **Exit codes**: `0` success · `1` client error (bad input, not found) · `2` server error · `3` auth error
+- **Config**: `~/.honcho/config.json` (shared with other Honcho tools). The CLI owns `apiKey` and `environmentUrl` at the top level; run `honcho init` to confirm or set them. Per-command scope (workspace / peer / session) is via `-w` / `-p` / `-s` flags or `HONCHO_*` env vars.
+
+## Command groups
+
+- `honcho config` — CLI configuration
+- `honcho workspace` — inspect, delete, search
+- `honcho peer` — inspect, card, chat, search
+- `honcho session` — inspect, messages, context, summaries
+- `honcho message` — list and get
+- `honcho conclusion` — list, search, create, delete
+
+## Rules
+
+- Always pass `--json` when processing output programmatically.
+- Run `honcho peer inspect` before `honcho peer chat` to understand context.
+- Use `honcho session context` to see exactly what an agent receives.
+- Never run `honcho workspace delete` without `honcho workspace inspect` first.
+- Check queue status when derivation seems stalled.
+- Compare peer card with conclusions to understand memory state.
+
+## Inspection tour
+
+When orienting to a Honcho deployment, walk outside-in:
+
+### 1. Understand the workspace
+
+```bash
+honcho workspace inspect --json
+```
+
+### 2. Find the peer
+
+```bash
+honcho peer list --json
+honcho peer inspect <peer_id> --json
+```
+
+### 3. Check peer's memory
+
+```bash
+honcho peer card <peer_id> --json
+honcho conclusion list --observer <peer_id> --json
+honcho conclusion search "topic" --observer <peer_id> --json
+```
+
+### 4. Debug a session
+
+```bash
+honcho session inspect <session_id> --json
+honcho message list <session_id> --last 20 --json
+honcho session context <session_id> --json
+honcho session summaries <session_id> --json
+```
+
+### 5. Search across workspace
+
+```bash
+honcho workspace search "query" --json
+honcho peer search <peer_id> "query" --json
+```
+
+## Debugging playbook
+
+### Peer not learning?
+
+```bash
+# Is observation enabled?
+honcho peer inspect <peer_id> --json | jq '.configuration'
+
+# Is the deriver queue processing messages?
+honcho workspace queue-status --json
+
+# What conclusions exist?
+honcho conclusion list --observer <peer_id> --json
+honcho conclusion search "expected topic" --observer <peer_id> --json
+```
+
+### Session context looks wrong?
+
+```bash
+# Raw context an agent would receive
+honcho session context <session_id> --json
+
+# Summaries feeding the context
+honcho session summaries <session_id> --json
+
+# Recent message history
+honcho message list <session_id> --last 50 --json
+```
+
+### Dialectic giving bad answers?
+
+```bash
+# What the peer card says
+honcho peer card <peer_id> --json
+
+# Conclusions on the specific topic
+honcho conclusion search "topic" --observer <peer_id> --json
+
+# Exercise the dialectic directly
+honcho peer chat <peer_id> "what do you know about X?" --json
+```
diff --git a/docs/v3/documentation/introduction/vibecoding.mdx b/docs/v3/documentation/introduction/vibecoding.mdx
index 86af64834..5c5bde1a3 100644
--- a/docs/v3/documentation/introduction/vibecoding.mdx
+++ b/docs/v3/documentation/introduction/vibecoding.mdx
@@ -66,6 +66,24 @@ claude mcp add honcho \
 
 ---
 
+## CLI
+
+Inspect and debug a running Honcho deployment from your terminal. The honcho CLI wraps the Python SDK with agent-friendly defaults — JSON output, structured errors, and commands for every primitive (workspaces, peers, sessions, messages, conclusions).
+
+**Get started:**
+
+```bash
+uv tool install honcho-cli
+honcho init      # configure apiKey + environmentUrl
+honcho doctor    # verify connectivity
+```
+
+The CLI also ships an agent skill. Install it with `npx skills add plastic-labs/honcho` and pick `honcho-cli` from the list.
+
+See the [full CLI reference](/v3/documentation/reference/cli) for all commands, flags, and environment variables.
+
+---
+
 ## Claude Code Plugin
 
 Use Honcho to build with Honcho! The [plugin](/v3/guides/integrations/claudecode) provides Claude Code persistent memory that survives context wipes and session restarts.
@@ -109,6 +127,12 @@ curl -o ~/.claude/skills/honcho-integration.md https://raw.githubusercontent.com
 
 Invoke with `/honcho-integration` in your coding agent.
 
+#### honcho-cli
+
+**For inspection & debugging.** Teaches your coding agent the right commands and flags for the [honcho CLI](#cli) — peer memory, session context, queue status, dialectic quality.
+
+Invoke implicitly when you ask your agent to inspect a Honcho deployment.
+
 #### migrate-honcho-py / migrate-honcho-ts
 
 **For SDK upgrades.** Migrates code from v1.6.0 to v2.0.0 (required for Honcho 3.0.0+). Use when upgrading the SDK or seeing errors about removed APIs like `observations`, `Representation`, `.core`, or `get_config`.
@@ -140,6 +164,7 @@ I want to start building with Honcho - an open source memory library for buildin
 - Core repo: https://github.com/plastic-labs/honcho
 - Python SDK: https://github.com/plastic-labs/honcho-python
 - TypeScript SDK: https://github.com/plastic-labs/honcho-node
+- CLI (inspect & debug a deployment): https://github.com/plastic-labs/honcho/tree/main/honcho-cli
 - Discord bot starter: https://github.com/plastic-labs/discord-python-starter
 - Telegram bot example: https://github.com/plastic-labs/telegram-python-starter
 
diff --git a/honcho-cli/README.md b/honcho-cli/README.md
index 8271d200a..804e22858 100644
--- a/honcho-cli/README.md
+++ b/honcho-cli/README.md
@@ -19,16 +19,6 @@ As a standalone tool (recommended):
 uv tool install honcho-cli
 ```
 
-As an extra on the Honcho SDK (if you want both the SDK and the CLI in one project):
-
-```bash
-uv add honcho-ai[cli]
-# or
-pip install honcho-ai[cli]
-```
-
-Either way, you'll get the `honcho` command on your PATH.
-
 ## Quick Start
 
 ```bash
@@ -142,6 +132,16 @@ Non-interactive onboarding:
 HONCHO_API_KEY=hch-v3-xxx honcho init --base-url https://api.honcho.dev
 ```
 
+## Agent skill
+
+`honcho-cli` ships with a skill that teaches agents the right commands and conventions for inspecting and debugging a Honcho deployment. Install it anywhere skills are accepted (Claude Code, other skill-aware agents):
+
+```bash
+npx skills add plastic-labs/honcho
+```
+
+The picker lists every skill for Honcho — select `honcho-cli` .
+
 ## Environment Variables
 
 All `HONCHO_*` env vars work at runtime — no config file required.
diff --git a/honcho-cli/src/honcho_cli/skills/CONTEXT.md b/honcho-cli/src/honcho_cli/skills/CONTEXT.md
deleted file mode 100644
index 59e5fdcda..000000000
--- a/honcho-cli/src/honcho_cli/skills/CONTEXT.md
+++ /dev/null
@@ -1,50 +0,0 @@
----
-name: honcho-cli
-version: 0.1.0
-description: A terminal for Honcho — memory that reasons.
----
-
-# Honcho CLI — Agent Interface
-
-## Overview
-
-`honcho` is a CLI for administering and debugging Honcho workspaces. It wraps the Honcho Python SDK with agent-friendly defaults: JSON output, structured errors, input validation.
-
-## Output Modes
-
-- **TTY**: Human-readable tables (default when interactive)
-- **Piped/scripted**: JSON automatically
-- `--json`: Force JSON output
-
-## Exit Codes
-
-- 0: Success
-- 1: Client error (bad input, not found)
-- 2: Server error
-- 3: Auth error
-
-## Config
-
-Shared with other Honcho tools at `~/.honcho/config.json`. The CLI owns only
-`apiKey` and `environmentUrl` at the top level. Host-specific
-entries under `hosts` are untouched.
-
-Run `honcho init` to confirm or set those two values. Workspace / peer /
-session are per-command — pass them via flags or env vars:
-
-```bash
-honcho peer card -w my-workspace -p my-peer
-# or
-export HONCHO_WORKSPACE_ID=my-workspace
-export HONCHO_PEER_ID=my-peer
-honcho peer card
-```
-
-## Command Groups
-
-- `honcho config` — Manage CLI configuration
-- `honcho workspace` — Inspect, delete, search workspaces
-- `honcho peer` — Inspect, card, chat, search peers
-- `honcho session` — Inspect, messages, context, summaries
-- `honcho message` — List and get messages
-- `honcho conclusion` — List, search, create, delete conclusions
diff --git a/honcho-cli/src/honcho_cli/skills/honcho-debug.md b/honcho-cli/src/honcho_cli/skills/honcho-debug.md
deleted file mode 100644
index 31ce2203d..000000000
--- a/honcho-cli/src/honcho_cli/skills/honcho-debug.md
+++ /dev/null
@@ -1,54 +0,0 @@
----
-name: honcho-cli-debug
-version: 0.1.0
-description: Debug Honcho peer representations and memory
----
-
-# Honcho CLI — Debug Skills
-
-## Rules
-
-- Check queue status when derivation seems stalled
-- Compare peer card with conclusions to understand memory state
-
-## Debugging Memory Issues
-
-### Peer not learning?
-
-```bash
-# Check if observation is enabled
-honcho peer inspect <peer_id> --json | jq '.configuration'
-
-# Check queue — are messages being processed?
-honcho workspace queue-status --json
-
-# Check what conclusions exist
-honcho conclusion list --observer <peer_id> --json
-honcho conclusion search "expected topic" --observer <peer_id> --json
-```
-
-### Session context looks wrong?
-
-```bash
-# See raw context
-honcho session context <session_id> --json
-
-# Check summaries
-honcho session summaries <session_id> --json
-
-# Check message history
-honcho message list <session_id> --last 50 --json
-```
-
-### Dialectic giving bad answers?
-
-```bash
-# Check what the peer card says
-honcho peer card <peer_id> --json
-
-# Check conclusions for the specific topic
-honcho conclusion search "topic" --observer <peer_id> --json
-
-# Try the dialectic directly
-honcho peer chat <peer_id> "what do you know about X?" --json
-```
diff --git a/honcho-cli/src/honcho_cli/skills/honcho-inspect.md b/honcho-cli/src/honcho_cli/skills/honcho-inspect.md
deleted file mode 100644
index 14c0cb94e..000000000
--- a/honcho-cli/src/honcho_cli/skills/honcho-inspect.md
+++ /dev/null
@@ -1,53 +0,0 @@
----
-name: honcho-cli-inspect
-version: 0.1.0
-description: Inspect Honcho workspace state for debugging
----
-
-# Honcho CLI — Inspection Skills
-
-## Rules
-
-- Always use `--json` when processing output programmatically
-- Run `honcho peer inspect` before `honcho peer chat` to understand context
-- Use `honcho session context` to see exactly what an agent receives
-- Never run `honcho workspace delete` without `honcho workspace inspect` first
-
-## Inspection Workflow
-
-### 1. Understand the workspace
-
-```bash
-honcho workspace inspect --json
-```
-
-### 2. Find the peer
-
-```bash
-honcho peer list --json
-honcho peer inspect <peer_id> --json
-```
-
-### 3. Check peer's memory
-
-```bash
-honcho peer card <peer_id> --json
-honcho conclusion list --observer <peer_id> --json
-honcho conclusion search "topic" --observer <peer_id> --json
-```
-
-### 4. Debug a session
-
-```bash
-honcho session inspect <session_id> --json
-honcho message list <session_id> --last 20 --json
-honcho session context <session_id> --json
-honcho session summaries <session_id> --json
-```
-
-### 5. Search across workspace
-
-```bash
-honcho workspace search "query" --json
-honcho peer search <peer_id> "query" --json
-```

From 1e7a3461e5e83df9fe4825bf0457a47c57ae13fe Mon Sep 17 00:00:00 2001
From: lilyplasticlabs <lily@plasticlabs.ai>
Date: Tue, 21 Apr 2026 14:56:06 -0400
Subject: [PATCH 18/46] docs(sillytavern): apply DEV-1482 review findings
 (DEV-1430)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Applies eight review findings from the DEV-1482 integration review. All
scoped to docs/v3/guides/integrations/sillytavern.mdx; no code changes.

- DOC-3: curl -fsSL in install command (fails loud on 4xx/5xx)
- DOC-4: Note now reflects installer auto-config + manual-fallback
- DOC-6: LLM-backend prerequisite callout at top of Quick Start
- DOC-14: restart step warns about live-session clobbering
- DOC-5: Global Config intro names resolution order + precedence;
  disambiguates "sillytavern" workspace vs hosts.sillytavern key
- DOC-7: new Peer Observability subsection (asymmetric default)
- DOC-2: route count in Architecture diagram 7 → 9
- DOC-8: troubleshooting row for "plugin on disk, drawer absent"

Findings index + rationale: plastic-labs/sillytavern-honcho#3
---
 docs/v3/guides/integrations/sillytavern.mdx | 27 ++++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index ce418907b..a25f7bc9d 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -11,6 +11,10 @@ The extension has two parts: a **client extension** (browser) that hooks into Si
 
 ## Quick Start
 
+<Note>
+Honcho stores memory; it doesn't generate text. You'll also need an LLM backend (OpenAI, Claude, OpenRouter, local llama.cpp, etc.) connected via SillyTavern's plug icon in the top nav. Configure that separately before expecting replies in chat.
+</Note>
+
 ### Step 1: Get Your Honcho API Key
 
 1. Go to **[app.honcho.dev](https://app.honcho.dev)**
@@ -23,7 +27,7 @@ From your **SillyTavern directory**:
 
 **macOS / Linux:**
 ```bash
-bash <(curl -s https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/install.sh)
+bash <(curl -fsSL https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/install.sh)
 ```
 
 **Windows (PowerShell):**
@@ -32,7 +36,7 @@ irm https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/insta
 ```
 
 <Note>
-Server plugins must be enabled. The installer checks for this, but if you haven't already, add `enableServerPlugins: true` to your `config.yaml`.
+Server plugins must be enabled for the Honcho plugin to load. The installer configures this automatically on fresh checkouts; if you skipped the installer or started SillyTavern before running it, manually set `enableServerPlugins: true` in `config.yaml` and restart.
 </Note>
 
 The installer:
@@ -43,7 +47,7 @@ The installer:
 
 ### Step 3: Restart SillyTavern
 
-Stop and restart SillyTavern so the server plugin loads.
+Stop and restart SillyTavern so the server plugin loads. If you have in-progress chats open, save them first — the running session ends when you stop the server.
 
 ### Step 4: Configure
 
@@ -56,7 +60,11 @@ Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
 
 ## Global Config (Multi-Tool Setups)
 
-If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension auto-populates settings from `~/.honcho/config.json`:
+If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension reads from `~/.honcho/config.json` on startup when resolvable keys are present. Resolution order: `hosts.sillytavern.apiKey` → root-level `apiKey` → fall through to the Extensions-panel key if neither resolves. The Extensions-panel key (SillyTavern's secret manager) takes priority at request time, so entering one in the UI overrides the config file without touching it.
+
+Note that "sillytavern" appears in two namespaces here: as the Honcho workspace ID (a Honcho concept — what you see in the dashboard) and as the `hosts.sillytavern` host key in the config file (a local convention for which tool's settings are which).
+
+Flat form:
 
 ```json
 {
@@ -67,7 +75,9 @@ If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the ex
 }
 ```
 
-Config reads fall back from `hosts.sillytavern` to root-level globals. Writes are always scoped to `hosts.sillytavern` -- the extension never mutates settings for other tools.
+Writes are always scoped to `hosts.sillytavern` — the extension never mutates settings for other tools.
+
+Nested form (when multiple tools share the file):
 
 ```jsonc
 {
@@ -112,6 +122,10 @@ In tool call mode, the extension registers three function tools that the LLM can
 
 This mode works best with models that support function calling. The LLM decides when to query memory rather than firing on every turn.
 
+### Peer Observability
+
+By default, only the user peer accumulates derived memory — Honcho observes the user's messages and derives conclusions across sessions. The AI character's persona comes from its character card, not from peer derivation. If you want the character to have its own Honcho-derived state, configure it as an additional peer in session setup.
+
 ### Peer Modes
 
 | Mode | Behavior |
@@ -143,7 +157,7 @@ Browser (Client Extension)              Server (Plugin)
 +-----------------------+               +------------------------------+
 | index.js              |  fetch()      | plugin/index.js              |
 |                       | ------------> |                              |
-| - Settings UI         | /api/plugins/ | - Express router (7 routes)  |
+| - Settings UI         | /api/plugins/ | - Express router (9 routes)  |
 | - Event hooks         |  honcho-proxy | - Honcho SDK (@honcho-ai/sdk)|
 | - Prompt injection    |               | - API key from ST secrets or |
 | - Tool registration   |               |   ~/.honcho/config.json      |
@@ -161,6 +175,7 @@ The server plugin reads API credentials from SillyTavern's secrets store first,
 | 403 on plugin requests | Set Honcho API key in extension settings or `~/.honcho/config.json` |
 | SDK import error | Run `cd plugins/honcho-proxy && npm install` |
 | Extension loads but nothing happens | Enable the checkbox and ensure workspace ID is set |
+| Plugin on disk but "Honcho Memory" drawer doesn't appear at all | Set `enableServerPlugins: true` in `config.yaml`; the panel can't show plugins the server never loaded |
 
 ---
 

From 2ffe30bd4f954ccecb21e2a894b95d78ebba41cb Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Tue, 21 Apr 2026 18:17:17 -0400
Subject: [PATCH 19/46] docs(sillytavern): post-review polish pass (DEV-1430)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Clarify installer step 4 — the plugin seeds config.json if absent
- 'Puzzle piece' -> 'three-cubes' for the Extensions icon (current ST UI)
- API key step notes the UI-overrides-config precedence explicitly
- 'Honcho workspace ID' -> 'default Honcho workspace ID (configurable)'
- Add Note after Context-modes table — Context only is session-scoped
  and returns empty until enough messages accumulate; Reasoning is the
  better default for fresh peers
- Next Steps gains two cards: Install SillyTavern (upstream docs) and
  the Claude Code setup skill (skills/setup/SKILL.md)

Follow-ups tracked separately — tool rename (observation -> conclusion,
matching the /conclusion endpoint), architecture Excalidraw.
---
 docs/v3/guides/integrations/sillytavern.mdx | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index a25f7bc9d..d87b32269 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -43,7 +43,7 @@ The installer:
 1. Clones the extension into `public/scripts/extensions/third-party/sillytavern-honcho`
 2. Symlinks the server plugin to `plugins/honcho-proxy`
 3. Installs the `@honcho-ai/sdk` dependency
-4. Detects your `~/.honcho/config.json` if it exists
+4. Detects your `~/.honcho/config.json` if it exists (if not, the plugin seeds a minimal one on first run)
 
 ### Step 3: Restart SillyTavern
 
@@ -51,10 +51,10 @@ Stop and restart SillyTavern so the server plugin loads. If you have in-progress
 
 ### Step 4: Configure
 
-Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
+Open **Extensions** (three-cubes icon, top-right) and expand **Honcho Memory**:
 
 1. Check **Enable Honcho Memory**
-2. Click the API key field to set your key
+2. Click the API key field to set your key (if auto-detected from `~/.honcho/config.json`, you can still enter one here to override)
 3. Enter your workspace ID
 4. Status indicator should show **Ready**
 
@@ -62,7 +62,7 @@ Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
 
 If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension reads from `~/.honcho/config.json` on startup when resolvable keys are present. Resolution order: `hosts.sillytavern.apiKey` → root-level `apiKey` → fall through to the Extensions-panel key if neither resolves. The Extensions-panel key (SillyTavern's secret manager) takes priority at request time, so entering one in the UI overrides the config file without touching it.
 
-Note that "sillytavern" appears in two namespaces here: as the Honcho workspace ID (a Honcho concept — what you see in the dashboard) and as the `hosts.sillytavern` host key in the config file (a local convention for which tool's settings are which).
+Note that "sillytavern" appears in two namespaces here: as the default Honcho workspace ID (a Honcho concept — what you see in the dashboard; configurable) and as the `hosts.sillytavern` host key in the config file (a local convention for which tool's settings are which).
 
 Flat form:
 
@@ -110,6 +110,10 @@ The **enrichment mode** controls what layers on top of the base context:
 
 Both the context and reasoning layers use stale-while-revalidate with configurable refresh intervals. After the first turn of a session, there is zero added latency.
 
+<Note>
+**Context only** mode relies on `session.context()`, which is session-scoped — it returns empty output until the session has enough messages for Honcho to derive a representation and summary. For fresh sessions or peers with little history, Reasoning mode is a better default: it queries `peer.chat()` across all of the peer's history, not just the current session.
+</Note>
+
 ### Tool Call Mode
 
 In tool call mode, the extension registers three function tools that the LLM can invoke:
@@ -182,6 +186,14 @@ The server plugin reads API credentials from SillyTavern's secrets store first,
 ## Next Steps
 
 <CardGroup cols={2}>
+  <Card title="Install SillyTavern" icon="download" href="https://docs.sillytavern.app/installation/">
+    New to SillyTavern? Start here — install guide for macOS, Linux, Windows, Docker.
+  </Card>
+
+  <Card title="Claude Code Setup Skill" icon="wand-magic-sparkles" href="https://github.com/plastic-labs/sillytavern-honcho/blob/main/skills/setup/SKILL.md">
+    Agent-assisted install — idempotent, structural patches, end-to-end verification.
+  </Card>
+
   <Card title="GitHub Repository" icon="github" href="https://github.com/plastic-labs/sillytavern-honcho">
     Source code, issues, and install scripts.
   </Card>

From 7d1ce9c1f41deaf8b6d2594d6b785ecb2132d9c1 Mon Sep 17 00:00:00 2001
From: qxxaa <mrhanoi@outlook.com>
Date: Thu, 23 Apr 2026 17:22:16 +0100
Subject: [PATCH 20/46] fix: remove hardcoded stop_sequences override from
 Deriver model config (#587)

* Update deriver.py

* Simplify model configuration in deriver.py

Removed stop_sequences from model configuration.
---
 src/deriver/deriver.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/deriver/deriver.py b/src/deriver/deriver.py
index 1fcc5ad24..979a9288c 100644
--- a/src/deriver/deriver.py
+++ b/src/deriver/deriver.py
@@ -125,11 +125,7 @@ async def process_representation_tasks_batch(
     # validation on settings means max_tokens will always be > 0
     base_model_config = _get_deriver_model_config()
     max_tokens = base_model_config.max_output_tokens or settings.LLM.DEFAULT_MAX_TOKENS
-    model_config = base_model_config.model_copy(
-        update={
-            "stop_sequences": ["   \n", "\n\n\n\n"],
-        }
-    )
+    model_config = base_model_config
 
     # Single LLM call
     llm_start = time.perf_counter()

From d8d625f4709efc5433914ec26336004029c18c53 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 12:43:49 -0400
Subject: [PATCH 21/46] docs(sillytavern): update for PR#10 surface + review
 fixes

- Add Prerequisites section with SillyTavern install link + Node >= 18
  requirement (was buried in Next Steps; users hit install step with no
  awareness ST needed to exist first).

- Expand restart step into a callout: restart required for server-plugin
  reload, not for client-side edits.

- Configure step now documents the three editable inputs (API key,
  Workspace ID, Your peer name) and where each saves.

- Fix 'three-cubes icon' -> 'puzzle piece icon'.

- Installer step list fleshed out: 6 steps (was 4), including config.yaml
  bootstrap and enableServerPlugins flip. Dropped the false claim that
  the plugin seeds a minimal ~/.honcho/config.json on first run.

- Global Config section rewritten: resolution order now generalized to
  apiKey / workspace / peerName (was apiKey-only); documents panel
  write-back to hosts.sillytavern.*; dropped aiPeer references (it's a
  telemetry-only field, not user-facing).

- Add a Disable / Enable global config subsection covering the opt-out
  toggle and the Inherit / Push local / Cancel diff dialog.

- Troubleshooting: two new rows (stale peer name on new chat, cancelled
  diff dialog).
---
 docs/v3/guides/integrations/sillytavern.mdx | 76 ++++++++++++---------
 1 file changed, 45 insertions(+), 31 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index d87b32269..5097a1a11 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -9,19 +9,15 @@ Give your SillyTavern characters long-term memory. Honcho remembers who you are,
 
 The extension has two parts: a **client extension** (browser) that hooks into SillyTavern events, and a **server plugin** (Node.js) that proxies requests to the Honcho API.
 
-## Quick Start
+## Prerequisites
 
-<Note>
-Honcho stores memory; it doesn't generate text. You'll also need an LLM backend (OpenAI, Claude, OpenRouter, local llama.cpp, etc.) connected via SillyTavern's plug icon in the top nav. Configure that separately before expecting replies in chat.
-</Note>
+- **SillyTavern running locally** (Node ≥ 18). New to it? See the [SillyTavern installation docs](https://docs.sillytavern.app/installation/).
+- A **Honcho API key** from [app.honcho.dev](https://app.honcho.dev).
+- An **LLM backend** (OpenAI, Claude, OpenRouter, local llama.cpp, etc.) connected via SillyTavern's plug icon in the top nav. Honcho stores memory; it doesn't generate text.
 
-### Step 1: Get Your Honcho API Key
-
-1. Go to **[app.honcho.dev](https://app.honcho.dev)**
-2. Sign up or log in
-3. Copy your API key
+## Quick Start
 
-### Step 2: Install
+### Step 1: Install
 
 From your **SillyTavern directory**:
 
@@ -35,36 +31,43 @@ bash <(curl -fsSL https://raw.githubusercontent.com/plastic-labs/sillytavern-hon
 irm https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/install.ps1 | iex
 ```
 
-<Note>
-Server plugins must be enabled for the Honcho plugin to load. The installer configures this automatically on fresh checkouts; if you skipped the installer or started SillyTavern before running it, manually set `enableServerPlugins: true` in `config.yaml` and restart.
-</Note>
-
 The installer:
 1. Clones the extension into `public/scripts/extensions/third-party/sillytavern-honcho`
 2. Symlinks the server plugin to `plugins/honcho-proxy`
 3. Installs the `@honcho-ai/sdk` dependency
-4. Detects your `~/.honcho/config.json` if it exists (if not, the plugin seeds a minimal one on first run)
+4. Bootstraps `config.yaml` if it doesn't exist (briefly runs `npm start` to generate defaults)
+5. Sets `enableServerPlugins: true` in `config.yaml`
+6. Detects your `~/.honcho/config.json` if it exists
 
-### Step 3: Restart SillyTavern
+### Step 2: Restart SillyTavern
 
-Stop and restart SillyTavern so the server plugin loads. If you have in-progress chats open, save them first — the running session ends when you stop the server.
+<Note>
+**Restart is required.** SillyTavern loads server plugins at startup only. After running `install.sh`, stop SillyTavern (⌃C) and start it again. You'll need to restart again whenever `plugin/index.js` changes. Client-side (browser) updates pick up on a hard refresh — no restart needed for those.
+</Note>
 
-### Step 4: Configure
+### Step 3: Configure
 
-Open **Extensions** (three-cubes icon, top-right) and expand **Honcho Memory**:
+Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
 
 1. Check **Enable Honcho Memory**
-2. Click the API key field to set your key (if auto-detected from `~/.honcho/config.json`, you can still enter one here to override)
-3. Enter your workspace ID
-4. Status indicator should show **Ready**
+2. Click the API key field to set your key (auto-populated if present in `~/.honcho/config.json`; the UI value overrides the file)
+3. Enter your **Workspace ID** (saves to `hosts.sillytavern.workspace`)
+4. Enter **Your peer name** (saves to `hosts.sillytavern.peerName`; auto-synced from your SillyTavern persona on first boot)
+5. Status indicator should show **Ready**
 
 ## Global Config (Multi-Tool Setups)
 
-If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension reads from `~/.honcho/config.json` on startup when resolvable keys are present. Resolution order: `hosts.sillytavern.apiKey` → root-level `apiKey` → fall through to the Extensions-panel key if neither resolves. The Extensions-panel key (SillyTavern's secret manager) takes priority at request time, so entering one in the UI overrides the config file without touching it.
+If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension reads from `~/.honcho/config.json` on startup.
+
+**Resolution order** for `apiKey`, `workspace`, and `peerName`:
 
-Note that "sillytavern" appears in two namespaces here: as the default Honcho workspace ID (a Honcho concept — what you see in the dashboard; configurable) and as the `hosts.sillytavern` host key in the config file (a local convention for which tool's settings are which).
+1. `hosts.sillytavern.<field>` (host-specific override)
+2. Root-level `<field>` (shared default across tools)
+3. For `apiKey` only: fall through to the Extensions-panel key (SillyTavern's secret manager), which takes priority at request time — entering one in the UI overrides the file without touching it.
 
-Flat form:
+**Writes** are always scoped to `hosts.sillytavern` — the extension never mutates settings for other tools. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
+
+Flat form (single-tool setup):
 
 ```json
 {
@@ -75,25 +78,34 @@ Flat form:
 }
 ```
 
-Writes are always scoped to `hosts.sillytavern` — the extension never mutates settings for other tools.
-
-Nested form (when multiple tools share the file):
+Nested form (multiple tools sharing the file):
 
 ```jsonc
 {
   "apiKey": "hch-v2-...",
   "peerName": "alice",
+  "workspace": "default",
   "hosts": {
     "sillytavern": {
       "workspace": "sillytavern",
-      "aiPeer": "Assistant"       // Updated automatically per character
+      "peerName": "alice-rp"
     },
-    "claude_code": { "..." : "..." },
-    "cursor": { "..." : "..." }
+    "claude_code": { "...": "..." },
+    "cursor": { "...": "..." }
   }
 }
 ```
 
+### Disable / Enable global config
+
+A toggle button in the panel opts out of auto-detection entirely. When disabled, the extension skips the config fetch on load, hides the source line, and saves peer-name edits to the local SillyTavern extension settings instead of the global file. Re-enabling opens a diff-aware dialog if local and global values diverge, offering:
+
+- **Inherit** — pull global values into this SillyTavern install
+- **Push local** — overwrite the global host entry with your local values
+- **Cancel** — leave both untouched
+
+Empty local values are never pushed, so a blank field cannot accidentally delete a host key.
+
 ## How It Works
 
 ### Context Architecture
@@ -180,6 +192,8 @@ The server plugin reads API credentials from SillyTavern's secrets store first,
 | SDK import error | Run `cd plugins/honcho-proxy && npm install` |
 | Extension loads but nothing happens | Enable the checkbox and ensure workspace ID is set |
 | Plugin on disk but "Honcho Memory" drawer doesn't appear at all | Set `enableServerPlugins: true` in `config.yaml`; the panel can't show plugins the server never loaded |
+| Peer name on a new chat still shows an old value | Clear the panel override field (falls back to root / ST persona) or set a new value |
+| Re-enabling global config didn't populate the UI | You canceled on the diff dialog — click Enable again and choose Inherit |
 
 ---
 

From d7fdf6d48a4bcc5c4480da5046dcd44dad2a94fa Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 12:50:16 -0400
Subject: [PATCH 22/46] docs(sillytavern): clarify write scope
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The plugin also writes to a root-level `sessions` map (ST dir → last
Honcho session ID), not only to `hosts.sillytavern.*`. The earlier
phrasing overstated the isolation claim.
---
 docs/v3/guides/integrations/sillytavern.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index 5097a1a11..bf9140865 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -65,7 +65,7 @@ If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the ex
 2. Root-level `<field>` (shared default across tools)
 3. For `apiKey` only: fall through to the Extensions-panel key (SillyTavern's secret manager), which takes priority at request time — entering one in the UI overrides the file without touching it.
 
-**Writes** are always scoped to `hosts.sillytavern` — the extension never mutates settings for other tools. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
+**Writes** are scoped to `hosts.sillytavern.*` and a root-level `sessions` map (SillyTavern dir → last Honcho session ID). The extension never mutates other tools' host entries or root fields they own. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
 
 Flat form (single-tool setup):
 

From f30eb1b4421616df1c3d7d39d22898ed180b44e3 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 12:52:04 -0400
Subject: [PATCH 23/46] docs(sillytavern): drop internal sessions-map detail

---
 docs/v3/guides/integrations/sillytavern.mdx | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index bf9140865..e8923794b 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -65,7 +65,7 @@ If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the ex
 2. Root-level `<field>` (shared default across tools)
 3. For `apiKey` only: fall through to the Extensions-panel key (SillyTavern's secret manager), which takes priority at request time — entering one in the UI overrides the file without touching it.
 
-**Writes** are scoped to `hosts.sillytavern.*` and a root-level `sessions` map (SillyTavern dir → last Honcho session ID). The extension never mutates other tools' host entries or root fields they own. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
+**Writes** are scoped to `hosts.sillytavern.*`. The extension never mutates other tools' entries. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
 
 Flat form (single-tool setup):
 

From ee7ef1f1677f21465f5982c015284cfd3926c7e1 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 12:55:11 -0400
Subject: [PATCH 24/46] docs(sillytavern): move Global Config after How It
 Works

---
 docs/v3/guides/integrations/sillytavern.mdx | 102 ++++++++++----------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index e8923794b..9749e69e2 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -55,57 +55,6 @@ Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
 4. Enter **Your peer name** (saves to `hosts.sillytavern.peerName`; auto-synced from your SillyTavern persona on first boot)
 5. Status indicator should show **Ready**
 
-## Global Config (Multi-Tool Setups)
-
-If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension reads from `~/.honcho/config.json` on startup.
-
-**Resolution order** for `apiKey`, `workspace`, and `peerName`:
-
-1. `hosts.sillytavern.<field>` (host-specific override)
-2. Root-level `<field>` (shared default across tools)
-3. For `apiKey` only: fall through to the Extensions-panel key (SillyTavern's secret manager), which takes priority at request time — entering one in the UI overrides the file without touching it.
-
-**Writes** are scoped to `hosts.sillytavern.*`. The extension never mutates other tools' entries. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
-
-Flat form (single-tool setup):
-
-```json
-{
-  "apiKey": "your-honcho-api-key",
-  "peerName": "your-name",
-  "workspace": "sillytavern",
-  "enabled": true
-}
-```
-
-Nested form (multiple tools sharing the file):
-
-```jsonc
-{
-  "apiKey": "hch-v2-...",
-  "peerName": "alice",
-  "workspace": "default",
-  "hosts": {
-    "sillytavern": {
-      "workspace": "sillytavern",
-      "peerName": "alice-rp"
-    },
-    "claude_code": { "...": "..." },
-    "cursor": { "...": "..." }
-  }
-}
-```
-
-### Disable / Enable global config
-
-A toggle button in the panel opts out of auto-detection entirely. When disabled, the extension skips the config fetch on load, hides the source line, and saves peer-name edits to the local SillyTavern extension settings instead of the global file. Re-enabling opens a diff-aware dialog if local and global values diverge, offering:
-
-- **Inherit** — pull global values into this SillyTavern install
-- **Push local** — overwrite the global host entry with your local values
-- **Cancel** — leave both untouched
-
-Empty local values are never pushed, so a blank field cannot accidentally delete a host key.
-
 ## How It Works
 
 ### Context Architecture
@@ -166,6 +115,57 @@ By default, only the user peer accumulates derived memory — Honcho observes th
 | User sends message | Stores message in Honcho session |
 | AI responds | Stores response in Honcho session |
 
+## Global Config (Multi-Tool Setups)
+
+If you already use Honcho with other tools (Claude Code, Cursor, Hermes), the extension reads from `~/.honcho/config.json` on startup.
+
+**Resolution order** for `apiKey`, `workspace`, and `peerName`:
+
+1. `hosts.sillytavern.<field>` (host-specific override)
+2. Root-level `<field>` (shared default across tools)
+3. For `apiKey` only: fall through to the Extensions-panel key (SillyTavern's secret manager), which takes priority at request time — entering one in the UI overrides the file without touching it.
+
+**Writes** are scoped to `hosts.sillytavern.*`. The extension never mutates other tools' entries. Panel edits for Workspace ID and peer name save back to `hosts.sillytavern.workspace` and `hosts.sillytavern.peerName` respectively (debounced). Empty values clear the host override and fall through to the root value.
+
+Flat form (single-tool setup):
+
+```json
+{
+  "apiKey": "your-honcho-api-key",
+  "peerName": "your-name",
+  "workspace": "sillytavern",
+  "enabled": true
+}
+```
+
+Nested form (multiple tools sharing the file):
+
+```jsonc
+{
+  "apiKey": "hch-v2-...",
+  "peerName": "alice",
+  "workspace": "default",
+  "hosts": {
+    "sillytavern": {
+      "workspace": "sillytavern",
+      "peerName": "alice-rp"
+    },
+    "claude_code": { "...": "..." },
+    "cursor": { "...": "..." }
+  }
+}
+```
+
+### Disable / Enable global config
+
+A toggle button in the panel opts out of auto-detection entirely. When disabled, the extension skips the config fetch on load, hides the source line, and saves peer-name edits to the local SillyTavern extension settings instead of the global file. Re-enabling opens a diff-aware dialog if local and global values diverge, offering:
+
+- **Inherit** — pull global values into this SillyTavern install
+- **Push local** — overwrite the global host entry with your local values
+- **Cancel** — leave both untouched
+
+Empty local values are never pushed, so a blank field cannot accidentally delete a host key.
+
 ## Architecture
 
 ```

From 3d37b343cbdd2d6a4f631c3ed923506fb39d53b1 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 13:02:25 -0400
Subject: [PATCH 25/46] docs(sillytavern): fix tool count (2, not 3)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

honcho_save_observation is not registered in the extension — only
honcho_query_memory and honcho_search_history exist in code.
---
 docs/v3/guides/integrations/sillytavern.mdx | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index 9749e69e2..1ef07c878 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -77,12 +77,11 @@ Both the context and reasoning layers use stale-while-revalidate with configurab
 
 ### Tool Call Mode
 
-In tool call mode, the extension registers three function tools that the LLM can invoke:
+In tool call mode, the extension registers two function tools that the LLM can invoke:
 
 | Tool | Description |
 | --- | --- |
 | `honcho_query_memory` | Dialectic chat query -- ask Honcho what it knows |
-| `honcho_save_observation` | Save an insight about the user to memory |
 | `honcho_search_history` | Semantic search across session messages |
 
 This mode works best with models that support function calling. The LLM decides when to query memory rather than firing on every turn.

From b389627194c794e843a84b16d4c9aa92bc794c5f Mon Sep 17 00:00:00 2001
From: ajspig <46900795+ajspig@users.noreply.github.com>
Date: Thu, 23 Apr 2026 13:28:12 -0400
Subject: [PATCH 26/46] docs: adding opencode (#596)

* docs: adding opencode

* docs: align opencode guide with latest plugin changes

* chore: updating language

---------

Co-authored-by: adavyas <adavyasharma@gmail.com>
---
 docs/docs.json                                |   1 +
 .../core-concepts/design-patterns.mdx         |   2 +-
 .../documentation/introduction/vibecoding.mdx |  14 +-
 docs/v3/guides/integrations/opencode.mdx      | 196 ++++++++++++++++++
 docs/v3/guides/overview.mdx                   |   3 +
 5 files changed, 214 insertions(+), 2 deletions(-)
 create mode 100644 docs/v3/guides/integrations/opencode.mdx

diff --git a/docs/docs.json b/docs/docs.json
index 54262e3f1..961fc094e 100644
--- a/docs/docs.json
+++ b/docs/docs.json
@@ -103,6 +103,7 @@
                 "group": "Integrations",
                 "pages": [
                   "v3/guides/integrations/claude-code",
+                  "v3/guides/integrations/opencode",
                   "v3/guides/integrations/crewai",
                   "v3/guides/integrations/langgraph",
                   "v3/guides/integrations/mcp",
diff --git a/docs/v3/documentation/core-concepts/design-patterns.mdx b/docs/v3/documentation/core-concepts/design-patterns.mdx
index be7d2caaa..0e35114e4 100644
--- a/docs/v3/documentation/core-concepts/design-patterns.mdx
+++ b/docs/v3/documentation/core-concepts/design-patterns.mdx
@@ -5,7 +5,7 @@ icon: "cubes"
 ---
 
 <Info>
-If you're using a coding agent (Claude Code, Cursor, etc.), the **`/honcho-integration` skill** walks you through these decisions interactively. It explores your codebase, interviews you about peers and sessions, and generates the integration code. The patterns below are the same ones the skill uses.
+If you're using a coding agent (Claude Code, OpenCode, Cursor, etc.), the **`/honcho-integration` skill** walks you through these decisions interactively. It explores your codebase, interviews you about peers and sessions, and generates the integration code. The patterns below are the same ones the skill uses.
 </Info>
 
 ## Quick Reference
diff --git a/docs/v3/documentation/introduction/vibecoding.mdx b/docs/v3/documentation/introduction/vibecoding.mdx
index 5c5bde1a3..d48c33340 100644
--- a/docs/v3/documentation/introduction/vibecoding.mdx
+++ b/docs/v3/documentation/introduction/vibecoding.mdx
@@ -100,9 +100,21 @@ See the [full Claude Code integration guide](/v3/guides/integrations/claudecode)
 
 ---
 
+## OpenCode Plugin
+
+The [OpenCode plugin](/v3/guides/integrations/opencode) gives OpenCode sessions persistent memory that survives context wipes, session restarts, and fresh chats.
+
+```bash
+bunx @honcho-ai/opencode-honcho install
+```
+
+Then run `/honcho:setup` inside OpenCode. See the [full OpenCode integration guide](/v3/guides/integrations/opencode) for setup details.
+
+---
+
 ## Agent Skills
 
-We provide agent skills for coding assistants like Claude Code, Cursor, Windsurf, and others.
+We provide agent skills for coding assistants like Claude Code, OpenCode, Cursor, Windsurf, and others.
 
 <CodeGroup>
 ```bash Install via npx (Recommended)
diff --git a/docs/v3/guides/integrations/opencode.mdx b/docs/v3/guides/integrations/opencode.mdx
new file mode 100644
index 000000000..837bb1064
--- /dev/null
+++ b/docs/v3/guides/integrations/opencode.mdx
@@ -0,0 +1,196 @@
+---
+title: "OpenCode"
+icon: 'code'
+description: "Add AI-native memory to OpenCode"
+sidebarTitle: 'OpenCode'
+---
+
+Give OpenCode long-term memory that survives context wipes, session restarts, and fresh chats. OpenCode remembers what you're working on, your durable preferences, and prior context across every project you touch.
+
+## Quick Start
+
+### Step 1: Get Your Honcho API Key
+
+1. Go to **[app.honcho.dev](https://app.honcho.dev)**
+2. Sign up or log in
+3. Copy your API key (starts with `hch-`)
+
+### Step 2: Install the Plugin
+
+<Note>
+This plugin requires [Bun](https://bun.sh) and the [OpenCode CLI](https://opencode.ai). If `opencode` isn't on your `PATH`, install it first, then restart your shell.
+</Note>
+
+Run the installer:
+
+```bash
+bunx @honcho-ai/opencode-honcho install
+```
+
+The installer:
+
+- registers `@honcho-ai/opencode-honcho` with OpenCode
+- enables both the native server and TUI plugin targets
+- writes the Honcho command templates into your global OpenCode config
+- activates the plugin globally for every OpenCode project
+
+### Step 3: Run Setup in OpenCode
+
+1. Start OpenCode
+2. Run `/honcho:setup`
+3. Keep the default **Honcho Cloud** option unless you want a self-hosted or local endpoint
+4. Paste your Honcho API key
+5. Run `/honcho:status` to verify the runtime
+
+### Step 4: (Optional) Kickstart with an Interview
+
+```
+/honcho:interview
+```
+
+OpenCode will interview you about stable preferences and project context, then persist what it learns to Honcho so every future session can draw on it.
+
+## What You Get
+
+- **Persistent Memory** — OpenCode retains durable context across sessions
+- **Cloud or Local Deployments** — Point at Honcho Cloud or a self-hosted / local instance
+- **Workspace Mapping** — OpenCode projects map cleanly to Honcho workspaces
+- **Flexible Session Mapping** — Scope sessions per directory, repo, branch, chat instance, or globally
+- **Durable Writes** — Save stable conclusions and retain session context across OpenCode runs
+- **Memory Retrieval** — Search session messages, query Honcho's reasoning, and inject relevant context into prompts
+- **Agent Tools** — First-class tools for search, chat, and conclusion-writing inside OpenCode
+
+## Configuration
+
+Configuration lives in a single shared file at `~/.honcho/config.json`, shared with other Honcho hosts (Claude Code, Cursor, etc.). OpenCode reads and writes this file directly, and OpenCode-specific defaults live under `hosts.opencode`. Edit the file direct or use `/honcho:config` to change it via OpenCode's chat, or call the `honcho_set_config` tool for other settings.
+
+```jsonc
+{
+  "apiKey": "hch-...",
+  "peerName": "alice",
+  "baseUrl": "https://api.honcho.dev",
+  "hosts": {
+    "opencode": {
+      "workspace": "opencode",
+      "aiPeer": "opencode",
+      "recallMode": "hybrid",
+      "observationMode": "directional",
+      "sessionStrategy": "per-directory"
+    }
+  }
+}
+```
+
+Top-level shared fields are `apiKey`, `peerName`, and `baseUrl`. OpenCode's host-scoped settings live under `hosts.opencode`: `workspace`, `aiPeer`, `recallMode`, `observationMode`, and `sessionStrategy`.
+
+### Cloud vs Local
+
+For **Honcho Cloud**:
+
+- `apiKey` is required
+- `baseUrl` should stay at `https://api.honcho.dev`
+
+For **self-hosted or local Honcho**:
+
+- `baseUrl` should point to your deployment (e.g. `http://127.0.0.1:8000`)
+- `apiKey` is only required if the deployment is authenticated
+
+<Warning>
+If OpenCode is running inside Docker or another remote environment, `localhost` won't refer to your host machine. The `baseUrl` must be reachable from the OpenCode runtime.
+</Warning>
+
+### Recall Modes
+
+| Mode | Behavior | Best for |
+| --- | --- | --- |
+| `hybrid` (default) | Context injection **and** tool access | Most users — balanced memory coverage |
+| `context` | Only inject memory into system prompts | Predictable prompts, no tool calls |
+| `tools` | Only expose memory as tools | Explicit, on-demand retrieval |
+
+### Session Strategies
+
+| Strategy | Behavior | Best for |
+| --- | --- | --- |
+| `per-directory` (default) | One session per working directory | Most projects |
+| `per-repo` | One session per repository | Repos with multiple entry directories |
+| `git-branch` | Session follows the current git branch | Branch-specific workflows |
+| `per-session` | New session per OpenCode session id | Short-lived isolated work |
+| `chat-instance` | Session tied to the current chat instance | Highly ephemeral usage |
+| `global` | One session for everything | Shared memory across all work |
+
+## Operator Commands
+
+| Command | Description |
+| --- | --- |
+| `/honcho:setup` | First-time setup for cloud or local Honcho |
+| `/honcho:status` | Show effective Honcho status for the current OpenCode project |
+| `/honcho:settings` | Show effective config values and config paths |
+| `/honcho:config` | Change `recallMode` |
+| `/honcho:interview` | Capture durable preferences or project context into memory |
+
+## Agent Tools
+
+The plugin exposes these tools inside OpenCode:
+
+| Tool | Description |
+| --- | --- |
+| `honcho_setup` | Validate setup and persist shared credentials or endpoint settings |
+| `honcho_status` | Show effective runtime status |
+| `honcho_get_config` | Read effective and persisted settings |
+| `honcho_set_config` | Update a persisted shared setting |
+| `honcho_search` | Search Honcho session messages |
+| `honcho_chat` | Query Honcho for reasoning-backed context |
+| `honcho_create_conclusion` | Save a durable memory conclusion |
+
+## Plugin Surfaces
+
+The plugin hooks into these OpenCode plugin capabilities:
+
+- `event`
+- `chat.message`
+- `tool.execute.after`
+- `command.execute.before`
+- `experimental.chat.system.transform`
+- `experimental.session.compacting`
+- `shell.env`
+- `tool`
+
+## Building with Teammates
+
+Because `~/.honcho/config.json` is shared across Honcho hosts, teammates can collaborate by pointing at the same workspace while keeping their own identities. Sessions are automatically prefixed by `peerName` to avoid collisions.
+
+**Alice** (`~/.honcho/config.json`):
+```json
+{
+  "apiKey": "hch-team-key...",
+  "peerName": "alice",
+  "hosts": {
+    "opencode": { "workspace": "team-acme", "aiPeer": "opencode" }
+  }
+}
+```
+
+**Bob** (`~/.honcho/config.json`):
+```json
+{
+  "apiKey": "hch-team-key...",
+  "peerName": "bob",
+  "hosts": {
+    "opencode": { "workspace": "team-acme", "aiPeer": "opencode" }
+  }
+}
+```
+
+Both write to `team-acme`; Honcho's dialectic reasoning draws on context from both peers.
+
+## Next Steps
+
+<CardGroup cols={2}>
+  <Card title="GitHub Repository" icon="github" href="https://github.com/plastic-labs/opencode-honcho">
+    Source code, issues, and README.
+  </Card>
+
+  <Card title="Honcho Architecture" icon="sitemap" href="../../documentation/core-concepts/architecture">
+    Learn about peers, sessions, and dialectic reasoning.
+  </Card>
+</CardGroup>
diff --git a/docs/v3/guides/overview.mdx b/docs/v3/guides/overview.mdx
index d3ca8c321..4411629db 100644
--- a/docs/v3/guides/overview.mdx
+++ b/docs/v3/guides/overview.mdx
@@ -14,6 +14,9 @@ Add persistent memory to AI assistants and agents:
   <Card title="Claude Code" icon="terminal" href="/v3/guides/integrations/claude-code">
     Long-term memory that survives context wipes, session restarts, and project switches
   </Card>
+  <Card title="OpenCode" icon="code" href="/v3/guides/integrations/opencode">
+    Persistent memory for OpenCode sessions, with per-directory, per-repo, or branch-scoped session mapping
+  </Card>
   <Card title="MCP Server" icon="star-of-life" href="/v3/guides/integrations/mcp">
     Add Honcho memory to Claude Desktop, Cursor, Windsurf, Cline, and any MCP client
   </Card>

From 6d26666df244ad28abce1d1a843e98bff1daebb0 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 13:33:59 -0400
Subject: [PATCH 27/46] docs: drop architecture ASCII from sillytavern guide

---
 docs/v3/guides/integrations/sillytavern.mdx | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index 1ef07c878..c303f31f0 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -165,22 +165,6 @@ A toggle button in the panel opts out of auto-detection entirely. When disabled,
 
 Empty local values are never pushed, so a blank field cannot accidentally delete a host key.
 
-## Architecture
-
-```
-Browser (Client Extension)              Server (Plugin)
-+-----------------------+               +------------------------------+
-| index.js              |  fetch()      | plugin/index.js              |
-|                       | ------------> |                              |
-| - Settings UI         | /api/plugins/ | - Express router (9 routes)  |
-| - Event hooks         |  honcho-proxy | - Honcho SDK (@honcho-ai/sdk)|
-| - Prompt injection    |               | - API key from ST secrets or |
-| - Tool registration   |               |   ~/.honcho/config.json      |
-+-----------------------+               +------------------------------+
-```
-
-The server plugin reads API credentials from SillyTavern's secrets store first, falling back to `~/.honcho/config.json`. It re-reads the global config before every write to prevent race conditions with concurrent tools.
-
 ## Troubleshooting
 
 | Symptom | Fix |

From b81762f5012c57bedaffd1df36e0f1903152515e Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 14:26:59 -0400
Subject: [PATCH 28/46] docs(sillytavern): group chat + session behavior, add
 missing tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- New Group Chats subsection: documents per-character peer routing
  (each group member gets their own peer, not a collapsed group-<id>
  peer) and lazy peer registration for characters joining mid-chat.
- Session Naming: documents the freeze-on-first-assign invariant
  (changing the naming mode doesn't reroute existing chats) and
  the Reset button for explicit session rollover.
- Tool table: add honcho_save_conclusion — prior fix undercounted
  (2 -> 3 tools). The extension registers all three.
---
 docs/v3/guides/integrations/sillytavern.mdx | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index c303f31f0..e062a9e24 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -77,11 +77,12 @@ Both the context and reasoning layers use stale-while-revalidate with configurab
 
 ### Tool Call Mode
 
-In tool call mode, the extension registers two function tools that the LLM can invoke:
+In tool call mode, the extension registers three function tools that the LLM can invoke:
 
 | Tool | Description |
 | --- | --- |
 | `honcho_query_memory` | Dialectic chat query -- ask Honcho what it knows |
+| `honcho_save_conclusion` | Save a key insight or biographical detail about the user to persistent memory |
 | `honcho_search_history` | Semantic search across session messages |
 
 This mode works best with models that support function calling. The LLM decides when to query memory rather than firing on every turn.
@@ -97,6 +98,10 @@ By default, only the user peer accumulates derived memory — Honcho observes th
 | **Single peer** | One user peer shared across all characters |
 | **Per-persona** | Each character gets its own isolated memory |
 
+### Group Chats
+
+Group chats register **one peer per character** in the group, not a single collapsed `group-<id>` peer. Each character's messages land under their own peer so their derived representations stay distinct. Characters who join the group mid-chat are lazy-added to the session on their first message.
+
 ### Session Naming
 
 | Mode | Behavior |
@@ -105,6 +110,10 @@ By default, only the user peer accumulates derived memory — Honcho observes th
 | **Per-character** | One session per character (persistent) |
 | **Custom** | User-defined session name |
 
+**Session IDs are frozen once assigned.** Changing the naming mode, the custom session name, or the character name only affects *new* chats — existing chats stay linked to their original Honcho session so history, summaries, and derivations don't fragment.
+
+The Active session field in the panel is read-only. To start fresh, use the **Reset** button next to it — it orphans the current session (messages remain in Honcho but the chat unlinks from them) and creates a new session on the next message.
+
 ### Event Flow
 
 | SillyTavern Event | Action |

From 5f9cb3f3c19350a5b36b6cab9666e84122b96d1f Mon Sep 17 00:00:00 2001
From: Sanjay Santhanam <51058514+Sanjays2402@users.noreply.github.com>
Date: Thu, 23 Apr 2026 12:55:16 -0700
Subject: [PATCH 29/46] fix(surprisal): use correct filter format for level
 observations (#581)

The Surprisal module passes `{"level": levels}` directly to
`get_all_documents()`, but `apply_filter()` expects operator syntax:
`{"level": {"in": levels}}`.

Without the `in` operator, the filter is silently ignored, causing
`_fetch_level_observations()` to return 0 results. This makes the
entire Surprisal phase of the Dream cycle a no-op.

Fixes #559
---
 src/dreamer/surprisal.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/dreamer/surprisal.py b/src/dreamer/surprisal.py
index 093b2d7b7..e05ce84de 100644
--- a/src/dreamer/surprisal.py
+++ b/src/dreamer/surprisal.py
@@ -261,7 +261,7 @@ async def _fetch_recent_observations(
         workspace_name=workspace_name,
         observer=observer,
         observed=observed,
-        filters={"level": levels} if levels else None,
+        filters={"level": {"in": levels}} if levels else None,
         limit=limit,
     )
 

From 28dcb136abcb77fc2231b62e5031ae455df950f1 Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Thu, 23 Apr 2026 15:56:21 -0400
Subject: [PATCH 30/46] docs(sillytavern): unify peer modes and session naming,
 move group chats last, drop event flow

---
 docs/v3/guides/integrations/sillytavern.mdx | 27 +++++++--------------
 1 file changed, 9 insertions(+), 18 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index e062a9e24..013a4da74 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -91,37 +91,28 @@ This mode works best with models that support function calling. The LLM decides
 
 By default, only the user peer accumulates derived memory — Honcho observes the user's messages and derives conclusions across sessions. The AI character's persona comes from its character card, not from peer derivation. If you want the character to have its own Honcho-derived state, configure it as an additional peer in session setup.
 
-### Peer Modes
+### Peer Modes and Session Naming
 
-| Mode | Behavior |
+Peer mode controls memory partitioning; session naming controls conversation partitioning. Pair them to get the isolation you want.
+
+| Peer Mode | Behavior |
 | --- | --- |
 | **Single peer** | One user peer shared across all characters |
 | **Per-persona** | Each character gets its own isolated memory |
 
-### Group Chats
-
-Group chats register **one peer per character** in the group, not a single collapsed `group-<id>` peer. Each character's messages land under their own peer so their derived representations stay distinct. Characters who join the group mid-chat are lazy-added to the session on their first message.
-
-### Session Naming
-
-| Mode | Behavior |
+| Session Naming | Behavior |
 | --- | --- |
 | **Auto** | Per-chat hash (unique per conversation) |
 | **Per-character** | One session per character (persistent) |
 | **Custom** | User-defined session name |
 
-**Session IDs are frozen once assigned.** Changing the naming mode, the custom session name, or the character name only affects *new* chats — existing chats stay linked to their original Honcho session so history, summaries, and derivations don't fragment.
+Session IDs are frozen once assigned. Changing the naming mode, the custom session name, or the character name only affects new chats — existing chats stay linked to their original Honcho session so history, summaries, and derivations don't fragment.
 
-The Active session field in the panel is read-only. To start fresh, use the **Reset** button next to it — it orphans the current session (messages remain in Honcho but the chat unlinks from them) and creates a new session on the next message.
+The Active session field in the panel is read-only. To start fresh, hit **Reset** next to it — the current session gets orphaned (messages stay in Honcho, the chat unlinks) and a new session starts on the next message.
 
-### Event Flow
+### Group Chats
 
-| SillyTavern Event | Action |
-| --- | --- |
-| Chat opened | Creates or retrieves Honcho session + peers |
-| Before generation | Injects memory context into the prompt |
-| User sends message | Stores message in Honcho session |
-| AI responds | Stores response in Honcho session |
+Group chats register one peer per character, not a single collapsed `group-<id>` peer. Each character's messages land under their own peer, so their derived representations stay distinct. Characters who join mid-chat get lazy-added to the session on their first message.
 
 ## Global Config (Multi-Tool Setups)
 

From f351db6055b258fe2d7de2fe875086179c227db6 Mon Sep 17 00:00:00 2001
From: Rajat Ahuja <rahuja445@gmail.com>
Date: Thu, 23 Apr 2026 16:09:22 -0400
Subject: [PATCH 31/46] fix: rm stop sequence from tests (#607)

---
 tests/deriver/test_deriver_processing.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/tests/deriver/test_deriver_processing.py b/tests/deriver/test_deriver_processing.py
index 5822f4d54..184bc1538 100644
--- a/tests/deriver/test_deriver_processing.py
+++ b/tests/deriver/test_deriver_processing.py
@@ -55,11 +55,7 @@ async def test_process_representation_tasks_batch_uses_model_config(self):
         if await_args is None:
             raise AssertionError("Expected deriver LLM call")
         kwargs = await_args.kwargs
-        expected_config = settings.DERIVER.MODEL_CONFIG.model_copy(
-            update={
-                "stop_sequences": ["   \n", "\n\n\n\n"],
-            }
-        )
+        expected_config = settings.DERIVER.MODEL_CONFIG
         assert "model_config" in kwargs
         assert kwargs["model_config"].model == expected_config.model
         assert kwargs["model_config"].thinking_effort == expected_config.thinking_effort

From 07e7a99f3c1877f0c25f8b2753907c9ff125d2e7 Mon Sep 17 00:00:00 2001
From: adavyas <121313528+adavyas@users.noreply.github.com>
Date: Thu, 23 Apr 2026 13:11:06 -0700
Subject: [PATCH 32/46] docs: add opencode docs (#606)

* docs: adding opencode

* docs: align opencode guide with latest plugin changes

* chore: updating language

* docs: remove interview command from opencode guide

---------

Co-authored-by: ajspig <dragon@monstercode.com>
---
 docs/v3/guides/integrations/opencode.mdx | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/docs/v3/guides/integrations/opencode.mdx b/docs/v3/guides/integrations/opencode.mdx
index 837bb1064..07a3ac5e1 100644
--- a/docs/v3/guides/integrations/opencode.mdx
+++ b/docs/v3/guides/integrations/opencode.mdx
@@ -42,14 +42,6 @@ The installer:
 4. Paste your Honcho API key
 5. Run `/honcho:status` to verify the runtime
 
-### Step 4: (Optional) Kickstart with an Interview
-
-```
-/honcho:interview
-```
-
-OpenCode will interview you about stable preferences and project context, then persist what it learns to Honcho so every future session can draw on it.
-
 ## What You Get
 
 - **Persistent Memory** — OpenCode retains durable context across sessions
@@ -126,7 +118,6 @@ If OpenCode is running inside Docker or another remote environment, `localhost`
 | `/honcho:status` | Show effective Honcho status for the current OpenCode project |
 | `/honcho:settings` | Show effective config values and config paths |
 | `/honcho:config` | Change `recallMode` |
-| `/honcho:interview` | Capture durable preferences or project context into memory |
 
 ## Agent Tools
 

From a3e8000778bc873072facba0b0f512087fbc567d Mon Sep 17 00:00:00 2001
From: adavyas <121313528+adavyas@users.noreply.github.com>
Date: Fri, 24 Apr 2026 08:53:02 -0700
Subject: [PATCH 33/46] docs: add Windows opencode install instructions (#611)

---
 docs/v3/guides/integrations/opencode.mdx | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/docs/v3/guides/integrations/opencode.mdx b/docs/v3/guides/integrations/opencode.mdx
index 07a3ac5e1..6b37ca58a 100644
--- a/docs/v3/guides/integrations/opencode.mdx
+++ b/docs/v3/guides/integrations/opencode.mdx
@@ -27,6 +27,18 @@ Run the installer:
 bunx @honcho-ai/opencode-honcho install
 ```
 
+<Accordion title="Windows install">
+
+Windows Command Prompt:
+
+```cmd
+git clone --branch main https://github.com/plastic-labs/opencode-honcho.git
+cd opencode-honcho
+bun install && bun run build && bun .\dist\cli.js install --plugin-spec "%CD%" --force
+```
+
+</Accordion>
+
 The installer:
 
 - registers `@honcho-ai/opencode-honcho` with OpenCode

From 9d68149ded1d8fd20509929200e07605eaeac3ac Mon Sep 17 00:00:00 2001
From: Erosika <eri@plasticlabs.ai>
Date: Fri, 24 Apr 2026 13:57:13 -0400
Subject: [PATCH 34/46] docs(sillytavern): correct panel labels, split
 installer per-platform, surface other knobs

---
 docs/v3/guides/integrations/sillytavern.mdx | 38 +++++++++------------
 1 file changed, 17 insertions(+), 21 deletions(-)

diff --git a/docs/v3/guides/integrations/sillytavern.mdx b/docs/v3/guides/integrations/sillytavern.mdx
index 013a4da74..6e09b164a 100644
--- a/docs/v3/guides/integrations/sillytavern.mdx
+++ b/docs/v3/guides/integrations/sillytavern.mdx
@@ -31,13 +31,15 @@ bash <(curl -fsSL https://raw.githubusercontent.com/plastic-labs/sillytavern-hon
 irm https://raw.githubusercontent.com/plastic-labs/sillytavern-honcho/main/install.ps1 | iex
 ```
 
-The installer:
+The installer (macOS / Linux):
 1. Clones the extension into `public/scripts/extensions/third-party/sillytavern-honcho`
 2. Symlinks the server plugin to `plugins/honcho-proxy`
 3. Installs the `@honcho-ai/sdk` dependency
 4. Bootstraps `config.yaml` if it doesn't exist (briefly runs `npm start` to generate defaults)
 5. Sets `enableServerPlugins: true` in `config.yaml`
-6. Detects your `~/.honcho/config.json` if it exists
+6. Detects your `~/.honcho/config.json` and warns if no resolvable `apiKey`
+
+The Windows installer does steps 1–3 (using a directory junction via `mklink /J` instead of a symlink) and checks for `~/.honcho/config.json`, but does **not** bootstrap `config.yaml` or flip `enableServerPlugins`. If `config.yaml` is missing, start SillyTavern once to generate it; then set `enableServerPlugins: true` manually before restarting.
 
 ### Step 2: Restart SillyTavern
 
@@ -47,10 +49,10 @@ The installer:
 
 ### Step 3: Configure
 
-Open **Extensions** (puzzle piece icon) and expand **Honcho Memory**:
+Open **Extensions** (puzzle blocks icon) and expand **Honcho Memory**:
 
 1. Check **Enable Honcho Memory**
-2. Click the API key field to set your key (auto-populated if present in `~/.honcho/config.json`; the UI value overrides the file)
+2. Click the API key field to set your key. The plugin falls back to `~/.honcho/config.json` at request time if no panel key is set; a panel value always wins.
 3. Enter your **Workspace ID** (saves to `hosts.sillytavern.workspace`)
 4. Enter **Your peer name** (saves to `hosts.sillytavern.peerName`; auto-synced from your SillyTavern persona on first boot)
 5. Status indicator should show **Ready**
@@ -66,10 +68,10 @@ The **enrichment mode** controls what layers on top of the base context:
 | Mode | Behavior |
 | --- | --- |
 | **Context only** | Base layer only -- peer representation + session summary |
-| **Reasoning** (default) | Base layer + dialectic `peer.chat()` queries on a configurable interval |
+| **Reasoning** (default) | Base layer + dialectic `peer.chat()` queries on a configurable per-turn cadence |
 | **Tool call** | Base layer + function tools the LLM can call on demand |
 
-Both the context and reasoning layers use stale-while-revalidate with configurable refresh intervals. After the first turn of a session, there is zero added latency.
+Both the context and reasoning layers use stale-while-revalidate with a configurable cadence ("Refresh every N turns" and "Reason every N turns"). After the first turn of a session, there is zero added latency.
 
 <Note>
 **Context only** mode relies on `session.context()`, which is session-scoped — it returns empty output until the session has enough messages for Honcho to derive a representation and summary. For fresh sessions or peers with little history, Reasoning mode is a better default: it queries `peer.chat()` across all of the peer's history, not just the current session.
@@ -87,9 +89,13 @@ In tool call mode, the extension registers three function tools that the LLM can
 
 This mode works best with models that support function calling. The LLM decides when to query memory rather than firing on every turn.
 
+### Other Panel Knobs
+
+The Extensions panel also exposes: **Context settings** (token budget, refresh cadence in turns, include session summary), **Injection position** (After/Before main prompt, or In-chat @ Depth with a numeric depth field), a **Prompt Template** textarea that wraps Honcho output via a `{{text}}` placeholder, and a **Reasoning queries** textarea (`{{message}}` placeholder) for customizing the dialectic prompts used in Reasoning mode.
+
 ### Peer Observability
 
-By default, only the user peer accumulates derived memory — Honcho observes the user's messages and derives conclusions across sessions. The AI character's persona comes from its character card, not from peer derivation. If you want the character to have its own Honcho-derived state, configure it as an additional peer in session setup.
+By default, only the user peer accumulates derived memory — Honcho observes the user's messages and derives conclusions across sessions. The AI character's persona comes from its character card, not from peer derivation.
 
 ### Peer Modes and Session Naming
 
@@ -97,13 +103,13 @@ Peer mode controls memory partitioning; session naming controls conversation par
 
 | Peer Mode | Behavior |
 | --- | --- |
-| **Single peer** | One user peer shared across all characters |
-| **Per-persona** | Each character gets its own isolated memory |
+| **Single peer for all personas** | One user peer shared across all personas |
+| **Separate peer per persona** | Each persona gets its own isolated memory |
 
 | Session Naming | Behavior |
 | --- | --- |
 | **Auto** | Per-chat hash (unique per conversation) |
-| **Per-character** | One session per character (persistent) |
+| **Per character** | One session per character (persistent) |
 | **Custom** | User-defined session name |
 
 Session IDs are frozen once assigned. Changing the naming mode, the custom session name, or the character name only affects new chats — existing chats stay linked to their original Honcho session so history, summaries, and derivations don't fragment.
@@ -155,16 +161,6 @@ Nested form (multiple tools sharing the file):
 }
 ```
 
-### Disable / Enable global config
-
-A toggle button in the panel opts out of auto-detection entirely. When disabled, the extension skips the config fetch on load, hides the source line, and saves peer-name edits to the local SillyTavern extension settings instead of the global file. Re-enabling opens a diff-aware dialog if local and global values diverge, offering:
-
-- **Inherit** — pull global values into this SillyTavern install
-- **Push local** — overwrite the global host entry with your local values
-- **Cancel** — leave both untouched
-
-Empty local values are never pushed, so a blank field cannot accidentally delete a host key.
-
 ## Troubleshooting
 
 | Symptom | Fix |
@@ -187,7 +183,7 @@ Empty local values are never pushed, so a blank field cannot accidentally delete
     New to SillyTavern? Start here — install guide for macOS, Linux, Windows, Docker.
   </Card>
 
-  <Card title="Claude Code Setup Skill" icon="wand-magic-sparkles" href="https://github.com/plastic-labs/sillytavern-honcho/blob/main/skills/setup/SKILL.md">
+  <Card title="Agent Skill for Setup" icon="wand-magic-sparkles" href="https://github.com/plastic-labs/sillytavern-honcho/blob/main/skills/setup/SKILL.md">
     Agent-assisted install — idempotent, structural patches, end-to-end verification.
   </Card>
 

From 8a95edb79b996794f9ad5c5357a11d8629f4dda9 Mon Sep 17 00:00:00 2001
From: adavyas <121313528+adavyas@users.noreply.github.com>
Date: Tue, 28 Apr 2026 12:05:41 -0700
Subject: [PATCH 35/46] docs: update opencode install command (#623)

* docs: update opencode install command

* docs: use native opencode plugin install
---
 docs/v3/guides/integrations/opencode.mdx | 24 +++++++++---------------
 1 file changed, 9 insertions(+), 15 deletions(-)

diff --git a/docs/v3/guides/integrations/opencode.mdx b/docs/v3/guides/integrations/opencode.mdx
index 6b37ca58a..2e046b1be 100644
--- a/docs/v3/guides/integrations/opencode.mdx
+++ b/docs/v3/guides/integrations/opencode.mdx
@@ -18,32 +18,26 @@ Give OpenCode long-term memory that survives context wipes, session restarts, an
 ### Step 2: Install the Plugin
 
 <Note>
-This plugin requires [Bun](https://bun.sh) and the [OpenCode CLI](https://opencode.ai). If `opencode` isn't on your `PATH`, install it first, then restart your shell.
+This plugin requires the [OpenCode CLI](https://opencode.ai). If `opencode` isn't on your `PATH`, install it first, then restart your shell.
 </Note>
 
-Run the installer:
+Install the plugin:
 
 ```bash
-bunx @honcho-ai/opencode-honcho install
+opencode plugin "@honcho-ai/opencode-honcho" --global
 ```
 
-<Accordion title="Windows install">
+To update an existing plugin install:
 
-Windows Command Prompt:
-
-```cmd
-git clone --branch main https://github.com/plastic-labs/opencode-honcho.git
-cd opencode-honcho
-bun install && bun run build && bun .\dist\cli.js install --plugin-spec "%CD%" --force
+```bash
+opencode plugin "@honcho-ai/opencode-honcho" --force
 ```
 
-</Accordion>
-
-The installer:
+OpenCode:
 
 - registers `@honcho-ai/opencode-honcho` with OpenCode
-- enables both the native server and TUI plugin targets
-- writes the Honcho command templates into your global OpenCode config
+- resolves the package's native server and TUI plugin targets
+- updates plugin entries in your global OpenCode config
 - activates the plugin globally for every OpenCode project
 
 ### Step 3: Run Setup in OpenCode

From b778d82319ff9766524b7abb839461e6d4e4d506 Mon Sep 17 00:00:00 2001
From: Rajat Ahuja <rahuja445@gmail.com>
Date: Tue, 28 Apr 2026 15:15:18 -0400
Subject: [PATCH 36/46] fix: add levels to AgentToolConclusionsDeletedEvent
 (#612)

---
 src/crud/__init__.py                |  2 +
 src/crud/document.py                | 42 +++++++++++++++
 src/telemetry/events/agent.py       |  6 ++-
 src/utils/agent_tools.py            | 31 ++++++-----
 tests/integration/test_telemetry.py |  2 +
 tests/telemetry/conftest.py         |  1 +
 tests/telemetry/test_events.py      |  2 +-
 tests/utils/test_agent_tools.py     | 82 +++++++++++++++++++++++++++++
 8 files changed, 152 insertions(+), 16 deletions(-)

diff --git a/src/crud/__init__.py b/src/crud/__init__.py
index ff8f87ac0..58bcd9f01 100644
--- a/src/crud/__init__.py
+++ b/src/crud/__init__.py
@@ -9,6 +9,7 @@
     create_observations,
     delete_document,
     delete_document_by_id,
+    delete_documents,
     fetch_documents_by_ids,
     get_all_documents,
     get_child_observations,
@@ -95,6 +96,7 @@
     "query_external_vector_document_ids",
     "delete_document",
     "delete_document_by_id",
+    "delete_documents",
     # Message
     "create_messages",
     "get_messages",
diff --git a/src/crud/document.py b/src/crud/document.py
index ba652eea7..97a57e369 100644
--- a/src/crud/document.py
+++ b/src/crud/document.py
@@ -661,6 +661,48 @@ async def delete_document(
     await db.commit()
 
 
+async def delete_documents(
+    db: AsyncSession,
+    workspace_name: str,
+    document_ids: Sequence[str],
+    *,
+    observer: str,
+    observed: str,
+    session_name: str | None = None,
+) -> list[tuple[str, str]]:
+    """
+    Soft-delete multiple documents in a single UPDATE ... RETURNING statement.
+
+    Returns (id, level) tuples for rows that actually got deleted — i.e. rows
+    that matched the workspace/observer/observed filter and were not already
+    soft-deleted. IDs that didn't match are silently skipped; callers can diff
+    the returned ids against the input to detect misses.
+    """
+    if not document_ids:
+        return []
+
+    conditions = [
+        models.Document.id.in_(document_ids),
+        models.Document.workspace_name == workspace_name,
+        models.Document.observer == observer,
+        models.Document.observed == observed,
+        models.Document.deleted_at.is_(None),
+    ]
+    if session_name is not None:
+        conditions.append(models.Document.session_name == session_name)
+
+    stmt = (
+        update(models.Document)
+        .where(*conditions)
+        .values(deleted_at=func.now())
+        .returning(models.Document.id, models.Document.level)
+    )
+    result = await db.execute(stmt)
+    rows = result.all()
+    await db.commit()
+    return [(row.id, row.level) for row in rows]
+
+
 async def delete_document_by_id(
     db: AsyncSession,
     workspace_name: str,
diff --git a/src/telemetry/events/agent.py b/src/telemetry/events/agent.py
index 77abe2c57..fe7ff72b8 100644
--- a/src/telemetry/events/agent.py
+++ b/src/telemetry/events/agent.py
@@ -121,7 +121,7 @@ class AgentToolConclusionsDeletedEvent(BaseEvent):
     """
 
     _event_type: ClassVar[str] = "agent.tool.conclusions.deleted"
-    _schema_version: ClassVar[int] = 1
+    _schema_version: ClassVar[int] = 2
     _category: ClassVar[str] = "agent"
 
     # Run identification
@@ -141,6 +141,10 @@ class AgentToolConclusionsDeletedEvent(BaseEvent):
 
     # What was deleted
     conclusion_count: int = Field(..., description="Number of conclusions deleted")
+    levels: list[str] = Field(
+        default_factory=list,
+        description="Level of each deleted conclusion (e.g., ['explicit', 'deductive', 'deductive'])",
+    )
 
     def get_resource_id(self) -> str:
         """Resource ID includes run_id and iteration for uniqueness."""
diff --git a/src/utils/agent_tools.py b/src/utils/agent_tools.py
index 36168f345..d3bd93ab5 100644
--- a/src/utils/agent_tools.py
+++ b/src/utils/agent_tools.py
@@ -1812,22 +1812,24 @@ async def _handle_delete_observations(
     if not observation_ids:
         return "ERROR: observation_ids list is empty"
 
-    deleted_count = 0
     async with ctx.db_lock, tracked_db("tool.delete_observations") as db:
-        for obs_id in observation_ids:
-            try:
-                await crud.delete_document(
-                    db,
-                    workspace_name=ctx.workspace_name,
-                    document_id=obs_id,
-                    observer=ctx.observer,
-                    observed=ctx.observed,
-                )
-                deleted_count += 1
-            except Exception as e:
-                logger.warning("Failed to delete observation %s: %s", obs_id, e)
+        deleted = await crud.delete_documents(
+            db,
+            workspace_name=ctx.workspace_name,
+            document_ids=observation_ids,
+            observer=ctx.observer,
+            observed=ctx.observed,
+        )
 
-    # Emit telemetry event if context is available
+    deleted_ids = {doc_id for doc_id, _ in deleted}
+    for obs_id in observation_ids:
+        if obs_id not in deleted_ids:
+            logger.warning(
+                "Failed to delete observation %s (not found, already deleted, or wrong scope)",
+                obs_id,
+            )
+
+    deleted_count = len(deleted)
     if deleted_count > 0 and ctx.run_id and ctx.agent_type and ctx.parent_category:
         emit(
             AgentToolConclusionsDeletedEvent(
@@ -1839,6 +1841,7 @@ async def _handle_delete_observations(
                 observer=ctx.observer,
                 observed=ctx.observed,
                 conclusion_count=deleted_count,
+                levels=[level for _, level in deleted],
             )
         )
 
diff --git a/tests/integration/test_telemetry.py b/tests/integration/test_telemetry.py
index 96b45c53c..9a627885d 100644
--- a/tests/integration/test_telemetry.py
+++ b/tests/integration/test_telemetry.py
@@ -246,6 +246,7 @@ def create_conclusions_deleted_event(
         observer="assistant",
         observed="user_peer",
         conclusion_count=3,
+        levels=["explicit", "deductive", "explicit"],
     )
 
 
@@ -651,6 +652,7 @@ async def test_conclusions_deleted_event(
         received = mock_transport.received_events[0]
         assert received["type"] == "agent.tool.conclusions.deleted"
         assert received["data"]["conclusion_count"] == 3
+        assert received["data"]["levels"] == ["explicit", "deductive", "explicit"]
 
     @pytest.mark.asyncio
     async def test_peer_card_updated_event(
diff --git a/tests/telemetry/conftest.py b/tests/telemetry/conftest.py
index bf296ef3b..a52194e29 100644
--- a/tests/telemetry/conftest.py
+++ b/tests/telemetry/conftest.py
@@ -184,6 +184,7 @@ def sample_conclusions_deleted_event(
         observer="assistant",
         observed="user_peer",
         conclusion_count=3,
+        levels=["explicit", "deductive", "explicit"],
     )
 
 
diff --git a/tests/telemetry/test_events.py b/tests/telemetry/test_events.py
index 94f34aaaf..c60c3923f 100644
--- a/tests/telemetry/test_events.py
+++ b/tests/telemetry/test_events.py
@@ -422,7 +422,7 @@ def test_event_type(self):
 
     def test_schema_version(self):
         """schema_version() returns correct value."""
-        assert AgentToolConclusionsDeletedEvent.schema_version() == 1
+        assert AgentToolConclusionsDeletedEvent.schema_version() == 2
 
     def test_category(self):
         """category() returns correct value."""
diff --git a/tests/utils/test_agent_tools.py b/tests/utils/test_agent_tools.py
index bb0ff9006..4114cfe71 100644
--- a/tests/utils/test_agent_tools.py
+++ b/tests/utils/test_agent_tools.py
@@ -140,6 +140,9 @@ def _make_context(
         include_observation_ids: bool = False,
         history_token_limit: int = 8192,
         session_name: str | None = None,
+        run_id: str | None = None,
+        agent_type: str | None = None,
+        parent_category: str | None = None,
     ) -> ToolContext:
         return ToolContext(
             workspace_name=workspace.name,
@@ -150,6 +153,9 @@ def _make_context(
             include_observation_ids=include_observation_ids,
             history_token_limit=history_token_limit,
             db_lock=shared_lock,
+            run_id=run_id,
+            agent_type=agent_type,
+            parent_category=parent_category,
         )
 
     return _make_context
@@ -412,6 +418,82 @@ async def test_delete_invalid_id_handled_gracefully(
         # Should report 0 deleted (graceful handling)
         assert "Deleted 0 observations" in result
 
+    async def test_delete_batch_emits_levels_for_successful_only(
+        self,
+        db_session: AsyncSession,
+        tool_test_data: Any,
+        make_tool_context: Callable[..., ToolContext],
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """Batch delete with mixed levels emits levels only for rows actually deleted."""
+        workspace, peer1, peer2, session, _messages, documents = tool_test_data
+
+        # Add two extra documents with non-explicit levels so the batch spans levels.
+        deductive_doc = models.Document(
+            workspace_name=workspace.name,
+            observer=peer1.name,
+            observed=peer2.name,
+            content="Works in tech",
+            embedding=[0.42] * 1536,
+            session_name=session.name,
+            level="deductive",
+            metadata={},
+        )
+        inductive_doc = models.Document(
+            workspace_name=workspace.name,
+            observer=peer1.name,
+            observed=peer2.name,
+            content="Tends to be an early riser",
+            embedding=[0.43] * 1536,
+            session_name=session.name,
+            level="inductive",
+            metadata={},
+        )
+        db_session.add_all([deductive_doc, inductive_doc])
+        await db_session.flush()
+        await db_session.refresh(deductive_doc)
+        await db_session.refresh(inductive_doc)
+        await db_session.commit()
+
+        # Capture emitted telemetry events.
+        from src.telemetry.events import AgentToolConclusionsDeletedEvent
+        from src.telemetry.events.base import BaseEvent
+        from src.utils import agent_tools as agent_tools_module
+
+        captured: list[BaseEvent] = []
+
+        def _capture(event: BaseEvent) -> None:
+            captured.append(event)
+
+        monkeypatch.setattr(agent_tools_module, "emit", _capture)
+
+        ctx = make_tool_context(
+            include_observation_ids=True,
+            run_id="test_run",
+            agent_type="deduction",
+            parent_category="dream",
+        )
+
+        explicit_doc_id = documents[0].id
+        ids_to_delete = [
+            explicit_doc_id,
+            deductive_doc.id,
+            inductive_doc.id,
+            "nonexistent_id_12345",
+        ]
+
+        result = await _handle_delete_observations(
+            ctx, {"observation_ids": ids_to_delete}
+        )
+
+        assert "Deleted 3 observations" in result
+        assert len(captured) == 1
+        event = captured[0]
+        assert isinstance(event, AgentToolConclusionsDeletedEvent)
+        assert event.conclusion_count == 3
+        # RETURNING order is not guaranteed; compare as multiset.
+        assert sorted(event.levels) == sorted(["explicit", "deductive", "inductive"])
+
 
 @pytest.mark.asyncio
 class TestGetRecentObservations:

From 03a2374ea15334eacf5632a0fbbeafab76abcad5 Mon Sep 17 00:00:00 2001
From: Rajat Ahuja <rahuja445@gmail.com>
Date: Tue, 28 Apr 2026 16:01:33 -0400
Subject: [PATCH 37/46] fix: give vector sync a substantial retry budget (#604)

---
 src/reconciler/sync_vectors.py              |  41 +++++---
 tests/conftest.py                           |   1 +
 tests/deriver/test_vector_reconciliation.py | 109 ++++++++++++++++++++
 3 files changed, 135 insertions(+), 16 deletions(-)

diff --git a/src/reconciler/sync_vectors.py b/src/reconciler/sync_vectors.py
index 4a17e40e2..34a2fab4f 100644
--- a/src/reconciler/sync_vectors.py
+++ b/src/reconciler/sync_vectors.py
@@ -11,8 +11,10 @@
 from dataclasses import dataclass
 from typing import cast
 
-from sqlalchemy import and_, delete, select, update
+from sqlalchemy import and_, delete, or_, select, update
 from sqlalchemy.ext.asyncio import AsyncSession
+from sqlalchemy.orm.attributes import InstrumentedAttribute
+from sqlalchemy.sql import ColumnElement
 from sqlalchemy.sql.functions import func
 
 from src import models
@@ -27,7 +29,20 @@
 # Constants
 RECONCILIATION_BATCH_SIZE = 50
 RECONCILIATION_TIME_BUDGET_SECONDS = 240  # Leave headroom for other maintenance work
-MAX_SYNC_ATTEMPTS = 5  # After this many failures, mark as failed
+MAX_SYNC_ATTEMPTS = 20  # After this many failures, mark as failed
+# Flat wait between sync attempts. With MAX_SYNC_ATTEMPTS=20 this gives ~3 hours
+# of outage headroom before a row is marked failed.
+SYNC_BACKOFF = datetime.timedelta(minutes=10)
+
+
+def _backoff_eligible(
+    last_sync_at: InstrumentedAttribute[datetime.datetime | None],
+) -> ColumnElement[bool]:
+    """Rows are eligible for sync if never attempted or past the backoff window."""
+    return or_(
+        last_sync_at.is_(None),
+        last_sync_at < func.now() - SYNC_BACKOFF,
+    )
 
 
 @dataclass
@@ -73,6 +88,7 @@ async def _get_documents_needing_sync(
             and_(
                 models.Document.deleted_at.is_(None),
                 models.Document.sync_state == "pending",  # Only pending items
+                _backoff_eligible(models.Document.last_sync_at),
             )
         )
         .order_by(models.Document.last_sync_at.asc().nullsfirst())
@@ -101,7 +117,12 @@ async def _get_message_embeddings_needing_sync(
     """
     stmt = (
         select(models.MessageEmbedding)
-        .where(models.MessageEmbedding.sync_state == "pending")
+        .where(
+            and_(
+                models.MessageEmbedding.sync_state == "pending",
+                _backoff_eligible(models.MessageEmbedding.last_sync_at),
+            )
+        )
         .order_by(models.MessageEmbedding.last_sync_at.asc().nullsfirst())
         .limit(batch_size)
         .with_for_update(skip_locked=True)
@@ -494,19 +515,7 @@ async def _reconcile_message_embeddings_batch(
         if not embs:
             return False
 
-        try:
-            synced, failed = await _sync_message_embeddings(
-                db, embs, external_vector_store
-            )
-        except Exception:
-            logger.exception(
-                "Message embedding reconciliation failed for %s embeddings",
-                len(embs),
-            )
-            await _bump_message_embedding_sync_attempts(db, embs)
-            synced = 0
-            failed = len(embs)
-
+        synced, failed = await _sync_message_embeddings(db, embs, external_vector_store)
         metrics.message_embeddings_synced += synced
         metrics.message_embeddings_failed += failed
         await db.commit()
diff --git a/tests/conftest.py b/tests/conftest.py
index 3c9b8e63f..80389d34f 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -799,6 +799,7 @@ async def mock_tracked_db_context(_: str | None = None):
         patch("src.utils.search.tracked_db", mock_tracked_db_context),
         patch("src.crud.document.tracked_db", mock_tracked_db_context),
         patch("src.crud.message.tracked_db", mock_tracked_db_context),
+        patch("src.reconciler.sync_vectors.tracked_db", mock_tracked_db_context),
         patch("src.dialectic.core.tracked_db", mock_tracked_db_context),
         patch("src.dreamer.specialists.tracked_db", mock_tracked_db_context),
         patch("src.dreamer.surprisal.tracked_db", mock_tracked_db_context),
diff --git a/tests/deriver/test_vector_reconciliation.py b/tests/deriver/test_vector_reconciliation.py
index cc6375977..20958e00f 100644
--- a/tests/deriver/test_vector_reconciliation.py
+++ b/tests/deriver/test_vector_reconciliation.py
@@ -20,6 +20,7 @@
     ReconciliationMetrics,
     _get_documents_needing_sync,  # pyright: ignore[reportPrivateUsage]
     _get_message_embeddings_needing_sync,  # pyright: ignore[reportPrivateUsage]
+    _reconcile_message_embeddings_batch,  # pyright: ignore[reportPrivateUsage]
     _sync_documents,  # pyright: ignore[reportPrivateUsage]
     _sync_message_embeddings,  # pyright: ignore[reportPrivateUsage]
     run_vector_reconciliation_cycle,
@@ -377,6 +378,60 @@ async def test_batch_size_respected(
         # Verify batch size respected
         assert len(batch) == 100
 
+    async def test_documents_respect_retry_backoff(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ) -> None:
+        """Pending documents should only be fetched once their backoff has elapsed."""
+        workspace, peer1 = sample_data
+
+        collection = models.Collection(
+            workspace_name=workspace.name,
+            observer=peer1.name,
+            observed=peer1.name,
+        )
+        db_session.add(collection)
+        await db_session.commit()
+
+        session = models.Session(
+            name=str(generate_nanoid()), workspace_name=workspace.name
+        )
+        db_session.add(session)
+        await db_session.commit()
+
+        now = datetime.datetime.now(datetime.timezone.utc)
+        ineligible_doc = models.Document(
+            content="too soon",
+            workspace_name=workspace.name,
+            observer=peer1.name,
+            observed=peer1.name,
+            session_name=session.name,
+            sync_state="pending",
+            sync_attempts=1,
+            last_sync_at=now - datetime.timedelta(minutes=9, seconds=59),
+            embedding=[1.0] * 1536,
+        )
+        eligible_doc = models.Document(
+            content="ready",
+            workspace_name=workspace.name,
+            observer=peer1.name,
+            observed=peer1.name,
+            session_name=session.name,
+            sync_state="pending",
+            sync_attempts=1,
+            last_sync_at=now - datetime.timedelta(minutes=10, seconds=1),
+            embedding=[2.0] * 1536,
+        )
+        db_session.add_all([ineligible_doc, eligible_doc])
+        await db_session.commit()
+
+        pending = await _get_documents_needing_sync(db_session)
+        pending_ids = {doc.id for doc in pending}
+
+        assert eligible_doc.id in pending_ids
+        assert ineligible_doc.id not in pending_ids
+
 
 @pytest.mark.asyncio
 class TestReEmbedding:
@@ -705,6 +760,33 @@ async def test_pending_embeddings_are_selected_without_vectors(
         pending = await _get_message_embeddings_needing_sync(db_session)
         assert any(emb.id == pending_emb.id for emb in pending)
 
+    async def test_message_embeddings_respect_retry_backoff(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ) -> None:
+        """Pending embeddings should only be fetched once their backoff has elapsed."""
+        workspace, peer = sample_data
+        ineligible_emb = await self._create_pending_message_embedding(
+            db_session, workspace, peer
+        )
+        eligible_emb = await self._create_pending_message_embedding(
+            db_session, workspace, peer
+        )
+
+        now = datetime.datetime.now(datetime.timezone.utc)
+        ineligible_emb.sync_attempts = 1
+        ineligible_emb.last_sync_at = now - datetime.timedelta(minutes=9, seconds=59)
+        eligible_emb.sync_attempts = 1
+        eligible_emb.last_sync_at = now - datetime.timedelta(minutes=10, seconds=1)
+        await db_session.commit()
+
+        pending = await _get_message_embeddings_needing_sync(db_session)
+        pending_ids = {emb.id for emb in pending}
+
+        assert eligible_emb.id in pending_ids
+        assert ineligible_emb.id not in pending_ids
+
     async def test_missing_embeddings_reembedded_and_synced(
         self,
         db_session: AsyncSession,
@@ -764,6 +846,33 @@ async def test_upsert_failure_marks_attempt_and_continues(
         assert pending_emb.sync_state in {"pending", "failed"}
         assert pending_emb.sync_attempts == 1
 
+    async def test_unexpected_batch_exception_does_not_bump_unattempted_rows(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+        mock_vector_store: VectorStore,
+    ) -> None:
+        """Unexpected wrapper-level failures should not penalize the whole batch."""
+        workspace, peer = sample_data
+        pending_emb = await self._create_pending_message_embedding(
+            db_session, workspace, peer
+        )
+
+        metrics = ReconciliationMetrics()
+        with (
+            patch(
+                "src.reconciler.sync_vectors._sync_message_embeddings",
+                side_effect=RuntimeError("unexpected"),
+            ),
+            pytest.raises(RuntimeError, match="unexpected"),
+        ):
+            await _reconcile_message_embeddings_batch(mock_vector_store, metrics)
+
+        await db_session.refresh(pending_emb)
+        assert pending_emb.sync_state == "pending"
+        assert pending_emb.sync_attempts == 0
+        assert pending_emb.last_sync_at is None
+
 
 @pytest.mark.asyncio
 class TestEndToEndReconciliation:

From 94ade07c12c6cb03719086e623e2d1143a200328 Mon Sep 17 00:00:00 2001
From: banteg <4562643+banteg@users.noreply.github.com>
Date: Wed, 29 Apr 2026 21:19:44 +0400
Subject: [PATCH 38/46] fix(config): use auto tool choice for dialectic
 defaults (#630)

---
 .env.template                  | 4 ++--
 config.toml.example            | 4 ++--
 src/config.py                  | 4 ++--
 tests/llm/test_model_config.py | 7 +++++++
 4 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/.env.template b/.env.template
index 123af642c..2b11a80c0 100644
--- a/.env.template
+++ b/.env.template
@@ -142,11 +142,11 @@ LLM_OPENAI_API_KEY=your-api-key-here
 # DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__minimal__MAX_TOOL_ITERATIONS=1
 # DIALECTIC_LEVELS__minimal__MAX_OUTPUT_TOKENS=250
-# DIALECTIC_LEVELS__minimal__TOOL_CHOICE=any
+# DIALECTIC_LEVELS__minimal__TOOL_CHOICE=auto
 # DIALECTIC_LEVELS__low__MODEL_CONFIG__TRANSPORT=openai
 # DIALECTIC_LEVELS__low__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__low__MAX_TOOL_ITERATIONS=5
-# DIALECTIC_LEVELS__low__TOOL_CHOICE=any
+# DIALECTIC_LEVELS__low__TOOL_CHOICE=auto
 # DIALECTIC_LEVELS__medium__MODEL_CONFIG__TRANSPORT=openai
 # DIALECTIC_LEVELS__medium__MODEL_CONFIG__MODEL=gpt-5.4-mini
 # DIALECTIC_LEVELS__medium__MAX_TOOL_ITERATIONS=2
diff --git a/config.toml.example b/config.toml.example
index 236f34025..4c90e27cc 100644
--- a/config.toml.example
+++ b/config.toml.example
@@ -131,7 +131,7 @@ SESSION_HISTORY_MAX_TOKENS = 4096
 [dialectic.levels.minimal]
 MAX_TOOL_ITERATIONS = 1
 MAX_OUTPUT_TOKENS = 250
-TOOL_CHOICE = "any"
+TOOL_CHOICE = "auto"
 
 [dialectic.levels.minimal.model_config]
 transport = "openai"
@@ -139,7 +139,7 @@ model = "gpt-5.4-mini"
 
 [dialectic.levels.low]
 MAX_TOOL_ITERATIONS = 5
-TOOL_CHOICE = "any"
+TOOL_CHOICE = "auto"
 
 [dialectic.levels.low.model_config]
 transport = "openai"
diff --git a/src/config.py b/src/config.py
index cae0c5dee..1703b5cd2 100644
--- a/src/config.py
+++ b/src/config.py
@@ -837,12 +837,12 @@ def _default_model_config() -> ConfiguredModelSettings:
             MODEL_CONFIG=_default_model_config(),
             MAX_TOOL_ITERATIONS=1,
             MAX_OUTPUT_TOKENS=250,
-            TOOL_CHOICE="any",
+            TOOL_CHOICE="auto",
         ),
         "low": DialecticLevelSettings(
             MODEL_CONFIG=_default_model_config(),
             MAX_TOOL_ITERATIONS=5,
-            TOOL_CHOICE="any",
+            TOOL_CHOICE="auto",
         ),
         "medium": DialecticLevelSettings(
             MODEL_CONFIG=_default_model_config(),
diff --git a/tests/llm/test_model_config.py b/tests/llm/test_model_config.py
index 20d4aa7c8..0f0664ff4 100644
--- a/tests/llm/test_model_config.py
+++ b/tests/llm/test_model_config.py
@@ -300,6 +300,9 @@ def test_config_toml_example_uses_nested_model_config_sections() -> None:
     minimal_level = DialecticLevelSettings.model_validate(
         config_data["dialectic"]["levels"]["minimal"]
     )
+    low_level = DialecticLevelSettings.model_validate(
+        config_data["dialectic"]["levels"]["low"]
+    )
     max_level = DialecticLevelSettings.model_validate(
         config_data["dialectic"]["levels"]["max"]
     )
@@ -331,6 +334,8 @@ def test_config_toml_example_uses_nested_model_config_sections() -> None:
     assert deriver_config.thinking_budget_tokens is None
     assert minimal_level.MODEL_CONFIG.model == "gpt-5.4-mini"
     assert minimal_level.MODEL_CONFIG.transport == "openai"
+    assert minimal_level.TOOL_CHOICE == "auto"
+    assert low_level.TOOL_CHOICE == "auto"
     assert max_level.MODEL_CONFIG.model == "gpt-5.4-mini"
     assert max_level.MODEL_CONFIG.transport == "openai"
     assert max_level.MODEL_CONFIG.thinking_budget_tokens is None
@@ -350,6 +355,8 @@ def test_env_template_uses_nested_model_config_keys() -> None:
     assert "EMBEDDING_VECTOR_DIMENSIONS" in env_template
     assert "DERIVER_MODEL_CONFIG__MODEL" in env_template
     assert "DIALECTIC_LEVELS__minimal__MODEL_CONFIG__MODEL" in env_template
+    assert "DIALECTIC_LEVELS__minimal__TOOL_CHOICE=auto" in env_template
+    assert "DIALECTIC_LEVELS__low__TOOL_CHOICE=auto" in env_template
     assert "SUMMARY_MODEL_CONFIG__MODEL" in env_template
     assert "DREAM_DEDUCTION_MODEL_CONFIG__MODEL" in env_template
 

From a05c2f8ec1f03893d9805a39a30a15f2f061c2f0 Mon Sep 17 00:00:00 2001
From: Phil <hello@philmacedo.com>
Date: Wed, 29 Apr 2026 13:18:00 -0600
Subject: [PATCH 39/46] fix(deriver): ignore blank observations before
 embedding (#615)

* fix(deriver): ignore blank observations before embedding

* Address PR review on observation normalization

* Harden mock await arg access in tests

* Unify blank observation filtering across tool paths

* Move soft-delete query test back to fixture class
---
 src/crud/representation.py                |  29 +++-
 src/utils/agent_tools.py                  |  20 ++-
 tests/crud/test_representation_manager.py | 185 ++++++++++++++++++++++
 tests/utils/test_agent_tools.py           |  91 +++++++++++
 4 files changed, 317 insertions(+), 8 deletions(-)

diff --git a/src/crud/representation.py b/src/crud/representation.py
index de97d0d34..9b689d066 100644
--- a/src/crud/representation.py
+++ b/src/crud/representation.py
@@ -26,6 +26,21 @@
 logger = logging.getLogger(__name__)
 
 
+def _observation_text(obs: ExplicitObservation | DeductiveObservation) -> str:
+    """Return the canonical text payload for an explicit or deductive observation."""
+    return obs.conclusion if isinstance(obs, DeductiveObservation) else obs.content
+
+
+def _normalized_observation(
+    obs: ExplicitObservation | DeductiveObservation,
+) -> ExplicitObservation | DeductiveObservation:
+    """Return an observation with its persisted/embed text normalized."""
+    text = _observation_text(obs).strip()
+    if isinstance(obs, DeductiveObservation):
+        return obs.model_copy(update={"conclusion": text})
+    return obs.model_copy(update={"content": text})
+
+
 class RepresentationManager:
     """Unified manager for representation and document queries."""
 
@@ -67,15 +82,19 @@ async def save_representation(
             logger.debug("No observations to save")
             return new_documents
 
-        all_observations = representation.deductive + representation.explicit
+        all_observations = [
+            _normalized_observation(obs)
+            for obs in representation.deductive + representation.explicit
+            if _observation_text(obs).strip()
+        ]
+        if not all_observations:
+            logger.debug("No non-empty observations to save")
+            return new_documents
 
         # Batch embed all observations
         batch_embed_start = time.perf_counter()
 
-        observation_texts = [
-            obs.conclusion if isinstance(obs, DeductiveObservation) else obs.content
-            for obs in all_observations
-        ]
+        observation_texts = [_observation_text(obs) for obs in all_observations]
         try:
             embeddings = await embedding_client.simple_batch_embed(observation_texts)
         except ValueError as e:
diff --git a/src/utils/agent_tools.py b/src/utils/agent_tools.py
index d3bd93ab5..0d95cf6d0 100644
--- a/src/utils/agent_tools.py
+++ b/src/utils/agent_tools.py
@@ -33,6 +33,13 @@
 MAX_PEER_CARD_FACTS = 40
 
 
+def _normalized_observation_input(
+    obs: schemas.ObservationInput,
+) -> schemas.ObservationInput:
+    """Return an observation input with content normalized for persistence/embedding."""
+    return obs.model_copy(update={"content": obs.content.strip()})
+
+
 def _base_observation_properties() -> dict[str, Any]:
     return {
         "content": {
@@ -800,6 +807,13 @@ async def create_observations(
         logger.warning("create_observations called with empty list")
         return ObservationsCreatedResult(created_count=0, created_levels=[], failed=[])
 
+    normalized_observations = [
+        _normalized_observation_input(obs) for obs in observations if obs.content.strip()
+    ]
+    if not normalized_observations:
+        logger.info("No non-empty observations to create")
+        return ObservationsCreatedResult(created_count=0, created_levels=[], failed=[])
+
     # Phase 1: Ensure collection exists (short DB scope)
     async with tracked_db("create_observations.collection") as db:
         await crud.get_or_create_collection(
@@ -810,12 +824,12 @@ async def create_observations(
         )
 
     # Phase 2: Compute embeddings (no DB needed)
-    contents = [obs.content for obs in observations]
+    contents = [obs.content for obs in normalized_observations]
     embeddings_by_index: dict[int, list[float]] | None = None
     try:
         embeddings = await embedding_client.simple_batch_embed(contents)
         embeddings_by_index = dict(
-            zip(range(len(observations)), embeddings, strict=True)
+            zip(range(len(normalized_observations)), embeddings, strict=True)
         )
     except Exception as e:
         logger.warning(
@@ -826,7 +840,7 @@ async def create_observations(
     # Build document objects with pre-computed embeddings
     documents: list[schemas.DocumentCreate] = []
     failed: list[ObservationFailure] = []
-    for i, obs in enumerate(observations):
+    for i, obs in enumerate(normalized_observations):
         embedding: list[float]
         if embeddings_by_index is not None:
             embedding = embeddings_by_index[i]
diff --git a/tests/crud/test_representation_manager.py b/tests/crud/test_representation_manager.py
index e4c468536..dfd959d76 100644
--- a/tests/crud/test_representation_manager.py
+++ b/tests/crud/test_representation_manager.py
@@ -1,3 +1,8 @@
+from contextlib import asynccontextmanager
+from datetime import datetime, timezone
+from types import SimpleNamespace
+from unittest.mock import AsyncMock, patch
+
 import pytest
 from nanoid import generate as generate_nanoid
 from sqlalchemy import func, update
@@ -5,6 +10,25 @@
 
 from src import models
 from src.crud.representation import RepresentationManager
+from src.utils.representation import (
+    DeductiveObservation,
+    ExplicitObservation,
+    Representation,
+)
+
+
+@asynccontextmanager
+async def _fake_tracked_db(_name: str):
+    yield object()
+
+
+def _saved_observations(mock_save: AsyncMock):
+    call = mock_save.await_args
+    if "all_observations" in call.kwargs:
+        return call.kwargs["all_observations"]
+    if len(call.args) > 1:
+        return call.args[1]
+    raise AssertionError("missing all_observations in await args")
 
 
 class TestRepresentationManagerSoftDelete:
@@ -134,3 +158,164 @@ async def test_query_documents_most_derived_excludes_soft_deleted(
         result_ids = [doc.id for doc in results]
         assert doc_live.id in result_ids
         assert doc_deleted.id not in result_ids
+
+
+class TestRepresentationManagerSave:
+    @pytest.mark.asyncio
+    async def test_save_representation_filters_blank_observations_before_embedding(self):
+        manager = RepresentationManager(
+            "workspace",
+            observer="observer",
+            observed="observed",
+        )
+        representation = Representation(
+            explicit=[
+                ExplicitObservation(
+                    content="   ",
+                    created_at=datetime.now(timezone.utc),
+                    message_ids=[1],
+                    session_name="session",
+                ),
+                ExplicitObservation(
+                    content=" useful observation ",
+                    created_at=datetime.now(timezone.utc),
+                    message_ids=[1],
+                    session_name="session",
+                ),
+            ]
+        )
+
+        with (
+            patch("src.crud.representation.tracked_db", _fake_tracked_db),
+            patch(
+                "src.crud.representation.embedding_client.simple_batch_embed",
+                new=AsyncMock(return_value=[[0.1]]),
+            ) as mock_embed,
+            patch.object(
+                manager,
+                "_save_representation_internal",
+                new=AsyncMock(return_value=1),
+            ) as mock_save,
+        ):
+            saved = await manager.save_representation(
+                representation,
+                message_ids=[1],
+                session_name="session",
+                message_created_at=datetime.now(timezone.utc),
+                message_level_configuration=SimpleNamespace(
+                    dream=SimpleNamespace(enabled=False)
+                ),
+            )
+
+        assert saved == 1
+        mock_embed.assert_awaited_once_with(["useful observation"])
+        saved_observations = _saved_observations(mock_save)
+        assert len(saved_observations) == 1
+        assert saved_observations[0].content == "useful observation"
+
+    @pytest.mark.asyncio
+    async def test_save_representation_filters_blank_deductive_observations(self):
+        manager = RepresentationManager(
+            "workspace",
+            observer="observer",
+            observed="observed",
+        )
+        representation = Representation(
+            deductive=[
+                DeductiveObservation(
+                    conclusion="   ",
+                    premises=["premise a"],
+                    source_ids=["doc-a"],
+                    created_at=datetime.now(timezone.utc),
+                    message_ids=[1],
+                    session_name="session",
+                ),
+                DeductiveObservation(
+                    conclusion=" inferred conclusion ",
+                    premises=["premise b"],
+                    source_ids=["doc-b"],
+                    created_at=datetime.now(timezone.utc),
+                    message_ids=[1],
+                    session_name="session",
+                ),
+            ]
+        )
+
+        with (
+            patch("src.crud.representation.tracked_db", _fake_tracked_db),
+            patch(
+                "src.crud.representation.embedding_client.simple_batch_embed",
+                new=AsyncMock(return_value=[[0.2]]),
+            ) as mock_embed,
+            patch.object(
+                manager,
+                "_save_representation_internal",
+                new=AsyncMock(return_value=1),
+            ) as mock_save,
+        ):
+            saved = await manager.save_representation(
+                representation,
+                message_ids=[1],
+                session_name="session",
+                message_created_at=datetime.now(timezone.utc),
+                message_level_configuration=SimpleNamespace(
+                    dream=SimpleNamespace(enabled=False)
+                ),
+            )
+
+        assert saved == 1
+        mock_embed.assert_awaited_once_with(["inferred conclusion"])
+        saved_observations = _saved_observations(mock_save)
+        assert len(saved_observations) == 1
+        assert isinstance(saved_observations[0], DeductiveObservation)
+        assert saved_observations[0].conclusion == "inferred conclusion"
+
+    @pytest.mark.asyncio
+    async def test_save_representation_skips_all_blank_observations(self):
+        manager = RepresentationManager(
+            "workspace",
+            observer="observer",
+            observed="observed",
+        )
+        representation = Representation(
+            explicit=[
+                ExplicitObservation(
+                    content="",
+                    created_at=datetime.now(timezone.utc),
+                    message_ids=[1],
+                    session_name="session",
+                ),
+                ExplicitObservation(
+                    content="\n\t ",
+                    created_at=datetime.now(timezone.utc),
+                    message_ids=[1],
+                    session_name="session",
+                ),
+            ]
+        )
+
+        with (
+            patch("src.crud.representation.tracked_db", _fake_tracked_db),
+            patch(
+                "src.crud.representation.embedding_client.simple_batch_embed",
+                new=AsyncMock(),
+            ) as mock_embed,
+            patch.object(
+                manager,
+                "_save_representation_internal",
+                new=AsyncMock(),
+            ) as mock_save,
+        ):
+            saved = await manager.save_representation(
+                representation,
+                message_ids=[1],
+                session_name="session",
+                message_created_at=datetime.now(timezone.utc),
+                message_level_configuration=SimpleNamespace(
+                    dream=SimpleNamespace(enabled=False)
+                ),
+            )
+
+        assert saved == 0
+        mock_embed.assert_not_awaited()
+        mock_save.assert_not_awaited()
diff --git a/tests/utils/test_agent_tools.py b/tests/utils/test_agent_tools.py
index 4114cfe71..954065871 100644
--- a/tests/utils/test_agent_tools.py
+++ b/tests/utils/test_agent_tools.py
@@ -4,6 +4,7 @@
 from collections.abc import Callable
 from datetime import datetime, timedelta, timezone
 from typing import Any
+from unittest.mock import AsyncMock
 
 import pytest
 from nanoid import generate as generate_nanoid
@@ -377,6 +378,96 @@ async def fake_create_documents(
         assert len(created_documents) == 1
         assert created_documents[0].content == "Embeds fine"
 
+    async def test_create_observations_filters_blank_content_before_embedding(
+        self,
+        tool_test_data: Any,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """Blank or whitespace-only observations are dropped before embedding/persistence."""
+        workspace, peer1, peer2, session, _, _ = tool_test_data
+        created_documents: list[Any] = []
+
+        async def fake_batch_embed(texts: list[str]) -> list[list[float]]:
+            assert texts == ["trimmed observation"]
+            return [[0.4, 0.5, 0.6]]
+
+        async def fake_create_documents(
+            _db: AsyncSession,
+            documents: list[Any],
+            workspace_name: str,
+            *,
+            observer: str,
+            observed: str,
+            deduplicate: bool = False,
+        ) -> list[Any]:
+            _ = (workspace_name, observer, observed, deduplicate)
+            created_documents.extend(documents)
+            return documents
+
+        monkeypatch.setattr(
+            "src.utils.agent_tools.embedding_client.simple_batch_embed",
+            fake_batch_embed,
+        )
+        monkeypatch.setattr(
+            "src.utils.agent_tools.crud.create_documents", fake_create_documents
+        )
+
+        result = await create_observations(
+            observations=[
+                schemas.ObservationInput(content="   ", level="explicit"),
+                schemas.ObservationInput(content=" trimmed observation ", level="explicit"),
+            ],
+            observer=peer1.name,
+            observed=peer2.name,
+            session_name=session.name,
+            workspace_name=workspace.name,
+            message_ids=[],
+            message_created_at=str(datetime.now(timezone.utc)),
+        )
+
+        assert isinstance(result, ObservationsCreatedResult)
+        assert result.created_count == 1
+        assert len(result.failed) == 0
+        assert len(created_documents) == 1
+        assert created_documents[0].content == "trimmed observation"
+
+    async def test_create_observations_skips_all_blank_content(
+        self,
+        tool_test_data: Any,
+        monkeypatch: pytest.MonkeyPatch,
+    ):
+        """All-blank observations short-circuit without embedding or persistence."""
+        workspace, peer1, peer2, session, _, _ = tool_test_data
+        batch_embed = AsyncMock()
+        create_documents = AsyncMock()
+
+        monkeypatch.setattr(
+            "src.utils.agent_tools.embedding_client.simple_batch_embed",
+            batch_embed,
+        )
+        monkeypatch.setattr(
+            "src.utils.agent_tools.crud.create_documents", create_documents
+        )
+
+        result = await create_observations(
+            observations=[
+                schemas.ObservationInput(content=" ", level="explicit"),
+                schemas.ObservationInput(content="\n\t", level="explicit"),
+            ],
+            observer=peer1.name,
+            observed=peer2.name,
+            session_name=session.name,
+            workspace_name=workspace.name,
+            message_ids=[],
+            message_created_at=str(datetime.now(timezone.utc)),
+        )
+
+        assert isinstance(result, ObservationsCreatedResult)
+        assert result.created_count == 0
+        assert len(result.failed) == 0
+        batch_embed.assert_not_awaited()
+        create_documents.assert_not_awaited()
+
 
 @pytest.mark.asyncio
 class TestDeleteObservations:

From f37338b855d9fe1ab06e7e4b8e676e6fd01baa47 Mon Sep 17 00:00:00 2001
From: Lily <coralcatcodes+lilyplasticlabs@gmail.com>
Date: Thu, 30 Apr 2026 11:40:51 -0400
Subject: [PATCH 40/46] fix(dreamer): threshold and time-guard semantics (#573)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* fix(dreamer): threshold and time-guard semantics

Finding 2: filter count_stmt on documents.level == 'explicit' in
check_and_schedule_dream. Dreamer-created levels (deductive, inductive,
contradiction) are consolidation output, not input, and would otherwise
inflate the threshold count and create a feedback loop.

Finding 3 (code-level): relocate last_dream_at write from enqueue_dream
(enqueue.py) to process_dream (orchestrator.py), inside the
'if result is not None' block. Duplicate enqueues can no longer reset
the 8-hour time guard clock. Failed/never-run dreams don't advance it.

Success criteria: lenient (any non-null DreamResult counts). Pending
Vineeth confirmation — will adjust to strict/middle if requested.

Tests pending in follow-up commits.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* test(dreamer): threshold filter + last_dream_at relocation regression tests

Tests for Finding 2 and Finding 3 (code-level):

- TestThresholdFilter (tests/dreamer/test_dream_scheduler.py):
  * Mixed levels below explicit threshold: 30 explicit + 40 deductive
    + 10 inductive → no trigger (core regression, buggy count would trigger)
  * Explicit-only at threshold: 60 explicit → triggers
  * Contradiction excluded: 100 contradiction + 10 explicit → no trigger
    (confirms positive == "explicit" filter excludes all dreamer output)

- TestEnqueueDreamMetadataShape (tests/deriver/test_enqueue_dream.py):
  * AsyncMock-patched update_collection_internal_metadata verifies
    enqueue writes last_dream_document_count but NOT last_dream_at

- TestLastDreamAtCompletionWrite (tests/dreamer/test_dreamer_integration.py):
  * Happy path: run_dream returns DreamResult → last_dream_at written
  * Failure path: run_dream returns None → last_dream_at absent
  * Exception path: run_dream raises → last_dream_at absent,
    process_dream swallows exception (queue-processed semantics preserved)

Docstring on check_and_schedule_dream tightened: "document threshold"
-> "explicit-observation threshold" to reflect filter semantics.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(dreamer): preserve last_dream_document_count in completion write

CodeRabbit caught this: update_collection_internal_metadata uses a
top-level JSONB `||` merge, so passing {"dream": {"last_dream_at": ...}}
replaces the entire "dream" subkey and drops last_dream_document_count
that was written by enqueue_dream.

Symptom: after every completed dream, the baseline drops to 0. Next
check_and_schedule_dream reads documents_since_last_dream as
current_count - 0 = current_count, so any collection with >= 50
explicit observations can re-trigger immediately once the 8h guard
expires, even with no new raw material.

Fix: read-modify-write. Fetch current collection, merge last_dream_at
into the existing "dream" dict, write the merged dict back. Preserves
sibling keys (current: last_dream_document_count; future-proof for
telemetry fields that might land in PR 4).

Regression test added to tests/dreamer/test_dreamer_integration.py:
pre-seeds {"dream": {"last_dream_document_count": 42}}, runs
process_dream, asserts both last_dream_at is written AND
last_dream_document_count == 42 is preserved.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(dreamer): address CodeRabbit feedback on b89997c

- enqueue.py: read-modify-write preserves last_dream_at when writing baseline
- dream_scheduler.py: explicit-level filter on execute_dream count query
- test fixture: pin DOCUMENT_THRESHOLD and ENABLED_TYPES for stability
- integration test: timezone-aware assertion on last_dream_at

Regression test added for enqueue sibling-drop (symmetric to c8fe40a).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(dreamer): session lookup symmetry + row lock on dream metadata RMW

- dream_scheduler.py: explicit-level filter on execute_dream session lookup
  (baseline and session pick must agree on the same document set)
- crud.collection.get_collection: optional with_for_update flag for callers
  that need serialized read-modify-write on internal_metadata
- enqueue.py + orchestrator.py: pass with_for_update=True on the RMW reads
  to close the TOCTOU between concurrent enqueue and completion writes

Follow-up filed for jsonb_set-based nested updates (docs/factory/backlog/).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(dreamer): explicit-only count on manual schedule_dream route

The third caller of enqueue_dream — POST /workspaces/{id}/schedule_dream —
was passing an all-levels document count as the baseline, breaking symmetry
with check_and_schedule_dream and execute_dream after Loop 2's filter fixes.
Filter the manual route's count to match.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* docs(dreamer): document explicit-only invariant on enqueue_dream.document_count

Loop 3 follow-up on d76627a. The parameter's semantic tightened across Loop
2 (check_and_schedule_dream, execute_dream) and Loop 3 (schedule_dream route)
to "explicit-level count, used as the baseline," but the signature still read
"Current document count for metadata update." The next caller would have no
way to know from the function contract.

Docstring now spells out: (1) the value is explicit-only, (2) it's written
as last_dream_document_count, (3) it's the baseline that
check_and_schedule_dream subtracts from to compute
documents_since_last_dream, (4) passing a count that includes non-explicit
levels (deductive, inductive, contradiction) inflates the baseline and
suppresses the next scheduled dream.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* refactor(dreamer): rename current_document_count → current_explicit_count

Loop 3 follow-up on d4e10e3. After Loop 2's filter landed, the local in
check_and_schedule_dream held an explicit-only count but was still named
current_document_count — asymmetric with execute_dream's current_explicit_count
(line 201) and contradicting the filter on line 269 that produces the value.

Pure rename: three occurrences (definition at 271, subtraction at 274, log
extra key at 282). No test references. Naming-as-invariant alignment with
d76627a (query filters), d4e10e3 (parameter docstring), and Loop 1's local
rename in execute_dream.

The persisted JSONB key last_dream_document_count is the one remaining
drift-layer; filed as plastic-claudebook backlog item for a separate PR
with an intentional migration path.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

* fix(dreamer): atomic guard-pair write + in-flight stampede defense

Loop 4 response to Vineeth's CHANGES_REQUESTED on PR #573.

The pre-Loop-4 enqueue-time write of last_dream_document_count was serving
double duty: rate limiter AND stampede latch. By arming the 8h guard the
moment a dream entered the pipeline, it implicitly blocked a second dream
from being scheduled during the in-flight window. Loop 3 relocated the
last_dream_at write to completion without moving its sibling baseline,
splitting the semantic pair and exposing the latch role that had lived
only in Vineeth's head.

Invariant (now pinned to check_and_schedule_dream's docstring): from the
moment a dream is scheduled until it completes or fails, no second dream
may be enqueued for the same (workspace, observer, observed) — and the
baseline count advances only when consolidation actually happened.

Changes:
- enqueue_dream: remove the last_dream_document_count write entirely and
  drop the document_count parameter. enqueue no longer touches dream
  metadata; the implicit stampede latch is replaced by an explicit
  queue-backed defense.
- process_dream: extend the existing row-locked RMW to write both guard
  fields atomically. Current explicit-doc count is recomputed inside the
  locked block (not carried on DreamPayload) so the pair reflects the
  actual consolidation moment.
- check_and_schedule_dream: query QueueItem for pending dreams on this
  collection's work_unit_keys (mirrors uq_queue_dream_pending_work_unit_key)
  before arming a timer. Uses queue state as source of truth rather than
  reflecting it into metadata.
- Tests: two new coherence tests under TestGuardPairCoherence —
  test_pending_queue_item_blocks_second_schedule walks the stampede timeline,
  test_silent_failure_allows_retry_on_same_corpus verifies failed dreams
  don't consume the baseline. Existing tests updated to the new contract.

* chore(dreamer): trim comment slop from loop-4 atomic pair work

Compress three verbose comments added in d24958d — the invariant itself
is captured in check_and_schedule_dream's docstring, so the inline
narrative restates what the code already says.

- dream_scheduler.py defense C block: 5 lines → 2
- orchestrator.py atomic pair write: 4 lines → 1
- enqueue.py docstring paragraph: 5 lines → 2

Net: +5/-14. Follows Eri's eef27be precedent on sillytavern-honcho PR #7.

---------

Co-authored-by: lilyplasticlabs <lily@plasticlabs.ai>
Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/crud/collection.py                    |  22 +
 src/deriver/enqueue.py                    |  34 +-
 src/dreamer/dream_scheduler.py            |  77 ++-
 src/dreamer/orchestrator.py               |  32 +-
 src/routers/workspaces.py                 |  14 +-
 tests/deriver/test_enqueue_dream.py       |  59 +++
 tests/dreamer/test_dream_scheduler.py     | 250 ++++-----
 tests/dreamer/test_dreamer_integration.py | 598 ++++++++++++++++++++++
 tests/routes/test_workspaces.py           |  59 +++
 9 files changed, 956 insertions(+), 189 deletions(-)
 create mode 100644 tests/deriver/test_enqueue_dream.py
 create mode 100644 tests/dreamer/test_dreamer_integration.py

diff --git a/src/crud/collection.py b/src/crud/collection.py
index f0496d509..2790cec1b 100644
--- a/src/crud/collection.py
+++ b/src/crud/collection.py
@@ -81,6 +81,7 @@ async def get_collection(
     *,
     observer: str,
     observed: str,
+    with_for_update: bool = False,
 ) -> models.Collection:
     """
     Get a collection by observer/observed for a workspace.
@@ -90,6 +91,11 @@ async def get_collection(
         workspace_name: Name of the workspace
         observer: Name of the observing peer (owns the collection)
         observed: Name of the observed peer
+        with_for_update: If True, acquire a row-level lock (SELECT ... FOR UPDATE)
+            on the collection. Bypasses the cache so the lock is actually held
+            by the current transaction. Callers using this flag must wrap the
+            read and subsequent write in the same transaction (the lock is
+            released on commit/rollback).
 
     Returns:
         The collection if found
@@ -97,6 +103,22 @@ async def get_collection(
     Raises:
         ResourceNotFoundException: If the collection does not exist
     """
+    if with_for_update:
+        # Row-lock path: go direct to DB (skip cache) so the FOR UPDATE lock
+        # is actually acquired on the row in the current transaction. The
+        # cached dict path would return without issuing SELECT ... FOR UPDATE.
+        stmt = (
+            select(models.Collection)
+            .where(models.Collection.workspace_name == workspace_name)
+            .where(models.Collection.observer == observer)
+            .where(models.Collection.observed == observed)
+            .with_for_update()
+        )
+        collection = await db.scalar(stmt)
+        if collection is None:
+            raise ResourceNotFoundException("Collection not found")
+        return collection
+
     data = await _fetch_collection(db, workspace_name, observer, observed)
     if data is None:
         raise ResourceNotFoundException("Collection not found")
diff --git a/src/deriver/enqueue.py b/src/deriver/enqueue.py
index 71b00a2ba..cbd032c2b 100644
--- a/src/deriver/enqueue.py
+++ b/src/deriver/enqueue.py
@@ -1,5 +1,4 @@
 import logging
-from datetime import datetime, timezone
 from typing import Any, Literal
 
 from sqlalchemy import exists, insert, select
@@ -436,27 +435,26 @@ async def enqueue_dream(
     observer: str,
     observed: str,
     dream_type: schemas.DreamType,
-    document_count: int,
     session_name: str | None = None,
 ) -> None:
     """
     Enqueue a dream task for immediate processing by the deriver.
 
+    Does not touch collection.internal_metadata["dream"] — both guard fields
+    are written atomically in process_dream on successful completion.
+
     Deduplication: If a dream with the same work_unit_key is already in-progress
-    (has an ActiveQueueSession), the enqueue is skipped to prevent running
-    multiple dreams concurrently for the same collection.
+    (has an ActiveQueueSession) or pending in the queue, the enqueue is skipped.
 
     Args:
         workspace_name: Name of the workspace
         observer: Name of the observer peer
         observed: Name of the observed peer
         dream_type: Type of dream to execute
-        document_count: Current document count for metadata update
         session_name: Name of the session to scope the dream to if specified
     """
     async with tracked_db("dream_enqueue") as db_session:
         try:
-            # Create the dream queue record
             dream_record = create_dream_record(
                 workspace_name,
                 observer=observer,
@@ -467,11 +465,6 @@ async def enqueue_dream(
 
             work_unit_key = dream_record["work_unit_key"]
 
-            # Check if a dream with this work_unit_key is currently in progress
-            # (has an ActiveQueueSession, meaning a worker is processing it)
-            # We only block on in-progress dreams, not pending ones - if there's
-            # a pending dream, we don't need to add another one anyway since
-            # the queue processor will pick it up.
             in_progress_check = select(
                 exists(
                     select(models.ActiveQueueSession.id).where(
@@ -491,7 +484,6 @@ async def enqueue_dream(
                 )
                 return
 
-            # Check if there's already a pending dream with the same work_unit_key
             pending_check = select(
                 exists(
                     select(QueueItem.id).where(
@@ -512,25 +504,9 @@ async def enqueue_dream(
                 )
                 return
 
-            # Insert into queue
             stmt = insert(QueueItem).returning(QueueItem)
             await db_session.execute(stmt, [dream_record])
-
-            # Update collection metadata (CRUD handles cache invalidation)
-            now_iso = datetime.now(timezone.utc).isoformat()
-            await crud.update_collection_internal_metadata(
-                db_session,
-                workspace_name,
-                observer,
-                observed,
-                update_data={
-                    "dream": {
-                        "last_dream_document_count": document_count,
-                        "last_dream_at": now_iso,
-                    }
-                },
-            )
-            # update_collection_internal_metadata commits already
+            await db_session.commit()
 
             logger.info(
                 "Enqueued dream task for %s/%s/%s (type: %s)",
diff --git a/src/dreamer/dream_scheduler.py b/src/dreamer/dream_scheduler.py
index 1a339d21c..eb2e2177a 100644
--- a/src/dreamer/dream_scheduler.py
+++ b/src/dreamer/dream_scheduler.py
@@ -4,7 +4,7 @@
 from logging import getLogger
 
 import sentry_sdk
-from sqlalchemy import func, select
+from sqlalchemy import exists, func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
 from src import models
@@ -160,13 +160,11 @@ async def execute_dream(
         observer: str,
         observed: str,
     ) -> None:
-        """Execute the dream by enqueueing it and updating collection metadata."""
-        # Import here to avoid circular dependency
+        """Execute the dream by enqueueing it."""
         from src import crud
         from src.deriver.enqueue import enqueue_dream
         from src.utils.config_helpers import get_configuration
 
-        # Find the most recent session and get current document count
         async with tracked_db("dream_session_lookup") as db:
             stmt = (
                 select(models.Document.session_name)
@@ -174,6 +172,7 @@ async def execute_dream(
                     models.Document.workspace_name == workspace_name,
                     models.Document.observer == observer,
                     models.Document.observed == observed,
+                    models.Document.level == "explicit",
                 )
                 .order_by(models.Document.created_at.desc())
                 .limit(1)
@@ -186,14 +185,6 @@ async def execute_dream(
                 )
                 return
 
-            # Get current document count at execution time (not stale from scheduling)
-            count_stmt = select(func.count(models.Document.id)).where(
-                models.Document.workspace_name == workspace_name,
-                models.Document.observer == observer,
-                models.Document.observed == observed,
-            )
-            current_document_count = int(await db.scalar(count_stmt) or 0)
-
             session = await crud.get_session(
                 db, workspace_name=workspace_name, session_name=session_name
             )
@@ -212,7 +203,6 @@ async def execute_dream(
             observer=observer,
             observed=observed,
             dream_type=dream_type,
-            document_count=current_document_count,
             session_name=session_name,
         )
 
@@ -231,13 +221,18 @@ async def check_and_schedule_dream(
     collection: models.Collection,
 ) -> bool:
     """
-    Check if a collection has reached the document threshold and schedule a timer-based dream.
+    From the moment a dream is scheduled until it completes or fails, no second
+    dream may be enqueued for the same (workspace, observer, observed) — and the
+    baseline count advances only when consolidation actually happened.
+
+    Check if a collection has reached the explicit-observation threshold and schedule a timer-based dream.
 
     This function only schedules a timer-based dream if:
     1. Dreams are enabled
-    2. Document threshold is reached
+    2. Explicit-observation threshold is reached (dreamer output does not count)
     3. Minimum hours between dreams have passed
-    4. No dream is already scheduled for this collection
+    4. No dream is already pending in the queue for this collection (in-flight check)
+    5. No dream is already scheduled for this collection
 
     Args:
         db: Database session
@@ -249,21 +244,21 @@ async def check_and_schedule_dream(
     if not settings.DREAM.ENABLED:
         return False
 
-    # Get dream metadata from internal_metadata
     dream_metadata = collection.internal_metadata.get("dream", {})
     last_dream_document_count = dream_metadata.get("last_dream_document_count", 0)
     last_dream_at = dream_metadata.get("last_dream_at")
 
-    # Count current documents in the collection
+    # Count explicit-level docs only: dreamer output (deductive/inductive/
+    # contradiction) would inflate the threshold and create a feedback loop.
     count_stmt = select(func.count(models.Document.id)).where(
         models.Document.workspace_name == collection.workspace_name,
         models.Document.observer == collection.observer,
         models.Document.observed == collection.observed,
+        models.Document.level == "explicit",
     )
-    current_document_count = int(await db.scalar(count_stmt) or 0)
+    current_explicit_count = int(await db.scalar(count_stmt) or 0)
 
-    # Calculate documents added since last dream
-    documents_since_last_dream = current_document_count - last_dream_document_count
+    documents_since_last_dream = current_explicit_count - last_dream_document_count
 
     logger.debug(
         "Dream check",
@@ -271,16 +266,14 @@ async def check_and_schedule_dream(
             "workspace_name": collection.workspace_name,
             "observer": collection.observer,
             "observed": collection.observed,
-            "current_document_count": current_document_count,
+            "current_explicit_count": current_explicit_count,
             "last_dream_document_count": last_dream_document_count,
             "documents_since_last_dream": documents_since_last_dream,
             "document_threshold": settings.DREAM.DOCUMENT_THRESHOLD,
         },
     )
 
-    # Only schedule timer if document threshold is reached
     if documents_since_last_dream >= settings.DREAM.DOCUMENT_THRESHOLD:
-        # Check if we're within minimum hours between dreams
         if last_dream_at:
             try:
                 last_dream_time = datetime.fromisoformat(last_dream_at)
@@ -299,11 +292,43 @@ async def check_and_schedule_dream(
                     f"Invalid last_dream_at timestamp: {last_dream_at}, error: {e}"
                 )
 
+        # Queue is source of truth for in-flight dreams; mirrors
+        # uq_queue_dream_pending_work_unit_key.
+        enabled_dream_types = settings.DREAM.ENABLED_TYPES
+        pending_keys = [
+            construct_work_unit_key(
+                collection.workspace_name,
+                {
+                    "task_type": "dream",
+                    "observer": collection.observer,
+                    "observed": collection.observed,
+                    "dream_type": dream_type,
+                },
+            )
+            for dream_type in enabled_dream_types
+        ]
+        pending_exists = await db.scalar(
+            select(
+                exists(
+                    select(models.QueueItem.id).where(
+                        models.QueueItem.task_type == "dream",
+                        models.QueueItem.processed == False,  # noqa: E712
+                        models.QueueItem.work_unit_key.in_(pending_keys),
+                    )
+                )
+            )
+        )
+        if pending_exists:
+            logger.info(
+                "Skipping dream schedule for %s/%s: pending dream already in queue",
+                collection.observer,
+                collection.observed,
+            )
+            return False
+
         dream_scheduler = get_dream_scheduler()
         if dream_scheduler:
-            enabled_dream_types = settings.DREAM.ENABLED_TYPES
             for dream_type in enabled_dream_types:
-                # Include dream_type in key so each dream type can be tracked independently
                 dream_work_unit_key = construct_work_unit_key(
                     collection.workspace_name,
                     {
diff --git a/src/dreamer/orchestrator.py b/src/dreamer/orchestrator.py
index 000d45a6a..d09e5b677 100644
--- a/src/dreamer/orchestrator.py
+++ b/src/dreamer/orchestrator.py
@@ -17,11 +17,13 @@
 import time
 import uuid
 from dataclasses import dataclass
+from datetime import datetime, timezone
 from typing import Any
 
 import sentry_sdk
+from sqlalchemy import func, select
 
-from src import crud
+from src import crud, models
 from src.config import settings
 from src.dependencies import tracked_db
 from src.dreamer.specialists import SPECIALISTS, SpecialistResult
@@ -323,6 +325,34 @@ async def process_dream(
                         + f"duration={result.total_duration_ms:.0f}ms"
                     )
 
+                    # Both guard fields advance together only on successful consolidation.
+                    now_iso = datetime.now(timezone.utc).isoformat()
+                    async with tracked_db("dream.guard_pair_write") as db:
+                        collection = await crud.get_collection(
+                            db,
+                            workspace_name,
+                            observer=payload.observer,
+                            observed=payload.observed,
+                            with_for_update=True,
+                        )
+                        count_stmt = select(func.count(models.Document.id)).where(
+                            models.Document.workspace_name == workspace_name,
+                            models.Document.observer == payload.observer,
+                            models.Document.observed == payload.observed,
+                            models.Document.level == "explicit",
+                        )
+                        current_explicit_count = int(await db.scalar(count_stmt) or 0)
+                        dream_meta = dict(collection.internal_metadata.get("dream", {}))
+                        dream_meta["last_dream_at"] = now_iso
+                        dream_meta["last_dream_document_count"] = current_explicit_count
+                        await crud.update_collection_internal_metadata(
+                            db,
+                            workspace_name,
+                            payload.observer,
+                            payload.observed,
+                            update_data={"dream": dream_meta},
+                        )
+
     except Exception as e:
         logger.error(
             f"Error processing dream task {payload.dream_type} for {payload.observer}/{payload.observed}: {str(e)}",
diff --git a/src/routers/workspaces.py b/src/routers/workspaces.py
index 90530e92a..2ce7cdda4 100644
--- a/src/routers/workspaces.py
+++ b/src/routers/workspaces.py
@@ -3,10 +3,9 @@
 from fastapi import APIRouter, Body, Depends, HTTPException, Path, Query, Response
 from fastapi_pagination import Page
 from fastapi_pagination.ext.sqlalchemy import apaginate
-from sqlalchemy import func, select
 from sqlalchemy.ext.asyncio import AsyncSession
 
-from src import crud, models, schemas
+from src import crud, schemas
 from src.config import settings
 from src.dependencies import db
 from src.deriver.enqueue import enqueue_deletion, enqueue_dream
@@ -201,7 +200,6 @@ async def schedule_dream(
     request: schemas.ScheduleDreamRequest = Body(
         ..., description="Dream scheduling parameters"
     ),
-    db: AsyncSession = db,
 ):
     """
     Manually schedule a dream task for a specific collection.
@@ -224,21 +222,11 @@ async def schedule_dream(
     observed = request.observed if request.observed is not None else request.observer
     dream_type = request.dream_type
 
-    # Count documents in the collection
-    count_stmt = select(func.count(models.Document.id)).where(
-        models.Document.workspace_name == workspace_id,
-        models.Document.observer == observer,
-        models.Document.observed == observed,
-    )
-    document_count = int(await db.scalar(count_stmt) or 0)
-
-    # Enqueue the dream task for immediate processing
     await enqueue_dream(
         workspace_id,
         observer=observer,
         observed=observed,
         dream_type=dream_type,
-        document_count=document_count,
         session_name=request.session_id,
     )
 
diff --git a/tests/deriver/test_enqueue_dream.py b/tests/deriver/test_enqueue_dream.py
new file mode 100644
index 000000000..bea5c2568
--- /dev/null
+++ b/tests/deriver/test_enqueue_dream.py
@@ -0,0 +1,59 @@
+"""Regression tests for `enqueue_dream` metadata write shape.
+
+Loop 4 (PR #573): `enqueue_dream` no longer touches collection.internal_metadata
+at all. Both guard fields (last_dream_at and last_dream_document_count) are
+written atomically in `process_dream` on successful completion — this preserves
+the invariant that the baseline advances only when consolidation actually
+happened, and prevents the in-flight stampede from false-advancing a guard.
+"""
+
+from unittest.mock import AsyncMock, patch
+
+import pytest
+
+from src import schemas
+from src.deriver.enqueue import enqueue_dream
+
+
+class TestEnqueueDreamMetadataShape:
+    @pytest.mark.asyncio
+    async def test_enqueue_does_not_touch_collection_metadata(self):
+        """`enqueue_dream` must not call update_collection_internal_metadata."""
+        with (
+            patch(
+                "src.deriver.enqueue.crud.update_collection_internal_metadata",
+                new_callable=AsyncMock,
+            ) as mock_update,
+            patch(
+                "src.deriver.enqueue.crud.get_collection",
+                new_callable=AsyncMock,
+            ) as mock_get_collection,
+            patch(
+                "src.deriver.enqueue.tracked_db",
+            ) as mock_db_ctx,
+        ):
+            mock_session = AsyncMock()
+            mock_session.scalar = AsyncMock(return_value=False)
+            mock_session.execute = AsyncMock()
+            mock_session.commit = AsyncMock()
+            mock_db_ctx.return_value.__aenter__.return_value = mock_session
+
+            await enqueue_dream(
+                workspace_name="test_workspace",
+                observer="alice",
+                observed="bob",
+                dream_type=schemas.DreamType.OMNI,
+                session_name=None,
+            )
+
+            assert not mock_update.called, (
+                "enqueue_dream must not write to collection.internal_metadata; "
+                "guard fields advance atomically in process_dream on success."
+            )
+            assert not mock_get_collection.called, (
+                "enqueue_dream must not need to load the collection — it no "
+                "longer touches dream metadata."
+            )
+            assert (
+                mock_session.execute.called
+            ), "enqueue_dream must still insert the QueueItem row."
diff --git a/tests/dreamer/test_dream_scheduler.py b/tests/dreamer/test_dream_scheduler.py
index 06f904c1c..4db81d74d 100644
--- a/tests/dreamer/test_dream_scheduler.py
+++ b/tests/dreamer/test_dream_scheduler.py
@@ -4,8 +4,14 @@
 from unittest.mock import AsyncMock, patch
 
 import pytest
-
-from src.dreamer.dream_scheduler import DreamScheduler, set_dream_scheduler
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src import models
+from src.dreamer.dream_scheduler import (
+    DreamScheduler,
+    check_and_schedule_dream,
+    set_dream_scheduler,
+)
 from src.schemas import DreamType
 from src.utils.work_unit import construct_work_unit_key
 
@@ -279,136 +285,140 @@ async def test_does_not_cancel_dreams_for_different_workspace(
             assert key_ws2 in dream_scheduler.pending_dreams
 
 
-class TestDocumentCountAtExecutionTime:
-    """Regression tests for Bug #2: Stale document count used in metadata update.
-
-    Previously, the document count was captured when the dream was scheduled
-    (at check_and_schedule_dream time), then used 60 minutes later when the
-    dream actually executed. This caused incorrect metadata if documents were
-    added during the wait period.
+class TestThresholdFilter:
+    """Regression tests for Finding 2: threshold must count only explicit-level docs.
 
-    Now, execute_dream queries the current document count at execution time.
+    Previously the threshold counted all documents in a collection, including
+    dreamer output (deductive/inductive/contradiction). This created a feedback
+    loop where each dream's output inflated the trigger for the next dream.
+    The fix filters the count to `level == "explicit"` only.
     """
 
-    @pytest.mark.asyncio
-    async def test_execute_dream_queries_document_count_at_execution(
-        self, dream_scheduler: DreamScheduler
-    ):
-        """execute_dream should query current document count, not use a stale value.
+    @pytest.fixture(autouse=True)
+    def _pin_dream_config(self):
+        """Pin DOCUMENT_THRESHOLD=50 and ENABLED_TYPES=['omni'] for this class.
 
-        This test verifies that execute_dream fetches the document count fresh
-        from the database at execution time rather than using a pre-captured value.
+        These tests assume the default thresholds; a developer's local env
+        (e.g. DREAM_DOCUMENT_THRESHOLD=5 for faster manual testing) would
+        otherwise invalidate the 30/60/10 fixtures below. Scoped to this
+        class only — do NOT widen; other tests may have different assumptions.
+        """
+        with (
+            patch("src.dreamer.dream_scheduler.settings.DREAM.DOCUMENT_THRESHOLD", 50),
+            patch("src.dreamer.dream_scheduler.settings.DREAM.ENABLED_TYPES", ["omni"]),
+        ):
+            yield
+
+    async def _make_collection(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ) -> models.Collection:
+        """Helper: create a Collection in the test workspace with no dream metadata."""
+        workspace, peer = sample_data
+        collection = models.Collection(
+            observer=peer.name,
+            observed=peer.name,
+            workspace_name=workspace.name,
+            internal_metadata={},
+        )
+        db_session.add(collection)
+        await db_session.commit()
+        return collection
+
+    async def _insert_doc(
+        self,
+        db_session: AsyncSession,
+        collection: models.Collection,
+        level: str,
+    ) -> None:
+        """Helper: insert one Document at the given level."""
+        db_session.add(
+            models.Document(
+                content="test",
+                level=level,
+                workspace_name=collection.workspace_name,
+                observer=collection.observer,
+                observed=collection.observed,
+            )
+        )
 
-        The key architectural change was:
-        - OLD: schedule_dream(document_count) -> _delayed_dream(document_count) -> execute_dream(document_count)
-        - NEW: schedule_dream() -> _delayed_dream() -> execute_dream() queries count internally
+    @pytest.mark.asyncio
+    async def test_mixed_levels_below_explicit_threshold(
+        self,
+        dream_scheduler: DreamScheduler,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ):
+        """30 explicit + 40 deductive + 10 inductive → should NOT trigger.
 
-        We verify this by mocking the database to return a specific count and
-        checking that enqueue_dream receives that count.
+        Total doc count = 80 (would trigger under the buggy unfiltered count),
+        but explicit count = 30 < threshold 50, so the correct behavior is to
+        NOT schedule a dream. This is the core regression: the fix must reject
+        this scenario.
         """
-        from contextlib import asynccontextmanager
-        from unittest.mock import MagicMock
-
-        from src import models
-        from src.schemas import (
-            ResolvedConfiguration,
-            ResolvedDreamConfiguration,
-            ResolvedPeerCardConfiguration,
-            ResolvedReasoningConfiguration,
-            ResolvedSummaryConfiguration,
+        collection = await self._make_collection(db_session, sample_data)
+        for _ in range(30):
+            await self._insert_doc(db_session, collection, "explicit")
+        for _ in range(40):
+            await self._insert_doc(db_session, collection, "deductive")
+        for _ in range(10):
+            await self._insert_doc(db_session, collection, "inductive")
+        await db_session.commit()
+
+        with patch.object(dream_scheduler, "schedule_dream", new_callable=AsyncMock):
+            scheduled = await check_and_schedule_dream(db_session, collection)
+
+        assert scheduled is False, (
+            "Threshold should filter on explicit level only — dreamer output "
+            "(deductive/inductive) must not count toward the trigger."
         )
 
-        workspace_name = "test_workspace"
-        observer = "bob"
-        observed = "bob"
-        session_name = "test_session"
-
-        # The document count that the database will return
-        CURRENT_DOC_COUNT = 42
-
-        # Track what document_count is passed to enqueue_dream
-        captured_document_count: int | None = None
-
-        async def capture_enqueue_dream(
-            _ws_name: str,
-            observer: str,  # pyright: ignore[reportUnusedParameter]
-            observed: str,  # pyright: ignore[reportUnusedParameter]
-            dream_type: Any,  # pyright: ignore[reportUnusedParameter]
-            document_count: int,
-            session_name: str,  # pyright: ignore[reportUnusedParameter]
-        ) -> None:
-            nonlocal captured_document_count
-            captured_document_count = document_count
-
-        # Create mock database session that returns our test data
-        mock_session = MagicMock()
-        mock_workspace = MagicMock(spec=models.Workspace)
-        mock_db_session = MagicMock(spec=models.Session)
-
-        # Mock scalar to return session_name for first call, document count for second
-        scalar_call_count = 0
-
-        async def mock_scalar(_stmt: Any) -> str | int:
-            nonlocal scalar_call_count
-            scalar_call_count += 1
-            if scalar_call_count == 1:
-                return session_name  # First call gets session_name from documents
-            else:
-                return CURRENT_DOC_COUNT  # Second call gets document count
-
-        mock_session.scalar = mock_scalar
-
-        @asynccontextmanager
-        async def mock_tracked_db(_: str | None = None):
-            yield mock_session
+    @pytest.mark.asyncio
+    async def test_explicit_only_at_threshold(
+        self,
+        dream_scheduler: DreamScheduler,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ):
+        """60 explicit + 0 derived → should trigger (60 ≥ threshold 50)."""
+        collection = await self._make_collection(db_session, sample_data)
+        for _ in range(60):
+            await self._insert_doc(db_session, collection, "explicit")
+        await db_session.commit()
 
-        with (
-            patch(
-                "src.dreamer.dream_scheduler.tracked_db",
-                mock_tracked_db,
-            ),
-            patch(
-                "src.deriver.enqueue.enqueue_dream",
-                side_effect=capture_enqueue_dream,
-            ),
-            patch(
-                "src.crud.get_session",
-                return_value=mock_db_session,
-            ),
-            patch(
-                "src.crud.get_workspace",
-                return_value=mock_workspace,
-            ),
-            patch(
-                "src.utils.config_helpers.get_configuration",
-                return_value=ResolvedConfiguration(
-                    reasoning=ResolvedReasoningConfiguration(enabled=True),
-                    peer_card=ResolvedPeerCardConfiguration(use=True, create=True),
-                    summary=ResolvedSummaryConfiguration(
-                        enabled=True,
-                        messages_per_short_summary=10,
-                        messages_per_long_summary=20,
-                    ),
-                    dream=ResolvedDreamConfiguration(enabled=True),
-                ),
-            ),
-        ):
-            # Execute the dream
-            await dream_scheduler.execute_dream(
-                workspace_name,
-                DreamType.OMNI,
-                observer=observer,
-                observed=observed,
-            )
+        with patch.object(
+            dream_scheduler, "schedule_dream", new_callable=AsyncMock
+        ) as mock_schedule:
+            scheduled = await check_and_schedule_dream(db_session, collection)
+
+        assert scheduled is True
+        assert mock_schedule.called, "schedule_dream should fire when threshold met"
+
+    @pytest.mark.asyncio
+    async def test_contradiction_excluded_from_count(
+        self,
+        dream_scheduler: DreamScheduler,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ):
+        """Contradiction-level docs are dreamer output — must not count.
+
+        100 contradictions + 10 explicit → explicit=10 < threshold=50, no trigger.
+        Confirms the positive `== "explicit"` filter excludes contradiction by
+        construction (same as deductive/inductive).
+        """
+        collection = await self._make_collection(db_session, sample_data)
+        for _ in range(100):
+            await self._insert_doc(db_session, collection, "contradiction")
+        for _ in range(10):
+            await self._insert_doc(db_session, collection, "explicit")
+        await db_session.commit()
 
-            # Verify that execute_dream queried the document count (2 scalar calls)
-            assert (
-                scalar_call_count == 2
-            ), "Should have queried session_name and document count"
+        with patch.object(dream_scheduler, "schedule_dream", new_callable=AsyncMock):
+            scheduled = await check_and_schedule_dream(db_session, collection)
 
-            # Verify that enqueue_dream received the CURRENT document count (42),
-            # proving that execute_dream queries the count at execution time
-            assert captured_document_count == CURRENT_DOC_COUNT
+        assert scheduled is False
 
 
 class TestEnqueueCancelsDreamsCorrectly:
diff --git a/tests/dreamer/test_dreamer_integration.py b/tests/dreamer/test_dreamer_integration.py
new file mode 100644
index 000000000..5f60196a4
--- /dev/null
+++ b/tests/dreamer/test_dreamer_integration.py
@@ -0,0 +1,598 @@
+"""Integration tests for the dream completion write.
+
+Finding 3 (code-level) relocates `last_dream_at` from enqueue time to
+dream-completion time (in `process_dream`). These tests exercise the real
+Postgres JSONB merge via `tracked_db` to verify the write lands in the
+collection's internal_metadata on successful dreams — and critically,
+does NOT land on failures or exceptions.
+"""
+
+from datetime import datetime, timedelta
+from typing import Any
+from unittest.mock import AsyncMock, patch
+
+import pytest
+import pytest_asyncio
+from sqlalchemy import select
+from sqlalchemy.ext.asyncio import AsyncSession
+
+from src import models
+from src.deriver.enqueue import enqueue_dream
+from src.dreamer.dream_scheduler import (
+    DreamScheduler,
+    check_and_schedule_dream,
+    set_dream_scheduler,
+)
+from src.dreamer.orchestrator import DreamResult, process_dream
+from src.schemas import (
+    DreamType,
+    ResolvedConfiguration,
+    ResolvedDreamConfiguration,
+    ResolvedPeerCardConfiguration,
+    ResolvedReasoningConfiguration,
+    ResolvedSummaryConfiguration,
+)
+from src.utils.queue_payload import DreamPayload
+
+
+@pytest_asyncio.fixture
+async def seeded_collection(
+    db_session: AsyncSession,
+    sample_data: tuple[models.Workspace, models.Peer],
+) -> models.Collection:
+    """Create a Collection with an empty dream metadata dict."""
+    workspace, peer = sample_data
+    collection = models.Collection(
+        observer=peer.name,
+        observed=peer.name,
+        workspace_name=workspace.name,
+        internal_metadata={},
+    )
+    db_session.add(collection)
+    await db_session.commit()
+    await db_session.refresh(collection)
+    return collection
+
+
+def _make_dream_result() -> DreamResult:
+    """Build a minimal non-null DreamResult for happy-path tests."""
+    return DreamResult(
+        run_id="test_run_01",
+        specialists_run=["deduction", "induction"],
+        deduction_success=True,
+        induction_success=True,
+        surprisal_enabled=False,
+        surprisal_conclusion_count=0,
+        total_iterations=3,
+        total_duration_ms=1234.5,
+        input_tokens=100,
+        output_tokens=50,
+    )
+
+
+async def _get_dream_metadata(
+    db_session: AsyncSession, collection: models.Collection
+) -> dict[str, Any]:
+    """Re-fetch collection and return its internal_metadata['dream'] dict (or {})."""
+    await db_session.refresh(collection)
+    stmt = select(models.Collection).where(models.Collection.id == collection.id)
+    refreshed = (await db_session.execute(stmt)).scalar_one()
+    dream_meta: dict[str, Any] = refreshed.internal_metadata.get("dream", {})
+    return dream_meta
+
+
+class TestLastDreamAtCompletionWrite:
+    """Regression tests for Finding 3: `last_dream_at` written at completion."""
+
+    @pytest.mark.asyncio
+    async def test_happy_path_writes_last_dream_at(
+        self,
+        db_session: AsyncSession,
+        seeded_collection: models.Collection,
+    ):
+        """Non-null DreamResult → `last_dream_at` is set in internal_metadata."""
+        payload = DreamPayload(
+            dream_type=DreamType.OMNI,
+            observer=seeded_collection.observer,
+            observed=seeded_collection.observed,
+        )
+
+        with patch(
+            "src.dreamer.orchestrator.run_dream",
+            new=AsyncMock(return_value=_make_dream_result()),
+        ):
+            await process_dream(payload, seeded_collection.workspace_name)
+
+        dream_meta = await _get_dream_metadata(db_session, seeded_collection)
+        assert (
+            "last_dream_at" in dream_meta
+        ), "process_dream must write last_dream_at when run_dream returns a result"
+        # Must be a tz-aware UTC ISO timestamp. A naive datetime.now().isoformat()
+        # would pass a loose "T in string" check but corrupt the 8h guard math
+        # against tz-aware now() comparisons downstream.
+        parsed = datetime.fromisoformat(dream_meta["last_dream_at"])
+        assert (
+            parsed.tzinfo is not None
+        ), f"last_dream_at must be timezone-aware, got {dream_meta['last_dream_at']!r}"
+        assert parsed.utcoffset() == timedelta(
+            0
+        ), f"last_dream_at must be UTC, got offset {parsed.utcoffset()}"
+
+    @pytest.mark.asyncio
+    async def test_failure_path_leaves_last_dream_at_null(
+        self,
+        db_session: AsyncSession,
+        seeded_collection: models.Collection,
+    ):
+        """run_dream returns None → `last_dream_at` stays absent.
+
+        Lenient success criteria: the guard only advances on completion of a
+        non-null DreamResult. Failed runs (None return) must not count.
+        """
+        payload = DreamPayload(
+            dream_type=DreamType.OMNI,
+            observer=seeded_collection.observer,
+            observed=seeded_collection.observed,
+        )
+
+        with patch(
+            "src.dreamer.orchestrator.run_dream",
+            new=AsyncMock(return_value=None),
+        ):
+            await process_dream(payload, seeded_collection.workspace_name)
+
+        dream_meta = await _get_dream_metadata(db_session, seeded_collection)
+        assert "last_dream_at" not in dream_meta, (
+            "last_dream_at must NOT be written when run_dream returns None "
+            "(failed dream). The guard should not falsely advance."
+        )
+
+    @pytest.mark.asyncio
+    async def test_completion_writes_guard_pair_atomically(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ):
+        """Completion writes last_dream_at AND last_dream_document_count together.
+
+        Both guard fields advance only on successful consolidation, recomputed
+        inside the row-locked RMW block so the pair stays coherent. Baseline
+        reflects the actual explicit-doc count at completion, not a stale
+        enqueue-time snapshot.
+        """
+        workspace, peer = sample_data
+        collection = models.Collection(
+            observer=peer.name,
+            observed=peer.name,
+            workspace_name=workspace.name,
+            internal_metadata={},
+        )
+        db_session.add(collection)
+        for i in range(7):
+            db_session.add(
+                models.Document(
+                    content=f"explicit {i}",
+                    level="explicit",
+                    workspace_name=workspace.name,
+                    observer=peer.name,
+                    observed=peer.name,
+                )
+            )
+        await db_session.commit()
+        await db_session.refresh(collection)
+
+        payload = DreamPayload(
+            dream_type=DreamType.OMNI,
+            observer=collection.observer,
+            observed=collection.observed,
+        )
+
+        with patch(
+            "src.dreamer.orchestrator.run_dream",
+            new=AsyncMock(return_value=_make_dream_result()),
+        ):
+            await process_dream(payload, collection.workspace_name)
+
+        dream_meta = await _get_dream_metadata(db_session, collection)
+        assert "last_dream_at" in dream_meta, "last_dream_at must be written"
+        assert dream_meta.get("last_dream_document_count") == 7, (
+            "last_dream_document_count must equal the current explicit-doc count "
+            "at completion time; both guard fields advance together."
+        )
+
+    @pytest.mark.asyncio
+    async def test_exception_path_leaves_last_dream_at_null(
+        self,
+        db_session: AsyncSession,
+        seeded_collection: models.Collection,
+    ):
+        """run_dream raises → `last_dream_at` stays absent.
+
+        `process_dream` catches exceptions (logs + marks task processed without
+        re-raising) so the queue worker doesn't get stuck retrying. The guard
+        write must not happen in the exception path — it's inside the
+        `if result is not None` block, which never executes if an exception
+        bypassed the assignment.
+        """
+        payload = DreamPayload(
+            dream_type=DreamType.OMNI,
+            observer=seeded_collection.observer,
+            observed=seeded_collection.observed,
+        )
+
+        with patch(
+            "src.dreamer.orchestrator.run_dream",
+            new=AsyncMock(side_effect=RuntimeError("simulated specialist crash")),
+        ):
+            # process_dream swallows exceptions internally; no re-raise expected
+            await process_dream(payload, seeded_collection.workspace_name)
+
+        dream_meta = await _get_dream_metadata(db_session, seeded_collection)
+        assert "last_dream_at" not in dream_meta, (
+            "last_dream_at must NOT be written when run_dream raises. "
+            "process_dream swallows the exception but the guard write must "
+            "not occur."
+        )
+
+
+class TestEnqueueDreamLeavesMetadataAlone:
+    """enqueue_dream must not touch collection.internal_metadata["dream"].
+
+    After the Loop 4 fix, the guard fields advance only on successful
+    completion in process_dream. enqueue_dream should preserve whatever
+    metadata is already on the collection (e.g. a prior completion's
+    timestamp and baseline) and add nothing of its own.
+    """
+
+    @pytest.mark.asyncio
+    async def test_enqueue_does_not_modify_dream_metadata(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ):
+        workspace, peer = sample_data
+        prior_metadata = {
+            "dream": {
+                "last_dream_at": "2026-04-17T12:00:00+00:00",
+                "last_dream_document_count": 99,
+            }
+        }
+        collection = models.Collection(
+            observer=peer.name,
+            observed=peer.name,
+            workspace_name=workspace.name,
+            internal_metadata=prior_metadata,
+        )
+        db_session.add(collection)
+        await db_session.commit()
+        await db_session.refresh(collection)
+
+        await enqueue_dream(
+            workspace_name=workspace.name,
+            observer=collection.observer,
+            observed=collection.observed,
+            dream_type=DreamType.OMNI,
+            session_name=None,
+        )
+
+        dream_meta = await _get_dream_metadata(db_session, collection)
+        assert dream_meta == prior_metadata["dream"], (
+            "enqueue_dream must leave dream metadata untouched; the guard fields "
+            "advance only at completion."
+        )
+
+
+class TestExecuteDreamSessionFilter:
+    """Regression test for the session lookup asymmetry in execute_dream.
+
+    The session_name lookup filters to `level == "explicit"`, symmetric with
+    check_and_schedule_dream's count query. Otherwise a derived doc could win
+    ORDER BY created_at DESC and the dream would be scoped to a session that
+    wasn't in the triggering document cohort.
+    """
+
+    @pytest.mark.asyncio
+    async def test_session_name_picked_from_latest_explicit_doc(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+    ):
+        """Latest explicit session wins even when a newer deductive doc exists.
+
+        Seeds:
+        - Session A (older): one explicit-level Document
+        - Session B (newer): one deductive-level Document (dreamer output)
+
+        Without the explicit filter on the session lookup, the newer deductive
+        doc's session_name (B) would be returned. With the filter, A is
+        returned — matching the explicit-only count query in
+        check_and_schedule_dream.
+        """
+        workspace, peer = sample_data
+
+        # Pre-create the collection so crud.get_collection inside enqueue_dream
+        # finds something (process_dream's baseline write is not exercised here;
+        # we're only asserting the kwargs passed to enqueue_dream).
+        collection = models.Collection(
+            observer=peer.name,
+            observed=peer.name,
+            workspace_name=workspace.name,
+            internal_metadata={},
+        )
+        db_session.add(collection)
+
+        # Two sessions: A (older), B (newer). Insert A first so its created_at
+        # is strictly earlier than B's.
+        session_a = models.Session(name="session_a", workspace_name=workspace.name)
+        db_session.add(session_a)
+        await db_session.commit()
+        await db_session.refresh(session_a)
+
+        session_b = models.Session(name="session_b", workspace_name=workspace.name)
+        db_session.add(session_b)
+        await db_session.commit()
+        await db_session.refresh(session_b)
+
+        # Older explicit doc in session A.
+        explicit_doc = models.Document(
+            content="explicit observation",
+            level="explicit",
+            workspace_name=workspace.name,
+            observer=peer.name,
+            observed=peer.name,
+            session_name=session_a.name,
+        )
+        db_session.add(explicit_doc)
+        await db_session.commit()
+
+        # Newer deductive doc in session B. Without the explicit filter on the
+        # session lookup, this doc's session_name (B) would win on ORDER BY
+        # created_at DESC — even though the count query ignores it.
+        deductive_doc = models.Document(
+            content="deductive observation",
+            level="deductive",
+            workspace_name=workspace.name,
+            observer=peer.name,
+            observed=peer.name,
+            session_name=session_b.name,
+        )
+        db_session.add(deductive_doc)
+        await db_session.commit()
+
+        captured_kwargs: dict[str, Any] = {}
+
+        async def capture_enqueue_dream(
+            workspace_name: str,
+            *,
+            observer: str,
+            observed: str,
+            dream_type: Any,
+            session_name: str,
+        ) -> None:
+            captured_kwargs.update(
+                {
+                    "workspace_name": workspace_name,
+                    "observer": observer,
+                    "observed": observed,
+                    "dream_type": dream_type,
+                    "session_name": session_name,
+                }
+            )
+
+        # Fresh scheduler instance; ENABLED patched so execute_dream runs.
+        DreamScheduler.reset_singleton()
+        scheduler = DreamScheduler()
+        set_dream_scheduler(scheduler)
+
+        try:
+            with (
+                patch("src.dreamer.dream_scheduler.settings.DREAM.ENABLED", True),
+                patch(
+                    "src.deriver.enqueue.enqueue_dream",
+                    side_effect=capture_enqueue_dream,
+                ),
+                patch(
+                    "src.utils.config_helpers.get_configuration",
+                    return_value=ResolvedConfiguration(
+                        reasoning=ResolvedReasoningConfiguration(enabled=True),
+                        peer_card=ResolvedPeerCardConfiguration(use=True, create=True),
+                        summary=ResolvedSummaryConfiguration(
+                            enabled=True,
+                            messages_per_short_summary=10,
+                            messages_per_long_summary=20,
+                        ),
+                        dream=ResolvedDreamConfiguration(enabled=True),
+                    ),
+                ),
+            ):
+                await scheduler.execute_dream(
+                    workspace.name,
+                    DreamType.OMNI,
+                    observer=peer.name,
+                    observed=peer.name,
+                )
+        finally:
+            DreamScheduler.reset_singleton()
+
+        assert captured_kwargs, (
+            "enqueue_dream must be called — execute_dream returned early, "
+            "likely because the session lookup returned no rows (check that "
+            "the explicit filter matches at least one doc in the fixture)."
+        )
+        assert captured_kwargs["session_name"] == session_a.name, (
+            f"Session lookup must filter to level=='explicit' to match the "
+            f"baseline count query. Got session_name="
+            f"{captured_kwargs['session_name']!r}, expected {session_a.name!r} "
+            f"(the older session with the only explicit doc). Picking "
+            f"{session_b.name!r} means the session came from a derived doc "
+            f"that the count query ignores — the dream would be scoped to a "
+            f"session that wasn't in the triggering cohort."
+        )
+
+
+class TestGuardPairCoherence:
+    """Loop 4 coherence tests for the invariant preserved by the atomic pair
+    write and the in-flight stampede defense.
+
+    Invariant: From the moment a dream is scheduled until it completes or
+    fails, no second dream may be enqueued for the same
+    (workspace, observer, observed) — and the baseline count advances only
+    when consolidation actually happened.
+    """
+
+    @pytest_asyncio.fixture
+    async def _scheduler(self):
+        DreamScheduler.reset_singleton()
+        scheduler = DreamScheduler()
+        set_dream_scheduler(scheduler)
+        with (
+            patch("src.dreamer.dream_scheduler.settings.DREAM.ENABLED", True),
+            patch("src.dreamer.dream_scheduler.settings.DREAM.DOCUMENT_THRESHOLD", 50),
+            patch("src.dreamer.dream_scheduler.settings.DREAM.ENABLED_TYPES", ["omni"]),
+        ):
+            yield scheduler
+        DreamScheduler.reset_singleton()
+
+    @pytest.mark.asyncio
+    async def test_pending_queue_item_blocks_second_schedule(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+        _scheduler: DreamScheduler,
+    ):
+        """In-flight window: pending QueueItem must block a second schedule.
+
+        Walks the stampede timeline: enqueue fires a dream, more explicit
+        docs arrive past the threshold again, but check_and_schedule_dream
+        sees the pending queue row and returns False — no second QueueItem.
+        """
+        workspace, peer = sample_data
+        collection = models.Collection(
+            observer=peer.name,
+            observed=peer.name,
+            workspace_name=workspace.name,
+            internal_metadata={},
+        )
+        db_session.add(collection)
+        for i in range(50):
+            db_session.add(
+                models.Document(
+                    content=f"explicit {i}",
+                    level="explicit",
+                    workspace_name=workspace.name,
+                    observer=peer.name,
+                    observed=peer.name,
+                )
+            )
+        await db_session.commit()
+        await db_session.refresh(collection)
+
+        await enqueue_dream(
+            workspace_name=workspace.name,
+            observer=peer.name,
+            observed=peer.name,
+            dream_type=DreamType.OMNI,
+            session_name=None,
+        )
+
+        pending_q = select(models.QueueItem).where(
+            models.QueueItem.task_type == "dream",
+            models.QueueItem.processed == False,  # noqa: E712
+            models.QueueItem.workspace_name == workspace.name,
+        )
+        pending_rows = (await db_session.execute(pending_q)).scalars().all()
+        assert len(pending_rows) == 1, (
+            "enqueue_dream must insert exactly one pending dream QueueItem "
+            "(baseline for the stampede test)."
+        )
+
+        for i in range(50, 100):
+            db_session.add(
+                models.Document(
+                    content=f"explicit {i}",
+                    level="explicit",
+                    workspace_name=workspace.name,
+                    observer=peer.name,
+                    observed=peer.name,
+                )
+            )
+        await db_session.commit()
+        await db_session.refresh(collection)
+
+        scheduled = await check_and_schedule_dream(db_session, collection)
+
+        assert scheduled is False, (
+            "check_and_schedule_dream must return False while a dream is "
+            "pending in the queue — the in-flight window must not admit a "
+            "second schedule regardless of how many explicit docs arrive."
+        )
+        pending_rows_after = (await db_session.execute(pending_q)).scalars().all()
+        assert len(pending_rows_after) == 1, (
+            "No second QueueItem may be inserted while the first is pending. "
+            f"Found {len(pending_rows_after)} pending rows."
+        )
+
+    @pytest.mark.asyncio
+    async def test_silent_failure_allows_retry_on_same_corpus(
+        self,
+        db_session: AsyncSession,
+        sample_data: tuple[models.Workspace, models.Peer],
+        _scheduler: DreamScheduler,
+    ):
+        """Failed dream (run_dream returns None) leaves both guard fields
+        untouched, so check_and_schedule_dream re-schedules on the same
+        corpus instead of silently consuming the baseline.
+        """
+        workspace, peer = sample_data
+        collection = models.Collection(
+            observer=peer.name,
+            observed=peer.name,
+            workspace_name=workspace.name,
+            internal_metadata={},
+        )
+        db_session.add(collection)
+        for i in range(50):
+            db_session.add(
+                models.Document(
+                    content=f"explicit {i}",
+                    level="explicit",
+                    workspace_name=workspace.name,
+                    observer=peer.name,
+                    observed=peer.name,
+                )
+            )
+        await db_session.commit()
+        await db_session.refresh(collection)
+
+        payload = DreamPayload(
+            dream_type=DreamType.OMNI,
+            observer=peer.name,
+            observed=peer.name,
+        )
+        with patch(
+            "src.dreamer.orchestrator.run_dream",
+            new=AsyncMock(return_value=None),
+        ):
+            await process_dream(payload, workspace.name)
+
+        dream_meta = await _get_dream_metadata(db_session, collection)
+        assert dream_meta.get("last_dream_document_count", 0) == 0, (
+            "Failed dream must not advance last_dream_document_count; "
+            "pre-Loop-4 the baseline was consumed at enqueue time and a "
+            "silent failure would lock out retries on the same corpus."
+        )
+        assert (
+            "last_dream_at" not in dream_meta
+        ), "Failed dream must not advance last_dream_at either."
+
+        with patch.object(
+            _scheduler, "schedule_dream", new_callable=AsyncMock
+        ) as mock_schedule:
+            scheduled = await check_and_schedule_dream(db_session, collection)
+
+        assert scheduled is True, (
+            "After a silent failure both guards should still allow the "
+            "same-corpus retry — 50 explicit docs ≥ threshold, no prior "
+            "last_dream_at, no pending queue item."
+        )
+        assert mock_schedule.called, "schedule_dream must be invoked on the retry path."
diff --git a/tests/routes/test_workspaces.py b/tests/routes/test_workspaces.py
index 36bc83128..031e5a904 100644
--- a/tests/routes/test_workspaces.py
+++ b/tests/routes/test_workspaces.py
@@ -1,9 +1,12 @@
 from typing import Any
+from unittest.mock import AsyncMock, patch
 
 import pytest
 from fastapi.testclient import TestClient
 from nanoid import generate as generate_nanoid
+from sqlalchemy.ext.asyncio import AsyncSession
 
+from src import models
 from src.models import Peer, Workspace
 
 
@@ -569,3 +572,59 @@ def test_delete_workspace_after_session_deletion(client: TestClient):
     # Now workspace deletion should succeed
     response = client.delete(f"/v3/workspaces/{workspace_name}")
     assert response.status_code == 202
+
+
+@pytest.mark.asyncio
+async def test_schedule_dream_invokes_enqueue_dream(
+    client: TestClient,
+    db_session: AsyncSession,
+    sample_data: tuple[Workspace, Peer],
+):
+    """POST /schedule_dream forwards observer/observed/dream_type to enqueue_dream.
+
+    After Loop 4, the manual schedule_dream route no longer touches the
+    baseline count — the orchestrator writes both guard fields atomically on
+    successful completion. The route's job shrinks to forwarding the dream
+    request.
+    """
+    workspace, peer = sample_data
+
+    collection = models.Collection(
+        observer=peer.name,
+        observed=peer.name,
+        workspace_name=workspace.name,
+        internal_metadata={},
+    )
+    db_session.add(collection)
+    await db_session.commit()
+
+    captured: dict[str, Any] = {}
+
+    async def fake_enqueue_dream(*args: Any, **kwargs: Any) -> None:
+        captured["args"] = args
+        captured["kwargs"] = kwargs
+
+    with (
+        patch("src.routers.workspaces.settings.DREAM.ENABLED", True),
+        patch(
+            "src.routers.workspaces.enqueue_dream",
+            new=AsyncMock(side_effect=fake_enqueue_dream),
+        ),
+    ):
+        response = client.post(
+            f"/v3/workspaces/{workspace.name}/schedule_dream",
+            json={
+                "observer": peer.name,
+                "observed": peer.name,
+                "dream_type": "omni",
+            },
+        )
+
+    assert response.status_code == 204, response.text
+    assert "kwargs" in captured, "enqueue_dream was not called"
+    assert captured["kwargs"]["observer"] == peer.name
+    assert captured["kwargs"]["observed"] == peer.name
+    assert "document_count" not in captured["kwargs"], (
+        "Loop 4: enqueue_dream no longer accepts document_count; the baseline "
+        "is written atomically with last_dream_at in process_dream."
+    )

From 2e444f82d69d1c281402eae8d4cfbfeb107d8b0b Mon Sep 17 00:00:00 2001
From: Jonathan Irvin <offendingcommit@gmail.com>
Date: Sun, 3 May 2026 21:02:29 -0500
Subject: [PATCH 41/46] feat(sync): re-apply deployment-critical adjacencies on
 top of upstream merge
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

These edits should have been folded into the merge commit (a901f34) but were
left uncommitted — pushing now to actually deliver CF Gateway support and
clean up leftovers from the -X theirs auto-resolution.

src/config.py
  - Add LLMSettings.CF_GATEWAY_AUTH_TOKEN (single global needed for the
    cf-aig-authorization header on any provider override client whose
    base_url targets a CF gateway URL).

src/llm/registry.py
  - Inject cf-aig-authorization header in get_openai_override_client,
    get_anthropic_override_client, and get_gemini_override_client when
    base_url contains 'gateway.ai.cloudflare.com' AND
    LLM.CF_GATEWAY_AUTH_TOKEN is set. Rides on the existing openai/
    anthropic/gemini transports — no parallel CF backend.

src/embedding_client.py
  - Mirror the same header injection on the openai/gemini branches so
    embeddings through CF Gateway authenticate correctly. Helper is
    duplicated locally so the embedding client doesn't depend on the
    LLM runtime registry module.

src/dreamer/specialists.py
  - Drop get_provider() / get_thinking_budget() override methods on
    BaseSpecialist + the per-specialist references to settings.DREAM.
    DEDUCTION_PROVIDER / INDUCTION_PROVIDER / *_THINKING_BUDGET_TOKENS.
    Those settings fields no longer exist upstream — same functionality
    is reachable via DREAM_DEDUCTION_MODEL_CONFIG__TRANSPORT etc.
  - Drop the orphan thinking_budget_tokens=llm_settings.THINKING_BUDGET_TOKENS
    arg on the honcho_llm_call site that survived the auto-merge — the
    value now lives on model_config which is already passed.

src/main.py
  - ruff isort fix (autofixed) — uuid/time import order.

Verification: ruff check src/ ✓, basedpyright src/ ✓ (0 errors).
---
 src/config.py              |  4 ++++
 src/dreamer/specialists.py | 21 -------------------
 src/embedding_client.py    | 43 +++++++++++++++++++++++++++++---------
 src/llm/registry.py        | 33 ++++++++++++++++++++++++++++-
 src/main.py                |  2 +-
 5 files changed, 70 insertions(+), 33 deletions(-)

diff --git a/src/config.py b/src/config.py
index b3f1b4920..2d19911b5 100644
--- a/src/config.py
+++ b/src/config.py
@@ -648,6 +648,10 @@ class LLMSettings(HonchoSettings):
     OPENAI_API_KEY: str | None = None
     GEMINI_API_KEY: str | None = None
 
+    # Cloudflare AI Gateway: when set, injected as cf-aig-authorization header
+    # on any provider override client whose base_url targets a CF gateway.
+    CF_GATEWAY_AUTH_TOKEN: str | None = None
+
     # General LLM settings
     DEFAULT_MAX_TOKENS: Annotated[int, Field(default=1000, gt=0, le=100_000)] = 2500
 
diff --git a/src/dreamer/specialists.py b/src/dreamer/specialists.py
index 29668a042..c7277586b 100644
--- a/src/dreamer/specialists.py
+++ b/src/dreamer/specialists.py
@@ -87,14 +87,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
         """Get the configured model to use for this specialist."""
         ...
 
-    def get_provider(self) -> str | None:
-        """Get the provider override for this specialist, or None to inherit from DREAM."""
-        return None
-
-    def get_thinking_budget(self) -> int | None:
-        """Get the thinking budget override, or None to inherit from DREAM."""
-        return None
-
     def get_max_tokens(self) -> int:
         """Get max output tokens for this specialist."""
         return 16384
@@ -249,7 +241,6 @@ def iteration_callback(data: Any) -> None:
             messages=messages,
             track_name=f"Dreamer/{self.name}",
             iteration_callback=iteration_callback,
-            thinking_budget_tokens=llm_settings.THINKING_BUDGET_TOKENS,
         )
 
         # Log metrics
@@ -342,12 +333,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
             specialist_name="DREAM DEDUCTION",
         )
 
-    def get_provider(self) -> str | None:
-        return settings.DREAM.DEDUCTION_PROVIDER
-
-    def get_thinking_budget(self) -> int | None:
-        return settings.DREAM.DEDUCTION_THINKING_BUDGET_TOKENS
-
     def get_max_tokens(self) -> int:
         return 8192
 
@@ -496,12 +481,6 @@ def get_model_config(self) -> ConfiguredModelSettings:
             specialist_name="DREAM INDUCTION",
         )
 
-    def get_provider(self) -> str | None:
-        return settings.DREAM.INDUCTION_PROVIDER
-
-    def get_thinking_budget(self) -> int | None:
-        return settings.DREAM.INDUCTION_THINKING_BUDGET_TOKENS
-
     def get_max_tokens(self) -> int:
         return 8192
 
diff --git a/src/embedding_client.py b/src/embedding_client.py
index 1217187a6..56f13492b 100644
--- a/src/embedding_client.py
+++ b/src/embedding_client.py
@@ -2,7 +2,7 @@
 import logging
 import threading
 from collections import defaultdict
-from typing import Any, NamedTuple
+from typing import NamedTuple
 
 import tiktoken
 from google import genai
@@ -22,6 +22,19 @@ class BatchItem(NamedTuple):
     chunk_index: int
 
 
+def _cf_gateway_headers(base_url: str | None) -> dict[str, str] | None:
+    """Cloudflare AI Gateway requires a per-account auth token in the
+    cf-aig-authorization header. Mirrors src/llm/registry._cf_gateway_headers
+    so the embedding client doesn't depend on the LLM runtime registry.
+    """
+    if not base_url or "gateway.ai.cloudflare.com" not in base_url:
+        return None
+    token = settings.LLM.CF_GATEWAY_AUTH_TOKEN
+    if not token:
+        return None
+    return {"cf-aig-authorization": f"Bearer {token}"}
+
+
 class _EmbeddingClient:
     """
     Embedding client supporting OpenAI and Gemini with chunking and batching support.
@@ -42,11 +55,13 @@ def __init__(
         if self.transport == "gemini":
             if not config.api_key:
                 raise ValueError("Gemini API key is required")
-            http_options = (
-                genai_types.HttpOptions(base_url=config.base_url)
-                if config.base_url
-                else None
-            )
+            cf_headers = _cf_gateway_headers(config.base_url)
+            if config.base_url or cf_headers:
+                http_options = genai_types.HttpOptions(
+                    base_url=config.base_url, headers=cf_headers
+                )
+            else:
+                http_options = None
             self.client: genai.Client | AsyncOpenAI = genai.Client(
                 api_key=config.api_key,
                 http_options=http_options,
@@ -58,10 +73,18 @@ def __init__(
         else:  # openai
             if not config.api_key:
                 raise ValueError("OpenAI API key is required")
-            self.client = AsyncOpenAI(
-                api_key=config.api_key,
-                base_url=config.base_url,
-            )
+            cf_headers = _cf_gateway_headers(config.base_url)
+            if cf_headers:
+                self.client = AsyncOpenAI(
+                    api_key=config.api_key,
+                    base_url=config.base_url,
+                    default_headers=cf_headers,
+                )
+            else:
+                self.client = AsyncOpenAI(
+                    api_key=config.api_key,
+                    base_url=config.base_url,
+                )
             self.max_embedding_tokens = max_input_tokens
             self.max_batch_size = 2048  # OpenAI batch limit
 
diff --git a/src/llm/registry.py b/src/llm/registry.py
index 73cf60c8b..76e230b48 100644
--- a/src/llm/registry.py
+++ b/src/llm/registry.py
@@ -63,6 +63,11 @@ def get_openai_override_client(
     base_url: str | None, api_key: str | None
 ) -> AsyncOpenAI:
     """OpenAI client for a specific (base_url, api_key) pair. Cached by key."""
+    headers = _cf_gateway_headers(base_url)
+    if headers:
+        return AsyncOpenAI(
+            api_key=api_key, base_url=base_url, default_headers=headers
+        )
     return AsyncOpenAI(api_key=api_key, base_url=base_url)
 
 
@@ -72,6 +77,14 @@ def get_anthropic_override_client(
     api_key: str | None,
 ) -> AsyncAnthropic:
     """Anthropic client for a specific (base_url, api_key) pair. Cached by key."""
+    headers = _cf_gateway_headers(base_url)
+    if headers:
+        return AsyncAnthropic(
+            api_key=api_key,
+            base_url=base_url,
+            timeout=600.0,
+            default_headers=headers,
+        )
     return AsyncAnthropic(api_key=api_key, base_url=base_url, timeout=600.0)
 
 
@@ -80,10 +93,28 @@ def get_gemini_override_client(
     base_url: str | None, api_key: str | None
 ) -> genai.Client:
     """Gemini client for a specific (base_url, api_key) pair. Cached by key."""
-    http_options = genai_types.HttpOptions(base_url=base_url) if base_url else None
+    headers = _cf_gateway_headers(base_url)
+    if base_url or headers:
+        http_options = genai_types.HttpOptions(base_url=base_url, headers=headers)
+    else:
+        http_options = None
     return genai.Client(api_key=api_key, http_options=http_options)
 
 
+def _cf_gateway_headers(base_url: str | None) -> dict[str, str] | None:
+    """Cloudflare AI Gateway requires a per-account auth token in the
+    cf-aig-authorization header when account-scoped auth is enabled. Inject it
+    on any override client routed through a CF gateway URL when
+    LLM.CF_GATEWAY_AUTH_TOKEN is configured.
+    """
+    if not base_url or "gateway.ai.cloudflare.com" not in base_url:
+        return None
+    token = settings.LLM.CF_GATEWAY_AUTH_TOKEN
+    if not token:
+        return None
+    return {"cf-aig-authorization": f"Bearer {token}"}
+
+
 # Module-level default-client registry, populated at import time. Tests patch
 # this dict via `patch.dict(CLIENTS, {...})` to inject mock provider clients.
 CLIENTS: dict[ModelTransport, ProviderClient] = {}
diff --git a/src/main.py b/src/main.py
index 7c1e8f8c4..ba78b8e5f 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,8 +1,8 @@
 import logging
 import os
 import re
-import uuid
 import time
+import uuid
 from collections.abc import Awaitable, Callable
 from contextlib import asynccontextmanager
 from typing import TYPE_CHECKING

From 34ad2ac535cdc2c59e654feaf030da0065e8252a Mon Sep 17 00:00:00 2001
From: Jonathan Irvin <offendingcommit@gmail.com>
Date: Sun, 3 May 2026 21:30:34 -0500
Subject: [PATCH 42/46] feat(llm): add hit_max_iterations flag on tool-loop
 response
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Telemetry-only signal: True when the loop exited via the max-iterations
synthesis path rather than the model deciding to stop. Distinguishes
"model didn't converge" from natural termination so downstream
observability can label the two cases differently.

No emitter changes — flag is set but no consumer reads it yet.
---
 src/llm/tool_loop.py | 1 +
 src/llm/types.py     | 3 +++
 2 files changed, 4 insertions(+)

diff --git a/src/llm/tool_loop.py b/src/llm/tool_loop.py
index 2db87e9a6..8d29aa033 100644
--- a/src/llm/tool_loop.py
+++ b/src/llm/tool_loop.py
@@ -470,6 +470,7 @@ async def _final_call() -> HonchoLLMCallResponse[Any]:
     final_response = await final_call_func()
     final_response.tool_calls_made = all_tool_calls
     final_response.iterations = iteration + 1
+    final_response.hit_max_iterations = True
     final_response.input_tokens = total_input_tokens + final_response.input_tokens
     final_response.output_tokens = total_output_tokens + final_response.output_tokens
     final_response.cache_creation_input_tokens = (
diff --git a/src/llm/types.py b/src/llm/types.py
index 7af5372d1..d40d42d07 100644
--- a/src/llm/types.py
+++ b/src/llm/types.py
@@ -66,6 +66,9 @@ class HonchoLLMCallResponse(BaseModel, Generic[T]):
     tool_calls_made: list[dict[str, Any]] = Field(default_factory=list)
     iterations: int = 0
     """Number of LLM calls made in the tool execution loop."""
+    hit_max_iterations: bool = False
+    """True when the tool loop exited via the max-iterations synthesis path
+    rather than the model deciding to stop. Telemetry-only signal."""
     thinking_content: str | None = None
     # Full thinking blocks with signatures for multi-turn replay (Anthropic only).
     thinking_blocks: list[dict[str, Any]] = Field(default_factory=list)

From 48b3003bba7842c0a9122c1599556d963ad2419b Mon Sep 17 00:00:00 2001
From: Jonathan Irvin <offendingcommit@gmail.com>
Date: Sun, 3 May 2026 21:30:52 -0500
Subject: [PATCH 43/46] feat(telemetry): declare per-LLM-call Prometheus series
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds six new metrics + recorder methods on the existing PrometheusMetrics
singleton; no callers yet, so this commit is purely declarative.

Series:
- llm_calls / llm_call_duration_seconds — counter + histogram per call,
  labeled by feature × provider × model × outcome.
- llm_tokens — input/output/cache_read/cache_creation per
  feature × provider × model.
- llm_tool_calls — per-tool invocation outcome inside the tool loop.
- llm_iterations — histogram of iterations consumed per call/outcome.
- llm_backup_used — counts failovers from primary to backup provider.

Cardinality-bounded: feature × provider × model × outcome ≈ 1.7k series
cap. Deliberately no workspace_name label here — these answer "is this
model effective for this feature", not "is workspace X slow".

LLMCallOutcome enum exported from src.telemetry.prometheus so callers can
reference the canonical values without importing from the metrics module
directly.
---
 src/telemetry/prometheus/__init__.py |   2 +
 src/telemetry/prometheus/metrics.py  | 170 +++++++++++++++++++++++++++
 2 files changed, 172 insertions(+)

diff --git a/src/telemetry/prometheus/__init__.py b/src/telemetry/prometheus/__init__.py
index ab0a26160..a3ff86943 100644
--- a/src/telemetry/prometheus/__init__.py
+++ b/src/telemetry/prometheus/__init__.py
@@ -11,6 +11,7 @@
     DeriverComponents,
     DeriverTaskTypes,
     DialecticComponents,
+    LLMCallOutcome,
     TokenTypes,
     metrics_endpoint,
     prometheus_metrics,
@@ -20,6 +21,7 @@
     "DeriverComponents",
     "DeriverTaskTypes",
     "DialecticComponents",
+    "LLMCallOutcome",
     "TokenTypes",
     "metrics_endpoint",
     "prometheus_metrics",
diff --git a/src/telemetry/prometheus/metrics.py b/src/telemetry/prometheus/metrics.py
index 0c9fc15b6..d70aea847 100644
--- a/src/telemetry/prometheus/metrics.py
+++ b/src/telemetry/prometheus/metrics.py
@@ -64,6 +64,24 @@ class DialecticComponents(Enum):
     TOTAL = "total"
 
 
+class LLMCallOutcome(Enum):
+    """Terminal outcome of a single `honcho_llm_call`.
+
+    Distinguishes "model didn't converge" (max_iterations) from "infra broke"
+    (timeout/validation/other) so dashboards and alerts can target each
+    independently. `success_via_backup` is its own bucket so silent failover
+    rate is observable without parsing logs.
+    """
+
+    SUCCESS = "success"
+    SUCCESS_AFTER_RETRY = "success_after_retry"
+    SUCCESS_VIA_BACKUP = "success_via_backup"
+    ERROR_TIMEOUT = "error_timeout"
+    ERROR_VALIDATION = "error_validation"
+    ERROR_MAX_ITERATIONS = "error_max_iterations"
+    ERROR_OTHER = "error_other"
+
+
 api_requests_counter = NamespacedCounter(
     "api_requests",
     "Total API requests",
@@ -192,6 +210,61 @@ class DialecticComponents(Enum):
     ["namespace", "workspace_name", "session_name", "state"],
 )
 
+# ---- Per-LLM-call observability ---------------------------------------------
+# Cardinality budget: feature ~6, provider ~4, model ~10, outcome 7 → ~1.7k
+# series cap. Deliberately no workspace_name label here: the question these
+# answer is "is this model effective for this feature", not "is workspace X
+# slow". Per-workspace LLM behavior shows up in dialectic_calls + token
+# counters which already carry workspace_name.
+
+llm_calls_counter = NamespacedCounter(
+    "llm_calls",
+    "Total honcho_llm_call invocations by feature, provider, model, outcome",
+    ["namespace", "feature", "provider", "model", "outcome"],
+)
+
+llm_call_duration_histogram = NamespacedHistogram(
+    "llm_call_duration_seconds",
+    "End-to-end honcho_llm_call latency (includes retries and backup failover)",
+    ["namespace", "feature", "provider", "model", "outcome"],
+    buckets=(0.1, 0.5, 1, 2, 5, 10, 30, 60, 120, 300, 600, 1800, 3600),
+)
+
+# Distinct from the existing deriver/dialectic/dreamer token counters:
+# this one carries provider+model so we can answer "tokens through gemini
+# vs glm-5.1" without bouncing through Langfuse.
+llm_tokens_counter = NamespacedCounter(
+    "llm_tokens",
+    "LLM tokens by feature/provider/model/token_type",
+    ["namespace", "feature", "provider", "model", "token_type"],
+)
+
+llm_tool_calls_counter = NamespacedCounter(
+    "llm_tool_calls",
+    "Individual tool invocations within an LLM tool loop",
+    ["namespace", "feature", "tool_name", "outcome"],
+)
+
+llm_iterations_histogram = NamespacedHistogram(
+    "llm_iterations",
+    "Tool-loop iterations consumed per call (1 = no tool calls)",
+    ["namespace", "feature", "outcome"],
+    buckets=(1, 2, 3, 4, 5, 7, 10, 15, 20, 30, 50),
+)
+
+llm_backup_used_counter = NamespacedCounter(
+    "llm_backup_used",
+    "Counts when a call's retry chain switched from primary to backup provider",
+    [
+        "namespace",
+        "feature",
+        "primary_provider",
+        "primary_model",
+        "backup_provider",
+        "backup_model",
+    ],
+)
+
 
 @final
 class PrometheusMetrics:
@@ -525,6 +598,103 @@ def set_session_queue_oldest_age(
         except Exception as e:
             self._handle_metric_error("set_session_queue_oldest_age", e)
 
+    def record_llm_call(
+        self,
+        *,
+        feature: str,
+        provider: str,
+        model: str,
+        outcome: str,
+        duration_seconds: float,
+    ) -> None:
+        try:
+            llm_calls_counter.labels(
+                feature=feature,
+                provider=provider,
+                model=model,
+                outcome=outcome,
+            ).inc()
+            llm_call_duration_histogram.labels(
+                feature=feature,
+                provider=provider,
+                model=model,
+                outcome=outcome,
+            ).observe(duration_seconds)
+        except Exception as e:
+            self._handle_metric_error("record_llm_call", e)
+
+    def record_llm_tokens(
+        self,
+        *,
+        feature: str,
+        provider: str,
+        model: str,
+        token_type: str,
+        count: int,
+    ) -> None:
+        if count <= 0:
+            return
+        try:
+            llm_tokens_counter.labels(
+                feature=feature,
+                provider=provider,
+                model=model,
+                token_type=token_type,
+            ).inc(count)
+        except Exception as e:
+            self._handle_metric_error("record_llm_tokens", e)
+
+    def record_llm_tool_call(
+        self,
+        *,
+        feature: str,
+        tool_name: str,
+        outcome: str,
+    ) -> None:
+        try:
+            llm_tool_calls_counter.labels(
+                feature=feature,
+                tool_name=tool_name,
+                outcome=outcome,
+            ).inc()
+        except Exception as e:
+            self._handle_metric_error("record_llm_tool_call", e)
+
+    def observe_llm_iterations(
+        self,
+        *,
+        feature: str,
+        outcome: str,
+        iterations: int,
+    ) -> None:
+        try:
+            llm_iterations_histogram.labels(
+                feature=feature,
+                outcome=outcome,
+            ).observe(iterations)
+        except Exception as e:
+            self._handle_metric_error("observe_llm_iterations", e)
+
+    def record_llm_backup_used(
+        self,
+        *,
+        feature: str,
+        primary_provider: str,
+        primary_model: str,
+        backup_provider: str,
+        backup_model: str,
+    ) -> None:
+        try:
+            llm_backup_used_counter.labels(
+                feature=feature,
+                primary_provider=primary_provider,
+                primary_model=primary_model,
+                backup_provider=backup_provider,
+                backup_model=backup_model,
+            ).inc()
+        except Exception as e:
+            self._handle_metric_error("record_llm_backup_used", e)
+
 
 prometheus_metrics = PrometheusMetrics()
 

From 389b9d73f3f541a5c365d7a125d91a3a76c01e49 Mon Sep 17 00:00:00 2001
From: Jonathan Irvin <offendingcommit@gmail.com>
Date: Sun, 3 May 2026 21:31:09 -0500
Subject: [PATCH 44/46] feat(telemetry): add observe_llm_call helper for
 per-call observability
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduces src/telemetry/llm_call_metrics.py — a context-manager-based
wrapper that turns one LLM call into one set of Prometheus samples and
one logfmt log line.

Surface:
- observe_llm_call(...) — context manager yielding a mutable _CallState
  the caller populates over the call's lifetime.
- finalize_success(...) — populate state from a successful response and
  pick the outcome bucket (success / success_after_retry / success_via_backup).
- mark_max_iterations(...) — flip the state to error_max_iterations when
  the tool loop exited via the synthesis path.
- normalize_feature_label(...) — maps caller's track_name/trace_name to
  a low-cardinality Prom label (e.g. "Dreamer/deduction" -> dream_deduction).

No callers wired in yet — this commit is the helper module on its own
so the diff stays reviewable. Wiring into honcho_llm_call and the tool
loop lands in subsequent commits.

Errors raised inside the wrapped call are classified into outcome
buckets (timeout / validation / other) and re-raised; the wrapper never
swallows or transforms exceptions.
---
 src/telemetry/llm_call_metrics.py | 280 ++++++++++++++++++++++++++++++
 1 file changed, 280 insertions(+)
 create mode 100644 src/telemetry/llm_call_metrics.py

diff --git a/src/telemetry/llm_call_metrics.py b/src/telemetry/llm_call_metrics.py
new file mode 100644
index 000000000..16103479a
--- /dev/null
+++ b/src/telemetry/llm_call_metrics.py
@@ -0,0 +1,280 @@
+"""Per-`honcho_llm_call` observability.
+
+Captures one structured log line + Prometheus sample set per LLM call,
+covering every feature (deriver, dialectic, dream, summary). Designed to
+be the *only* instrumentation point inside `honcho_llm_call`; subsystem-
+specific token counters in `prometheus.metrics` continue to work in parallel.
+
+Outcome classification distinguishes "the model didn't converge"
+(`error_max_iterations`) from "the infra broke" (timeout / validation /
+other) so dashboards and alerts can target each independently.
+"""
+
+from __future__ import annotations
+
+import logging
+import re
+import time
+from collections.abc import Iterator
+from contextlib import contextmanager
+from dataclasses import dataclass, field
+from typing import TYPE_CHECKING, Any
+
+from src.exceptions import ValidationException
+from src.telemetry.prometheus import LLMCallOutcome, prometheus_metrics
+
+if TYPE_CHECKING:
+    from src.config import ModelConfig
+    from src.llm.types import HonchoLLMCallResponse
+
+logger = logging.getLogger("honcho.llm.call")
+
+
+# Track names used by callers, mapped to clean Prom label values. Anything
+# not in the table falls through `_normalize_track_name` (lower + underscores).
+_FEATURE_LABEL_MAP: dict[str, str] = {
+    "Minimal Deriver": "deriver",
+    "Dialectic Agent": "dialectic",
+    "Dialectic Agent Stream": "dialectic_stream",
+}
+
+
+def normalize_feature_label(track_name: str | None, trace_name: str | None) -> str:
+    """Map caller's `track_name`/`trace_name` to a low-cardinality Prom label.
+
+    Prefers explicit `_FEATURE_LABEL_MAP` matches, then snake-cases the
+    track_name (e.g. ``"Dreamer/deduction"`` → ``"dream_deduction"``),
+    falling back to trace_name, then to ``"unknown"``.
+    """
+    if track_name and track_name in _FEATURE_LABEL_MAP:
+        return _FEATURE_LABEL_MAP[track_name]
+    raw = track_name or trace_name
+    if not raw:
+        return "unknown"
+    # "Dreamer/Deduction" → "dreamer_deduction" → "dream_deduction"
+    s = re.sub(r"[^A-Za-z0-9]+", "_", raw).strip("_").lower()
+    s = s.replace("dreamer_", "dream_")
+    return s or "unknown"
+
+
+@dataclass
+class _CallState:
+    """Mutable observation collected across the lifetime of one call."""
+
+    feature: str
+    primary_provider: str
+    primary_model: str
+    has_backup: bool
+    backup_provider: str | None = None
+    backup_model: str | None = None
+    started_at: float = field(default_factory=time.monotonic)
+    final_provider: str = ""
+    final_model: str = ""
+    attempts: int = 1
+    iterations: int | None = None
+    input_tokens: int = 0
+    output_tokens: int = 0
+    cache_creation_input_tokens: int = 0
+    cache_read_input_tokens: int = 0
+    tool_calls: int = 0
+    used_backup: bool = False
+    outcome: LLMCallOutcome = LLMCallOutcome.SUCCESS
+    error_class: str | None = None
+
+
+def _classify_error(exc: BaseException) -> LLMCallOutcome:
+    """Map an exception to a coarse outcome bucket."""
+    if isinstance(exc, ValidationException):
+        return LLMCallOutcome.ERROR_VALIDATION
+    if isinstance(exc, TimeoutError):
+        return LLMCallOutcome.ERROR_TIMEOUT
+    name = type(exc).__name__.lower()
+    msg = str(exc).lower()
+    if "timeout" in name or "timed out" in msg or "timeout" in msg:
+        return LLMCallOutcome.ERROR_TIMEOUT
+    if "pydantic" in name and "validation" in name:
+        return LLMCallOutcome.ERROR_VALIDATION
+    return LLMCallOutcome.ERROR_OTHER
+
+
+@contextmanager
+def observe_llm_call(
+    *,
+    track_name: str | None,
+    trace_name: str | None,
+    runtime_model_config: ModelConfig,
+) -> Iterator[_CallState]:
+    """Wrap one `honcho_llm_call` invocation with metrics + structured logging.
+
+    The caller mutates the yielded `_CallState` (sets `attempts`, `iterations`,
+    accumulates tokens, etc.) over the call's lifetime; on exit we emit the
+    Prometheus samples and a single logfmt line. Metric errors are swallowed
+    inside `prometheus_metrics`; this wrapper never raises.
+    """
+    fb = runtime_model_config.fallback
+    state = _CallState(
+        feature=normalize_feature_label(track_name, trace_name),
+        primary_provider=str(runtime_model_config.transport),
+        primary_model=str(runtime_model_config.model),
+        has_backup=fb is not None,
+        backup_provider=str(fb.transport) if fb is not None else None,
+        backup_model=str(fb.model) if fb is not None else None,
+    )
+    # Default the "winning" provider/model to primary; the caller updates
+    # these post-success based on the actual AttemptPlan that returned.
+    state.final_provider = state.primary_provider
+    state.final_model = state.primary_model
+
+    try:
+        yield state
+    except BaseException as exc:
+        state.outcome = _classify_error(exc)
+        state.error_class = type(exc).__name__
+        _emit(state)
+        raise
+    else:
+        # Success — caller is responsible for setting iterations / final
+        # provider / outcome (via `finalize_success` below) before exit.
+        _emit(state)
+
+
+def finalize_success(
+    state: _CallState,
+    *,
+    response: HonchoLLMCallResponse[Any] | None,
+    final_provider: str | None,
+    final_model: str | None,
+    attempts: int,
+    iterations: int | None,
+    has_backup: bool,
+) -> None:
+    """Populate `state` from a successful response and pick the outcome bucket.
+
+    Called by `honcho_llm_call` right before the context manager exits when
+    no exception was raised. `iterations` is None for tool-less calls.
+    """
+    state.attempts = max(1, attempts)
+    state.iterations = iterations
+    if final_provider:
+        state.final_provider = final_provider
+    if final_model:
+        state.final_model = final_model
+    state.used_backup = (
+        has_backup
+        and state.final_model != state.primary_model
+        and state.backup_model is not None
+        and state.final_model == state.backup_model
+    )
+    if response is not None:
+        state.input_tokens = int(response.input_tokens or 0)
+        state.output_tokens = int(response.output_tokens or 0)
+        state.cache_creation_input_tokens = int(
+            response.cache_creation_input_tokens or 0
+        )
+        state.cache_read_input_tokens = int(response.cache_read_input_tokens or 0)
+        state.tool_calls = len(response.tool_calls_made or [])
+
+    if state.used_backup:
+        state.outcome = LLMCallOutcome.SUCCESS_VIA_BACKUP
+    elif state.attempts > 1:
+        state.outcome = LLMCallOutcome.SUCCESS_AFTER_RETRY
+    else:
+        state.outcome = LLMCallOutcome.SUCCESS
+
+
+def mark_max_iterations(state: _CallState, iterations: int) -> None:
+    """Mark a tool-loop call that hit `max_tool_iterations` without converging.
+
+    Called when execute_tool_loop returns from the synthesis fallback path
+    rather than from natural convergence. The call still returned content,
+    but the model didn't decide to stop on its own — different reliability
+    signal than a clean success.
+    """
+    state.iterations = iterations
+    state.outcome = LLMCallOutcome.ERROR_MAX_ITERATIONS
+
+
+def _emit(state: _CallState) -> None:
+    duration = time.monotonic() - state.started_at
+    outcome_value = state.outcome.value
+
+    prometheus_metrics.record_llm_call(
+        feature=state.feature,
+        provider=state.final_provider,
+        model=state.final_model,
+        outcome=outcome_value,
+        duration_seconds=duration,
+    )
+    if state.input_tokens:
+        prometheus_metrics.record_llm_tokens(
+            feature=state.feature,
+            provider=state.final_provider,
+            model=state.final_model,
+            token_type="input",
+            count=state.input_tokens,
+        )
+    if state.output_tokens:
+        prometheus_metrics.record_llm_tokens(
+            feature=state.feature,
+            provider=state.final_provider,
+            model=state.final_model,
+            token_type="output",
+            count=state.output_tokens,
+        )
+    if state.cache_read_input_tokens:
+        prometheus_metrics.record_llm_tokens(
+            feature=state.feature,
+            provider=state.final_provider,
+            model=state.final_model,
+            token_type="cache_read",
+            count=state.cache_read_input_tokens,
+        )
+    if state.cache_creation_input_tokens:
+        prometheus_metrics.record_llm_tokens(
+            feature=state.feature,
+            provider=state.final_provider,
+            model=state.final_model,
+            token_type="cache_creation",
+            count=state.cache_creation_input_tokens,
+        )
+    if state.iterations is not None:
+        prometheus_metrics.observe_llm_iterations(
+            feature=state.feature,
+            outcome=outcome_value,
+            iterations=state.iterations,
+        )
+    if state.used_backup and state.backup_provider and state.backup_model:
+        prometheus_metrics.record_llm_backup_used(
+            feature=state.feature,
+            primary_provider=state.primary_provider,
+            primary_model=state.primary_model,
+            backup_provider=state.backup_provider,
+            backup_model=state.backup_model,
+        )
+
+    # One structured logfmt line per call. Quote-free values keep `| logfmt`
+    # parsing in Loki/Grafana straightforward.
+    iter_value = state.iterations if state.iterations is not None else 0
+    err_part = f" error_class={state.error_class}" if state.error_class else ""
+    line = (
+        f"honcho.llm.call feature={state.feature}"
+        f" provider={state.final_provider} model={state.final_model}"
+        f" outcome={outcome_value} latency_ms={int(duration * 1000)}"
+        f" attempts={state.attempts}"
+        f" used_backup={'true' if state.used_backup else 'false'}"
+        f" input_tokens={state.input_tokens}"
+        f" output_tokens={state.output_tokens}"
+        f" cache_read_tokens={state.cache_read_input_tokens}"
+        f" cache_creation_tokens={state.cache_creation_input_tokens}"
+        f" tool_calls={state.tool_calls} iterations={iter_value}"
+        f"{err_part}"
+    )
+    logger.info(line)
+
+
+__all__ = [
+    "finalize_success",
+    "mark_max_iterations",
+    "normalize_feature_label",
+    "observe_llm_call",
+]

From 873e5e34bb6513eca58398af96fbcfa4ca9311d1 Mon Sep 17 00:00:00 2001
From: Jonathan Irvin <offendingcommit@gmail.com>
Date: Sun, 3 May 2026 21:31:51 -0500
Subject: [PATCH 45/46] feat(llm): emit per-tool-call metrics inside the tool
 execution loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds prometheus_metrics.record_llm_tool_call() calls in both the
success and error branches of execute_tool_loop's per-tool dispatch.
Threads track_name / trace_name through the function signature so the
emitted metric carries the same feature label that the call-level
metrics will use.

Both new params default to None (current callers don't pass them yet),
so feature label resolves to "unknown" until honcho_llm_call is wired
in the next commit. Metric emission is wrapped in PrometheusMetrics'
sentry-captured error handler — a metric bug can never break a real
tool call.
---
 src/llm/tool_loop.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/llm/tool_loop.py b/src/llm/tool_loop.py
index 8d29aa033..a13b553a8 100644
--- a/src/llm/tool_loop.py
+++ b/src/llm/tool_loop.py
@@ -20,6 +20,8 @@
 
 from src.config import ModelTransport
 from src.exceptions import ValidationException
+from src.telemetry.llm_call_metrics import normalize_feature_label
+from src.telemetry.prometheus import prometheus_metrics
 from src.utils.types import set_current_iteration
 
 from .executor import honcho_llm_call_inner
@@ -166,6 +168,8 @@ async def execute_tool_loop(
     before_retry_callback: Callable[[Any], None],
     stream_final: bool = False,
     iteration_callback: IterationCallback | None = None,
+    track_name: str | None = None,
+    trace_name: str | None = None,
 ) -> HonchoLLMCallResponse[Any] | StreamingResponseWithMetadata:
     """Run the iterative tool calling loop for agentic LLM interactions.
 
@@ -188,6 +192,8 @@ async def execute_tool_loop(
             + f"got {max_tool_iterations}"
         )
 
+    feature_label = normalize_feature_label(track_name, trace_name)
+
     conversation_messages: list[dict[str, Any]] = (
         messages.copy() if messages else [{"role": "user", "content": prompt}]
     )
@@ -351,6 +357,11 @@ async def _call_with_messages(
                         "tool_result": tool_result,
                     }
                 )
+                prometheus_metrics.record_llm_tool_call(
+                    feature=feature_label,
+                    tool_name=tool_name,
+                    outcome="success",
+                )
             except Exception as e:
                 logger.error(f"Tool execution failed for {tool_name}: {e}")
                 tool_results.append(
@@ -361,6 +372,11 @@ async def _call_with_messages(
                         "is_error": True,
                     }
                 )
+                prometheus_metrics.record_llm_tool_call(
+                    feature=feature_label,
+                    tool_name=tool_name,
+                    outcome="error",
+                )
 
         append_tool_results(current_provider, tool_results, conversation_messages)
 

From 11cd7c2fa9d41237b1576410bcb272fb9e65d1fe Mon Sep 17 00:00:00 2001
From: Jonathan Irvin <offendingcommit@gmail.com>
Date: Sun, 3 May 2026 21:32:11 -0500
Subject: [PATCH 46/46] feat(llm): wire observe_llm_call into honcho_llm_call
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wraps the body of honcho_llm_call (both tool-less and tool-loop paths)
in observe_llm_call(...) so every invocation produces one set of
Prometheus samples and one logfmt log line.

Captures the AttemptPlan that produced the most-recent (and on success,
the winning) call via a `last_plan` cell updated inside _get_attempt_plan,
so the recorded provider/model is the one that actually answered —
primary on early attempts, backup on the final retry. This makes
backup-on-final-attempt observable directly from llm_calls / llm_tokens
without parsing logs.

Passes track_name and trace_name through to execute_tool_loop so its
per-tool counter (added in the previous commit) carries the same
feature label as the call-level metrics.

When the tool loop returns response.hit_max_iterations=True, the call's
outcome is overridden to error_max_iterations via mark_max_iterations
so dashboards can split "model didn't converge" from clean success
without the tool-loop having to know about outcome semantics.

Streaming responses don't carry token counts at the entry point —
the recorded call still emits but token counters skip those rows
(record_llm_tokens silently no-ops on count<=0). Acceptable partial
signal until streaming refactor surfaces tokens earlier.

ruff + basedpyright clean. End-to-end smoke verified all six series
fire correctly across success, success_via_backup, error_max_iterations,
error_timeout, and tool-call paths.
---
 src/llm/api.py | 140 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 97 insertions(+), 43 deletions(-)

diff --git a/src/llm/api.py b/src/llm/api.py
index 2a8f05299..782203741 100644
--- a/src/llm/api.py
+++ b/src/llm/api.py
@@ -21,6 +21,11 @@
 
 from src.config import ConfiguredModelSettings, ModelConfig
 from src.exceptions import ValidationException
+from src.telemetry.llm_call_metrics import (
+    finalize_success,
+    mark_max_iterations,
+    observe_llm_call,
+)
 from src.telemetry.logging import conditional_observe
 from src.telemetry.reasoning_traces import log_reasoning_trace
 
@@ -193,6 +198,11 @@ async def honcho_llm_call(
     # tenacity uses 1-indexed attempts.
     current_attempt.set(1)
 
+    # Captures the AttemptPlan that produced the most recent (and on success,
+    # the winning) call so observability can label by the model that actually
+    # answered — primary on early attempts, backup on the final retry.
+    last_plan: dict[str, AttemptPlan | None] = {"value": None}
+
     def _get_attempt_plan() -> AttemptPlan:
         plan = plan_attempt(
             runtime_model_config=runtime_model_config,
@@ -201,6 +211,7 @@ def _get_attempt_plan() -> AttemptPlan:
             call_thinking_budget_tokens=thinking_budget_tokens,
             call_reasoning_effort=reasoning_effort,
         )
+        last_plan["value"] = plan
         update_current_langfuse_observation(
             plan.provider,
             plan.model,
@@ -304,11 +315,92 @@ def _trace_stop_seqs() -> list[str] | None:
             stop_seqs if stop_seqs is not None else runtime_model_config.stop_sequences
         )
 
-    # Tool-less path: call once and return.
-    if not tools or not tool_executor:
-        result: (
-            HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
-        ) = await decorated()
+    with observe_llm_call(
+        track_name=track_name,
+        trace_name=trace_name,
+        runtime_model_config=runtime_model_config,
+    ) as obs_state:
+        # Tool-less path: call once and return.
+        if not tools or not tool_executor:
+            result: (
+                HonchoLLMCallResponse[Any] | AsyncIterator[HonchoLLMCallStreamChunk]
+            ) = await decorated()
+            response_for_metrics = (
+                result if isinstance(result, HonchoLLMCallResponse) else None
+            )
+            winning = last_plan["value"]
+            finalize_success(
+                obs_state,
+                response=response_for_metrics,
+                final_provider=str(winning.provider) if winning else None,
+                final_model=winning.model if winning else None,
+                attempts=current_attempt.get(),
+                iterations=None,
+                has_backup=runtime_model_config.fallback is not None,
+            )
+            if trace_name and isinstance(result, HonchoLLMCallResponse):
+                log_reasoning_trace(
+                    task_type=trace_name,
+                    model_config=runtime_model_config,
+                    prompt=prompt,
+                    response=result,
+                    max_tokens=max_tokens,
+                    thinking_budget_tokens=_trace_thinking_budget(),
+                    reasoning_effort=_trace_reasoning_effort(),
+                    json_mode=json_mode,
+                    stop_seqs=_trace_stop_seqs(),
+                    messages=messages,
+                )
+            return result
+
+        # execute_tool_loop raises ValidationException on out-of-range
+        # max_tool_iterations; fail-fast is cheaper than silent clamping here.
+        result = await execute_tool_loop(
+            prompt=prompt,
+            max_tokens=max_tokens,
+            messages=messages,
+            tools=tools,
+            tool_choice=tool_choice,
+            tool_executor=tool_executor,
+            max_tool_iterations=max_tool_iterations,
+            response_model=response_model,
+            json_mode=json_mode,
+            temperature=temperature,
+            stop_seqs=stop_seqs,
+            verbosity=verbosity,
+            enable_retry=enable_retry,
+            retry_attempts=retry_attempts,
+            max_input_tokens=max_input_tokens,
+            get_attempt_plan=_get_attempt_plan,
+            before_retry_callback=before_retry_callback,
+            stream_final=stream_final_only,
+            iteration_callback=iteration_callback,
+            track_name=track_name,
+            trace_name=trace_name,
+        )
+        response_for_metrics = (
+            result if isinstance(result, HonchoLLMCallResponse) else None
+        )
+        winning = last_plan["value"]
+        iterations = (
+            response_for_metrics.iterations
+            if response_for_metrics
+            else (getattr(result, "iterations", None))
+        )
+        finalize_success(
+            obs_state,
+            response=response_for_metrics,
+            final_provider=str(winning.provider) if winning else None,
+            final_model=winning.model if winning else None,
+            attempts=current_attempt.get(),
+            iterations=iterations,
+            has_backup=runtime_model_config.fallback is not None,
+        )
+        if response_for_metrics is not None and getattr(
+            response_for_metrics, "hit_max_iterations", False
+        ):
+            mark_max_iterations(obs_state, iterations or max_tool_iterations)
+
         if trace_name and isinstance(result, HonchoLLMCallResponse):
             log_reasoning_trace(
                 task_type=trace_name,
@@ -324,43 +416,5 @@ def _trace_stop_seqs() -> list[str] | None:
             )
         return result
 
-    # execute_tool_loop raises ValidationException on out-of-range
-    # max_tool_iterations; fail-fast is cheaper than silent clamping here.
-    result = await execute_tool_loop(
-        prompt=prompt,
-        max_tokens=max_tokens,
-        messages=messages,
-        tools=tools,
-        tool_choice=tool_choice,
-        tool_executor=tool_executor,
-        max_tool_iterations=max_tool_iterations,
-        response_model=response_model,
-        json_mode=json_mode,
-        temperature=temperature,
-        stop_seqs=stop_seqs,
-        verbosity=verbosity,
-        enable_retry=enable_retry,
-        retry_attempts=retry_attempts,
-        max_input_tokens=max_input_tokens,
-        get_attempt_plan=_get_attempt_plan,
-        before_retry_callback=before_retry_callback,
-        stream_final=stream_final_only,
-        iteration_callback=iteration_callback,
-    )
-    if trace_name and isinstance(result, HonchoLLMCallResponse):
-        log_reasoning_trace(
-            task_type=trace_name,
-            model_config=runtime_model_config,
-            prompt=prompt,
-            response=result,
-            max_tokens=max_tokens,
-            thinking_budget_tokens=_trace_thinking_budget(),
-            reasoning_effort=_trace_reasoning_effort(),
-            json_mode=json_mode,
-            stop_seqs=_trace_stop_seqs(),
-            messages=messages,
-        )
-    return result
-
 
 __all__ = ["honcho_llm_call"]