offendingcommit · offendingcommit · Apr 25, 2026 · Apr 3, 2026 · Apr 4, 2026 · Apr 4, 2026
diff --git a/.github/workflows/docker-build.yml b/.github/workflows/docker-build.yml
@@ -1,6 +1,7 @@
 name: Build and Push Docker Image
 
 on:
+  workflow_dispatch: 
   push:
     branches:
       - main

diff --git a/.gitignore b/.gitignore
@@ -193,3 +193,5 @@ metrics.jsonl
 AGENTS.md
 lancedb_data/
 grafana-data/
+.codex_honcho_runtime/
+.codex_honcho_setup/
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -84,6 +84,44 @@ All API routes follow the pattern: `/v1/{resource}/{id}/{action}`
 - Typechecking: `uv run basedpyright`
 - Format code: `uv run ruff format src/`
 
+### LLM provider gotchas (learned 2026-04-16 in k8s deploy)
+
+- **Structured outputs (`response_format={"type": "json_schema"}`) only work on providers whose upstream API natively honors them.** Google Gemini does (route via `cf` provider with base_url ending in `/openai`). Ollama Cloud (reached via the `custom` provider + `custom-ollama` CF gateway endpoint, or any direct Ollama endpoint) does **not** translate `response_format` into Ollama's native JSON-mode — every Ollama Cloud model (GLM-5.1, nemotron-3-nano, qwen3.5, devstral-small-2 confirmed) returns free-form text/markdown when a schema is requested, and `honcho_llm_call` bubbles a `ValidationError: Invalid JSON` out of pydantic parsing.
+- **Therefore: deriver (`src/deriver/deriver.py:126`) and summary (`src/utils/summarizer.py`) must stay on a Gemini-backed `cf` provider.** Dream, dialectic, and any free-form / tool-call path is free to use the `custom` provider.
+- **Gemini `thoughtSignature` round-tripping breaks on the CF `openai`-compat route.** Any call with `maxToolIterations > 1` AND `thinkingBudgetTokens > 0` will return `400 Function call is missing a thought_signature` on iteration 2+. If you need thinking on a multi-iteration tool loop, use the native Gemini provider, not the OpenAI-compat route — or set `thinkingBudgetTokens=0`.
+- **None of this is Cloudflare's fault.** CF AI Gateway is a transparent proxy in both the `openai` and `custom-ollama` routes. The limitations live at the upstream provider (Ollama Cloud's OpenAI-compat layer).
+
+### Local LM Studio Setup
+
+- Honcho can use LM Studio for generation through the `custom` provider path.
+- Keep `LLM_OPENAI_API_KEY` configured for embeddings unless embedding support is added for local models.
+- For Docker Compose, `LLM_OPENAI_COMPATIBLE_BASE_URL` must be `http://host.docker.internal:1234/v1`, not `http://localhost:1234/v1`.
+- `LLM_OPENAI_COMPATIBLE_API_KEY=lm-studio` is sufficient for local use.
+- Current local default model is `qwen2.5-14b-instruct`.
+- When overriding `DIALECTIC_LEVELS__*` via env vars, each level needs its full required settings, not just `PROVIDER` and `MODEL`. Include `THINKING_BUDGET_TOKENS` and `MAX_TOOL_ITERATIONS`, and optionally `MAX_OUTPUT_TOKENS`.
+- Docker should own the runtime environment completely. Do not mount the repo onto `/app` and do not mount a named volume onto `/app/.venv`, or the image-built environment can be hidden and replaced with incompatible artifacts.
+- If Docker services fail with missing Python modules or incompatible native extensions, rebuild the image instead of trying to repair the environment in-place:
+
+```bash
+docker compose build --no-cache api deriver
+docker compose up -d --force-recreate api deriver
+```
+
+- Verify LM Studio from the host with:
+
+```bash
+curl -sS http://localhost:1234/v1/models
+```
+
+- Verify LM Studio from Docker with:
+
+```bash
+docker compose run --rm --entrypoint sh api -lc 'python - <<\"PY\"
+import urllib.request
+print(urllib.request.urlopen(\"http://host.docker.internal:1234/v1/models\", timeout=5).status)
+PY'
+```
+
 ### SDK Testing
 
 #### TypeScript SDK

diff --git a/Dockerfile b/Dockerfile
@@ -46,6 +46,8 @@ COPY --chown=app:app alembic.ini /app/alembic.ini
 # Copy config files - this will copy config.toml if it exists, and config.toml.example
 COPY --chown=app:app config.toml* /app/
 
+RUN chmod +x /app/docker/entrypoint.sh
+
 # Switch to non-root user
 USER app
 

diff --git a/docker-compose.yml.example b/docker-compose.yml.example
@@ -4,73 +4,86 @@
 #   cp docker-compose.yml.example docker-compose.yml
 #   cp .env.template .env  # edit with your provider config
 #   docker compose up -d --build
-#
-# By default, ports are bound to 127.0.0.1 (localhost only).
-# For development, uncomment the source mounts and monitoring services below.
 
 services:
+  traefik:
+    image: traefik:v3.2
+    command:
+      - --api.dashboard=true
+      - --api.insecure=true
+      - --providers.file.filename=/etc/traefik/dynamic.yml
+      - --providers.file.watch=true
+      - --entrypoints.web.address=:8000
+      - --ping=true
+    ports:
+      - ${HONCHO_HTTP_PORT:-8000}:8000
+      - ${TRAEFIK_DASHBOARD_PORT:-8080}:8080
+    volumes:
+      - ./docker/traefik/dynamic.yml:/etc/traefik/dynamic.yml:ro
+    networks:
+      - honcho
+
   api:
+    image: honcho:latest
     build:
       context: .
       dockerfile: Dockerfile
-    entrypoint: ["sh", "docker/entrypoint.sh"]
+    entrypoint: ["/app/docker/entrypoint.sh"]
     depends_on:
       database:
         condition: service_healthy
       redis:
         condition: service_healthy
-    ports:
-      - "127.0.0.1:8000:8000"
-    # -- Development: mount source for live reload --
-    # volumes:
-    #   - .:/app
-    #   - venv:/app/.venv
+      traefik:
+        condition: service_started
+    expose:
+      - 8000
     environment:
       - DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@database:5432/postgres
       - CACHE_URL=redis://redis:6379/0?suppress=true
-      - CACHE_ENABLED=true
     env_file:
       - path: .env
         required: false
-    restart: unless-stopped
+    networks:
+      - honcho
 
   deriver:
     build:
       context: .
       dockerfile: Dockerfile
-    entrypoint: ["/app/.venv/bin/python", "-m", "src.deriver"]
+    entrypoint: ["python", "-m", "src.deriver"]
     depends_on:
       database:
         condition: service_healthy
       redis:
         condition: service_healthy
-    # -- Development: mount source for live reload --
-    # volumes:
-    #   - .:/app
-    #   - venv:/app/.venv
     environment:
       - DB_CONNECTION_URI=postgresql+psycopg://postgres:postgres@database:5432/postgres
       - CACHE_URL=redis://redis:6379/0?suppress=true
-      - CACHE_ENABLED=true
+      - METRICS_ENABLED=true
     env_file:
       - path: .env
         required: false
-    restart: unless-stopped
+    networks:
+      - honcho
 
   database:
     image: pgvector/pgvector:pg15
-    restart: unless-stopped
+    restart: always
     ports:
-      - "127.0.0.1:5432:5432"
-    command: ["postgres", "-c", "max_connections=200"]
+      - 5432:5432
+    command: ["postgres", "-c", "max_connections=800"]
     environment:
       - POSTGRES_DB=postgres
       - POSTGRES_USER=postgres
       - POSTGRES_PASSWORD=postgres
+      - POSTGRES_HOST_AUTH_METHOD=trust
       - PGDATA=/var/lib/postgresql/data/pgdata
     volumes:
       - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
       - pgdata:/var/lib/postgresql/data/
+    networks:
+      - honcho
     healthcheck:
       test: ["CMD-SHELL", "pg_isready -U postgres -d postgres"]
       interval: 5s
@@ -79,46 +92,54 @@ services:
 
   redis:
     image: redis:8.2
-    restart: unless-stopped
+    restart: always
     ports:
-      - "127.0.0.1:6379:6379"
+      - 6379:6379
     volumes:
-      - redis-data:/data
+      - ./redis-data:/data
+    networks:
+      - honcho
     healthcheck:
       test: ["CMD-SHELL", "redis-cli ping"]
       interval: 5s
       timeout: 5s
       retries: 5
 
-  # -- Development: monitoring stack (uncomment to enable) --
-  # prometheus:
-  #   image: prom/prometheus:v3.2.1
-  #   ports:
-  #     - "127.0.0.1:9090:9090"
-  #   volumes:
-  #     - ./docker/prometheus.yml:/etc/prometheus/prometheus.yml:ro
-  #     - prometheus-data:/prometheus
-  #   depends_on:
-  #     api:
-  #       condition: service_started
-  # grafana:
-  #   image: grafana/grafana:11.4.0
-  #   ports:
-  #     - "127.0.0.1:3000:3000"
-  #   environment:
-  #     - GF_SECURITY_ADMIN_USER=admin
-  #     - GF_SECURITY_ADMIN_PASSWORD=admin
-  #     - GF_AUTH_ANONYMOUS_ENABLED=true
-  #     - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
-  #   volumes:
-  #     - ./docker/grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro
-  #   depends_on:
-  #     prometheus:
-  #       condition: service_started
+  prometheus:
+    image: prom/prometheus:v3.2.1
+    ports:
+      - 9090:9090
+    volumes:
+      - ./docker/prometheus.yml:/etc/prometheus/prometheus.yml:ro
+      - prometheus-data:/prometheus
+    depends_on:
+      api:
+        condition: service_started
+    networks:
+      - honcho
+
+  grafana:
+    image: grafana/grafana:11.4.0
+    ports:
+      - 3000:3000
+    environment:
+      - GF_SECURITY_ADMIN_USER=admin
+      - GF_SECURITY_ADMIN_PASSWORD=admin
+      - GF_AUTH_ANONYMOUS_ENABLED=true
+      - GF_AUTH_ANONYMOUS_ORG_ROLE=Viewer
+    volumes:
+      - ./grafana-data:/var/lib/grafana
+      - ./docker/grafana-datasource.yml:/etc/grafana/provisioning/datasources/datasource.yml:ro
+    depends_on:
+      prometheus:
+        condition: service_started
+    networks:
+      - honcho
+
+networks:
+  honcho:
+    name: honcho
 
 volumes:
   pgdata:
-  redis-data:
-  # -- Development: uncomment if using source mounts --
-  # venv:
-  # prometheus-data:
+  prometheus-data:
diff --git a/docker/entrypoint.sh b/docker/entrypoint.sh
@@ -2,7 +2,7 @@
 set -e
 
 echo "Running database migrations..."
-/app/.venv/bin/python scripts/provision_db.py
+python scripts/provision_db.py
 
 echo "Starting API server..."
-exec /app/.venv/bin/fastapi run --host 0.0.0.0 src/main.py
+exec fastapi run --host 0.0.0.0 src/main.py
diff --git a/docker/grafana-dashboards.yml b/docker/grafana-dashboards.yml
@@ -0,0 +1,11 @@
+apiVersion: 1
+
+providers:
+  - name: Honcho
+    orgId: 1
+    folder: Honcho
+    type: file
+    disableDeletion: false
+    editable: true
+    options:
+      path: /etc/grafana/provisioning/dashboards/files