From 9fd4ce68f90698c3c15516d448c1baa73c9f9348 Mon Sep 17 00:00:00 2001
From: Ashish-dwi99
Date: Wed, 13 May 2026 16:47:44 +0530
Subject: [PATCH] Add developer brain context firewall
---
.github/workflows/test.yml | 17 +
CHANGELOG.md | 60 +-
README.md | 538 +++-----
SECURITY.md | 87 ++
dhee/__init__.py | 3 +
dhee/benchmarks/replay_corpus.py | 801 ++++++++++++
dhee/benchmarks/router_replay.py | 544 ++++++--
dhee/cli.py | 406 +++++-
dhee/context_kernel.py | 78 ++
dhee/context_state.py | 5 +-
dhee/demo.py | 246 ++++
dhee/doctor.py | 130 +-
dhee/fs/__init__.py | 2 +
dhee/fs/uri.py | 59 +
dhee/fs/workspace.py | 3 +-
dhee/install_cleanup.py | 169 +++
dhee/mcp_server.py | 64 +-
dhee/mcp_slim.py | 64 +-
dhee/observability.py | 2 +-
dhee/protocol/v1.py | 430 ++++++-
dhee/router/digest.py | 107 +-
dhee/router/quality_report.py | 260 +++-
dhee/runtime.py | 784 ++++++++++++
dhee/runtime_daemon.py | 13 +
install.sh | 2 +-
tests/fixtures/golden_replay/golden.jsonl | 2 +
.../redacted_real/golden_needs_review.jsonl | 4 +
.../golden_replay/redacted_real/manifest.json | 138 +++
...ed_real_claude_code_d15b0f834020060a.jsonl | 20 +
...redacted_real_codex_ac99c6113f58f19a.jsonl | 1102 +++++++++++++++++
...redacted_real_codex_c918764edfc5535b.jsonl | 778 ++++++++++++
...redacted_real_codex_e9aab5e70135fb15.jsonl | 769 ++++++++++++
.../sessions/claude_code_debug.jsonl | 2 +
.../sessions/codex_exec_debug.jsonl | 1 +
tests/test_context_state.py | 7 +
tests/test_demo.py | 30 +
tests/test_dheefs.py | 31 +
tests/test_install_cleanup.py | 106 ++
tests/test_packaging.py | 1 +
tests/test_protocol_v1.py | 288 +++++
tests/test_router.py | 145 +++
tests/test_router_replay.py | 731 +++++++++++
tests/test_runtime.py | 352 ++++++
43 files changed, 8764 insertions(+), 617 deletions(-)
create mode 100644 SECURITY.md
create mode 100644 dhee/benchmarks/replay_corpus.py
create mode 100644 dhee/context_kernel.py
create mode 100644 dhee/demo.py
create mode 100644 dhee/fs/uri.py
create mode 100644 dhee/install_cleanup.py
create mode 100644 dhee/runtime.py
create mode 100644 dhee/runtime_daemon.py
create mode 100644 tests/fixtures/golden_replay/golden.jsonl
create mode 100644 tests/fixtures/golden_replay/redacted_real/golden_needs_review.jsonl
create mode 100644 tests/fixtures/golden_replay/redacted_real/manifest.json
create mode 100644 tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_claude_code_d15b0f834020060a.jsonl
create mode 100644 tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_codex_ac99c6113f58f19a.jsonl
create mode 100644 tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_codex_c918764edfc5535b.jsonl
create mode 100644 tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_codex_e9aab5e70135fb15.jsonl
create mode 100644 tests/fixtures/golden_replay/sessions/claude_code_debug.jsonl
create mode 100644 tests/fixtures/golden_replay/sessions/codex_exec_debug.jsonl
create mode 100644 tests/test_demo.py
create mode 100644 tests/test_install_cleanup.py
create mode 100644 tests/test_router_replay.py
create mode 100644 tests/test_runtime.py
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9c00611..7b7434b 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -31,3 +31,20 @@ jobs:
- name: Run tests
run: pytest tests/ -v
+
+ - name: Run router replay gate
+ env:
+ DHEE_DATA_DIR: ${{ runner.temp }}/dhee-router-gate
+ run: |
+ dhee router gate \
+ --harness all \
+ --sessions-dir tests/fixtures/golden_replay/sessions \
+ --golden tests/fixtures/golden_replay/golden.jsonl \
+ --allow-insufficient
+
+ - name: Inspect redacted real replay corpus
+ run: |
+ dhee router corpus \
+ --harness all \
+ --sessions-dir tests/fixtures/golden_replay/redacted_real/sessions \
+ --golden tests/fixtures/golden_replay/redacted_real/golden_needs_review.jsonl
diff --git a/CHANGELOG.md b/CHANGELOG.md
index c5d17ee..e60bced 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -8,9 +8,63 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/), and this
- Public Dhee is now positioned and packaged as **Dhee Developer Brain**:
local memory, handoff, harness setup, and git-backed repo context.
-- Removed the public web UI package and `dhee ui` command surface. The
- enterprise dashboard and commercial code now live in the private
- `dhee-enterprise` repository.
+- Rewrote the README as a concise first-read product page focused on why Dhee
+ matters, the 30-second token-router proof, install, integrations, benchmarks,
+ and the public-core/paid-team-layer boundary.
+- Added `dhee demo token-router`, a deterministic context-firewall demo that
+ shows raw tool-output tokens, digest tokens, savings, and expansion pointers
+ without requiring a live agent session.
+- Added a public `SECURITY.md` with Dhee's local-first trust boundaries,
+ threat model, `.dheemem`/repo-context/daemon controls, reporting process, and
+ public-core vs paid-governance security split.
+- Added canonical `dhee://` URI aliases over DheeFS for stable cross-tool
+ references such as `dhee://state/current` and `dhee://handoff/latest`.
+- Added `dhee runtime status|restart|stop|doctor` with a local-only runtime
+ daemon, managed-venv visibility, and doctor integration.
+- `dhee shell`, MCP `dhee_shell`, and compiled context actions now use the
+ local runtime daemon when it is healthy, with automatic fallback and
+ `DHEE_RUNTIME_DISABLE=1` escape hatch.
+- `dhee uninstall` now performs packaging-grade cleanup: stops the daemon,
+ disables native harness wiring, removes only installer-owned symlinks, strips
+ the exact managed `# dhee` shell PATH block, and deletes the managed data/venv
+ directory.
+- MCP `dhee_read` and `dhee_grep` now use the local runtime daemon when
+ healthy. MCP `dhee_bash` can also use the daemon, but only when the daemon
+ process is started with `DHEE_RUNTIME_ENABLE_BASH=1`, a cwd allowlist, and a
+ timeout cap; successful results include runtime audit metadata.
+- Source-side read routing now extracts richer language-aware digests for
+ TS/TSX components and types, Java contracts, shell scripts, SQL objects,
+ and log severity signals.
+- Router quality reports now include explicit release-facing quality gates
+ for token savings, expansion rate, projected cache-read per turn, and
+ context-governance incidents.
+- Router replay now supports Claude Code and Codex JSONL transcript streams
+ plus golden annotations for task parity scores and stale-context incidents.
+- Added `dhee router gate` for CI/release gating and wired the checked-in
+ Claude/Codex golden replay corpus into GitHub Actions. It exits non-zero on
+ failed replay quality gates and supports `--allow-insufficient` for partial
+ telemetry jobs.
+- Added `dhee router harvest` and `dhee router corpus` to grow the golden
+ replay suite from real Claude Code/Codex sessions without checking in raw
+ prompts, tool outputs, absolute paths, or secrets. Harvested annotations are
+ marked `needs_review` until a human validates task parity.
+- Golden replay reports now count `pending_review_sessions`, release gates fail
+ when included annotations are still pending, and `dhee router annotate` can
+ promote a reviewed session to `pass` or `fail` without hand-editing JSONL.
+- Hardened signed `.dheemem` v1 import/inspect validation: manifest signature
+ failures now report cleanly, required payload files and `handoff.json` are
+ enforced, and duplicate, unexpected, absolute, or traversal archive members
+ are rejected before import.
+- `.dheemem` import and dry-run results now include a compact
+ `handoff_bootstrap` summary from the signed `handoff.json`, so a receiving
+ harness can inspect continuity before or after import.
+- `.dheemem` packs now also carry signed repo-shared context payloads
+ (`repo_context/manifest.json` and `repo_context/entries.jsonl`); import
+ dry-runs report the repo-context bootstrap and merge/replace can restore
+ entries into a target repo while rejecting tampered, symlinked, or
+ likely-secret-bearing context.
+- Public Dhee exposes local CLI/MCP/data primitives so dashboard products can
+ render governance without duplicating core context logic.
- Added repo-shared context commands: `dhee link`, `dhee unlink`,
`dhee links`, `dhee promote`, `dhee demote`, and `dhee context`.
- Repo-shared context uses append-only `.dhee/context/entries.jsonl` with
diff --git a/README.md b/README.md
index 5659858..ce93d54 100644
--- a/README.md
+++ b/README.md
@@ -2,407 +2,236 @@
-Dhee — the context manager for AI coding agents
+Dhee
-Dhee decides what your coding agent should see, remember, and forget each turn, so it stays cheap, reliable, and auditable.
+The context firewall for AI coding agents.
- It runs locally under Claude Code, Codex, Cursor, Gemini CLI, Aider, Cline, Hermes, and any MCP client.
+ Dhee decides what your agent should see, remember, forget, compress, and expand each turn.
-
+
- #1 on LongMemEval retrieval — R@1 94.8% · R@5 99.4% · R@10 99.8% on the full 500-question set. Reproduce it →
-
-
-
-
-
-
-
- What is Dhee ·
- Current State ·
- Team Knowledge ·
- File Interface ·
- Quick Start ·
- Repo-Shared Context ·
- Benchmarks ·
- How It Works ·
- vs Alternatives ·
- Integrations
+ Why |
+ Try it |
+ Install |
+ How it works |
+ Integrations |
+ Benchmarks |
+ FAQ |
+ Security
---
-## What is Dhee?
-
-**Dhee decides what your coding agent should see, remember, and forget each turn, so it stays cheap, reliable, and auditable.**
+## Why Dhee
-Every serious coding agent now hits the same bottleneck: not model intelligence, but context. Transcripts grow, tool output piles up, compaction drops decisions, and useful project knowledge gets trapped in one session.
+Coding agents do not usually fail because the model is too weak. They fail
+because context gets messy:
-Dhee runs locally between your agent and your workspace. It keeps the agent focused on the goal, decisions, files, tests, and evidence that matter now, while preserving the raw history for audit and reuse.
+- They reread the same files and logs.
+- They carry stale decisions after task pivots.
+- They dump huge test output into the model.
+- They forget state after compaction or handoff.
+- Teams cannot reuse what one agent learned without copying prompt sludge.
-The buyer problem is simple:
+Dhee runs locally beside your coding agent and governs context before it becomes
+a token problem.
-- Agents waste money re-reading files, logs, and old conversation.
-- Agents lose decisions after compaction, handoff, or tool-output overload.
-- Teams cannot reuse what one agent learned in another agent without copying a pile of text.
+| Without Dhee | With Dhee |
+| --- | --- |
+| Raw logs, diffs, files, and subagent output flood context. | Large outputs become compact digests with expandable evidence pointers. |
+| The agent guesses what still matters after compaction. | Dhee keeps a current state card: goal, facts, decisions, files, tests, next step. |
+| Team knowledge lives in random transcripts and markdown files. | Promoted learnings and repo context are reusable across agents with provenance. |
+| Memory grows forever. | Dhee scores, decays, tombstones, and gates what gets injected. |
+| Switching agents means re-explaining the project. | Claude Code, Codex, Cursor, Gemini CLI, Aider, Cline, Hermes, and MCP clients share one local context layer. |
-Dhee handles the context layer:
+The promise is simple:
-1. **Keeps current state.** Goal, facts, decisions, active files, tests, and next step stay visible without replaying the whole session.
-
-2. **Shrinks noisy tool output.** Large reads, searches, logs, and test runs become compact digests with pointers back to the raw evidence.
-
-3. **Reuses team knowledge safely.** Decisions, docs, handoffs, and promoted learnings move across agents with provenance instead of becoming prompt sludge.
-
-### Who it's for
-
-- **AI-native engineering teams** whose agents are expensive, forgetful, repetitive, or hard to audit.
-- **Claude Code / Cursor / Codex / Gemini CLI / Aider / Cline users** who have hit context limits, compaction loops, or runaway tool-output bills.
-- **Teams standardizing on `AGENTS.md`, `CLAUDE.md`, Skills, MCP tools, and subagents** who need governed delivery instead of bigger prompts.
-- **Hermes users** who already have a self-evolving agent and want those learnings to make Claude Code and Codex smarter too.
-- **Founders building agentic development workflows** who need a local, inspectable context layer before they can trust agents with more of the work.
+> Your agent should not see everything. It should see the right thing, with proof.
---
-## Current State — keep the agent oriented
+## Try It
-Long coding sessions get expensive and less reliable when old tool output, repeated reads, failed attempts, and superseded plans keep influencing the next token. Dhee's answer is not to trim the transcript. Dhee keeps a canonical working state and regenerates a small state card for each turn.
+Run the built-in context-router demo. It needs no API key and no connected agent:
```bash
-dhee context status
-dhee context state --card
-dhee context provision "fix expired-token KeyError"
-dhee context checkpoint --reason "before compaction"
-dhee context rollover --reason "context debt crossed threshold"
+dhee demo token-router
```
-The state card contains only current signal:
-
-```xml
-
- Fix expired-token KeyError in login
- middleware.py line 47 raises KeyError iat
- Use python-jose validation path
- Patch middleware and run the narrow auth test.
- middleware.py
- failing pytest digest
-
-```
-
-Task pivots start a new epoch: stale facts, repeated reads, old plans, and superseded decisions are tombstoned instead of carried into the next state card. The raw evidence remains local behind pointers, and state writes are guarded so CLI, MCP, Codex sync, and Claude hooks do not trample each other.
-
-Quality is the gate. Dhee suppresses duplicate and stale context only when the pointer store, expansion SLO, and outcome signals keep the next step safe. If expansion rises, Dhee deepens that digest class instead of hiding more evidence.
-
----
-
-## Team Knowledge — reuse what agents learn
-
-Hermes can evolve its own skills and memories. Claude Code has native hooks. Codex has MCP config, `AGENTS.md`, and a persisted session stream. Dhee turns those separate agent histories into reusable context that other agents can trust.
+Example result:
```text
-Hermes MemoryProvider
- ├─ MEMORY.md / USER.md writes
- ├─ agent-created skills
- ├─ session summaries and outcomes
- └─ self-evolution traces
- │
- ▼
- Dhee Review Layer
- │
- ├─ candidate -> review / evidence / score
- ├─ promoted -> injected as Learned Playbooks
- └─ rejected -> auditable, never injected
- │
- ▼
-Claude Code · Codex · Hermes · any MCP client
+Dhee token-router demo
+ context firewall: agent sees the right thing, not everything
+ raw tokens: 25,208
+ digest tokens: 1,742
+ saved: 23,466 (93.1%)
```
-What this means in practice:
+The demo shows how Dhee handles three common agent hazards:
-- Your existing Hermes progress is not stranded inside Hermes. `dhee install` detects Hermes when present, installs Dhee as a Hermes `MemoryProvider` at `~/.hermes/plugins/memory/dhee`, and imports local Hermes memory files, session summaries, and agent-created skills into Dhee.
-- Claude Code and Codex do not need to launch Hermes to benefit. They receive promoted Hermes/Dhee learnings through normal Dhee context and MCP tools.
-- New Claude Code and Codex outcomes can become Dhee learning candidates too. After promotion, Hermes can read them back through the same provider.
-- Candidate learnings are never auto-injected. Trusted Hermes `MEMORY.md` / `USER.md` imports may be promoted during install; Hermes `SOUL.md`, session traces, and agent-created skills stay candidates until explicitly approved or promoted by policy.
+- a noisy pytest failure log
+- a large git diff
+- a long source file read
-This is the product contract: **with Dhee, a learning proven in one agent can become a promoted playbook for every connected agent.**
-
-### Reality check
-
-- **Hermes native:** Dhee integrates as a Hermes `MemoryProvider`, the first-class Hermes memory-plugin surface. Hermes allows one active external memory provider, so V1 replaces Honcho/Mem0/etc. while `memory.provider: dhee` is active.
-- **Claude Code native:** Dhee uses Claude Code hooks, MCP, and router enforcement. This is the strongest integration surface.
-- **Codex native:** Codex does not expose Claude-style pre-tool hooks here. Dhee uses the closest native Codex surfaces: `~/.codex/config.toml`, global `~/.codex/AGENTS.md`, MCP server instructions, and Codex session-stream auto-sync.
-- **Promotion gate:** Imported Hermes skills and session traces are candidates by default. Rejected or archived learnings remain auditable but are excluded from retrieval.
-- **Continuity hygiene:** Handoffs filter fixture memories, artifact chunks, and placeholder test rows by default. Shared tool results carry provenance, salience, TTL, and evidence pointers so another agent can inherit the useful state without inheriting every live mirror.
+In each case the agent receives a useful digest, while exact raw evidence stays
+behind `dhee_expand_result(ptr="...")` for explicit expansion.
---
-## File Interface — inspect agent context like local files
+## Install
-Agents already understand files and shell verbs. Dhee exposes memory, handoff, artifacts, shared tasks, and learning review as one virtual context space:
+One command:
```bash
-dhee shell "ls /learnings"
-dhee shell "cat /handoff/latest.md"
-dhee shell "grep parser /learnings/promoted"
-dhee shell "cat /router/ptr/R-abc123"
+curl -fsSL https://raw.githubusercontent.com/Sankhya-AI/Dhee/main/install.sh | sh
```
-The first version is a virtual shell, not FUSE. It intentionally supports a small approved command set: `ls`, `cat`, `grep`, `why`, `promote`, `reject`, `broadcast`, `provision`, and `snapshot`. The same surface is available through MCP as `dhee_shell(command)` and through Python:
+Or via pip:
-```python
-from dhee import ContextWorkspace
-
-result = ContextWorkspace(repo=".").execute("provision 'fix parser bug'")
-print(result.stdout)
+```bash
+pip install dhee
+dhee install
```
-External systems such as Slack, Gmail, and Notion are future **context sources** under `/sources`, not generic remote action backends. They can sync and search evidence into Dhee artifacts, learnings, and handoffs without making the core install depend on SaaS SDKs.
+Then open your coding agent in a project. Dhee auto-wires supported local
+harnesses when detected and keeps its personal state under `~/.dhee`.
-```text
-/learnings candidates, promoted, rejected, archived
-/state current compiled state, state card, decisions, epoch history
-/context debt, status, checkpoints, rollover evidence
-/handoff latest repo/session continuity
-/router/ptr raw pointer lookup when explicitly requested
-/artifacts host-parsed files and chunks
-/repo .dhee/context decisions and conventions
-/agents Hermes, Claude Code, Codex views
-/shared inbox, broadcasts, shared task results
-/sources optional future Slack/Gmail/Notion context mounts
-```
+Useful first commands:
----
-
-## Quick Start
+```bash
+dhee status
+dhee doctor
+dhee demo token-router
+dhee handoff
+dhee context state --card
+dhee runtime status
+```
-**One command. No venv. No config. No pasting into `settings.json`.**
+Clean uninstall is part of the trust contract:
```bash
-curl -fsSL https://raw.githubusercontent.com/Sankhya-AI/Dhee/main/install.sh | sh
+dhee uninstall --yes
```
-The installer creates `~/.dhee/`, installs the `dhee` package, and auto-wires Claude Code, Codex, and Hermes when detected. Open your agent in any project — cognition is on.
+It stops the daemon, removes Dhee-owned harness wiring and shell PATH blocks,
+and deletes the managed local runtime/data directory.
-
-Other install paths
+---
-```bash
-# Via pip
-pip install dhee
-dhee install # configure supported agent harnesses
+## What You Get
-# From source
-git clone https://github.com/Sankhya-AI/Dhee.git
-cd Dhee && ./scripts/bootstrap_dev_env.sh
-source .venv-dhee/bin/activate
-dhee install
-```
-
-
+**1. Current state, not transcript replay**
-After install, Dhee auto-ingests project docs (`CLAUDE.md`, `AGENTS.md`, `SKILL.md`, etc.) on the first session. Run `dhee ingest` any time to re-chunk.
+Dhee keeps a compact state card for the active task: goal, facts, decisions,
+files, tests, evidence pointers, and next step.
```bash
-dhee install # configure local agent harnesses
-dhee hermes status # see whether Hermes is detected and Dhee-backed
-dhee hermes sync --dry-run # preview Hermes memories/skills before import
-dhee learn search --include-candidates # inspect candidates and promotions
-dhee link /path/to/repo # share context with teammates through this repo
-dhee context refresh # refresh repo context after pull/checkout
-dhee handoff # compact continuity for current repo/session
-dhee key set openai # store a provider key locally (encrypted)
-dhee router report # token-savings stats + replay projection
-dhee router tune # re-tune retrieval policy from usage
+dhee context provision "fix expired-token KeyError"
+dhee context state --card
+dhee context checkpoint --reason "before compaction"
```
----
-
-## Repo-Shared Context — git is the sync layer
+**2. Source-side routing**
-Most "team memory" tools need a server. Dhee uses the one your team already trusts: **git**.
+Heavy `Read`, `Bash`, `Grep`, and agent results are digested before they flood
+the model.
-```bash
-dhee link /path/to/repo
+```text
+10 MB pytest log -> failing test, first error, summary, head/tail, pointer
+large git diff -> files changed, hunks, additions/deletions, pointer
+source file -> symbols, imports, focus lines, pointer
```
-Dhee creates a tracked folder inside your repo:
+**3. Evidence on demand**
+
+The model can expand raw data only when the digest is not enough:
```text
-/.dhee/
- config.json
- context/manifest.json
- context/entries.jsonl
+dhee_expand_result(ptr="B-demo-pytest")
```
-Commit it. Teammates who pull the repo and have Dhee installed get the **same shared context** — decisions, conventions, what-not-to-do — surfaced into their agent automatically.
+Expansion reasons are logged, so Dhee learns which digests need more depth.
-Shared context is **append-only and git-friendly**. If two developers edit overlapping context concurrently, Dhee keeps both versions and reports a conflict instead of silently dropping one developer's work. The installed `pre-push` hook blocks unresolved conflicts from leaving the laptop:
+**4. Git-shared repo context**
+
+Teams can share decisions and conventions through the repository itself:
```bash
+dhee link /path/to/repo
dhee context check --repo /path/to/repo
```
-**No hosted service. No org account. Your repo is the team brain.**
+Dhee stores shared context under `/.dhee/context`, with append-only
+entries and conflict detection. No hosted server or org account is required.
----
+**5. Portable local memory**
-## Benchmarks
+`.dheemem` packs move Dhee state between machines and harnesses:
-> **#1 on LongMemEval recall.** R@1 **94.8%**, R@5 **99.4%**, R@10 **99.8%** — full 500 questions, no held-out split, no cherry-picking.
-
-| System | R@1 | R@3 | R@5 | R@10 |
-|:-------|:----|:----|:----|:-----|
-| **Dhee** | **94.8%** | **99.0%** | **99.4%** | **99.8%** |
-| [MemPalace](https://github.com/MemPalace/mempalace#benchmarks) (raw) | — | — | 96.6% | — |
-| [MemPalace](https://github.com/MemPalace/mempalace#benchmarks) (hybrid v4, held-out 450q) | — | — | 98.4% | — |
-| [agentmemory](https://github.com/rohitg00/agentmemory#benchmarks) | — | — | 95.2% | 98.6% |
-
-Stack: NVIDIA `llama-nemotron-embed-vl-1b-v2` embedder + `llama-3.2-nv-rerankqa-1b-v2` reranker, top-k 10.
+```bash
+dhee export --format dheemem --output backup.dheemem
+dhee import backup.dheemem --format dheemem --strategy dry-run
+```
-**Proof is in-tree, not screenshots.** Exact command, metrics, and per-question output live under [`benchmarks/longmemeval/`](benchmarks/longmemeval/). Recompute R@k yourself — any mismatch is a bug you can open.
+Packs are signed and validated before import.
---
## How It Works
+```text
+Agent asks for context
+ |
+ v
+Dhee reads current task state, repo context, memories, and tool output
+ |
+ v
+Context firewall decides:
+ state -> compact current truth
+ proof -> pointer-backed evidence
+ source -> exact raw expansion only when needed
+ |
+ v
+Agent sees a small, relevant, auditable packet
```
- ┌──────────────────────────────┐
- │ Your fat context │
- │ CLAUDE.md · AGENTS.md · │
- │ SKILL.md · prompts · docs · │
- │ sessions · tool output │
- └──────────────┬─────────────────┘
- │ ingest once
- ▼
- ┌────────────────────────────────────────────────────┐
- │ Dhee · local SQLite brain │
- │ │
- │ doc chunks · short-term · long-term · insights · │
- │ beliefs · policies · intentions · episodes · edits │
- └─────────────────────┬───────────────────────────────┘
- │
- ┌──────────────┴───────────────┐
- ▼ ▼
- Session start Each user prompt
- (full assembly) (matching slice only)
- │ │
- └──────────────┬───────────────┘
- ▼
- ┌────────────────────────────┐
- │ Token-budgeted XML │
- │ │
- │ │
- │ What worked last… │
- │ │
- └────────────────────────────┘
- │
- Model sees only what it
- needs, when it needs it.
-```
-
-On the tool-use side, the **router** digests raw output **at source** — never letting raw `Read`, `Bash`, or subagent results into context unless the model asks.
-### The four-operation API
-
-Every interface — hooks, MCP, Python, CLI — exposes the same four operations.
+The core interfaces stay small:
```python
from dhee import Dhee
+
d = Dhee()
d.remember("User prefers FastAPI over Flask")
d.recall("what framework does this project use?")
d.context("fixing the auth bug")
-d.checkpoint("Fixed auth bug", what_worked="git blame first", outcome_score=1.0)
+d.checkpoint("Fixed auth bug", what_worked="checked logs", outcome_score=1.0)
```
-| Operation | LLM calls | Cost |
-|:----------|:---------:|:----:|
-| `remember` / `recall` / `context` | 0 | ~$0.0002 |
-| `checkpoint` | 1 per ~10 memories | ~$0.001 |
-| **Typical 20-turn Opus session** | **~1** | **~$0.004** |
-
-Dhee overhead: ~$0.004/session. Token savings on the same 20-turn session: **~$0.50+**. **>100× ROI.**
-
-### The router — digest at source
-
-Four MCP tools replace `Read` / `Bash` / `Agent` on heavy calls:
-
-- `dhee_read(file_path, offset?, limit?, query?, task_intent?)` — symbols, focus slices, head/tail, kind, token estimate + pointer. When no query is passed, Dhee infers one from compiled state.
-- `dhee_bash(command, preview_only?)` — preflight risk, output class, stderr/stdout landmarks, and command-specific reducers for git diffs, pytest/build failures, grep, listings, and generic logs.
-- `dhee_agent(text)` — file refs, headings, bullets, error signals from any subagent return.
-- `dhee_expand_result(ptr, range?, symbol?, reason?, expected?)` — only called when the digest genuinely isn't enough; expansion reasons feed router tuning.
-
-A 10 MB `git log --oneline -50000` becomes a ~200-token digest. This is where the serious savings live.
-
-### Learns what to show
-
-Most memory layers are static: you write rules, they retrieve. Dhee watches what happens and tunes itself.
-
-- **Intent classification.** Every `Read`/`Bash`/`Agent` call is bucketed (source, test, config, doc, data, build). Reads also inherit the live compiled-state task intent, so a debug session gets failure landmarks without the agent remembering to pass a query.
-- **Stable duplicate suppression.** Admission hashes the underlying evidence, not the fresh pointer string, so unchanged repeated reads stop adding debt.
-- **Expansion ledger.** Every `dhee_expand_result(ptr)` is logged with `(tool, intent, depth, slice mode, reason, expected signal)`.
-- **Policy tuning.** `dhee router tune` reads the ledger and atomically rewrites `~/.dhee/router_policy.json` — deeper for what gets expanded, shallower for what doesn't.
-
-Frontend-heavy teams get deeper JS/TS digests. Data teams get richer CSV/JSONL summaries. **You don't pick — Dhee picks, based on what you actually expand.**
-
----
-
-## vs alternatives
-
-| | **Dhee** | CLAUDE.md | Mem0 | Letta | MemPalace | agentmemory |
-|:--|:-:|:-:|:-:|:-:|:-:|:-:|
-| **Tokens / turn** | **~300** | 2,000+ | varies | ~1K+ | varies | ~1,900 |
-| **LongMemEval R@5** | **99.4%** | — | — | — | 96.6% | 95.2% |
-| **Adapts from expansions** | **Yes** | No | No | No | No | No |
-| **Hermes → Claude/Codex learning exchange** | **Yes** | No | No | No | No | No |
-| **Auto-digest tool output** | **Yes** | No | No | No | No | No |
-| **Git-shared team context** | **Yes** | Manual | No | No | No | No |
-| **Works across MCP agents** | **Yes** | No | Partial | No | Yes | Yes |
-| **External DB required** | No (SQLite) | No | Qdrant/pgvector | Postgres+vector | No | No |
-| **License** | MIT | — | Apache-2 | Apache-2 | MIT | MIT |
-
-Dhee is not trying to be the agent, the IDE, or the memory SaaS. It is the **context manager** those systems need underneath them: smaller prompts, reproducible recall, adaptive retrieval, git-shared team context, and auditable knowledge reuse in one local-first package.
+Every surface uses the same primitives: CLI, Python SDK, Claude Code hooks,
+Codex session sync, and MCP tools.
---
## Integrations
-### Hermes Agent — native MemoryProvider
+| Surface | Dhee support |
+| --- | --- |
+| Claude Code | Deepest integration: hooks, MCP, handoff, shared tasks, router enforcement. |
+| Codex | MCP config, global `AGENTS.md`, server instructions, and session-stream sync. |
+| Cursor / Gemini CLI / Cline / Goose | MCP-first integration through `dhee-mcp`. |
+| Hermes | Native MemoryProvider, learning import, promotion, and playbook exchange. |
+| Aider / other CLIs | CLI, MCP, repo context, and portable `.dheemem` flows. |
-```bash
-dhee install # detects Hermes and enables Dhee when present
-dhee hermes status
-dhee hermes sync --dry-run
-```
-
-Dhee installs as the Hermes memory provider, mirrors Hermes memory writes, imports local Hermes memory files, and checkpoints Hermes sessions into Dhee learning candidates. Curated `MEMORY.md` / `USER.md` imports can be promoted on install; `SOUL.md`, session traces, and agent-created skills stay gated. Promoted playbooks flow back into Hermes through the provider and out to Claude Code/Codex through Dhee context.
-
-### Claude Code — native hooks
-
-```bash
-pip install dhee && dhee install
-```
-
-Six lifecycle hooks fire at the right moments. Claude Code gets Dhee handoff, shared tasks, inbox broadcasts, learned playbooks, and router enforcement for heavy `Read`/`Bash`/`Grep` calls.
-
-### Codex — closest native surface
-
-```bash
-pip install dhee && dhee install --harness codex
-dhee harness status --harness codex
-```
-
-Dhee writes `~/.codex/config.toml`, manages a global `~/.codex/AGENTS.md` block, advertises context-first MCP instructions, and tails Codex session logs on Dhee calls. Codex does not currently expose Claude-style pre-tool hooks, so this is the strongest truthful native integration available.
-
-### MCP server — Cursor, Gemini CLI, Cline, Goose, anything MCP
+MCP config:
```json
{
@@ -412,69 +241,74 @@ Dhee writes `~/.codex/config.toml`, manages a global `~/.codex/AGENTS.md` block,
}
```
-### Python SDK / CLI / Docker
+Codex note: Codex does not expose Claude-style pre-tool hooks. Dhee uses the
+strongest truthful Codex surfaces available: MCP, `AGENTS.md`, config, server
+instructions, and session-log sync.
-```bash
-dhee remember "User prefers Python"
-dhee recall "programming language"
-dhee ingest CLAUDE.md AGENTS.md
-dhee checkpoint "Fixed auth" --what-worked "checked logs"
-```
+---
-### Provider options
+## Benchmarks
-```bash
-pip install dhee[openai,mcp] # cheapest embeddings
-pip install dhee[nvidia,mcp] # current SOTA stack
-pip install dhee[gemini,mcp]
-pip install dhee[ollama,mcp] # local, no API costs
-```
+Dhee reports LongMemEval retrieval results on the full 500-question set:
+
+| System | R@1 | R@5 | R@10 |
+| --- | ---: | ---: | ---: |
+| Dhee | 94.8% | 99.4% | 99.8% |
+| MemPalace raw | - | 96.6% | - |
+| MemPalace hybrid v4 | - | 98.4% | - |
+| agentmemory | - | 95.2% | 98.6% |
+
+Stack: NVIDIA `llama-nemotron-embed-vl-1b-v2` embedder plus
+`llama-3.2-nv-rerankqa-1b-v2` reranker, top-k 10.
+
+The proof is committed under [`benchmarks/longmemeval/`](benchmarks/longmemeval/):
+commands, metrics, and per-question output.
+
+Retrieval is only one piece. Dhee's stronger claim is context governance:
+controlling what reaches the model before memory retrieval becomes prompt
+pollution.
---
-## Public vs Enterprise
+## Public Core and Paid Layer
-| | **Public Dhee** (this repo, MIT) | **Dhee Enterprise** (private) |
-|:--|:--|:--|
-| Local memory + router | ✅ | ✅ |
-| Self-tuning retrieval | ✅ | ✅ |
-| Hermes → Claude Code/Codex learning exchange | ✅ | ✅ |
-| Git-shared repo context | ✅ | ✅ |
-| Claude Code / Codex / MCP | ✅ | ✅ |
-| Org / team management | — | ✅ |
-| Repo Brain code-intelligence | — | ✅ |
-| Owner dashboard, billing, licensing | — | ✅ |
-| Sentry-derived security telemetry | — | ✅ |
+Public Dhee is MIT and complete for local developer use: memory, router,
+handoff, DheeFS, MCP, repo context, `.dheemem`, runtime, security checks, and
+replay/report data.
-Public Dhee is the local collaboration layer — lightweight, trustworthy, and complete on its own. The commercial layer is closed-source and lives in `Sankhya-AI/dhee-enterprise`.
+A paid team layer can sit on top for company needs: org dashboards, policy,
+audit, SSO/RBAC, fleet health, billing, and governance workflows. The local
+developer brain stays useful without it.
---
## FAQ
-**What problem does Dhee solve?**
-Large agent projects accumulate a fat `CLAUDE.md`, `AGENTS.md`, skills library, and tool output that get re-injected every turn. Dhee chunks, indexes, and decays that knowledge, and digests fat tool output at the source — so only the relevant ~300 tokens reach the model.
+**Is Dhee another memory database?**
-**How is Dhee different from Mem0, Letta, MemPalace, agentmemory?**
-Dhee is built around four pieces most tools treat separately: reproducible LongMemEval results, a self-tuning retrieval/router policy, source-side digests for heavy `Read`/`Bash`/subagent output, and git-shared team context instead of a server.
+No. Memory is part of Dhee, but the wedge is context governance: deciding what
+the model sees now, what stays hidden behind proof pointers, and what should be
+forgotten or tombstoned.
-**Does Dhee work with Claude Code, Cursor, Codex, Gemini CLI, Aider?**
-Yes. Native Claude Code hooks, closest-native Codex config/AGENTS/session-stream sync, a Hermes MemoryProvider, an MCP server for every other host, plus a Python SDK and CLI. One install, every agent.
+**Does it require a server?**
-**Does Hermes make Claude Code and Codex smarter?**
-Yes, through Dhee's learning exchange after promotion. Dhee can install as Hermes' memory provider, import Hermes memory/session/skill artifacts, and expose promoted learnings to Claude Code, Codex, and any MCP client as Learned Playbooks. Claude/Codex do not have to run Hermes to benefit.
+No. Dhee is local-first and uses SQLite by default. Repo-shared context uses git.
-**Does Claude Code or Codex evolve Hermes back?**
-Yes, after promotion. Claude Code hooks, Codex session-stream sync, MCP memory tools, and learning submissions create Dhee learning candidates. Promoted personal/repo/workspace playbooks are retrieved by Hermes through the Dhee provider.
+**Does it store secrets in the repo?**
-**How does the team-context sharing actually work?**
-`dhee link /path/to/repo` writes a `.dhee/` directory inside your repo. Commit it. Teammates pull, install Dhee, and their agent surfaces the same shared decisions and conventions. Append-only with conflict detection — no overwrites, no server, no account.
+It should not. Repo-shared context is meant for decisions and conventions, not
+secrets or bulk private data. See [`SECURITY.md`](SECURITY.md).
-**Is Dhee production-ready? What storage?**
-SQLite by default. No Postgres, no Qdrant, no pgvector, no infra. The regression suite and reproducible benchmarks live in-tree. MIT, works offline with Ollama or online with OpenAI / NVIDIA NIM / Gemini.
+**Can I inspect or export my data?**
-**Where are the benchmarks and can I reproduce them?**
-[`benchmarks/longmemeval/`](benchmarks/longmemeval/) — full command, per-question JSONL, `metrics.json`. Clone, run, recompute R@k. Any mismatch is an issue you can open.
+Yes. Dhee exposes local shell/MCP surfaces and signed `.dheemem` export/import.
+Clean uninstall is supported.
+
+**Which agent should I use it with first?**
+
+Claude Code gets the deepest routing integration. Codex gets the best available
+MCP/session-sync integration. Cursor, Gemini CLI, Cline, Goose, and others work
+through MCP.
---
@@ -482,13 +316,13 @@ SQLite by default. No Postgres, no Qdrant, no pgvector, no infra. The regression
```bash
git clone https://github.com/Sankhya-AI/Dhee.git
-cd Dhee && ./scripts/bootstrap_dev_env.sh
+cd Dhee
+./scripts/bootstrap_dev_env.sh
source .venv-dhee/bin/activate
pytest
```
-For the same full-suite path CI expects, including the local Rust acceleration
-extension and async test plugin:
+Full verification:
```bash
./scripts/verify_full_suite.sh
@@ -497,16 +331,12 @@ extension and async test plugin:
---
- Your fat skills stay fat. Your token bill stays thin. Promoted learnings travel with every agent.
+ Your agent stops drowning in context.
- GitHub ·
- PyPI ·
- Issues ·
+ GitHub |
+ PyPI |
+ Issues |
Sankhya AI
-MIT License — built by Sankhya AI Labs.
-
-
-Topics: ai-agents · agent-memory · llm-memory · developer-brain · claude-code · claude-code-hooks · claudemd · agentsmd · mcp · mcp-server · model-context-protocol · context-router · context-engineering · context-compression · token-optimization · llm-tools · vector-memory · sqlite · longmemeval · retrieval-augmented-generation · rag · mem0-alternative · letta-alternative · mempalace-alternative · cursor · codex · gemini-cli · aider · cline · goose
-
+MIT License - built by Sankhya AI Labs.
diff --git a/SECURITY.md b/SECURITY.md
new file mode 100644
index 0000000..cbae3ef
--- /dev/null
+++ b/SECURITY.md
@@ -0,0 +1,87 @@
+# Security Policy
+
+Dhee is local-first infrastructure that can see prompts, tool output, repo
+context, memory, artifacts, and agent handoffs. Treat it as part of the trusted
+developer workstation.
+
+## Supported Versions
+
+Security fixes target the current public release line and `main`.
+
+## Reporting a Vulnerability
+
+Do not open a public issue for suspected vulnerabilities involving secrets,
+prompt injection payloads, private paths, private repo content, or bypasses.
+
+Report privately through GitHub Security Advisories for this repository. Include:
+
+- Dhee version or commit SHA.
+- Operating system and Python version.
+- Installed harnesses involved: Claude Code, Codex, Hermes, or MCP.
+- Reproduction steps with secrets replaced by placeholders.
+- Expected vs actual trust-boundary behavior.
+
+## Trust Boundaries
+
+Dhee has five main boundaries:
+
+- **Local data root:** personal state under `~/.dhee` stays local unless the user exports or syncs it.
+- **Repo-shared context:** `/.dhee/context` is git-tracked and must never contain secrets or bulk private data.
+- **Router pointer store:** raw `Read`, `Bash`, `Grep`, and agent outputs stay behind local pointers until explicitly expanded.
+- **`.dheemem` packs:** portable archives are signed and validated before import.
+- **Runtime daemon:** loopback-only by default; daemon-backed bash requires explicit server-side opt-in and cwd allowlisting.
+
+## Threat Model
+
+Dhee actively defends against:
+
+- Secret leakage from hook-captured tool output.
+- Prompt injection stored in repo-shared context.
+- Symlink escape from repo context and pack import paths.
+- Archive traversal and tampered `.dheemem` manifests.
+- Stale context being injected after task drift.
+- Oversized tool output flooding the model context.
+- Native bash acceleration outside approved daemon trust boundaries.
+
+Dhee does not claim to defend against:
+
+- A compromised developer account or workstation.
+- A malicious LLM provider receiving content the user intentionally sends.
+- Unsafe commands the user or agent explicitly runs outside Dhee routing.
+- Public network exposure of local services without a trusted auth proxy.
+
+## Security Controls
+
+- Secret filtering is applied before hook-derived memory and replay-corpus storage.
+- Repo-shared context import rejects symlinked files, traversal paths, and likely secrets.
+- Router digests expose summaries first and require `dhee_expand_result(ptr)` for raw evidence.
+- Context state tracks epochs, stale assertions, duplicate reads, and context debt.
+- Runtime daemon binds to loopback and refuses public hosts unless explicitly overridden.
+- `dhee uninstall` stops the daemon and removes Dhee-owned harness wiring, symlinks, shell PATH blocks, and managed runtime data.
+
+## Team Governance Controls
+
+A paid/team governance layer should be used when teams need:
+
+- Org/team dashboards.
+- Policy and approval workflows.
+- Audit export.
+- SSO/RBAC.
+- Context-manager findings.
+- Cross-team governance for promoted learnings and shared repo context.
+
+The public MIT package remains complete for local developer use; paid team
+features should add governance, not basic functionality.
+
+## Handling Secrets
+
+Do not promote secrets into memory, repo-shared context, or `.dheemem` packs.
+Use provider environment variables or Dhee's local encrypted secret store for API
+keys. If a secret is accidentally captured, rotate it first, then purge the
+affected memory/context/artifact records.
+
+## Security Philosophy
+
+Memory without governance becomes prompt pollution. Dhee's job is not to make
+agents remember everything. Dhee's job is to make context admission explicit,
+auditable, minimal, and reversible.
diff --git a/dhee/__init__.py b/dhee/__init__.py
index f7c9299..60d663c 100644
--- a/dhee/__init__.py
+++ b/dhee/__init__.py
@@ -26,6 +26,7 @@
from dhee.simple import Dhee, Engram
from dhee.plugin import DheePlugin
from dhee.fs import ContextWorkspace
+from dhee.context_kernel import DheeContextKernel, KernelScope
from dhee.core.category import CategoryProcessor, Category, CategoryType, CategoryMatch
from dhee.core.echo import EchoProcessor, EchoDepth, EchoResult
from dhee.configs.base import MemoryConfig, FadeMemConfig, EchoMemConfig, CategoryMemConfig, ScopeConfig
@@ -47,6 +48,8 @@
"DheePlugin",
# DheeFS
"ContextWorkspace",
+ "DheeContextKernel",
+ "KernelScope",
# CategoryMem
"CategoryProcessor",
"Category",
diff --git a/dhee/benchmarks/replay_corpus.py b/dhee/benchmarks/replay_corpus.py
new file mode 100644
index 0000000..ec93b3c
--- /dev/null
+++ b/dhee/benchmarks/replay_corpus.py
@@ -0,0 +1,801 @@
+"""Privacy-safe replay corpus harvesting.
+
+The router replay harness is only strategically useful when it can run on
+representative real sessions. Raw Claude/Codex transcripts are too sensitive
+to check in or share, so this module converts them into redacted replay
+fixtures that preserve the tool-call shape, output size, exit status, and
+high-level failure/success signals without storing prompts, source text,
+absolute paths, or secrets.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import re
+import shlex
+import time
+from dataclasses import asdict, dataclass
+from pathlib import Path
+from typing import Any
+
+from dhee.benchmarks.router_replay import (
+ _PROJECTORS,
+ _command_text,
+ _detect_harness,
+ _flatten_result,
+ _jsonl_records,
+ _loads_tool_args,
+ _normalise_codex_tool,
+ aggregate_reports,
+ discover_transcripts,
+ load_golden_annotations,
+ replay_session,
+)
+from dhee.hooks.claude_code.privacy import filter_secrets
+
+
+_PATHISH_KEYS = {
+ "path",
+ "file_path",
+ "filepath",
+ "cwd",
+ "workdir",
+ "directory",
+ "repo",
+ "root",
+}
+_OUTPUT_KEYS = {
+ "stdout",
+ "stderr",
+ "output",
+ "aggregated_output",
+ "content",
+ "result",
+}
+_KEEP_SCALARS = {"limit", "offset", "exit_code", "duration_ms", "timeout_ms"}
+_GENERIC_SESSION_PREFIX = "redacted_real"
+
+
+@dataclass
+class HarvestedSession:
+ session_id: str
+ harness: str
+ output_path: str
+ source_size_bytes: int
+ source_path_sha256: str
+ sanitized_records: int
+ total_calls: int
+ calls_by_tool: dict[str, int]
+ raw_tokens: int
+ digest_tokens: int
+ saved_pct: float
+ warnings_count: int
+ annotation_status: str = "needs_review"
+
+
+def _sha_text(value: str, *, chars: int = 12) -> str:
+ return hashlib.sha256(value.encode("utf-8", errors="replace")).hexdigest()[:chars]
+
+
+def _sha_file(path: Path, *, chars: int = 16) -> str:
+ h = hashlib.sha256()
+ try:
+ with path.open("rb") as handle:
+ for chunk in iter(lambda: handle.read(1024 * 1024), b""):
+ h.update(chunk)
+ except OSError:
+ h.update(str(path).encode("utf-8", errors="replace"))
+ return h.hexdigest()[:chars]
+
+
+def _write_jsonl(path: Path, records: list[dict[str, Any]]) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ with path.open("w", encoding="utf-8") as handle:
+ for record in records:
+ handle.write(json.dumps(record, separators=(",", ":"), sort_keys=True) + "\n")
+
+
+def _safe_suffix(value: str) -> str:
+ suffix = Path(value).suffix.lower()
+ if 1 <= len(suffix) <= 10 and re.fullmatch(r"\.[a-z0-9_+-]+", suffix):
+ return suffix
+ return ".txt"
+
+
+def sanitize_path(value: Any) -> str:
+ """Return a stable placeholder path without preserving local details."""
+ text = filter_secrets(str(value or "")).strip()
+ if not text:
+ return "/unknown.txt"
+ return f"/file_{_sha_text(text)}{_safe_suffix(text)}"
+
+
+def _command_tokens(command: str) -> list[str]:
+ try:
+ return shlex.split(command)
+ except ValueError:
+ return command.split()
+
+
+def _normalise_program(token: str) -> str:
+ name = Path(token).name
+ if name in {"python3", "python3.9", "python3.10", "python3.11", "python3.12", "python3.13", "python3.14"}:
+ return "python"
+ return name or token
+
+
+def sanitize_command(command: Any) -> str:
+ """Keep the command class, remove args that could reveal local work."""
+ text = filter_secrets(_command_text(command)).strip()
+ if not text:
+ return ""
+ tokens = _command_tokens(text)
+ lowered = [_normalise_program(t).lower() for t in tokens]
+
+ if "pytest" in lowered:
+ return "python -m pytest tests/test_redacted.py -q"
+ if lowered[:3] == ["python", "-m", "pytest"]:
+ return "python -m pytest tests/test_redacted.py -q"
+ if lowered and lowered[0] in {"uv", "poetry"} and "pytest" in lowered:
+ return f"{lowered[0]} run python -m pytest tests/test_redacted.py -q"
+ if "npm" in lowered:
+ if "test" in lowered:
+ return "npm test -- --runInBand"
+ if "build" in lowered:
+ return "npm run build"
+ return "npm "
+ if "pnpm" in lowered:
+ if "test" in lowered:
+ return "pnpm test"
+ if "build" in lowered:
+ return "pnpm build"
+ return "pnpm "
+ if "yarn" in lowered:
+ if "test" in lowered:
+ return "yarn test"
+ if "build" in lowered:
+ return "yarn build"
+ return "yarn "
+ if lowered and lowered[0] == "git":
+ sub = lowered[1] if len(lowered) > 1 else "status"
+ if sub in {"status", "diff", "show", "log", "branch", "rev-parse", "ls-files"}:
+ return f"git {sub} "
+ return "git "
+ if lowered and lowered[0] in {"rg", "grep"}:
+ return f"{lowered[0]} "
+ if lowered and lowered[0] in {"cat", "sed", "head", "tail", "nl"}:
+ return "sed -n '1,120p' "
+ if lowered and lowered[0] in {"ls", "find", "fd"}:
+ return f"{lowered[0]} "
+ if lowered and lowered[0] in {"make", "cargo", "go"}:
+ sub = lowered[1] if len(lowered) > 1 else ""
+ return f"{lowered[0]} {sub}".strip()
+ program = _normalise_program(tokens[0])
+ return f"{program} "
+
+
+def _pad_placeholder(base: str, target_len: int) -> str:
+ if target_len <= 0:
+ return base
+ if len(base) >= target_len:
+ return base
+ return base + ("." * (target_len - len(base)))
+
+
+def _sanitize_line(line: str) -> str:
+ filtered = filter_secrets(line)
+ target_len = max(16, min(len(filtered), 240))
+ stripped = filtered.strip()
+ if not stripped:
+ return ""
+ summary_parts = re.findall(
+ r"\d+\s+(?:passed|failed|failures?|errors?|skipped|warnings?)",
+ stripped,
+ flags=re.IGNORECASE,
+ )
+ if summary_parts and len(stripped) < 220:
+ return _pad_placeholder(" ".join(summary_parts), target_len)
+ if re.search(r"\bFAILED\b|\bERROR\b|AssertionError|Traceback", stripped):
+ return _pad_placeholder(
+ "FAILED tests/test_redacted.py::test_case AssertionError: redacted",
+ target_len,
+ )
+ if re.search(r"\bPASSED\b|\bok\b", stripped, flags=re.IGNORECASE):
+ return _pad_placeholder(
+ "tests/test_redacted.py::test_case PASSED",
+ target_len,
+ )
+ if re.search(r"\bWARNING\b|\bDeprecationWarning\b", stripped):
+ return _pad_placeholder("WARNING redacted warning text", target_len)
+ if re.search(r"\b(exit code|return code)\b", stripped, flags=re.IGNORECASE):
+ return _pad_placeholder("exit code: ", target_len)
+ return _pad_placeholder(
+ f"",
+ target_len,
+ )
+
+
+def sanitize_output(value: Any, *, max_output_chars: int = 50_000) -> str:
+ """Redact tool output while preserving rough token mass and result shape."""
+ text = filter_secrets(str(value or ""))
+ if not text:
+ return ""
+ max_chars = max(500, int(max_output_chars or 50_000))
+ out: list[str] = []
+ used = 0
+ lines = text.splitlines()
+ for idx, line in enumerate(lines):
+ sanitized = _sanitize_line(line)
+ next_len = len(sanitized) + 1
+ if used + next_len > max_chars:
+ remaining_lines = len(lines) - idx
+ remaining_chars = max(0, len(text) - used)
+ out.append(
+ f""
+ )
+ break
+ out.append(sanitized)
+ used += next_len
+ suffix = "\n" if text.endswith("\n") else ""
+ return "\n".join(out) + suffix
+
+
+def _sanitize_scalar(key: str, value: Any, *, max_output_chars: int) -> Any:
+ if isinstance(value, (int, float, bool)) or value is None:
+ return value
+ lower = key.lower()
+ if lower in _KEEP_SCALARS:
+ return value
+ if lower in _PATHISH_KEYS or lower.endswith("_path"):
+ return sanitize_path(value)
+ if lower in {"command", "cmd", "script"}:
+ return sanitize_command(value)
+ if lower in _OUTPUT_KEYS or lower.endswith("_output"):
+ return sanitize_output(value, max_output_chars=max_output_chars)
+ if lower in {"description", "prompt", "instructions", "query", "pattern"}:
+ return f""
+ text = filter_secrets(str(value))
+ if len(text) <= 20 and re.fullmatch(r"[a-zA-Z0-9_.:/ -]+", text):
+ return text
+ return f""
+
+
+def sanitize_tool_input(
+ tool_name: str,
+ data: Any,
+ *,
+ max_output_chars: int = 50_000,
+) -> dict[str, Any]:
+ """Sanitize tool input but keep fields the replay projector needs."""
+ raw = data if isinstance(data, dict) else _loads_tool_args(data)
+ safe: dict[str, Any] = {}
+ for key, value in raw.items():
+ if isinstance(value, dict):
+ safe[key] = sanitize_tool_input(
+ tool_name,
+ value,
+ max_output_chars=max_output_chars,
+ )
+ elif isinstance(value, list):
+ if key.lower() in {"command", "cmd"}:
+ safe[key] = sanitize_command(value)
+ else:
+ safe[key] = [
+ sanitize_tool_input(tool_name, item, max_output_chars=max_output_chars)
+ if isinstance(item, dict)
+ else _sanitize_scalar(key, item, max_output_chars=max_output_chars)
+ for item in value
+ ]
+ else:
+ safe[key] = _sanitize_scalar(key, value, max_output_chars=max_output_chars)
+
+ if tool_name == "Bash":
+ safe["command"] = sanitize_command(raw.get("command") or raw.get("cmd") or raw.get("script") or "")
+ elif tool_name == "Read":
+ safe["file_path"] = sanitize_path(raw.get("file_path") or raw.get("path") or "")
+ elif tool_name in {"Task", "Agent"}:
+ safe.setdefault("description", "")
+ safe.setdefault("prompt", "")
+ return safe
+
+
+def _sanitize_usage(usage: Any) -> dict[str, int]:
+ if not isinstance(usage, dict):
+ return {}
+ out: dict[str, int] = {}
+ for key in (
+ "input_tokens",
+ "output_tokens",
+ "cache_read_input_tokens",
+ "cache_creation_input_tokens",
+ ):
+ try:
+ value = int(usage.get(key, 0) or 0)
+ except (TypeError, ValueError):
+ value = 0
+ if value:
+ out[key] = value
+ return out
+
+
+def _sanitize_claude_records(path: Path, *, max_output_chars: int) -> list[dict[str, Any]]:
+ records: list[dict[str, Any]] = []
+ id_map: dict[str, str] = {}
+ next_id = 1
+
+ for rec in _jsonl_records(path):
+ rec_type = rec.get("type")
+ msg = rec.get("message") or rec
+ content = msg.get("content") if isinstance(msg, dict) else None
+ if not isinstance(content, list):
+ continue
+
+ tool_blocks: list[dict[str, Any]] = []
+ result_blocks: list[dict[str, Any]] = []
+ for block in content:
+ if not isinstance(block, dict):
+ continue
+ btype = block.get("type")
+ if btype == "tool_use":
+ name = str(block.get("name") or "")
+ old_id = str(block.get("id") or "")
+ if not old_id or name not in _PROJECTORS:
+ continue
+ new_id = f"tool-{next_id:04d}"
+ next_id += 1
+ id_map[old_id] = new_id
+ tool_blocks.append(
+ {
+ "type": "tool_use",
+ "id": new_id,
+ "name": name,
+ "input": sanitize_tool_input(
+ name,
+ block.get("input") or {},
+ max_output_chars=max_output_chars,
+ ),
+ }
+ )
+ elif btype == "tool_result":
+ old_id = str(block.get("tool_use_id") or "")
+ if old_id not in id_map:
+ continue
+ result_blocks.append(
+ {
+ "type": "tool_result",
+ "tool_use_id": id_map[old_id],
+ "content": sanitize_output(
+ _flatten_result(block.get("content")),
+ max_output_chars=max_output_chars,
+ ),
+ }
+ )
+
+ if rec_type == "assistant" and tool_blocks:
+ records.append(
+ {
+ "type": "assistant",
+ "message": {
+ "usage": _sanitize_usage(msg.get("usage")),
+ "content": tool_blocks,
+ },
+ }
+ )
+ if result_blocks:
+ records.append(
+ {
+ "type": "user",
+ "message": {"content": result_blocks},
+ }
+ )
+
+ return records
+
+
+def _sanitize_codex_records(path: Path, *, max_output_chars: int) -> list[dict[str, Any]]:
+ records: list[dict[str, Any]] = []
+ id_map: dict[str, str] = {}
+ next_id = 1
+
+ def mapped_call_id(old: str) -> str:
+ nonlocal next_id
+ if old and old in id_map:
+ return id_map[old]
+ new = f"call-{next_id:04d}"
+ next_id += 1
+ if old:
+ id_map[old] = new
+ return new
+
+ for rec in _jsonl_records(path):
+ payload = rec.get("payload") if isinstance(rec.get("payload"), dict) else {}
+ ptype = payload.get("type")
+ if rec.get("type") == "response_item" and ptype == "function_call":
+ original_name = str(payload.get("name") or "")
+ original_args = _loads_tool_args(payload.get("arguments"))
+ normalised = _normalise_codex_tool(original_name, original_args)
+ if not normalised:
+ continue
+ tool, tool_input = normalised
+ call_id = mapped_call_id(str(payload.get("call_id") or payload.get("id") or ""))
+ sanitized_args = sanitize_tool_input(
+ tool,
+ tool_input,
+ max_output_chars=max_output_chars,
+ )
+ records.append(
+ {
+ "type": "response_item",
+ "payload": {
+ "type": "function_call",
+ "name": original_name or ("exec_command" if tool == "Bash" else "read_file"),
+ "call_id": call_id,
+ "arguments": json.dumps(sanitized_args, separators=(",", ":"), sort_keys=True),
+ },
+ }
+ )
+ continue
+
+ if rec.get("type") != "event_msg" or ptype not in {
+ "exec_command_end",
+ "function_call_output",
+ "tool_result",
+ }:
+ continue
+ old_call_id = str(payload.get("call_id") or payload.get("id") or payload.get("tool_call_id") or "")
+ if old_call_id and old_call_id not in id_map and ptype != "exec_command_end":
+ continue
+ call_id = mapped_call_id(old_call_id)
+ output = (
+ payload.get("aggregated_output")
+ or payload.get("output")
+ or payload.get("stdout")
+ or payload.get("content")
+ or ""
+ )
+ safe_payload: dict[str, Any] = {
+ "type": ptype,
+ "call_id": call_id,
+ }
+ if payload.get("exit_code") is not None:
+ safe_payload["exit_code"] = payload.get("exit_code")
+ if payload.get("command") is not None:
+ cmd = sanitize_command(payload.get("command"))
+ safe_payload["command"] = ["/bin/sh", "-lc", cmd]
+ if ptype == "exec_command_end":
+ safe_payload["aggregated_output"] = sanitize_output(
+ output,
+ max_output_chars=max_output_chars,
+ )
+ else:
+ safe_payload["output"] = sanitize_output(
+ output,
+ max_output_chars=max_output_chars,
+ )
+ if payload.get("stderr"):
+ safe_payload["stderr"] = sanitize_output(
+ payload.get("stderr"),
+ max_output_chars=max_output_chars,
+ )
+ records.append({"type": "event_msg", "payload": safe_payload})
+
+ return records
+
+
+def sanitize_transcript(
+ path: Path,
+ output_path: Path,
+ *,
+ harness: str = "auto",
+ max_output_chars: int = 50_000,
+) -> HarvestedSession | None:
+ selected = _detect_harness(path) if harness in {"auto", "all"} else harness
+ if selected in {"codex", "codex_cli"}:
+ records = _sanitize_codex_records(path, max_output_chars=max_output_chars)
+ selected = "codex"
+ else:
+ records = _sanitize_claude_records(path, max_output_chars=max_output_chars)
+ selected = "claude_code"
+ if not records:
+ return None
+
+ _write_jsonl(output_path, records)
+ report = replay_session(output_path, harness=selected)
+ if report.total_calls <= 0:
+ try:
+ output_path.unlink()
+ except OSError:
+ pass
+ return None
+ stat = path.stat()
+ return HarvestedSession(
+ session_id=output_path.stem,
+ harness=selected,
+ output_path=str(output_path),
+ source_size_bytes=stat.st_size,
+ source_path_sha256=_sha_text(str(path.resolve()), chars=16),
+ sanitized_records=len(records),
+ total_calls=report.total_calls,
+ calls_by_tool=dict(report.calls_by_tool),
+ raw_tokens=report.raw_tokens,
+ digest_tokens=report.digest_tokens,
+ saved_pct=round(report.saved_pct, 2),
+ warnings_count=len(report.warnings),
+ )
+
+
+def _session_output_path(source: Path, output_sessions_dir: Path, harness: str) -> Path:
+ source_hash = _sha_file(source)
+ return output_sessions_dir / f"{_GENERIC_SESSION_PREFIX}_{harness}_{source_hash}.jsonl"
+
+
+def _write_review_annotations(path: Path, sessions: list[HarvestedSession]) -> None:
+ records = [
+ {
+ "session_id": session.session_id,
+ "task_parity": "needs_review",
+ "stale_context_incidents": 0,
+ "note": f"Harvested from redacted real {session.harness} session; requires human parity review before release gating.",
+ }
+ for session in sessions
+ ]
+ _write_jsonl(path, records)
+
+
+def _manifest(
+ *,
+ sessions: list[HarvestedSession],
+ aggregate: dict[str, Any],
+ output_dir: Path,
+ golden_path: Path | None,
+) -> dict[str, Any]:
+ return {
+ "format": "dhee_replay_corpus_manifest",
+ "version": 1,
+ "generated_at": time.time(),
+ "source": "redacted_real_sessions",
+ "privacy": {
+ "raw_prompts": False,
+ "raw_tool_outputs": False,
+ "raw_paths": False,
+ "secret_filter": "dhee.hooks.claude_code.privacy.filter_secrets",
+ },
+ "output_dir": str(output_dir),
+ "golden_path": str(golden_path) if golden_path else "",
+ "aggregate": aggregate,
+ "sessions": [asdict(session) for session in sessions],
+ }
+
+
+def harvest_corpus(
+ *,
+ sessions_dir: Path | None = None,
+ output_dir: Path,
+ harness: str = "all",
+ limit: int = 0,
+ min_calls: int = 1,
+ max_output_chars: int = 50_000,
+ golden_output: Path | None = None,
+ manifest_output: Path | None = None,
+) -> dict[str, Any]:
+ """Harvest local transcripts into a redacted replay corpus."""
+ output_dir.mkdir(parents=True, exist_ok=True)
+ output_sessions_dir = output_dir / "sessions"
+ output_sessions_dir.mkdir(parents=True, exist_ok=True)
+
+ transcripts = discover_transcripts(
+ sessions_dir=sessions_dir,
+ harness=harness,
+ limit=limit,
+ )
+ harvested: list[HarvestedSession] = []
+ skipped: list[dict[str, Any]] = []
+ replay_harness = "auto" if harness in {"all", "auto"} else harness
+
+ for source in transcripts:
+ try:
+ selected = _detect_harness(source) if replay_harness == "auto" else replay_harness
+ selected = "codex" if selected in {"codex", "codex_cli"} else "claude_code"
+ output_path = _session_output_path(source, output_sessions_dir, selected)
+ session = sanitize_transcript(
+ source,
+ output_path,
+ harness=selected,
+ max_output_chars=max_output_chars,
+ )
+ if session is None or session.total_calls < min_calls:
+ if session is not None:
+ try:
+ Path(session.output_path).unlink()
+ except OSError:
+ pass
+ skipped.append({"source_path_sha256": _sha_text(str(source.resolve()), chars=16), "reason": "too_few_calls"})
+ continue
+ harvested.append(session)
+ except Exception as exc: # noqa: BLE001
+ skipped.append(
+ {
+ "source_path_sha256": _sha_text(str(source.resolve()), chars=16),
+ "reason": f"{type(exc).__name__}: {exc}",
+ }
+ )
+
+ if golden_output is None:
+ golden_output = output_dir / "golden_needs_review.jsonl"
+ if harvested:
+ _write_review_annotations(golden_output, harvested)
+
+ annotations = load_golden_annotations(golden_output) if harvested else {}
+ reports = [
+ replay_session(
+ Path(session.output_path),
+ harness=session.harness,
+ annotations=annotations,
+ )
+ for session in harvested
+ ]
+ aggregate = aggregate_reports(reports)
+
+ manifest = _manifest(
+ sessions=harvested,
+ aggregate=aggregate,
+ output_dir=output_dir,
+ golden_path=golden_output if harvested else None,
+ )
+ manifest["transcripts_considered"] = len(transcripts)
+ manifest["skipped"] = skipped
+ if manifest_output is None:
+ manifest_output = output_dir / "manifest.json"
+ manifest_output.write_text(json.dumps(manifest, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+ return {
+ "output_dir": str(output_dir),
+ "sessions_dir": str(output_sessions_dir),
+ "golden_path": str(golden_output) if harvested else "",
+ "manifest_path": str(manifest_output),
+ "transcripts_considered": len(transcripts),
+ "harvested_sessions": len(harvested),
+ "skipped": skipped,
+ "sessions": [asdict(session) for session in harvested],
+ "aggregate": aggregate,
+ "privacy": manifest["privacy"],
+ }
+
+
+def inspect_corpus(
+ *,
+ sessions_dir: Path,
+ harness: str = "all",
+ golden_path: Path | None = None,
+ limit: int = 0,
+) -> dict[str, Any]:
+ transcripts = discover_transcripts(
+ sessions_dir=sessions_dir,
+ harness=harness,
+ limit=limit,
+ )
+ annotations = load_golden_annotations(golden_path)
+ replay_harness = "auto" if harness in {"all", "auto"} else harness
+ reports = [
+ replay_session(path, harness=replay_harness, annotations=annotations)
+ for path in transcripts
+ ]
+ return {
+ "sessions_dir": str(sessions_dir),
+ "harness": harness,
+ "golden_path": str(golden_path) if golden_path else "",
+ "sessions": [
+ {
+ "session_id": report.session_id,
+ "harness": report.harness,
+ "total_calls": report.total_calls,
+ "calls_by_tool": dict(report.calls_by_tool),
+ "raw_tokens": report.raw_tokens,
+ "digest_tokens": report.digest_tokens,
+ "saved_tokens": report.net_saved,
+ "saved_pct": round(report.saved_pct, 2),
+ "annotations_count": report.annotations_count,
+ "task_parity": report.task_parity,
+ "task_parity_score": report.task_parity_score,
+ "stale_context_incidents": report.stale_context_incidents,
+ "warnings_count": len(report.warnings),
+ }
+ for report in reports
+ ],
+ "aggregate": aggregate_reports(reports),
+ }
+
+
+def upsert_golden_annotation(
+ *,
+ golden_path: Path,
+ session_id: str,
+ task_parity: str,
+ task_parity_score: float | None = None,
+ stale_context_incidents: int | None = None,
+ note: str | None = None,
+) -> dict[str, Any]:
+ """Create or replace one golden replay annotation record."""
+ parity = str(task_parity or "").strip().lower()
+ if parity not in {"pass", "fail", "needs_review"}:
+ raise ValueError("task_parity must be pass, fail, or needs_review")
+ if not session_id:
+ raise ValueError("session_id is required")
+
+ record: dict[str, Any] = {
+ "session_id": session_id,
+ "task_parity": parity,
+ "review_status": "needs_review" if parity == "needs_review" else "reviewed",
+ }
+ if task_parity_score is not None:
+ record["task_parity_score"] = float(task_parity_score)
+ if stale_context_incidents is not None:
+ record["stale_context_incidents"] = max(0, int(stale_context_incidents))
+ else:
+ record["stale_context_incidents"] = 0
+ if note:
+ record["note"] = str(note)
+
+ records = list(_jsonl_records(golden_path)) if golden_path.exists() else []
+ updated: list[dict[str, Any]] = []
+ replaced = False
+ for existing in records:
+ existing_session = str(existing.get("session_id") or "")
+ if existing_session == session_id:
+ updated.append(record)
+ replaced = True
+ else:
+ updated.append(existing)
+ if not replaced:
+ updated.append(record)
+ _write_jsonl(golden_path, updated)
+ return {
+ "golden_path": str(golden_path),
+ "session_id": session_id,
+ "action": "updated" if replaced else "created",
+ "annotation": record,
+ }
+
+
+def format_harvest_human(result: dict[str, Any]) -> str:
+ lines = [
+ "Replay corpus harvest",
+ f" considered: {result.get('transcripts_considered', 0)}",
+ f" harvested: {result.get('harvested_sessions', 0)}",
+ f" sessions: {result.get('sessions_dir') or '(none)'}",
+ f" golden: {result.get('golden_path') or '(none)'}",
+ f" manifest: {result.get('manifest_path') or '(none)'}",
+ ]
+ aggregate = result.get("aggregate") or {}
+ lines.extend(
+ [
+ f" calls: {aggregate.get('total_calls', 0)}",
+ f" saved: {aggregate.get('saved_pct', 0.0)}%",
+ f" pending: {aggregate.get('pending_review_sessions', 0)}",
+ f" harnesses: {aggregate.get('sessions_by_harness', {})}",
+ ]
+ )
+ if result.get("skipped"):
+ lines.append(f" skipped: {len(result['skipped'])}")
+ lines.append(" privacy: raw prompts/outputs/paths omitted; review annotations are pending")
+ return "\n".join(lines)
+
+
+def format_inspect_human(result: dict[str, Any]) -> str:
+ aggregate = result.get("aggregate") or {}
+ lines = [
+ "Replay corpus",
+ f" sessions dir: {result.get('sessions_dir')}",
+ f" harness: {result.get('harness')}",
+ f" sessions: {aggregate.get('sessions', 0)}",
+ f" harnesses: {aggregate.get('sessions_by_harness', {})}",
+ f" calls: {aggregate.get('total_calls', 0)}",
+ f" by tool: {aggregate.get('calls_by_tool', {})}",
+ f" saved: {aggregate.get('saved_pct', 0.0)}%",
+ f" annotated: {aggregate.get('annotated_sessions', 0)}",
+ f" pending: {aggregate.get('pending_review_sessions', 0)}",
+ f" parity: {aggregate.get('task_parity', {})}",
+ f" stale ctx: {aggregate.get('stale_context_incidents', 0)}",
+ ]
+ return "\n".join(lines)
diff --git a/dhee/benchmarks/router_replay.py b/dhee/benchmarks/router_replay.py
index 395f541..0b06ce3 100644
--- a/dhee/benchmarks/router_replay.py
+++ b/dhee/benchmarks/router_replay.py
@@ -1,9 +1,9 @@
"""Router replay harness — projects token savings on real session transcripts.
-Reads Claude Code session JSONL files, finds each native `Read` / `Bash`
-/ `Task` tool_use + tool_result pair, and re-runs the corresponding
-router digest function to compute what the context would have held if
-the router had been active.
+Reads Claude Code or Codex session JSONL files, finds native `Read` /
+`Bash` / `Task` tool pairs, and re-runs the corresponding router digest
+function to compute what the context would have held if the router had
+been active.
This is *counterfactual projection*, not a live A/B. It answers the
question: given the tool calls the model actually made, how many tokens
@@ -17,12 +17,16 @@
- tool_result projected tokens (digest length via our renderer)
- absolute savings, % savings
- count of tool calls split by tool
+ - stale-context incidents and task parity when golden annotations
+ are present
- warnings when source file no longer exists / command can't be
replayed (we fall back to transcript length for those)
Usage:
python -m dhee.benchmarks.router_replay
[--sessions-dir ~/.claude/projects/]
+ [--harness claude_code|codex|all|auto]
+ [--golden golden_annotations.jsonl]
[--limit 5]
[--json]
"""
@@ -79,15 +83,48 @@ def saved_pct(self) -> float:
return (self.saved_tokens / self.raw_tokens) * 100.0
+def _parse_boolish(value: Any) -> bool | None:
+ if isinstance(value, bool):
+ return value
+ if value is None:
+ return None
+ text = str(value).strip().lower()
+ if text in {"pass", "passed", "success", "succeeded", "true", "yes", "1", "parity"}:
+ return True
+ if text in {"fail", "failed", "failure", "false", "no", "0", "regression"}:
+ return False
+ return None
+
+
+def _stale_incident_count(value: Any) -> int:
+ if value is None:
+ return 0
+ if isinstance(value, list):
+ return len(value)
+ if isinstance(value, dict):
+ return len(value) if "count" not in value else _stale_incident_count(value.get("count"))
+ try:
+ return max(0, int(value))
+ except (TypeError, ValueError):
+ return 0
+
+
@dataclass
class SessionReport:
session_id: str
+ harness: str = "claude_code"
total_calls: int = 0
calls_by_tool: Counter = field(default_factory=Counter)
raw_tokens: int = 0
digest_tokens: int = 0
saved_tokens: int = 0
warnings: list[str] = field(default_factory=list)
+ annotations_count: int = 0
+ stale_context_incidents: int = 0
+ task_parity: bool | None = None
+ task_parity_score: float | None = None
+ pending_review: bool = False
+ golden_notes: list[str] = field(default_factory=list)
# Ground-truth usage, read from each assistant record's `usage` field.
assistant_turns: int = 0
cache_read_input_tokens: int = 0
@@ -108,6 +145,40 @@ def add(self, p: CallProjection) -> None:
if p.note:
self.warnings.append(p.note)
+ def apply_annotation(self, annotation: dict[str, Any]) -> None:
+ self.annotations_count += 1
+ parity_value = (
+ annotation.get("task_parity")
+ if "task_parity" in annotation
+ else annotation.get("parity")
+ )
+ parity = _parse_boolish(parity_value)
+ if parity is not None:
+ self.task_parity = parity
+ else:
+ review_text = str(
+ annotation.get("review_status")
+ or annotation.get("status")
+ or parity_value
+ or ""
+ ).strip().lower()
+ if review_text in {"needs_review", "pending_review", "pending", "review"}:
+ self.pending_review = True
+ score = annotation.get("task_parity_score", annotation.get("parity_score"))
+ if score is not None:
+ try:
+ self.task_parity_score = float(score)
+ except (TypeError, ValueError):
+ pass
+ self.stale_context_incidents += _stale_incident_count(
+ annotation.get("stale_context_incidents", annotation.get("stale_incidents"))
+ )
+ note = annotation.get("note") or annotation.get("notes")
+ if isinstance(note, list):
+ self.golden_notes.extend(str(item) for item in note if item)
+ elif note:
+ self.golden_notes.append(str(note))
+
@property
def net_saved(self) -> int:
return self.raw_tokens - self.digest_tokens
@@ -184,10 +255,10 @@ def _project_bash(tool_input: dict[str, Any], result_text: str) -> CallProjectio
raw_tokens = _tokens(raw)
d = _bash_digest.digest_bash(
cmd=cmd,
- exit_code=0,
+ exit_code=int(tool_input.get("exit_code", 0) or 0),
duration_ms=0,
stdout=raw,
- stderr="",
+ stderr=str(tool_input.get("stderr") or ""),
)
rendered = d.render("B-replayXXXX")
digest_tokens = _tokens(rendered)
@@ -222,7 +293,103 @@ def _project_agent(_tool_input: dict[str, Any], result_text: str) -> CallProject
}
-def replay_session(path: Path) -> SessionReport:
+def _jsonl_records(path: Path):
+ with path.open("r", encoding="utf-8") as f:
+ for line in f:
+ line = line.strip()
+ if not line:
+ continue
+ try:
+ yield json.loads(line)
+ except json.JSONDecodeError:
+ continue
+
+
+def _annotation_from_record(rec: dict[str, Any], *, session_id: str) -> dict[str, Any] | None:
+ rec_type = rec.get("type") or rec.get("format")
+ payload = rec.get("payload") if isinstance(rec.get("payload"), dict) else rec
+ if rec_type not in {
+ "dhee_golden",
+ "dhee_golden_replay",
+ "golden_annotation",
+ "golden_replay",
+ "dhee_replay_annotation",
+ } and not any(k in payload for k in ("task_parity", "parity", "stale_context_incidents", "stale_incidents")):
+ return None
+ ann = dict(payload)
+ ann.setdefault("session_id", session_id)
+ return ann
+
+
+def load_golden_annotations(path: Path | None) -> dict[str, list[dict[str, Any]]]:
+ if not path:
+ return {}
+ files: list[Path]
+ if path.is_dir():
+ files = sorted(path.glob("*.jsonl"))
+ else:
+ files = [path]
+ out: dict[str, list[dict[str, Any]]] = {}
+ for f in files:
+ if not f.exists():
+ continue
+ for rec in _jsonl_records(f):
+ ann = _annotation_from_record(rec, session_id=str(rec.get("session_id") or f.stem))
+ if ann is None:
+ continue
+ sid = str(ann.get("session_id") or f.stem)
+ out.setdefault(sid, []).append(ann)
+ return out
+
+
+def _detect_harness(path: Path) -> str:
+ for rec in _jsonl_records(path):
+ payload = rec.get("payload") if isinstance(rec.get("payload"), dict) else {}
+ if rec.get("type") in {"response_item", "event_msg"} and payload.get("type"):
+ return "codex"
+ msg = rec.get("message") or rec
+ content = msg.get("content") if isinstance(msg, dict) else None
+ if isinstance(content, list):
+ return "claude_code"
+ return "claude_code"
+
+
+def _command_text(value: Any) -> str:
+ if isinstance(value, list):
+ if len(value) >= 3 and value[-2] == "-lc":
+ return str(value[-1])
+ return " ".join(str(part) for part in value)
+ return str(value or "")
+
+
+def _loads_tool_args(value: Any) -> dict[str, Any]:
+ if isinstance(value, dict):
+ return dict(value)
+ if not isinstance(value, str) or not value.strip():
+ return {}
+ try:
+ data = json.loads(value)
+ except json.JSONDecodeError:
+ return {"command": value}
+ return data if isinstance(data, dict) else {}
+
+
+def _normalise_codex_tool(name: str, args: dict[str, Any]) -> tuple[str, dict[str, Any]] | None:
+ lower = str(name or "").lower()
+ if lower in {"exec_command", "shell", "bash"}:
+ cmd = args.get("cmd") or args.get("command") or args.get("script") or ""
+ return "Bash", {"command": _command_text(cmd), **args}
+ if lower in {"read_file", "read"}:
+ path = args.get("file_path") or args.get("path") or ""
+ return "Read", {"file_path": path, **args}
+ return None
+
+
+def _replay_claude_session(
+ path: Path,
+ *,
+ annotations: dict[str, list[dict[str, Any]]] | None = None,
+) -> SessionReport:
"""Walk a transcript, pairing tool_use records with their tool_result.
Also collects ground-truth usage per assistant turn (cache-read,
@@ -231,151 +398,322 @@ def replay_session(path: Path) -> SessionReport:
share).
"""
pending: dict[str, dict[str, Any]] = {}
- report = SessionReport(session_id=path.stem)
+ report = SessionReport(session_id=path.stem, harness="claude_code")
+
+ for rec in _jsonl_records(path):
+ ann = _annotation_from_record(rec, session_id=path.stem)
+ if ann is not None:
+ report.apply_annotation(ann)
+ continue
+ rec_type = rec.get("type")
+ msg = rec.get("message") or rec
+ # Assistant usage (ground-truth API cache/output counts).
+ if rec_type == "assistant" and isinstance(msg, dict):
+ usage = msg.get("usage") or {}
+ if isinstance(usage, dict):
+ report.assistant_turns += 1
+ try:
+ report.cache_read_input_tokens += int(
+ usage.get("cache_read_input_tokens", 0) or 0
+ )
+ report.cache_creation_input_tokens += int(
+ usage.get("cache_creation_input_tokens", 0) or 0
+ )
+ report.output_tokens += int(usage.get("output_tokens", 0) or 0)
+ except (TypeError, ValueError):
+ pass
+ content = msg.get("content") if isinstance(msg, dict) else None
+ if not isinstance(content, list):
+ continue
+ for block in content:
+ if not isinstance(block, dict):
+ continue
+ btype = block.get("type")
+ if btype == "tool_use":
+ tid = block.get("id")
+ name = block.get("name") or ""
+ if tid and name in _PROJECTORS:
+ pending[tid] = {
+ "tool": name,
+ "input": block.get("input") or {},
+ }
+ elif btype == "tool_result":
+ tid = block.get("tool_use_id")
+ text = _flatten_result(block.get("content"))
+ # Every tool_result contributes to cache-replay load
+ # on subsequent turns, regardless of whether we
+ # project a digest for it. Track the raw token mass.
+ report.tool_result_tokens += _tokens(text)
+ if not tid or tid not in pending:
+ continue
+ entry = pending.pop(tid)
+ projector = _PROJECTORS[entry["tool"]]
+ try:
+ p = projector(entry["input"], text)
+ except Exception as exc: # noqa: BLE001
+ report.warnings.append(
+ f"{entry['tool']} projector failed: {type(exc).__name__}: {exc}"
+ )
+ continue
+ report.add(p)
+ for ann in (annotations or {}).get(path.stem, []):
+ report.apply_annotation(ann)
+ return report
- with path.open("r", encoding="utf-8") as f:
- for line in f:
- line = line.strip()
- if not line:
+
+def _replay_codex_session(
+ path: Path,
+ *,
+ annotations: dict[str, list[dict[str, Any]]] | None = None,
+) -> SessionReport:
+ pending: dict[str, dict[str, Any]] = {}
+ report = SessionReport(session_id=path.stem, harness="codex")
+
+ for rec in _jsonl_records(path):
+ ann = _annotation_from_record(rec, session_id=path.stem)
+ if ann is not None:
+ report.apply_annotation(ann)
+ continue
+ payload = rec.get("payload") if isinstance(rec.get("payload"), dict) else {}
+ ptype = payload.get("type")
+ if rec.get("type") == "response_item" and ptype == "function_call":
+ call_id = str(payload.get("call_id") or payload.get("id") or "")
+ normalised = _normalise_codex_tool(
+ str(payload.get("name") or ""),
+ _loads_tool_args(payload.get("arguments")),
+ )
+ if call_id and normalised:
+ tool, tool_input = normalised
+ pending[call_id] = {"tool": tool, "input": tool_input}
+ continue
+ if rec.get("type") == "event_msg" and ptype in {"exec_command_end", "function_call_output", "tool_result"}:
+ call_id = str(payload.get("call_id") or payload.get("id") or payload.get("tool_call_id") or "")
+ entry = pending.pop(call_id, None)
+ if entry is None and ptype == "exec_command_end":
+ entry = {"tool": "Bash", "input": {"command": _command_text(payload.get("command"))}}
+ if entry is None:
continue
- try:
- rec = json.loads(line)
- except json.JSONDecodeError:
+ text = str(
+ payload.get("aggregated_output")
+ or payload.get("output")
+ or payload.get("stdout")
+ or payload.get("content")
+ or ""
+ )
+ if payload.get("stderr"):
+ entry["input"]["stderr"] = str(payload.get("stderr") or "")
+ if payload.get("exit_code") is not None:
+ entry["input"]["exit_code"] = payload.get("exit_code")
+ if not entry["input"].get("command") and payload.get("command"):
+ entry["input"]["command"] = _command_text(payload.get("command"))
+ projector = _PROJECTORS.get(entry["tool"])
+ if projector is None:
continue
- rec_type = rec.get("type")
- msg = rec.get("message") or rec
- # Assistant usage (ground-truth API cache/output counts).
- if rec_type == "assistant" and isinstance(msg, dict):
- usage = msg.get("usage") or {}
- if isinstance(usage, dict):
- report.assistant_turns += 1
- try:
- report.cache_read_input_tokens += int(
- usage.get("cache_read_input_tokens", 0) or 0
- )
- report.cache_creation_input_tokens += int(
- usage.get("cache_creation_input_tokens", 0) or 0
- )
- report.output_tokens += int(usage.get("output_tokens", 0) or 0)
- except (TypeError, ValueError):
- pass
- content = msg.get("content") if isinstance(msg, dict) else None
- if not isinstance(content, list):
+ try:
+ p = projector(entry["input"], text)
+ except Exception as exc: # noqa: BLE001
+ report.warnings.append(
+ f"{entry['tool']} projector failed: {type(exc).__name__}: {exc}"
+ )
continue
- for block in content:
- if not isinstance(block, dict):
- continue
- btype = block.get("type")
- if btype == "tool_use":
- tid = block.get("id")
- name = block.get("name") or ""
- if tid and name in _PROJECTORS:
- pending[tid] = {
- "tool": name,
- "input": block.get("input") or {},
- }
- elif btype == "tool_result":
- tid = block.get("tool_use_id")
- text = _flatten_result(block.get("content"))
- # Every tool_result contributes to cache-replay load
- # on subsequent turns, regardless of whether we
- # project a digest for it. Track the raw token mass.
- report.tool_result_tokens += _tokens(text)
- if not tid or tid not in pending:
- continue
- entry = pending.pop(tid)
- projector = _PROJECTORS[entry["tool"]]
- try:
- p = projector(entry["input"], text)
- except Exception as exc: # noqa: BLE001
- report.warnings.append(
- f"{entry['tool']} projector failed: {type(exc).__name__}: {exc}"
- )
- continue
- report.add(p)
+ report.add(p)
+ report.tool_result_tokens += _tokens(text)
+ for ann in (annotations or {}).get(path.stem, []):
+ report.apply_annotation(ann)
return report
+def replay_session(
+ path: Path,
+ *,
+ harness: str = "auto",
+ annotations: dict[str, list[dict[str, Any]]] | None = None,
+) -> SessionReport:
+ selected = _detect_harness(path) if harness == "auto" else harness
+ if selected in {"codex", "codex_cli"}:
+ return _replay_codex_session(path, annotations=annotations)
+ return _replay_claude_session(path, annotations=annotations)
+
+
def _default_sessions_dir() -> Path:
# Current project's Claude Code session dir. The one the user is in.
cwd_slug = "-" + str(Path.cwd()).replace("/", "-").lstrip("-")
return Path.home() / ".claude" / "projects" / cwd_slug
+def _default_codex_sessions_dir() -> Path:
+ return Path.home() / ".codex" / "sessions"
+
+
+def discover_transcripts(
+ *,
+ sessions_dir: Path | None = None,
+ harness: str = "claude_code",
+ limit: int = 0,
+) -> list[Path]:
+ if sessions_dir is not None:
+ transcripts = sorted(
+ sessions_dir.rglob("*.jsonl") if sessions_dir.is_dir() else [sessions_dir],
+ key=lambda p: p.stat().st_mtime if p.exists() else 0,
+ reverse=True,
+ )
+ elif harness in {"codex", "codex_cli"}:
+ root = _default_codex_sessions_dir()
+ transcripts = sorted(root.rglob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True) if root.exists() else []
+ elif harness == "all":
+ roots = [_default_sessions_dir(), _default_codex_sessions_dir()]
+ found: list[Path] = []
+ for root in roots:
+ if root.exists():
+ found.extend(root.rglob("*.jsonl"))
+ transcripts = sorted(found, key=lambda p: p.stat().st_mtime, reverse=True)
+ else:
+ root = _default_sessions_dir()
+ transcripts = sorted(root.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True) if root.exists() else []
+ if limit:
+ return transcripts[:limit]
+ return transcripts
+
+
+def aggregate_reports(reports: list[SessionReport]) -> dict[str, Any]:
+ raw = digest = calls = turns = cache_read = cache_creation = tool_result_tokens = 0
+ by_tool: Counter = Counter()
+ by_harness: Counter = Counter()
+ warnings = 0
+ stale = 0
+ annotated = 0
+ pending_review = 0
+ parity_pass = parity_fail = parity_unknown = 0
+ parity_scores: list[float] = []
+ for r in reports:
+ calls += r.total_calls
+ by_tool.update(r.calls_by_tool)
+ by_harness[r.harness] += 1
+ raw += r.raw_tokens
+ digest += r.digest_tokens
+ warnings += len(r.warnings)
+ turns += r.assistant_turns
+ cache_read += r.cache_read_input_tokens
+ cache_creation += r.cache_creation_input_tokens
+ tool_result_tokens += r.tool_result_tokens
+ stale += r.stale_context_incidents
+ if r.annotations_count:
+ annotated += 1
+ if r.pending_review:
+ pending_review += 1
+ if r.task_parity is True:
+ parity_pass += 1
+ elif r.task_parity is False:
+ parity_fail += 1
+ else:
+ parity_unknown += 1
+ if r.task_parity_score is not None:
+ parity_scores.append(r.task_parity_score)
+ net_saved = raw - digest
+ return {
+ "sessions": len(reports),
+ "sessions_by_harness": dict(by_harness),
+ "annotated_sessions": annotated,
+ "pending_review_sessions": pending_review,
+ "assistant_turns": turns,
+ "total_calls": calls,
+ "calls_by_tool": dict(by_tool),
+ "raw_tokens": raw,
+ "digest_tokens": digest,
+ "net_saved_tokens": net_saved,
+ "saved_pct": round(net_saved / raw * 100, 2) if raw else 0.0,
+ "cache_read_tokens_total": cache_read,
+ "cache_creation_tokens_total": cache_creation,
+ "cache_read_per_turn": int(cache_read / turns) if turns else 0,
+ "projected_cache_read_per_turn": int((cache_read - net_saved) / turns) if turns and cache_read else 0,
+ "tool_result_tokens": tool_result_tokens,
+ "tool_result_share": round(tool_result_tokens / cache_read, 3) if cache_read else 0.0,
+ "warnings_count": warnings,
+ "stale_context_incidents": stale,
+ "task_parity": {
+ "pass": parity_pass,
+ "fail": parity_fail,
+ "unknown": parity_unknown,
+ "avg_score": round(sum(parity_scores) / len(parity_scores), 3) if parity_scores else None,
+ "score_count": len(parity_scores),
+ },
+ }
+
+
def main(argv: list[str] | None = None) -> int:
ap = argparse.ArgumentParser(description=__doc__)
ap.add_argument("--sessions-dir", type=Path, default=None)
+ ap.add_argument("--harness", choices=["claude_code", "codex", "all", "auto"], default="claude_code")
+ ap.add_argument("--golden", type=Path, default=None, help="JSONL file or directory with golden replay annotations")
ap.add_argument("--limit", type=int, default=0, help="Only replay N most-recent sessions")
ap.add_argument("--json", action="store_true")
args = ap.parse_args(argv)
- sdir = args.sessions_dir or _default_sessions_dir()
- if not sdir.exists():
- print(f"sessions dir not found: {sdir}", file=sys.stderr)
+ transcripts = discover_transcripts(
+ sessions_dir=args.sessions_dir,
+ harness=args.harness,
+ limit=args.limit,
+ )
+ if args.sessions_dir and not args.sessions_dir.exists():
+ print(f"sessions dir not found: {args.sessions_dir}", file=sys.stderr)
return 2
-
- transcripts = sorted(sdir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True)
- if args.limit:
- transcripts = transcripts[: args.limit]
-
- reports = [replay_session(p) for p in transcripts]
-
- # Aggregate — compute saved as raw - digest honestly (some calls
- # produce digests larger than raw for tiny inputs; those increase
- # digest_tokens and reduce net savings).
- agg = SessionReport(session_id="__aggregate__")
- for r in reports:
- agg.total_calls += r.total_calls
- agg.calls_by_tool.update(r.calls_by_tool)
- agg.raw_tokens += r.raw_tokens
- agg.digest_tokens += r.digest_tokens
- agg.saved_tokens = agg.raw_tokens - agg.digest_tokens
+ annotations = load_golden_annotations(args.golden)
+ reports = [replay_session(p, harness=args.harness if args.harness != "all" else "auto", annotations=annotations) for p in transcripts]
+ aggregate = aggregate_reports(reports)
if args.json:
out = {
"sessions": [
{
"session_id": r.session_id,
+ "harness": r.harness,
"total_calls": r.total_calls,
"calls_by_tool": dict(r.calls_by_tool),
"raw_tokens": r.raw_tokens,
"digest_tokens": r.digest_tokens,
"saved_tokens": r.net_saved,
"saved_pct": round(r.saved_pct, 2),
+ "annotations_count": r.annotations_count,
+ "stale_context_incidents": r.stale_context_incidents,
+ "task_parity": r.task_parity,
+ "task_parity_score": r.task_parity_score,
+ "pending_review": r.pending_review,
"warnings_count": len(r.warnings),
}
for r in reports
],
- "aggregate": {
- "sessions": len(reports),
- "total_calls": agg.total_calls,
- "calls_by_tool": dict(agg.calls_by_tool),
- "raw_tokens": agg.raw_tokens,
- "digest_tokens": agg.digest_tokens,
- "saved_tokens": agg.net_saved,
- "saved_pct": round(agg.saved_pct, 2),
- },
+ "aggregate": aggregate,
}
print(json.dumps(out, indent=2))
return 0
# Pretty
- print(f"Sessions dir: {sdir}")
+ print(f"Harness: {args.harness}")
+ print(f"Transcripts: {len(transcripts)}")
+ if args.sessions_dir:
+ print(f"Sessions dir: {args.sessions_dir}")
+ if args.golden:
+ print(f"Golden annotations: {args.golden}")
print(f"Sessions replayed: {len(reports)}")
print("")
- print(f"{'session':<14} {'calls':>6} {'raw_tok':>10} {'digest_tok':>11} {'saved':>10} {'save%':>7}")
+ print(f"{'session':<14} {'harness':<11} {'calls':>6} {'raw_tok':>10} {'digest_tok':>11} {'saved':>10} {'save%':>7} {'stale':>5} {'parity':>7}")
for r in reports:
+ parity = "pass" if r.task_parity is True else ("fail" if r.task_parity is False else "-")
print(
- f"{r.session_id[:14]:<14} {r.total_calls:>6} {r.raw_tokens:>10,} "
- f"{r.digest_tokens:>11,} {r.net_saved:>10,} {r.saved_pct:>6.1f}%"
+ f"{r.session_id[:14]:<14} {r.harness:<11} {r.total_calls:>6} {r.raw_tokens:>10,} "
+ f"{r.digest_tokens:>11,} {r.net_saved:>10,} {r.saved_pct:>6.1f}% "
+ f"{r.stale_context_incidents:>5} {parity:>7}"
)
print("")
print("Aggregate (net = raw - digest):")
- print(f" calls: {agg.total_calls}")
- print(f" by tool: {dict(agg.calls_by_tool)}")
- print(f" raw tokens: {agg.raw_tokens:,}")
- print(f" digest: {agg.digest_tokens:,}")
- print(f" net saved: {agg.net_saved:,} ({agg.saved_pct:.1f}%)")
+ print(f" harnesses: {aggregate['sessions_by_harness']}")
+ print(f" calls: {aggregate['total_calls']}")
+ print(f" by tool: {aggregate['calls_by_tool']}")
+ print(f" raw tokens: {aggregate['raw_tokens']:,}")
+ print(f" digest: {aggregate['digest_tokens']:,}")
+ print(f" net saved: {aggregate['net_saved_tokens']:,} ({aggregate['saved_pct']:.1f}%)")
+ print(f" stale ctx: {aggregate['stale_context_incidents']}")
+ print(f" parity: {aggregate['task_parity']}")
return 0
diff --git a/dhee/cli.py b/dhee/cli.py
index 2ebfd8f..1b65661 100644
--- a/dhee/cli.py
+++ b/dhee/cli.py
@@ -13,6 +13,7 @@
dhee why Explain why a memory or artifact exists
dhee handoff Emit structured resume JSON for a new harness/agent
dhee harness status Show native Claude Code / Codex integration state
+ dhee demo token-router Show how Dhee keeps raw tool output behind pointers
dhee benchmark Run performance benchmarks
dhee status Version, config, DB info
"""
@@ -86,8 +87,6 @@ def cmd_setup(args: argparse.Namespace) -> None:
def cmd_shell(args: argparse.Namespace) -> None:
"""Run a DheeFS virtual shell command."""
- from dhee.fs import ContextWorkspace
-
shell_parts = list(getattr(args, "shell_command", []) or [])
# ``argparse.REMAINDER`` intentionally lets users pass commands like
# ``grep "two words" /state`` intact, but it also captures options
@@ -115,24 +114,42 @@ def cmd_shell(args: argparse.Namespace) -> None:
repo = getattr(args, "repo", None)
if repo is None:
repo = os.getcwd()
- workspace = ContextWorkspace(
+ workspace_id = getattr(args, "workspace_id", None) or os.path.abspath(os.path.expanduser(repo))
+ user_id = getattr(args, "user_id", "default")
+ agent_id = getattr(args, "agent_id", None) or "cli"
+ from dhee import runtime
+
+ result_dict = runtime.execute_shell(
+ command,
repo=repo,
- user_id=getattr(args, "user_id", "default"),
- agent_id=getattr(args, "agent_id", None) or "cli",
- db=_get_db(),
- workspace_id=getattr(args, "workspace_id", None) or os.path.abspath(os.path.expanduser(repo)),
+ user_id=user_id,
+ agent_id=agent_id,
+ workspace_id=workspace_id,
)
- result = workspace.execute(command)
+ if result_dict is None:
+ from dhee.fs import ContextWorkspace
+
+ workspace = ContextWorkspace(
+ repo=repo,
+ user_id=user_id,
+ agent_id=agent_id,
+ db=_get_db(),
+ workspace_id=workspace_id,
+ )
+ result_dict = workspace.execute(command).to_dict()
if getattr(args, "json", False):
- _json_out(result.to_dict())
+ _json_out(result_dict)
else:
- stream = sys.stderr if result.exit_code else sys.stdout
- if result.stdout:
- print(result.stdout, file=stream)
- elif result.stderr:
- print(result.stderr, file=sys.stderr)
- if result.exit_code:
- sys.exit(result.exit_code)
+ exit_code = int(result_dict.get("exit_code", 0) or 0)
+ stdout = str(result_dict.get("stdout") or "")
+ stderr = str(result_dict.get("stderr") or "")
+ stream = sys.stderr if exit_code else sys.stdout
+ if stdout:
+ print(stdout, file=stream)
+ elif stderr:
+ print(stderr, file=sys.stderr)
+ if int(result_dict.get("exit_code", 0) or 0):
+ sys.exit(int(result_dict.get("exit_code", 1) or 1))
# ---------------------------------------------------------------------------
@@ -387,17 +404,33 @@ def cmd_context(args: argparse.Namespace) -> None:
compiled_actions = {"status", "state", "checkpoint", "rollover", "provision", "debt"}
action = args.context_action or "list"
if action in compiled_actions:
+ from dhee import runtime
from dhee.context_state import ContextStateStore
repo = args.repo or os.getcwd()
- store = ContextStateStore(
- repo=repo,
- workspace_id=os.path.abspath(os.path.expanduser(repo)),
- user_id=getattr(args, "user_id", "default"),
- agent_id="cli",
- )
+ workspace_id = os.path.abspath(os.path.expanduser(repo))
+ user_id = getattr(args, "user_id", "default")
+
+ def _runtime_context(extra: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
+ return runtime.execute_context(
+ action,
+ repo=repo,
+ workspace_id=workspace_id,
+ user_id=user_id,
+ agent_id="cli",
+ args=extra or {},
+ )
+
+ def _store() -> ContextStateStore:
+ return ContextStateStore(
+ repo=repo,
+ workspace_id=workspace_id,
+ user_id=user_id,
+ agent_id="cli",
+ )
+
if action == "status":
- data = store.status()
+ data = _runtime_context() or _store().status()
if args.json:
_json_out(data)
return
@@ -409,11 +442,17 @@ def cmd_context(args: argparse.Namespace) -> None:
print(f" rollover required {'yes' if data['rollover_required'] else 'no'}")
return
if action == "state":
- text = store.render_state_card() if getattr(args, "card", False) else store.render_markdown()
- print(text)
+ fmt = "card" if getattr(args, "card", False) else "markdown"
+ data = _runtime_context({"format": fmt})
+ if data is not None:
+ print(data.get("text") or "")
+ return
+ store = _store()
+ print(store.render_state_card() if getattr(args, "card", False) else store.render_markdown())
return
if action == "debt":
- data = store.debt_summary(top=bool(getattr(args, "top", False)))
+ show_top = bool(getattr(args, "top", False))
+ data = _runtime_context({"top": show_top}) or _store().debt_summary(top=show_top)
if args.json:
_json_out(data)
return
@@ -426,7 +465,8 @@ def cmd_context(args: argparse.Namespace) -> None:
print(f" {_compact_int(row.get('tokens') or 0)} {row.get('kind') or ''} {row.get('source') or ''} - {row.get('reason') or ''}")
return
if action == "checkpoint":
- data = store.checkpoint(reason=getattr(args, "reason", None) or "manual")
+ reason = getattr(args, "reason", None) or "manual"
+ data = _runtime_context({"reason": reason}) or _store().checkpoint(reason=reason)
if args.json:
_json_out(data)
return
@@ -434,7 +474,8 @@ def cmd_context(args: argparse.Namespace) -> None:
print(data["state_card"])
return
if action == "rollover":
- data = store.rollover(reason=getattr(args, "reason", None) or "manual rollover")
+ reason = getattr(args, "reason", None) or "manual rollover"
+ data = _runtime_context({"reason": reason}) or _store().rollover(reason=reason)
if args.json:
_json_out(data)
return
@@ -444,7 +485,7 @@ def cmd_context(args: argparse.Namespace) -> None:
return
if action == "provision":
task = args.entry_id or ""
- data = store.provision(task)
+ data = _runtime_context({"task": task}) or _store().provision(task)
if args.json:
_json_out(data)
return
@@ -745,6 +786,7 @@ def cmd_export(args: argparse.Namespace) -> None:
output_path=output,
user_id=getattr(args, "user_id", "default"),
key_dir=CONFIG_DIR,
+ repo=getattr(args, "repo", None) or os.getcwd(),
)
finally:
vector_store.close()
@@ -755,7 +797,8 @@ def cmd_export(args: argparse.Namespace) -> None:
print(
f"Exported {counts.get('memories', 0)} memories, "
f"{counts.get('vectors', 0)} vector nodes, "
- f"and {counts.get('artifacts_manifest', 0)} artifacts to {output}"
+ f"{counts.get('artifacts_manifest', 0)} artifacts, "
+ f"and {counts.get('repo_context_entries', 0)} repo-context entries to {output}"
)
return
@@ -790,17 +833,20 @@ def cmd_import(args: argparse.Namespace) -> None:
input_path=args.file,
user_id=args.user_id,
strategy=args.strategy,
+ repo=getattr(args, "repo", None) or os.getcwd(),
)
finally:
vector_store.close()
if args.json:
_json_out(result)
else:
+ bootstrap = result.get("handoff_bootstrap") or {}
if args.strategy == "dry-run":
print(
f"Pack preview: {result.get('memories', 0)} memories, "
f"{result.get('vectors', 0)} vectors, "
f"{result.get('artifacts', 0)} artifacts "
+ f"and {(result.get('repo_context') or {}).get('records', 0)} repo-context entries "
f"({result.get('existing_ids', 0)} existing IDs, "
f"{result.get('existing_hashes', 0)} existing hashes)."
)
@@ -810,7 +856,14 @@ def cmd_import(args: argparse.Namespace) -> None:
print(
f"Imported {mem_stats.get('imported', 0)} memories, "
f"{result.get('vectors_imported', 0)} vector nodes, "
- f"and {art_stats.get('artifacts', 0)} artifacts."
+ f"{art_stats.get('artifacts', 0)} artifacts, "
+ f"and {(result.get('repo_context_import') or {}).get('imported', 0)} repo-context entries."
+ )
+ if bootstrap:
+ last = bootstrap.get("last_session_id") or "none"
+ print(
+ f"Handoff bootstrap: {bootstrap.get('continuity_source') or 'unknown'} "
+ f"(last session: {last}, artifacts: {bootstrap.get('recent_artifacts', 0)})."
)
return
@@ -1173,6 +1226,20 @@ def cmd_checkpoint(args: argparse.Namespace) -> None:
print(f" Intention stored: {result['intention_stored'][:60]}")
+def cmd_demo(args: argparse.Namespace) -> None:
+ """Run built-in demos that explain Dhee's context-governance wedge."""
+ action = getattr(args, "demo_action", None) or "token-router"
+ if action != "token-router":
+ raise ValueError(f"Unknown demo action: {action}")
+ from dhee.demo import format_token_router_demo, token_router_demo
+
+ report = token_router_demo()
+ if getattr(args, "json", False):
+ _json_out(report)
+ return
+ print(format_token_router_demo(report, show_digests=not getattr(args, "no_digests", False)))
+
+
def cmd_status(args: argparse.Namespace) -> None:
"""Show version, config, DB size, detected agents, and brain health.
@@ -1319,18 +1386,122 @@ def cmd_doctor(args: argparse.Namespace) -> None:
sys.stdout.write(run(as_json=bool(getattr(args, "json", False))))
+def cmd_runtime(args: argparse.Namespace) -> None:
+ """Inspect or manage the local Dhee runtime daemon."""
+ from dhee import runtime
+
+ action = getattr(args, "runtime_action", None) or "status"
+ if action == "status":
+ result = runtime.status()
+ elif action == "restart":
+ result = runtime.restart_daemon()
+ elif action == "stop":
+ result = runtime.stop_daemon()
+ elif action == "doctor":
+ result = runtime.status(timeout=0.75)
+ result["doctor"] = {
+ "daemon_health": "ok" if result["daemon"].get("running") else "stopped",
+ "hot_path_note": (
+ "The local daemon is the stable process boundary for future MCP/CLI/router "
+ "hot-path reuse. Current clients still work without it."
+ ),
+ }
+ else:
+ raise ValueError(f"Unknown runtime action: {action}")
+
+ if getattr(args, "json", False):
+ _json_out(result)
+ return
+ if action in {"restart", "stop"}:
+ status = result.get("status") or result.get("started", {}).get("status") or {}
+ print(runtime.format_status(status))
+ return
+ print(runtime.format_status(result))
+
+
def cmd_uninstall(args: argparse.Namespace) -> None:
- """Remove ~/.dhee data directory."""
+ """Cleanly stop Dhee and remove managed install artifacts."""
from dhee.cli_config import CONFIG_DIR
+ from dhee.install_cleanup import cleanup_install_artifacts
+ from dhee import runtime
+
+ runtime_result = runtime.stop_daemon()
if not os.path.exists(CONFIG_DIR):
- print("Nothing to remove.")
+ harness_result = _disable_harnesses_for_uninstall()
+ artifact_result = cleanup_install_artifacts(CONFIG_DIR)
+ if os.path.exists(CONFIG_DIR):
+ shutil.rmtree(CONFIG_DIR)
+ result = {
+ "removed": False,
+ "path": CONFIG_DIR,
+ "reason": "missing",
+ "runtime": runtime_result,
+ "harnesses": harness_result,
+ "install_artifacts": artifact_result,
+ }
+ if getattr(args, "json", False):
+ _json_out(result)
+ else:
+ cleaned = bool(
+ artifact_result["symlinks"]["removed"]
+ or artifact_result["shell_profiles"]["changed"]
+ or any((item or {}).get("changed") for item in harness_result.values())
+ )
+ print("Removed leftover installer artifacts." if cleaned else "Nothing to remove.")
return
- confirm = input(f"Remove {CONFIG_DIR}? [y/N]: ").strip().lower()
+
+ confirm = "yes" if getattr(args, "yes", False) else input(f"Remove {CONFIG_DIR}? [y/N]: ").strip().lower()
if confirm in ("y", "yes"):
- shutil.rmtree(CONFIG_DIR)
- print(f"Removed {CONFIG_DIR}")
+ harness_result = _disable_harnesses_for_uninstall()
+ artifact_result = cleanup_install_artifacts(CONFIG_DIR)
+ if os.path.exists(CONFIG_DIR):
+ shutil.rmtree(CONFIG_DIR)
+ result = {
+ "removed": True,
+ "path": CONFIG_DIR,
+ "runtime": runtime_result,
+ "harnesses": harness_result,
+ "install_artifacts": artifact_result,
+ }
+ if getattr(args, "json", False):
+ _json_out(result)
+ else:
+ print(f"Stopped runtime: {runtime_result.get('stopped', False)}")
+ print(f"Disabled harnesses: {', '.join(sorted(harness_result))}")
+ removed_links = artifact_result["symlinks"]["removed"]
+ changed_profiles = artifact_result["shell_profiles"]["changed"]
+ if removed_links:
+ print(f"Removed managed symlinks: {len(removed_links)}")
+ if changed_profiles:
+ print(f"Cleaned shell profiles: {len(changed_profiles)}")
+ print(f"Removed {CONFIG_DIR}")
else:
- print("Cancelled.")
+ if getattr(args, "json", False):
+ _json_out({"removed": False, "reason": "cancelled", "runtime": runtime_result})
+ else:
+ print("Cancelled.")
+
+
+def _disable_harnesses_for_uninstall() -> Dict[str, Any]:
+ """Best-effort removal of global/native harness wiring."""
+ from dataclasses import asdict
+
+ from dhee.harness.install import disable_harnesses
+
+ results: Dict[str, Any] = {}
+ for harness in ("claude_code", "codex", "hermes", "gstack", "cursor"):
+ try:
+ disabled = disable_harnesses(harness=harness)
+ for name, result in disabled.items():
+ results[name] = asdict(result)
+ except Exception as exc:
+ results[harness] = {
+ "harness": harness,
+ "action": "error",
+ "changed": False,
+ "details": {"error": str(exc)},
+ }
+ return results
def cmd_task(args: argparse.Namespace) -> None:
@@ -2065,6 +2236,80 @@ def cmd_router(args: argparse.Namespace) -> None:
print(router_stats.format_human(computed))
return
+ if action == "harvest":
+ from pathlib import Path as _Path
+
+ from dhee.benchmarks.replay_corpus import format_harvest_human, harvest_corpus
+
+ output_dir = getattr(args, "output_dir", None)
+ if not output_dir:
+ output_dir = str(_Path.home() / ".dhee" / "replay_corpus" / "redacted")
+ result = harvest_corpus(
+ sessions_dir=_Path(getattr(args, "sessions_dir", "")) if getattr(args, "sessions_dir", None) else None,
+ output_dir=_Path(output_dir),
+ harness=getattr(args, "harness", "all") or "all",
+ limit=getattr(args, "limit", 0) or 0,
+ min_calls=getattr(args, "min_calls", 1) or 1,
+ max_output_chars=getattr(args, "max_output_chars", 50_000) or 50_000,
+ golden_output=_Path(getattr(args, "golden_output", "")) if getattr(args, "golden_output", None) else None,
+ manifest_output=_Path(getattr(args, "manifest_output", "")) if getattr(args, "manifest_output", None) else None,
+ )
+ if args.json:
+ _json_out(result)
+ return
+ print(format_harvest_human(result))
+ return
+
+ if action == "corpus":
+ from pathlib import Path as _Path
+
+ from dhee.benchmarks.replay_corpus import format_inspect_human, inspect_corpus
+
+ sessions_dir = getattr(args, "sessions_dir", None)
+ if not sessions_dir:
+ sessions_dir = str(_Path.home() / ".dhee" / "replay_corpus" / "redacted" / "sessions")
+ result = inspect_corpus(
+ sessions_dir=_Path(sessions_dir),
+ harness=getattr(args, "harness", "all") or "all",
+ golden_path=_Path(getattr(args, "golden", "")) if getattr(args, "golden", None) else None,
+ limit=getattr(args, "limit", 0) or 0,
+ )
+ if args.json:
+ _json_out(result)
+ return
+ print(format_inspect_human(result))
+ return
+
+ if action == "annotate":
+ from pathlib import Path as _Path
+
+ from dhee.benchmarks.replay_corpus import upsert_golden_annotation
+
+ if not getattr(args, "golden", None):
+ raise ValueError("dhee router annotate requires --golden")
+ if not getattr(args, "session_id", None):
+ raise ValueError("dhee router annotate requires --session-id")
+ if not getattr(args, "task_parity", None):
+ raise ValueError("dhee router annotate requires --task-parity")
+ result = upsert_golden_annotation(
+ golden_path=_Path(getattr(args, "golden")),
+ session_id=str(getattr(args, "session_id")),
+ task_parity=str(getattr(args, "task_parity")),
+ task_parity_score=getattr(args, "task_parity_score", None),
+ stale_context_incidents=getattr(args, "stale_context_incidents", None),
+ note=getattr(args, "note", None),
+ )
+ if args.json:
+ _json_out(result)
+ return
+ annotation = result["annotation"]
+ print(
+ f" {result['action']} golden annotation for {result['session_id']} "
+ f"({annotation['task_parity']}, stale={annotation['stale_context_incidents']})"
+ )
+ print(f" golden → {result['golden_path']}")
+ return
+
if action == "tune":
from dhee.router import policy as _policy
from dhee.router import tune as _tune
@@ -2094,19 +2339,40 @@ def cmd_router(args: argparse.Namespace) -> None:
print(_tune.format_human(report))
return
- if action == "report":
+ if action in {"report", "gate"}:
from dhee.router import quality_report
+ from pathlib import Path as _Path
report = quality_report.build_report(
limit=getattr(args, "limit", 0) or 0,
+ sessions_dir=_Path(getattr(args, "sessions_dir", "")) if getattr(args, "sessions_dir", None) else None,
+ harness=getattr(args, "harness", "claude_code") or "claude_code",
+ golden_path=_Path(getattr(args, "golden", "")) if getattr(args, "golden", None) else None,
)
out_path = quality_report.save_report(report)
+ if action == "gate":
+ gate = quality_report.gate_summary(
+ report,
+ allow_insufficient=bool(getattr(args, "allow_insufficient", False)),
+ )
+ if args.json:
+ _json_out({"gate": gate, "quality_gates": report.quality_gates, "report_path": str(out_path)})
+ else:
+ print(quality_report.format_human(report))
+ print("")
+ print(f" replay gate: {'PASS' if gate['ok'] else 'FAIL'}")
+ if gate["failed_gates"]:
+ print(f" failed gates: {', '.join(gate['failed_gates'])}")
+ if gate["pending_gates"]:
+ print(f" pending gates: {', '.join(gate['pending_gates'])}")
+ print(f" report saved → {out_path}")
+ if not gate["ok"]:
+ sys.exit(1)
+ return
if args.json:
_json_out(report.to_dict())
return
if getattr(args, "share", False):
- from pathlib import Path as _Path
-
share_md = quality_report.format_share(report)
share_path = _Path.home() / ".dhee" / "session_quality_report.md"
share_path.parent.mkdir(parents=True, exist_ok=True)
@@ -2121,7 +2387,7 @@ def cmd_router(args: argparse.Namespace) -> None:
return
# default: print subcommand help
- print("Usage: dhee router {enable|disable|status|stats|enforce|report}")
+ print("Usage: dhee router {enable|disable|status|stats|enforce|report|gate|harvest|corpus|annotate}")
def cmd_benchmark(args: argparse.Namespace) -> None:
@@ -2253,6 +2519,18 @@ def build_parser() -> argparse.ArgumentParser:
p_cp.add_argument("--user-id", default="default", help="User ID")
p_cp.add_argument("--json", action="store_true", help="JSON output")
+ # demo
+ p_demo = sub.add_parser("demo", help="Run product demos that prove Dhee's context firewall")
+ p_demo.add_argument(
+ "demo_action",
+ nargs="?",
+ choices=["token-router"],
+ default="token-router",
+ help="Demo to run",
+ )
+ p_demo.add_argument("--no-digests", action="store_true", help="Hide digest previews")
+ p_demo.add_argument("--json", action="store_true", help="JSON output")
+
# list
p_list = sub.add_parser("list", help="List all memories")
p_list.add_argument("--user-id", default="default", help="User ID")
@@ -2278,6 +2556,7 @@ def build_parser() -> argparse.ArgumentParser:
p_export = sub.add_parser("export", help="Export memories to JSON or .dheemem")
p_export.add_argument("--output", "-o", help="Output file path")
p_export.add_argument("--user-id", default="default", help="User ID")
+ p_export.add_argument("--repo", help="Repo whose .dhee/context should be included (default: cwd)")
p_export.add_argument(
"--format",
choices=["json", "dheemem"],
@@ -2290,6 +2569,7 @@ def build_parser() -> argparse.ArgumentParser:
p_import = sub.add_parser("import", help="Import memories from JSON or .dheemem")
p_import.add_argument("file", help="JSON or .dheemem file to import")
p_import.add_argument("--user-id", default="default", help="User ID")
+ p_import.add_argument("--repo", help="Repo where signed repo context should be restored (default: cwd)")
p_import.add_argument(
"--format",
choices=["json", "dheemem"],
@@ -2487,6 +2767,20 @@ def build_parser() -> argparse.ArgumentParser:
)
p_doctor.add_argument("--json", action="store_true", help="JSON output")
+ # runtime — managed venv + local daemon clarity
+ p_runtime = sub.add_parser(
+ "runtime",
+ help="Inspect or manage the local Dhee runtime daemon",
+ )
+ p_runtime.add_argument(
+ "runtime_action",
+ nargs="?",
+ choices=["status", "restart", "stop", "doctor"],
+ default="status",
+ help="Subcommand",
+ )
+ p_runtime.add_argument("--json", action="store_true", help="JSON output")
+
# task
p_task = sub.add_parser("task", help="Start Claude Code with Dhee cognition")
p_task.add_argument("description", nargs="?", default="", help="Task description")
@@ -2715,7 +3009,7 @@ def build_parser() -> argparse.ArgumentParser:
p_router.add_argument(
"router_action",
nargs="?",
- choices=["enable", "disable", "status", "stats", "enforce", "report", "tune"],
+ choices=["enable", "disable", "status", "stats", "enforce", "report", "gate", "tune", "harvest", "corpus", "annotate"],
help="Subcommand",
)
p_router.add_argument(
@@ -2724,7 +3018,21 @@ def build_parser() -> argparse.ArgumentParser:
choices=["on", "off", "apply", "clear"],
help="For `router enforce`: on|off | For `router tune`: apply|clear (omit to dry-run)",
)
- p_router.add_argument("--limit", type=int, default=0, help="For `router report`: replay only N most-recent sessions (0 = all)")
+ p_router.add_argument("--limit", type=int, default=0, help="For `router report|harvest|corpus`: replay only N most-recent sessions (0 = all)")
+ p_router.add_argument("--sessions-dir", help="For `router report|harvest|corpus`: transcript directory or JSONL file")
+ p_router.add_argument("--harness", choices=["claude_code", "codex", "all", "auto"], default=None, help="For `router report|harvest|corpus`: transcript harness")
+ p_router.add_argument("--golden", help="For `router report|corpus|annotate`: JSONL file or directory with golden replay annotations")
+ p_router.add_argument("--output-dir", help="For `router harvest`: redacted corpus destination directory")
+ p_router.add_argument("--golden-output", help="For `router harvest`: write pending-review golden annotations here")
+ p_router.add_argument("--manifest-output", help="For `router harvest`: write corpus manifest here")
+ p_router.add_argument("--min-calls", type=int, default=1, help="For `router harvest`: require at least N replayable calls per session")
+ p_router.add_argument("--max-output-chars", type=int, default=50_000, help="For `router harvest`: max redacted output chars per tool result")
+ p_router.add_argument("--session-id", help="For `router annotate`: session id to annotate")
+ p_router.add_argument("--task-parity", choices=["pass", "fail", "needs_review"], help="For `router annotate`: reviewed task parity")
+ p_router.add_argument("--task-parity-score", type=float, help="For `router annotate`: parity score, normally 0.0-1.0")
+ p_router.add_argument("--stale-context-incidents", type=int, help="For `router annotate`: count of stale-context incidents")
+ p_router.add_argument("--note", help="For `router annotate`: short review note")
+ p_router.add_argument("--allow-insufficient", action="store_true", help="For `router gate`: pass when only pending gates remain")
p_router.add_argument("--share", action="store_true", help="For `router report`: emit customer-shareable redacted Markdown")
p_router.add_argument("--json", action="store_true", help="JSON output")
@@ -2734,12 +3042,17 @@ def build_parser() -> argparse.ArgumentParser:
help="Extended session-quality report: cache-read/turn, expansion rate, tool_result share, projected savings",
)
p_qr.add_argument("--limit", type=int, default=0, help="Replay only N most-recent sessions (0 = all)")
+ p_qr.add_argument("--sessions-dir", help="Transcript directory or JSONL file")
+ p_qr.add_argument("--harness", choices=["claude_code", "codex", "all", "auto"], default="claude_code", help="Transcript harness")
+ p_qr.add_argument("--golden", help="JSONL file or directory with golden replay annotations")
p_qr.add_argument("--share", action="store_true", help="Emit customer-shareable redacted Markdown")
p_qr.add_argument("--json", action="store_true", help="JSON output")
# uninstall
- sub.add_parser("uninstall", help="Remove ~/.dhee directory")
+ p_uninstall = sub.add_parser("uninstall", help="Stop Dhee and remove managed install artifacts")
+ p_uninstall.add_argument("--yes", "-y", action="store_true", help="Do not prompt")
+ p_uninstall.add_argument("--json", action="store_true", help="JSON output")
# onboard — interactive provider + key wizard (called by install.sh)
try:
@@ -2768,6 +3081,7 @@ def build_parser() -> argparse.ArgumentParser:
"recall": cmd_search, # alias
"search": cmd_search,
"checkpoint": cmd_checkpoint,
+ "demo": cmd_demo,
"list": cmd_list,
"stats": cmd_stats,
"decay": cmd_decay,
@@ -2788,6 +3102,7 @@ def build_parser() -> argparse.ArgumentParser:
"shared-task": cmd_shared_task,
"status": cmd_status,
"doctor": cmd_doctor,
+ "runtime": cmd_runtime,
"task": cmd_task,
"ingest": cmd_ingest,
"docs": cmd_docs,
@@ -2808,6 +3123,9 @@ def build_parser() -> argparse.ArgumentParser:
router_action="report",
enforce_action=None,
limit=getattr(args, "limit", 0),
+ sessions_dir=getattr(args, "sessions_dir", None),
+ harness=getattr(args, "harness", "claude_code"),
+ golden=getattr(args, "golden", None),
share=getattr(args, "share", False),
json=getattr(args, "json", False),
)
diff --git a/dhee/context_kernel.py b/dhee/context_kernel.py
new file mode 100644
index 0000000..ae263fd
--- /dev/null
+++ b/dhee/context_kernel.py
@@ -0,0 +1,78 @@
+"""Typed facade over Dhee's local context substrate.
+
+This is intentionally small. The goal is to give CLI, MCP, daemon, and future
+SDK code one stable boundary for context inspection without turning Dhee into a
+generic context database.
+"""
+
+from __future__ import annotations
+
+import os
+from dataclasses import dataclass
+from typing import Any, Dict, List, Optional
+
+from dhee.fs import ContextWorkspace, DheeFSEntry
+
+
+@dataclass(frozen=True)
+class KernelScope:
+ repo: Optional[str] = None
+ user_id: str = "default"
+ agent_id: str = "kernel"
+ workspace_id: Optional[str] = None
+
+ @property
+ def resolved_repo(self) -> Optional[str]:
+ if self.repo:
+ return os.path.abspath(os.path.expanduser(self.repo))
+ return None
+
+ @property
+ def resolved_workspace_id(self) -> str:
+ return self.workspace_id or self.resolved_repo or os.getcwd()
+
+
+class DheeContextKernel:
+ """Decision-complete context boundary for local developer-agent workflows."""
+
+ def __init__(self, scope: Optional[KernelScope] = None, *, db: Any = None):
+ self.scope = scope or KernelScope()
+ self.db = db
+
+ def workspace(self) -> ContextWorkspace:
+ return ContextWorkspace(
+ repo=self.scope.resolved_repo,
+ user_id=self.scope.user_id,
+ agent_id=self.scope.agent_id,
+ workspace_id=self.scope.resolved_workspace_id,
+ db=self.db,
+ )
+
+ def normalize(self, uri_or_path: str) -> str:
+ return self.workspace().normalize_path(uri_or_path)
+
+ def list(self, uri_or_path: str = "/") -> List[DheeFSEntry]:
+ return self.workspace().list(uri_or_path)
+
+ def read(self, uri_or_path: str) -> str:
+ return self.workspace().read(uri_or_path)
+
+ def search(self, uri_or_path: str, query: str) -> List[Dict[str, Any]]:
+ return self.workspace().search(uri_or_path, query)
+
+ def snapshot(self) -> Dict[str, Any]:
+ ws = self.workspace()
+ return {
+ "scope": {
+ "repo": self.scope.resolved_repo,
+ "user_id": self.scope.user_id,
+ "agent_id": self.scope.agent_id,
+ "workspace_id": self.scope.resolved_workspace_id,
+ },
+ "state": ws.context_state_store().status(),
+ "handoff": ws.handoff_snapshot(),
+ "shared": ws.shared_snapshot(),
+ }
+
+
+__all__ = ["DheeContextKernel", "KernelScope"]
diff --git a/dhee/context_state.py b/dhee/context_state.py
index de65e48..2061478 100644
--- a/dhee/context_state.py
+++ b/dhee/context_state.py
@@ -1944,7 +1944,10 @@ def task_aware_read_schema(path: str, *, query: str = "", task_intent: str = "")
if intent == "debug_failure":
return {
"intent": intent,
- "preferred_depth": "deep" if ext in {".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs"} else "normal",
+ "preferred_depth": "deep" if ext in {
+ ".py", ".ts", ".tsx", ".js", ".jsx", ".go", ".rs", ".java",
+ ".kt", ".kts", ".sh", ".bash", ".zsh", ".sql", ".log",
+ } else "normal",
"focus": ["failing assertion", "stack frame", "related symbol", "next verification command"],
"note": "task_schema=debug_failure: preserve failure landmarks and likely next verification command",
}
diff --git a/dhee/demo.py b/dhee/demo.py
new file mode 100644
index 0000000..bfb5980
--- /dev/null
+++ b/dhee/demo.py
@@ -0,0 +1,246 @@
+"""Built-in demos that make Dhee's context-governance value concrete."""
+
+from __future__ import annotations
+
+from dataclasses import asdict, dataclass
+from typing import Any
+
+from dhee.router import bash_digest, digest as read_digest
+
+CHARS_PER_TOKEN = 3.5
+
+
+def _tokens(text: str) -> int:
+ return int(len(text) / CHARS_PER_TOKEN) if text else 0
+
+
+@dataclass
+class TokenRouterCase:
+ name: str
+ surface: str
+ decision: str
+ raw_tokens: int
+ digest_tokens: int
+ saved_tokens: int
+ saved_pct: float
+ ptr: str
+ digest: str
+ expand: str
+
+
+def _pytest_output() -> str:
+ header = [
+ "============================= test session starts =============================",
+ "platform darwin -- Python 3.12.0, pytest-8.3.4",
+ "collected 842 items",
+ "",
+ "tests/test_checkout.py::test_retries_on_409 FAILED",
+ "tests/test_checkout.py::test_preserves_idempotency_key PASSED",
+ "",
+ "=================================== FAILURES ===================================",
+ "FAILED tests/test_checkout.py::test_retries_on_409 - AssertionError: retry budget exhausted",
+ "E AssertionError: expected status=complete",
+ "E assert 'pending' == 'complete'",
+ "",
+ ]
+ noisy_block = [
+ "Captured stdout call",
+ "checkout worker poll attempt=1 status=pending",
+ "checkout worker poll attempt=2 status=pending",
+ "checkout worker poll attempt=3 status=pending",
+ "debug payload: {'cart_id': 'demo', 'retry_after_ms': 250, 'trace': 'redacted'}",
+ ]
+ tail = [
+ "",
+ "=========================== short test summary info ===========================",
+ "FAILED tests/test_checkout.py::test_retries_on_409 - AssertionError: retry budget exhausted",
+ "1 failed, 841 passed, 12 skipped in 88.43s",
+ ]
+ return "\n".join(header + noisy_block * 180 + tail) + "\n"
+
+
+def _git_diff_output() -> str:
+ chunks: list[str] = []
+ for idx in range(1, 26):
+ chunks.extend(
+ [
+ f"diff --git a/src/service_{idx}.py b/src/service_{idx}.py",
+ f"--- a/src/service_{idx}.py",
+ f"+++ b/src/service_{idx}.py",
+ "@@ -10,7 +10,12 @@ def handle(request):",
+ "- return process(request)",
+ "+ result = process(request)",
+ "+ if result.needs_retry:",
+ "+ audit_retry(request.id)",
+ "+ return retry(result)",
+ "+ return result",
+ "",
+ ]
+ )
+ return "\n".join(chunks)
+
+
+def _source_file() -> str:
+ head = [
+ "from __future__ import annotations",
+ "",
+ "from dataclasses import dataclass",
+ "from typing import Any",
+ "",
+ "",
+ "@dataclass",
+ "class ContextDecision:",
+ " source: str",
+ " reason: str",
+ " evidence_ptr: str",
+ " tokens_saved: int",
+ "",
+ "",
+ "class ContextGovernor:",
+ " def __init__(self, policy: dict[str, Any]):",
+ " self.policy = policy",
+ "",
+ ]
+ funcs: list[str] = []
+ for idx in range(1, 90):
+ funcs.extend(
+ [
+ f" def route_signal_{idx}(self, observation: dict[str, Any]) -> ContextDecision:",
+ f" reason = observation.get('reason') or 'demo route {idx}'",
+ f" source = observation.get('source') or 'tool-output-{idx}'",
+ " if observation.get('is_stale'):",
+ " reason = 'stale signal tombstoned before injection'",
+ " return ContextDecision(source=source, reason=reason, evidence_ptr='R-demo', tokens_saved=1024)",
+ "",
+ ]
+ )
+ return "\n".join(head + funcs)
+
+
+def _case_from_bash(name: str, command: str, stdout: str, ptr: str, decision: str) -> TokenRouterCase:
+ digest_obj = bash_digest.digest_bash(
+ cmd=command,
+ exit_code=1 if "pytest" in command else 0,
+ duration_ms=4312,
+ stdout=stdout,
+ stderr="",
+ )
+ rendered = digest_obj.render(ptr)
+ raw_tokens = _tokens(stdout)
+ digest_tokens = _tokens(rendered)
+ saved = max(0, raw_tokens - digest_tokens)
+ return TokenRouterCase(
+ name=name,
+ surface="dhee_bash",
+ decision=decision,
+ raw_tokens=raw_tokens,
+ digest_tokens=digest_tokens,
+ saved_tokens=saved,
+ saved_pct=round(saved / raw_tokens * 100, 2) if raw_tokens else 0.0,
+ ptr=ptr,
+ digest=rendered,
+ expand=f'dhee_expand_result(ptr="{ptr}")',
+ )
+
+
+def _case_from_read() -> TokenRouterCase:
+ text = _source_file()
+ digest_obj = read_digest.digest_read(
+ "src/context_governor.py",
+ text,
+ depth="normal",
+ query="what decides whether context enters the model",
+ task_intent="explain context governance",
+ )
+ ptr = "R-demo-source"
+ rendered = digest_obj.render(ptr)
+ raw_tokens = _tokens(text)
+ digest_tokens = _tokens(rendered)
+ saved = max(0, raw_tokens - digest_tokens)
+ return TokenRouterCase(
+ name="source file read",
+ surface="dhee_read",
+ decision="Expose structure, symbols, and task-relevant focus; keep exact source behind a pointer.",
+ raw_tokens=raw_tokens,
+ digest_tokens=digest_tokens,
+ saved_tokens=saved,
+ saved_pct=round(saved / raw_tokens * 100, 2) if raw_tokens else 0.0,
+ ptr=ptr,
+ digest=rendered,
+ expand=f'dhee_expand_result(ptr="{ptr}")',
+ )
+
+
+def token_router_demo() -> dict[str, Any]:
+ """Return a deterministic token-router demo report."""
+ cases = [
+ _case_from_bash(
+ "pytest failure log",
+ "pytest tests/test_checkout.py -q",
+ _pytest_output(),
+ "B-demo-pytest",
+ "Show pass/fail counts and first failure; hide repetitive debug logs.",
+ ),
+ _case_from_bash(
+ "large git diff",
+ "git diff src tests",
+ _git_diff_output(),
+ "B-demo-diff",
+ "Show changed-file and hunk totals; keep raw patch available only on demand.",
+ ),
+ _case_from_read(),
+ ]
+ raw = sum(case.raw_tokens for case in cases)
+ digest = sum(case.digest_tokens for case in cases)
+ saved = max(0, raw - digest)
+ return {
+ "format": "dhee_token_router_demo",
+ "version": 1,
+ "positioning": "Dhee is the context firewall for AI coding agents: the agent sees the right thing, not everything.",
+ "aggregate": {
+ "cases": len(cases),
+ "raw_tokens": raw,
+ "digest_tokens": digest,
+ "saved_tokens": saved,
+ "saved_pct": round(saved / raw * 100, 2) if raw else 0.0,
+ },
+ "cases": [asdict(case) for case in cases],
+ "next_step": "Run real reports with `dhee router report` or harvest replay fixtures with `dhee router harvest`.",
+ }
+
+
+def _preview(text: str, *, lines: int = 12) -> str:
+ parts = text.splitlines()
+ shown = parts[:lines]
+ if len(parts) > lines:
+ shown.append(f"... ({len(parts) - lines} more digest lines)")
+ return "\n".join(shown)
+
+
+def format_token_router_demo(report: dict[str, Any], *, show_digests: bool = True) -> str:
+ aggregate = report.get("aggregate") or {}
+ lines = [
+ "Dhee token-router demo",
+ " context firewall: agent sees the right thing, not everything",
+ f" raw tokens: {aggregate.get('raw_tokens', 0):,}",
+ f" digest tokens: {aggregate.get('digest_tokens', 0):,}",
+ f" saved: {aggregate.get('saved_tokens', 0):,} ({aggregate.get('saved_pct', 0.0):.1f}%)",
+ "",
+ ]
+ for case in report.get("cases", []):
+ lines.extend(
+ [
+ f"[{case['surface']}] {case['name']}",
+ f" decision: {case['decision']}",
+ f" raw -> digest: {case['raw_tokens']:,} -> {case['digest_tokens']:,} tokens",
+ f" saved: {case['saved_tokens']:,} ({case['saved_pct']:.1f}%)",
+ f" evidence: {case['expand']}",
+ ]
+ )
+ if show_digests:
+ lines.append(" what the agent sees:")
+ for digest_line in _preview(case["digest"]).splitlines():
+ lines.append(f" {digest_line}")
+ lines.append("")
+ lines.append(str(report.get("next_step") or ""))
+ return "\n".join(lines).rstrip()
diff --git a/dhee/doctor.py b/dhee/doctor.py
index 6164f9d..7e731e2 100644
--- a/dhee/doctor.py
+++ b/dhee/doctor.py
@@ -37,7 +37,7 @@
"M2": "Propositional substrate + supersede chains (tier, superseded_by, preferences, retrieval integration)",
"M3": "Years-of-memory (tier promotion, background consolidator, lineage UI, Epistemic Control Loop)",
"M4": "Honest self-evolution (real MetaBuddhi loop, Nididhyasana scheduler, group-relative confidence, step-level utility)",
- "M5": ".dheemem protocol v1 (portable core + optional extensions, export/import/migrate CLI, signed manifests)",
+ "M5": ".dheemem protocol v1 (portable core, export/import/migrate CLI, signed manifests)",
"M6": "Harness adapters (base + ClaudeCode + Codex, canonical event vocabulary)",
"M7": "Public proof (replay corpus, decades synthetic corpus, portability eval, README rewritten to measured numbers)",
}
@@ -48,9 +48,12 @@ class DoctorReport:
dhee_version: str = ""
generated_at: float = 0.0
core: dict[str, Any] = field(default_factory=dict)
+ runtime: dict[str, Any] = field(default_factory=dict)
router: dict[str, Any] = field(default_factory=dict)
+ context: dict[str, Any] = field(default_factory=dict)
cognition: dict[str, Any] = field(default_factory=dict)
memory: dict[str, Any] = field(default_factory=dict)
+ portability: dict[str, Any] = field(default_factory=dict)
capabilities: dict[str, Any] = field(default_factory=dict)
def to_dict(self) -> dict[str, Any]:
@@ -58,9 +61,12 @@ def to_dict(self) -> dict[str, Any]:
"dhee_version": self.dhee_version,
"generated_at": self.generated_at,
"core": self.core,
+ "runtime": self.runtime,
"router": self.router,
+ "context": self.context,
"cognition": self.cognition,
"memory": self.memory,
+ "portability": self.portability,
"capabilities": self.capabilities,
}
@@ -108,6 +114,65 @@ def _core_section() -> dict[str, Any]:
}
+def _runtime_section() -> dict[str, Any]:
+ try:
+ from dhee.runtime import status
+
+ return status()
+ except Exception as exc:
+ return {"error": f"{type(exc).__name__}: {exc}"}
+
+
+def _context_section() -> dict[str, Any]:
+ out: dict[str, Any] = {}
+ try:
+ from dhee.context_state import ContextStateStore
+
+ store = ContextStateStore(repo=os.getcwd(), workspace_id=os.getcwd())
+ out["state"] = store.status()
+ out["debt"] = store.debt_summary(top=True)
+ except Exception as exc:
+ out["state"] = {"error": f"{type(exc).__name__}: {exc}"}
+
+ try:
+ from dhee import repo_link
+
+ repo = repo_link.repo_for_path(os.getcwd())
+ if repo is None:
+ out["repo_context"] = {"linked": False}
+ else:
+ manifest = repo_link._read_json(repo_link.repo_manifest_path(repo), {})
+ conflicts = repo_link.detect_conflicts(repo)
+ out["repo_context"] = {
+ "linked": True,
+ "repo_root": str(repo),
+ "entry_count": manifest.get("entry_count", 0),
+ "conflict_count": len(conflicts),
+ "ok": not conflicts,
+ }
+ except Exception as exc:
+ out["repo_context"] = {"error": f"{type(exc).__name__}: {exc}"}
+ return out
+
+
+def _portability_section() -> dict[str, Any]:
+ try:
+ from dhee.protocol import PACK_EXTENSION
+
+ return {
+ "format": PACK_EXTENSION,
+ "contract": "signed manifest + per-file hashes + memories/history/vectors/artifacts/repo-context/handoff",
+ "commands": [
+ "dhee export --output pack.dheemem --repo /path/to/repo",
+ "dhee import pack.dheemem --strategy merge|replace|dry-run --repo /path/to/repo",
+ ],
+ "latest_pack_checked": None,
+ "note": "Run `dhee portability-eval` for a destructive-safe round-trip scorecard.",
+ }
+ except Exception as exc:
+ return {"error": f"{type(exc).__name__}: {exc}"}
+
+
def _router_section() -> dict[str, Any]:
try:
from dhee.router.install import ENFORCE_FLAG, status as router_status
@@ -454,7 +519,7 @@ def _capabilities_section(router: dict[str, Any]) -> dict[str, Any]:
"run_portability_eval() round-trips a user's Dhee state "
"through a signed .dheemem pack and scores per-substrate "
"retention (memories / memory_history / distillation_provenance "
- "/ artifacts / vectors) + handoff survival. CLI: "
+ "/ artifacts / vectors / repo context) + handoff survival. CLI: "
"`dhee portability-eval [--user-id X] [--threshold F] [--json]`. "
"M7.6b fix: intra-pack content_hash + history-signature "
"collisions no longer collapse distinct source rows on merge — "
@@ -531,18 +596,24 @@ def build_report() -> DoctorReport:
__version__ = "unknown"
core = _core_section()
+ runtime = _runtime_section()
router = _router_section()
+ context = _context_section()
cognition = _cognition_section()
memory = _memory_section()
+ portability = _portability_section()
capabilities = _capabilities_section(router)
return DoctorReport(
dhee_version=__version__,
generated_at=time.time(),
core=core,
+ runtime=runtime,
router=router,
+ context=context,
cognition=cognition,
memory=memory,
+ portability=portability,
capabilities=capabilities,
)
@@ -555,9 +626,12 @@ def build_report() -> DoctorReport:
def format_human(report: DoctorReport) -> str:
lines: list[str] = []
core = report.core
+ runtime = report.runtime
router = report.router
+ context = report.context
cog = report.cognition
mem = report.memory
+ portability = report.portability
cap = report.capabilities
ts = time.strftime("%Y-%m-%d %H:%M", time.localtime(report.generated_at))
@@ -578,6 +652,28 @@ def format_human(report: DoctorReport) -> str:
lines.append(f" {label:<13} {info.get('path')} ({size_h})")
lines.append("")
+ # Runtime
+ lines.append("[ runtime ]")
+ if "error" in runtime:
+ lines.append(f" error: {runtime['error']}")
+ else:
+ daemon = runtime.get("daemon") or {}
+ venv = runtime.get("venv") or {}
+ paths = runtime.get("paths") or {}
+ lines.append(f" daemon: {'running' if daemon.get('running') else 'stopped'}")
+ if daemon.get("pid"):
+ lines.append(f" pid: {daemon.get('pid')}")
+ if daemon.get("endpoint"):
+ lines.append(f" endpoint: {daemon.get('endpoint')}")
+ health = daemon.get("health") or {}
+ if health.get("error"):
+ lines.append(f" health: {health.get('error')}")
+ elif daemon.get("running"):
+ lines.append(" health: ok")
+ lines.append(f" managed venv:{' present' if venv.get('exists') else ' missing'} ({venv.get('path')})")
+ lines.append(f" runtime dir: {paths.get('runtime_dir')}")
+ lines.append("")
+
# Router
lines.append("[ router ]")
if "error" in router:
@@ -600,6 +696,26 @@ def format_human(report: DoctorReport) -> str:
)
lines.append("")
+ # Context
+ lines.append("[ context ]")
+ state = context.get("state") or {}
+ if "error" in state:
+ lines.append(f" state: error: {state['error']}")
+ else:
+ revision = state.get("state_revision", state.get("revision", 0))
+ epoch = state.get("task_epoch", state.get("epoch", 0))
+ debt_level = state.get("level") or state.get("debt_level") or "unknown"
+ lines.append(f" state: revision={revision} epoch={epoch} debt={debt_level}")
+ repo_ctx = context.get("repo_context") or {}
+ if repo_ctx.get("linked"):
+ lines.append(
+ f" repo: linked entries={repo_ctx.get('entry_count', 0)} "
+ f"conflicts={repo_ctx.get('conflict_count', 0)}"
+ )
+ else:
+ lines.append(" repo: not linked for current cwd")
+ lines.append("")
+
# Cognition
lines.append("[ cognition ]")
mb = cog.get("meta_buddhi", {})
@@ -647,6 +763,16 @@ def format_human(report: DoctorReport) -> str:
lines.append(f" supersede_chain {sup.get('column')}")
lines.append("")
+ # Portability
+ lines.append("[ portability ]")
+ if "error" in portability:
+ lines.append(f" error: {portability['error']}")
+ else:
+ lines.append(f" format: {portability.get('format')}")
+ lines.append(f" integrity: {portability.get('contract')}")
+ lines.append(f" latest pack: {portability.get('latest_pack_checked') or 'not checked'}")
+ lines.append("")
+
# Capabilities
lines.append("[ wired today ]")
for item in cap.get("closed_today", []):
diff --git a/dhee/fs/__init__.py b/dhee/fs/__init__.py
index 75812fb..b0f84fd 100644
--- a/dhee/fs/__init__.py
+++ b/dhee/fs/__init__.py
@@ -1,6 +1,7 @@
"""DheeFS virtual learning/context shell."""
from dhee.fs.types import DheeFSEntry, DheeFSError, DheeFSResult, DheeMount
+from dhee.fs.uri import normalize_dhee_uri
from dhee.fs.workspace import CommandRegistry, ContextWorkspace
__all__ = [
@@ -10,4 +11,5 @@
"DheeFSError",
"DheeFSResult",
"DheeMount",
+ "normalize_dhee_uri",
]
diff --git a/dhee/fs/uri.py b/dhee/fs/uri.py
new file mode 100644
index 0000000..5053fd0
--- /dev/null
+++ b/dhee/fs/uri.py
@@ -0,0 +1,59 @@
+"""Canonical DheeFS URI helpers.
+
+The virtual shell remains path-first because agents and developers already
+understand ``/state`` and ``/handoff``. ``dhee://`` is the stable address form
+for cross-tool references, docs, and future SDK boundaries.
+"""
+
+from __future__ import annotations
+
+from urllib.parse import urlparse
+
+
+_ALIASES = {
+ "/state/current": "/state/current.md",
+ "/state/card": "/state/card.xml",
+ "/state/decisions": "/state/decisions.md",
+ "/state/superseded": "/state/superseded.md",
+ "/state/history": "/state/history.md",
+ "/handoff/latest": "/handoff/latest.md",
+ "/handoff/snapshot": "/handoff/snapshot.json",
+ "/sessions/latest": "/sessions/latest.md",
+ "/shared/task-results": "/shared/task-results",
+}
+
+
+def normalize_dhee_uri(value: str) -> str:
+ """Return a DheeFS path for either a path or a ``dhee://`` URI.
+
+ Examples:
+ ``dhee://state/current`` -> ``/state/current.md``
+ ``dhee://router/ptr/R-abc`` -> ``/router/ptr/R-abc``
+ ``/dhee/state/current.md`` -> ``/state/current.md`` later in
+ ``ContextWorkspace.normalize_path``.
+ """
+ raw = str(value or "").strip()
+ if not raw.startswith("dhee://") and not raw.startswith("dhee:/"):
+ return raw
+
+ parsed = urlparse(raw)
+ if parsed.scheme != "dhee":
+ return raw
+
+ # urlparse("dhee://state/current") stores "state" as netloc and
+ # "/current" as path. urlparse("dhee:/state/current") stores the whole
+ # virtual path in parsed.path. Support both forms.
+ if parsed.netloc:
+ path = f"/{parsed.netloc}{parsed.path or ''}"
+ else:
+ path = parsed.path or "/"
+ if not path.startswith("/"):
+ path = "/" + path
+ path = path.rstrip("/") if len(path) > 1 else path
+
+ if path.startswith("/agents/") and path.endswith("/memory"):
+ return f"{path}.md"
+ return _ALIASES.get(path, path)
+
+
+__all__ = ["normalize_dhee_uri"]
diff --git a/dhee/fs/workspace.py b/dhee/fs/workspace.py
index 286da90..db8f694 100644
--- a/dhee/fs/workspace.py
+++ b/dhee/fs/workspace.py
@@ -32,6 +32,7 @@
DheeMount,
entries_to_text,
)
+from dhee.fs.uri import normalize_dhee_uri
_PTR_PATTERN = re.compile(r"\b[A-Z]-[0-9a-f]{10}\b")
@@ -236,7 +237,7 @@ def execute(self, command: str) -> DheeFSResult:
)
def normalize_path(self, path: str) -> str:
- value = str(path or "/").strip()
+ value = normalize_dhee_uri(str(path or "/").strip())
if not value:
value = "/"
if not value.startswith("/"):
diff --git a/dhee/install_cleanup.py b/dhee/install_cleanup.py
new file mode 100644
index 0000000..a024ca7
--- /dev/null
+++ b/dhee/install_cleanup.py
@@ -0,0 +1,169 @@
+"""Installer artifact cleanup for ``dhee uninstall``.
+
+The curl installer intentionally touches a tiny set of user-owned files:
+managed symlinks in ``~/.local/bin`` and a marked ``# dhee`` PATH block in a
+shell profile. Uninstall must be equally precise: remove Dhee-owned artifacts
+without guessing at, or rewriting, user-managed shell configuration.
+"""
+
+from __future__ import annotations
+
+import os
+from pathlib import Path
+from typing import Any, Iterable
+
+MANAGED_BINARIES = ("dhee", "dhee-mcp", "dhee-mcp-full", "engram-bus")
+PROFILE_RELATIVE_PATHS = (
+ ".zshrc",
+ ".bashrc",
+ ".profile",
+ ".config/fish/config.fish",
+)
+
+
+def installer_bin_dir(*, home: Path | None = None) -> Path:
+ """Return the bin directory used by the curl installer."""
+ root = Path.home() if home is None else Path(home)
+ return root / ".local" / "bin"
+
+
+def managed_venv_dir(config_dir: str | os.PathLike[str]) -> Path:
+ """Return the managed virtualenv directory under Dhee's data dir."""
+ return Path(config_dir).expanduser() / ".venv"
+
+
+def cleanup_install_artifacts(
+ config_dir: str | os.PathLike[str],
+ *,
+ home: Path | None = None,
+ bin_dir: Path | None = None,
+) -> dict[str, Any]:
+ """Remove installer-owned shell/profile artifacts.
+
+ The function is safe to call repeatedly and when ``config_dir`` no longer
+ exists. It never removes real executables, user PATH edits, or symlinks that
+ target anything outside Dhee's managed venv.
+ """
+ resolved_bin_dir = installer_bin_dir(home=home) if bin_dir is None else Path(bin_dir)
+ return {
+ "symlinks": cleanup_installer_symlinks(
+ config_dir,
+ home=home,
+ bin_dir=resolved_bin_dir,
+ ),
+ "shell_profiles": cleanup_shell_profiles(
+ home=home,
+ bin_dir=resolved_bin_dir,
+ ),
+ }
+
+
+def cleanup_installer_symlinks(
+ config_dir: str | os.PathLike[str],
+ *,
+ home: Path | None = None,
+ bin_dir: Path | None = None,
+ names: Iterable[str] = MANAGED_BINARIES,
+) -> dict[str, Any]:
+ """Remove managed symlinks that point into ``/.venv/bin``."""
+ resolved_bin_dir = installer_bin_dir(home=home) if bin_dir is None else Path(bin_dir)
+ managed_bin = (managed_venv_dir(config_dir) / "bin").resolve(strict=False)
+ removed: list[dict[str, str]] = []
+ skipped: list[dict[str, str]] = []
+
+ for name in names:
+ path = resolved_bin_dir / name
+ if not path.exists() and not path.is_symlink():
+ continue
+ if not path.is_symlink():
+ skipped.append({"name": name, "path": str(path), "reason": "not_symlink"})
+ continue
+
+ target = _symlink_target(path)
+ if _is_relative_to(target, managed_bin):
+ path.unlink()
+ removed.append({"name": name, "path": str(path), "target": str(target)})
+ else:
+ skipped.append(
+ {
+ "name": name,
+ "path": str(path),
+ "target": str(target),
+ "reason": "outside_managed_venv",
+ }
+ )
+
+ return {
+ "bin_dir": str(resolved_bin_dir),
+ "managed_bin": str(managed_bin),
+ "removed": removed,
+ "skipped": skipped,
+ }
+
+
+def cleanup_shell_profiles(
+ *,
+ home: Path | None = None,
+ bin_dir: Path | None = None,
+) -> dict[str, Any]:
+ """Remove exact Dhee installer PATH blocks from common shell profiles."""
+ root = Path.home() if home is None else Path(home)
+ resolved_bin_dir = installer_bin_dir(home=root) if bin_dir is None else Path(bin_dir)
+ expected_path_lines = {
+ f'export PATH="{resolved_bin_dir}:$PATH"',
+ f"fish_add_path {resolved_bin_dir}",
+ }
+ changed: list[dict[str, Any]] = []
+ scanned: list[str] = []
+
+ for rel_path in PROFILE_RELATIVE_PATHS:
+ profile = root / rel_path
+ scanned.append(str(profile))
+ if not profile.exists() or not profile.is_file():
+ continue
+ original = profile.read_text(encoding="utf-8")
+ updated, removed_blocks = _remove_dhee_profile_blocks(original, expected_path_lines)
+ if removed_blocks:
+ profile.write_text(updated, encoding="utf-8")
+ changed.append({"path": str(profile), "removed_blocks": removed_blocks})
+
+ return {"bin_dir": str(resolved_bin_dir), "changed": changed, "scanned": scanned}
+
+
+def _remove_dhee_profile_blocks(content: str, expected_path_lines: set[str]) -> tuple[str, int]:
+ lines = content.splitlines(keepends=True)
+ updated: list[str] = []
+ removed_blocks = 0
+ i = 0
+ while i < len(lines):
+ current = _strip_newline(lines[i])
+ next_line = _strip_newline(lines[i + 1]) if i + 1 < len(lines) else None
+ if current == "# dhee" and next_line in expected_path_lines:
+ if updated and not updated[-1].strip():
+ updated.pop()
+ removed_blocks += 1
+ i += 2
+ continue
+ updated.append(lines[i])
+ i += 1
+ return "".join(updated), removed_blocks
+
+
+def _strip_newline(value: str) -> str:
+ return value.rstrip("\r\n")
+
+
+def _symlink_target(path: Path) -> Path:
+ raw_target = os.readlink(path)
+ target = Path(raw_target)
+ if not target.is_absolute():
+ target = path.parent / target
+ return target.resolve(strict=False)
+
+
+def _is_relative_to(path: Path, parent: Path) -> bool:
+ try:
+ path.relative_to(parent)
+ return True
+ except ValueError:
+ return False
diff --git a/dhee/mcp_server.py b/dhee/mcp_server.py
index c440a8e..c3dd805 100644
--- a/dhee/mcp_server.py
+++ b/dhee/mcp_server.py
@@ -1641,11 +1641,23 @@ def _handle_dhee_promote_learning(_memory, arguments: Dict[str, Any]) -> Dict[st
def _handle_dhee_shell(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
- from dhee.fs import ContextWorkspace
-
repo = arguments.get("repo")
if repo:
repo = os.path.abspath(str(repo))
+ from dhee import runtime
+
+ runtime_result = runtime.execute_shell(
+ str(arguments.get("command") or ""),
+ repo=repo,
+ user_id=_default_user_id(arguments),
+ agent_id=_default_agent_id(arguments),
+ workspace_id=arguments.get("workspace_id") or repo,
+ )
+ if runtime_result is not None:
+ return runtime_result
+
+ from dhee.fs import ContextWorkspace
+
workspace = ContextWorkspace(
repo=repo,
user_id=_default_user_id(arguments),
@@ -1670,13 +1682,32 @@ def _context_store(arguments: Dict[str, Any]):
)
+def _runtime_context(arguments: Dict[str, Any], action: str, extra: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
+ from dhee import runtime
+
+ repo = arguments.get("repo")
+ if repo:
+ repo = os.path.abspath(str(repo))
+ return runtime.execute_context(
+ action,
+ repo=repo,
+ workspace_id=arguments.get("workspace_id") or repo,
+ user_id=_default_user_id(arguments),
+ agent_id=_default_agent_id(arguments),
+ args=extra or {},
+ )
+
+
def _handle_dhee_context_status(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
_maybe_sync_codex_runtime(arguments)
- return _context_store(arguments).status()
+ return _runtime_context(arguments, "status") or _context_store(arguments).status()
def _handle_dhee_context_state(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
_maybe_sync_codex_runtime(arguments)
+ runtime_result = _runtime_context(arguments, "state", {"format": str(arguments.get("format") or "card").lower()})
+ if runtime_result is not None:
+ return runtime_result
store = _context_store(arguments)
fmt = str(arguments.get("format") or "card").lower()
if fmt == "json":
@@ -1688,17 +1719,20 @@ def _handle_dhee_context_state(_memory, arguments: Dict[str, Any]) -> Dict[str,
def _handle_dhee_context_checkpoint(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
_maybe_sync_codex_runtime(arguments)
- return _context_store(arguments).checkpoint(reason=str(arguments.get("reason") or "mcp checkpoint"))
+ reason = str(arguments.get("reason") or "mcp checkpoint")
+ return _runtime_context(arguments, "checkpoint", {"reason": reason}) or _context_store(arguments).checkpoint(reason=reason)
def _handle_dhee_context_rollover(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
_maybe_sync_codex_runtime(arguments)
- return _context_store(arguments).rollover(reason=str(arguments.get("reason") or "mcp rollover"))
+ reason = str(arguments.get("reason") or "mcp rollover")
+ return _runtime_context(arguments, "rollover", {"reason": reason}) or _context_store(arguments).rollover(reason=reason)
def _handle_dhee_context_provision(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
_maybe_sync_codex_runtime(arguments)
- return _context_store(arguments).provision(str(arguments.get("task") or arguments.get("query") or ""))
+ task = str(arguments.get("task") or arguments.get("query") or "")
+ return _runtime_context(arguments, "provision", {"task": task}) or _context_store(arguments).provision(task)
def _handle_dhee_tools_list(_memory, _arguments: Dict[str, Any]) -> Dict[str, Any]:
@@ -2165,11 +2199,23 @@ def _handle_dhee_broadcast(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]
def _handle_dhee_read(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
+ from dhee import runtime
+
+ runtime_result = runtime.execute_router("read", arguments)
+ if runtime_result is not None:
+ return runtime_result
+
from dhee.router.handlers import handle_dhee_read
return handle_dhee_read(arguments)
def _handle_dhee_bash(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
+ from dhee import runtime
+
+ runtime_result = runtime.execute_router("bash", arguments)
+ if runtime_result is not None:
+ return runtime_result
+
from dhee.router.handlers import handle_dhee_bash
return handle_dhee_bash(arguments)
@@ -2180,6 +2226,12 @@ def _handle_dhee_agent(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
def _handle_dhee_grep(_memory, arguments: Dict[str, Any]) -> Dict[str, Any]:
+ from dhee import runtime
+
+ runtime_result = runtime.execute_router("grep", arguments)
+ if runtime_result is not None:
+ return runtime_result
+
from dhee.router.handlers import handle_dhee_grep
return handle_dhee_grep(arguments)
diff --git a/dhee/mcp_slim.py b/dhee/mcp_slim.py
index 17860cf..2be825a 100644
--- a/dhee/mcp_slim.py
+++ b/dhee/mcp_slim.py
@@ -927,11 +927,23 @@ def _handle_dhee_promote_learning(args: Dict[str, Any]) -> Dict[str, Any]:
def _handle_dhee_shell(args: Dict[str, Any]) -> Dict[str, Any]:
- from dhee.fs import ContextWorkspace
-
repo = args.get("repo")
if repo:
repo = os.path.abspath(str(repo))
+ from dhee import runtime
+
+ runtime_result = runtime.execute_shell(
+ str(args.get("command") or ""),
+ repo=repo,
+ user_id=str(args.get("user_id") or "default"),
+ agent_id=_default_agent_id(args),
+ workspace_id=args.get("workspace_id") or repo,
+ )
+ if runtime_result is not None:
+ return runtime_result
+
+ from dhee.fs import ContextWorkspace
+
workspace = ContextWorkspace(
repo=repo,
user_id=str(args.get("user_id") or "default"),
@@ -956,11 +968,30 @@ def _context_store(args: Dict[str, Any]):
)
+def _runtime_context(args: Dict[str, Any], action: str, extra: Optional[Dict[str, Any]] = None) -> Optional[Dict[str, Any]]:
+ from dhee import runtime
+
+ repo = args.get("repo")
+ if repo:
+ repo = os.path.abspath(str(repo))
+ return runtime.execute_context(
+ action,
+ repo=repo,
+ workspace_id=args.get("workspace_id") or repo,
+ user_id=str(args.get("user_id") or "default"),
+ agent_id=_default_agent_id(args),
+ args=extra or {},
+ )
+
+
def _handle_dhee_context_status(args: Dict[str, Any]) -> Dict[str, Any]:
- return _context_store(args).status()
+ return _runtime_context(args, "status") or _context_store(args).status()
def _handle_dhee_context_state(args: Dict[str, Any]) -> Dict[str, Any]:
+ runtime_result = _runtime_context(args, "state", {"format": str(args.get("format") or "card").lower()})
+ if runtime_result is not None:
+ return runtime_result
store = _context_store(args)
fmt = str(args.get("format") or "card").lower()
if fmt == "json":
@@ -971,15 +1002,18 @@ def _handle_dhee_context_state(args: Dict[str, Any]) -> Dict[str, Any]:
def _handle_dhee_context_checkpoint(args: Dict[str, Any]) -> Dict[str, Any]:
- return _context_store(args).checkpoint(reason=str(args.get("reason") or "mcp checkpoint"))
+ reason = str(args.get("reason") or "mcp checkpoint")
+ return _runtime_context(args, "checkpoint", {"reason": reason}) or _context_store(args).checkpoint(reason=reason)
def _handle_dhee_context_rollover(args: Dict[str, Any]) -> Dict[str, Any]:
- return _context_store(args).rollover(reason=str(args.get("reason") or "mcp rollover"))
+ reason = str(args.get("reason") or "mcp rollover")
+ return _runtime_context(args, "rollover", {"reason": reason}) or _context_store(args).rollover(reason=reason)
def _handle_dhee_context_provision(args: Dict[str, Any]) -> Dict[str, Any]:
- return _context_store(args).provision(str(args.get("task") or args.get("query") or ""))
+ task = str(args.get("task") or args.get("query") or "")
+ return _runtime_context(args, "provision", {"task": task}) or _context_store(args).provision(task)
def _handle_dhee_tools_list(_args: Dict[str, Any]) -> Dict[str, Any]:
@@ -1104,16 +1138,34 @@ def _handle_dhee_broadcast(args: Dict[str, Any]) -> Dict[str, Any]:
def _handle_dhee_read(args: Dict[str, Any]) -> Dict[str, Any]:
+ from dhee import runtime
+
+ runtime_result = runtime.execute_router("read", args)
+ if runtime_result is not None:
+ return runtime_result
+
from dhee.router.handlers import handle_dhee_read
return handle_dhee_read(args)
def _handle_dhee_bash(args: Dict[str, Any]) -> Dict[str, Any]:
+ from dhee import runtime
+
+ runtime_result = runtime.execute_router("bash", args)
+ if runtime_result is not None:
+ return runtime_result
+
from dhee.router.handlers import handle_dhee_bash
return handle_dhee_bash(args)
def _handle_dhee_grep(args: Dict[str, Any]) -> Dict[str, Any]:
+ from dhee import runtime
+
+ runtime_result = runtime.execute_router("grep", args)
+ if runtime_result is not None:
+ return runtime_result
+
from dhee.router.handlers import handle_dhee_grep
return handle_dhee_grep(args)
diff --git a/dhee/observability.py b/dhee/observability.py
index 76b6054..4dccb25 100644
--- a/dhee/observability.py
+++ b/dhee/observability.py
@@ -1,6 +1,6 @@
"""Engram Observability — compatibility-safe no-op implementation.
-Core Engram does not require metrics infrastructure at runtime, but enterprise
+Core Engram does not require metrics infrastructure at runtime, but managed team
and API layers import symbols from this module. Keep this interface stable and
side-effect free so those imports always succeed.
"""
diff --git a/dhee/protocol/v1.py b/dhee/protocol/v1.py
index 6ca138f..37e21da 100644
--- a/dhee/protocol/v1.py
+++ b/dhee/protocol/v1.py
@@ -1,8 +1,8 @@
"""Signed `.dheemem` v1 portable archive format.
The pack is a zip archive containing newline-delimited JSON payloads plus a
-signed manifest. Import restores the durable DB rows, artifact substrate, and
-vector index without requiring fresh model calls.
+signed manifest. Import restores the durable DB rows, artifact substrate,
+repo-shared context, and vector index without requiring fresh model calls.
"""
from __future__ import annotations
@@ -11,11 +11,14 @@
import hashlib
import json
import os
+import posixpath
+import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional
from zipfile import ZIP_DEFLATED, ZipFile
+from cryptography.exceptions import InvalidSignature
from cryptography.hazmat.primitives import serialization
from cryptography.hazmat.primitives.asymmetric.ed25519 import (
Ed25519PrivateKey,
@@ -29,6 +32,8 @@
PACK_VERSION = "1"
MANIFEST_NAME = "manifest.json"
HANDOFF_NAME = "handoff.json"
+REPO_CONTEXT_MANIFEST_NAME = "repo_context/manifest.json"
+REPO_CONTEXT_ENTRIES_NAME = "repo_context/entries.jsonl"
PRIVATE_KEY_NAME = "protocol_ed25519_private.pem"
PUBLIC_KEY_NAME = "protocol_ed25519_public.pem"
@@ -42,6 +47,14 @@
("artifact_extractions", "artifact_extractions.jsonl"),
("artifact_chunks", "artifact_chunks.jsonl"),
]
+_REPO_CONTEXT_ARCHIVE_NAMES = {REPO_CONTEXT_MANIFEST_NAME, REPO_CONTEXT_ENTRIES_NAME}
+_PAYLOAD_ARCHIVE_NAMES = (
+ {archive_name for _, archive_name in _FILE_ORDER}
+ | {HANDOFF_NAME}
+ | _REPO_CONTEXT_ARCHIVE_NAMES
+)
+_ALLOWED_ARCHIVE_NAMES = _PAYLOAD_ARCHIVE_NAMES | {MANIFEST_NAME}
+_SHA256_RE = re.compile(r"^[0-9a-f]{64}$")
def _utcnow() -> str:
@@ -85,6 +98,14 @@ def _sha256(raw: bytes) -> str:
return hashlib.sha256(raw).hexdigest()
+def _path_within(child: Path, parent: Path) -> bool:
+ try:
+ child.resolve(strict=False).relative_to(parent.resolve(strict=False))
+ return True
+ except ValueError:
+ return False
+
+
def _key_id(public_pem: bytes) -> str:
return hashlib.sha256(public_pem).hexdigest()[:16]
@@ -151,13 +172,116 @@ def _verify_manifest(manifest: Dict[str, Any]) -> Dict[str, Any]:
raise ValueError("Manifest signature is incomplete")
public_key = serialization.load_pem_public_key(public_pem)
assert isinstance(public_key, Ed25519PublicKey)
- public_key.verify(
- base64.b64decode(signature_b64.encode("ascii")),
- _canonical_json(manifest_core),
- )
+ try:
+ public_key.verify(
+ base64.b64decode(signature_b64.encode("ascii")),
+ _canonical_json(manifest_core),
+ )
+ except InvalidSignature as exc:
+ raise ValueError("Manifest signature verification failed") from exc
return manifest_core
+def _safe_archive_name(name: str) -> bool:
+ if not isinstance(name, str) or not name:
+ return False
+ if name.startswith(("/", "\\")) or "\\" in name:
+ return False
+ if posixpath.normpath(name) != name:
+ return False
+ parts = name.split("/")
+ return all(part and part not in {".", ".."} for part in parts)
+
+
+def _validate_manifest_core(manifest_core: Dict[str, Any]) -> None:
+ if manifest_core.get("format") != "dheemem":
+ raise ValueError("Unsupported pack format")
+ if str(manifest_core.get("version") or "") != PACK_VERSION:
+ raise ValueError(f"Unsupported pack version: {manifest_core.get('version')}")
+ files = manifest_core.get("files")
+ if not isinstance(files, dict):
+ raise ValueError("Manifest files metadata is missing")
+
+ declared = set(files)
+ unsafe = sorted(name for name in declared if not _safe_archive_name(str(name)))
+ if unsafe:
+ raise ValueError(f"Unsafe archive path in manifest: {unsafe[0]}")
+
+ missing = sorted(_PAYLOAD_ARCHIVE_NAMES - declared)
+ if missing:
+ raise ValueError(f"Pack manifest missing required files: {', '.join(missing)}")
+ unexpected = sorted(declared - _PAYLOAD_ARCHIVE_NAMES)
+ if unexpected:
+ raise ValueError(f"Pack manifest declares unexpected files: {', '.join(unexpected)}")
+
+ for archive_name in sorted(_PAYLOAD_ARCHIVE_NAMES):
+ meta = files.get(archive_name)
+ if not isinstance(meta, dict):
+ raise ValueError(f"Manifest file metadata is invalid for {archive_name}")
+ sha = str(meta.get("sha256") or "")
+ if not _SHA256_RE.match(sha):
+ raise ValueError(f"Manifest sha256 metadata is invalid for {archive_name}")
+ records = meta.get("records")
+ if records is not None:
+ try:
+ if int(records) < 0:
+ raise ValueError
+ except (TypeError, ValueError) as exc:
+ raise ValueError(f"Manifest record count is invalid for {archive_name}") from exc
+
+
+def _validate_zip_members(zf: ZipFile, manifest_core: Dict[str, Any]) -> None:
+ names = zf.namelist()
+ if len(names) != len(set(names)):
+ raise ValueError("Pack contains duplicate archive members")
+
+ unsafe = sorted(name for name in names if not _safe_archive_name(name))
+ if unsafe:
+ raise ValueError(f"Unsafe archive path in pack: {unsafe[0]}")
+
+ actual = set(names)
+ missing = sorted(_ALLOWED_ARCHIVE_NAMES - actual)
+ if missing:
+ raise ValueError(f"Pack archive missing required files: {', '.join(missing)}")
+ unexpected = sorted(actual - _ALLOWED_ARCHIVE_NAMES)
+ if unexpected:
+ raise ValueError(f"Pack archive contains unexpected files: {', '.join(unexpected)}")
+
+ declared = set((manifest_core.get("files") or {}).keys()) | {MANIFEST_NAME}
+ if actual != declared:
+ raise ValueError("Pack archive members do not match signed manifest")
+
+
+def _read_manifest_and_validate_pack(zf: ZipFile) -> tuple[Dict[str, Any], Dict[str, Any]]:
+ names = zf.namelist()
+ if len(names) != len(set(names)):
+ raise ValueError("Pack contains duplicate archive members")
+ if MANIFEST_NAME not in names:
+ raise ValueError("Pack manifest is missing")
+ unsafe = sorted(name for name in names if not _safe_archive_name(name))
+ if unsafe:
+ raise ValueError(f"Unsafe archive path in pack: {unsafe[0]}")
+ try:
+ manifest = json.loads(zf.read(MANIFEST_NAME).decode("utf-8"))
+ except json.JSONDecodeError as exc:
+ raise ValueError("Pack manifest is not valid JSON") from exc
+ if not isinstance(manifest, dict):
+ raise ValueError("Pack manifest must be a JSON object")
+ manifest_core = _verify_manifest(manifest)
+ _validate_manifest_core(manifest_core)
+ _validate_zip_members(zf, manifest_core)
+ return manifest, manifest_core
+
+
+def _verified_read(zf: ZipFile, manifest_core: Dict[str, Any], archive_name: str) -> bytes:
+ meta = (manifest_core.get("files") or {}).get(archive_name) or {}
+ raw = zf.read(archive_name)
+ actual = _sha256(raw)
+ if actual != meta.get("sha256"):
+ raise ValueError(f"Hash mismatch for {archive_name}")
+ return raw
+
+
def _export_rows(db: Any, *, user_id: str) -> Dict[str, List[Dict[str, Any]]]:
artifact_payload = ArtifactManager(db).export_payload(user_id=user_id)
memories = db.get_all_memories(user_id=user_id, limit=100000)
@@ -202,6 +326,89 @@ def _export_rows(db: Any, *, user_id: str) -> Dict[str, List[Dict[str, Any]]]:
}
+def _default_repo_context_manifest(repo: Path, *, included: bool) -> Dict[str, Any]:
+ return {
+ "format": "dhee_repo_context",
+ "version": PACK_VERSION,
+ "included": included,
+ "source_repo": str(repo),
+ "exported_at": _utcnow(),
+ "schema_version": 1,
+ "repo_id": "",
+ "entry_count": 0,
+ "records": 0,
+ "source_manifest": {},
+ }
+
+
+def _read_repo_context_json(path: Path) -> Dict[str, Any]:
+ if not path.exists():
+ return {}
+ try:
+ data = json.loads(path.read_text(encoding="utf-8"))
+ except json.JSONDecodeError as exc:
+ raise ValueError(f"Repo context manifest is not valid JSON: {path}") from exc
+ if not isinstance(data, dict):
+ raise ValueError(f"Repo context manifest must be a JSON object: {path}")
+ return data
+
+
+def _read_repo_context_entries(path: Path, context_dir: Path) -> List[Dict[str, Any]]:
+ if not path.exists():
+ return []
+ if path.is_symlink():
+ raise ValueError(f"Repo context file is a symlink: {path}")
+ if not _path_within(path, context_dir):
+ raise ValueError(f"Repo context file escapes context directory: {path}")
+ try:
+ return _parse_jsonl(path.read_bytes())
+ except (json.JSONDecodeError, UnicodeDecodeError) as exc:
+ raise ValueError(f"Repo context entries are not valid JSONL: {path}") from exc
+
+
+def _assert_repo_context_safe(entries: List[Dict[str, Any]]) -> None:
+ from dhee.hooks.claude_code.privacy import filter_secrets
+
+ for row in entries:
+ if not isinstance(row, dict):
+ raise ValueError("Repo context entries must be JSON objects")
+ text = json.dumps(row, sort_keys=True, ensure_ascii=False, default=str)
+ if filter_secrets(text) != text:
+ raise ValueError("Repo context contains a likely secret and cannot be packed")
+
+
+def _repo_context_payload(repo: os.PathLike[str] | str | None) -> tuple[Dict[str, Any], List[Dict[str, Any]]]:
+ repo_root = Path(repo or os.getcwd()).expanduser().resolve()
+ context_dir = repo_root / ".dhee" / "context"
+ manifest_path = context_dir / "manifest.json"
+ entries_path = context_dir / "entries.jsonl"
+ if not context_dir.exists():
+ return _default_repo_context_manifest(repo_root, included=False), []
+ if context_dir.is_symlink():
+ raise ValueError(f"Repo context directory is a symlink: {context_dir}")
+
+ context_root = context_dir.resolve(strict=False)
+ for path in (manifest_path, entries_path):
+ if path.exists() and (path.is_symlink() or not _path_within(path, context_root)):
+ raise ValueError(f"Repo context file is unsafe: {path}")
+
+ source_manifest = _read_repo_context_json(manifest_path)
+ entries = _read_repo_context_entries(entries_path, context_root)
+ _assert_repo_context_safe(entries)
+
+ payload_manifest = _default_repo_context_manifest(repo_root, included=True)
+ payload_manifest.update(
+ {
+ "schema_version": source_manifest.get("schema_version") or 1,
+ "repo_id": str(source_manifest.get("repo_id") or ""),
+ "entry_count": int(source_manifest.get("entry_count") or len(entries)),
+ "records": len(entries),
+ "source_manifest": source_manifest,
+ }
+ )
+ return payload_manifest, entries
+
+
def export_pack(
*,
db: Any,
@@ -209,9 +416,12 @@ def export_pack(
output_path: os.PathLike[str] | str,
user_id: str = "default",
key_dir: os.PathLike[str] | str,
+ repo: os.PathLike[str] | str | None = None,
) -> Dict[str, Any]:
rows = _export_rows(db, user_id=user_id)
- handoff = build_handoff_snapshot(db, user_id=user_id, repo=os.getcwd())
+ repo_for_handoff = str(Path(repo or os.getcwd()).expanduser().resolve())
+ handoff = build_handoff_snapshot(db, user_id=user_id, repo=repo_for_handoff)
+ repo_context_manifest, repo_context_entries = _repo_context_payload(repo)
try:
rows["vectors"] = vector_store.export_entries(filters={"user_id": user_id}, limit=200000)
except NotImplementedError:
@@ -232,6 +442,23 @@ def export_pack(
"sha256": _sha256(handoff_raw),
"records": 1,
}
+ repo_context_manifest_raw = json.dumps(
+ repo_context_manifest,
+ indent=2,
+ sort_keys=True,
+ ensure_ascii=False,
+ ).encode("utf-8")
+ repo_context_entries_raw = _jsonl_bytes(repo_context_entries)
+ blobs[REPO_CONTEXT_MANIFEST_NAME] = repo_context_manifest_raw
+ blobs[REPO_CONTEXT_ENTRIES_NAME] = repo_context_entries_raw
+ file_meta[REPO_CONTEXT_MANIFEST_NAME] = {
+ "sha256": _sha256(repo_context_manifest_raw),
+ "records": 1,
+ }
+ file_meta[REPO_CONTEXT_ENTRIES_NAME] = {
+ "sha256": _sha256(repo_context_entries_raw),
+ "records": len(repo_context_entries),
+ }
manifest_core = {
"format": "dheemem",
@@ -254,41 +481,80 @@ def export_pack(
"path": str(output),
"version": PACK_VERSION,
"user_id": user_id,
- "counts": {name: len(rows.get(name, [])) for name, _ in _FILE_ORDER},
+ "counts": {
+ **{name: len(rows.get(name, [])) for name, _ in _FILE_ORDER},
+ "repo_context_entries": len(repo_context_entries),
+ },
"handoff": handoff,
+ "repo_context": repo_context_manifest,
}
def inspect_pack(path: os.PathLike[str] | str) -> Dict[str, Any]:
with ZipFile(path, "r") as zf:
- manifest = json.loads(zf.read(MANIFEST_NAME).decode("utf-8"))
- manifest_core = _verify_manifest(manifest)
- for archive_name, meta in (manifest_core.get("files") or {}).items():
- raw = zf.read(archive_name)
- actual = _sha256(raw)
- if actual != meta.get("sha256"):
- raise ValueError(f"Hash mismatch for {archive_name}")
- handoff = None
- if HANDOFF_NAME in zf.namelist():
- handoff = json.loads(zf.read(HANDOFF_NAME).decode("utf-8"))
+ manifest, manifest_core = _read_manifest_and_validate_pack(zf)
+ for archive_name in sorted(manifest_core.get("files") or {}):
+ _verified_read(zf, manifest_core, archive_name)
+ handoff = json.loads(_verified_read(zf, manifest_core, HANDOFF_NAME).decode("utf-8"))
+ repo_context = json.loads(
+ _verified_read(zf, manifest_core, REPO_CONTEXT_MANIFEST_NAME).decode("utf-8")
+ )
result = dict(manifest)
- if handoff is not None:
- result["handoff"] = handoff
+ result["handoff"] = handoff
+ result["repo_context"] = repo_context
return result
-def _load_pack_rows(path: os.PathLike[str] | str) -> Dict[str, List[Dict[str, Any]]]:
+def _handoff_bootstrap(handoff: Dict[str, Any]) -> Dict[str, Any]:
+ last_session = handoff.get("last_session")
+ if not isinstance(last_session, dict):
+ last_session = {}
+ return {
+ "format": handoff.get("format"),
+ "generated_at": handoff.get("generated_at"),
+ "continuity_source": handoff.get("continuity_source"),
+ "last_session_id": last_session.get("id"),
+ "last_session_status": last_session.get("status"),
+ "has_thread_state": bool(handoff.get("thread_state")),
+ "recent_artifacts": len(handoff.get("recent_artifacts") or []),
+ "shared_task_results": len(handoff.get("shared_task_results") or []),
+ }
+
+
+def _repo_context_bootstrap(repo_context: Dict[str, Any], entries: List[Dict[str, Any]]) -> Dict[str, Any]:
+ return {
+ "format": repo_context.get("format"),
+ "included": bool(repo_context.get("included")),
+ "source_repo": repo_context.get("source_repo"),
+ "repo_id": repo_context.get("repo_id"),
+ "entry_count": int(repo_context.get("entry_count") or 0),
+ "records": len(entries),
+ }
+
+
+def _load_pack_payload(
+ path: os.PathLike[str] | str,
+) -> tuple[Dict[str, List[Dict[str, Any]]], Dict[str, Any], Dict[str, Any], List[Dict[str, Any]]]:
with ZipFile(path, "r") as zf:
- manifest = json.loads(zf.read(MANIFEST_NAME).decode("utf-8"))
- manifest_core = _verify_manifest(manifest)
+ _manifest, manifest_core = _read_manifest_and_validate_pack(zf)
rows: Dict[str, List[Dict[str, Any]]] = {}
for logical_name, archive_name in _FILE_ORDER:
- raw = zf.read(archive_name)
- actual = _sha256(raw)
- expected = ((manifest_core.get("files") or {}).get(archive_name) or {}).get("sha256")
- if expected and actual != expected:
- raise ValueError(f"Hash mismatch for {archive_name}")
- rows[logical_name] = _parse_jsonl(raw)
+ rows[logical_name] = _parse_jsonl(_verified_read(zf, manifest_core, archive_name))
+ handoff = json.loads(_verified_read(zf, manifest_core, HANDOFF_NAME).decode("utf-8"))
+ if not isinstance(handoff, dict) or handoff.get("format") != "dhee_handoff":
+ raise ValueError("Pack handoff bootstrap is invalid")
+ repo_context = json.loads(
+ _verified_read(zf, manifest_core, REPO_CONTEXT_MANIFEST_NAME).decode("utf-8")
+ )
+ if not isinstance(repo_context, dict) or repo_context.get("format") != "dhee_repo_context":
+ raise ValueError("Pack repo context manifest is invalid")
+ repo_context_entries = _parse_jsonl(_verified_read(zf, manifest_core, REPO_CONTEXT_ENTRIES_NAME))
+ _assert_repo_context_safe(repo_context_entries)
+ return rows, handoff, repo_context, repo_context_entries
+
+
+def _load_pack_rows(path: os.PathLike[str] | str) -> Dict[str, List[Dict[str, Any]]]:
+ rows, _handoff, _repo_context, _repo_context_entries = _load_pack_payload(path)
return rows
@@ -520,6 +786,99 @@ def _merge_distillation_provenance(
return inserted
+def _canonical_repo_context_row(row: Dict[str, Any]) -> str:
+ return json.dumps(row, sort_keys=True, separators=(",", ":"), ensure_ascii=False, default=str)
+
+
+def _merge_repo_context(
+ *,
+ repo: os.PathLike[str] | str | None,
+ repo_context: Dict[str, Any],
+ entries: List[Dict[str, Any]],
+ strategy: str,
+) -> Dict[str, Any]:
+ if not entries:
+ return {
+ "status": "empty",
+ "entries": 0,
+ "imported": 0,
+ "skipped_existing": 0,
+ "repo": str(repo) if repo is not None else None,
+ }
+ if repo is None:
+ return {
+ "status": "skipped",
+ "reason": "repo_not_provided",
+ "entries": len(entries),
+ "imported": 0,
+ "skipped_existing": 0,
+ "repo": None,
+ }
+
+ from dhee import repo_link
+
+ repo_root = Path(repo).expanduser().resolve()
+ preexisting_config = repo_link.repo_config_path(repo_root).exists()
+ repo_link._ensure_repo_skeleton(repo_root)
+ context_dir = repo_link.repo_context_dir(repo_root)
+ entries_path = repo_link.repo_entries_path(repo_root)
+ config_path = repo_link.repo_config_path(repo_root)
+ if context_dir.is_symlink() or not _path_within(context_dir, repo_root):
+ raise ValueError(f"Target repo context directory is unsafe: {context_dir}")
+ if entries_path.exists() and entries_path.is_symlink():
+ raise ValueError(f"Target repo context entries file is unsafe: {entries_path}")
+
+ source_repo_id = str(repo_context.get("repo_id") or "").strip()
+ if source_repo_id:
+ cfg = _read_repo_context_json(config_path)
+ if not preexisting_config or not cfg.get("repo_id"):
+ cfg["repo_id"] = source_repo_id
+ cfg["schema_version"] = cfg.get("schema_version") or 1
+ cfg["linked_at"] = cfg.get("linked_at") or _utcnow()
+ config_path.write_text(json.dumps(cfg, indent=2, sort_keys=True) + "\n", encoding="utf-8")
+
+ _assert_repo_context_safe(entries)
+ context_dir.mkdir(parents=True, exist_ok=True)
+ if strategy == "replace":
+ raw = _jsonl_bytes(entries)
+ entries_path.write_bytes(raw)
+ imported = len(entries)
+ skipped = 0
+ else:
+ existing_rows = _read_repo_context_entries(entries_path, context_dir.resolve(strict=False))
+ existing_signatures = {
+ _canonical_repo_context_row(row)
+ for row in existing_rows
+ if isinstance(row, dict)
+ }
+ imported_rows: List[Dict[str, Any]] = []
+ skipped = 0
+ for row in entries:
+ signature = _canonical_repo_context_row(row)
+ if signature in existing_signatures:
+ skipped += 1
+ continue
+ imported_rows.append(row)
+ existing_signatures.add(signature)
+ if imported_rows:
+ with entries_path.open("ab") as fh:
+ raw = _jsonl_bytes(imported_rows)
+ if entries_path.stat().st_size and not entries_path.read_bytes().endswith(b"\n"):
+ fh.write(b"\n")
+ fh.write(raw)
+ imported = len(imported_rows)
+
+ manifest = repo_link.refresh_manifest(repo_root)
+ return {
+ "status": "imported",
+ "repo": str(repo_root),
+ "entries": len(entries),
+ "imported": imported,
+ "skipped_existing": skipped,
+ "manifest": manifest,
+ }
+
+
def import_pack(
*,
db: Any,
@@ -527,12 +886,13 @@ def import_pack(
input_path: os.PathLike[str] | str,
user_id: str = "default",
strategy: str = "merge",
+ repo: os.PathLike[str] | str | None = None,
) -> Dict[str, Any]:
strategy = str(strategy or "merge").strip().lower()
if strategy not in {"merge", "replace", "dry-run"}:
raise ValueError("strategy must be one of: merge, replace, dry-run")
- rows = _load_pack_rows(input_path)
+ rows, handoff, repo_context, repo_context_entries = _load_pack_payload(input_path)
memories = rows.get("memories", [])
history_rows = rows.get("memory_history", [])
provenance_rows = rows.get("distillation_provenance", [])
@@ -564,6 +924,8 @@ def import_pack(
"artifacts": len(artifact_payload["artifacts_manifest"]),
"existing_ids": existing_ids,
"existing_hashes": existing_hashes,
+ "handoff_bootstrap": _handoff_bootstrap(handoff),
+ "repo_context": _repo_context_bootstrap(repo_context, repo_context_entries),
}
if strategy == "dry-run":
return preview
@@ -608,6 +970,13 @@ def import_pack(
filtered_vectors.append(entry)
imported_vectors = vector_store.import_entries(filtered_vectors)
+ repo_context_import = _merge_repo_context(
+ repo=repo,
+ repo_context=repo_context,
+ entries=repo_context_entries,
+ strategy=strategy,
+ )
+
return {
**preview,
"cleared": cleared,
@@ -616,4 +985,5 @@ def import_pack(
"distillation_provenance_imported": provenance_inserted,
"artifact_import": artifact_stats,
"vectors_imported": imported_vectors,
+ "repo_context_import": repo_context_import,
}
diff --git a/dhee/router/digest.py b/dhee/router/digest.py
index 45cda85..1eb5989 100644
--- a/dhee/router/digest.py
+++ b/dhee/router/digest.py
@@ -96,6 +96,8 @@ def _detect_kind(path: str, text: str) -> str:
".go": "go",
".rs": "rust",
".java": "java",
+ ".kt": "kotlin", ".kts": "kotlin",
+ ".c": "c", ".h": "c", ".cc": "cpp", ".cpp": "cpp", ".hpp": "cpp",
".rb": "ruby",
".md": "markdown", ".mdx": "markdown",
".json": "json",
@@ -202,10 +204,13 @@ def _describe(v: Any) -> str:
)
_JS_CLASS_RE = re.compile(r"^\s*(?:export\s+)?class\s+([A-Za-z_$][\w$]*)", re.MULTILINE)
_JS_CONST_FN_RE = re.compile(
- r"^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*=\s*(?:async\s*)?\(([^)]*)\)\s*=>",
+ r"^\s*(?:export\s+)?(?:const|let|var)\s+([A-Za-z_$][\w$]*)\s*(?::[^=]+)?=\s*(?:async\s*)?(?:\(([^)]*)\)|([A-Za-z_$][\w$]*))\s*=>",
re.MULTILINE,
)
_JS_IMPORT_RE = re.compile(r"""^\s*import\s+[^;'"]*['"]([^'"]+)['"]""", re.MULTILINE)
+_TS_INTERFACE_RE = re.compile(r"^\s*(?:export\s+)?interface\s+([A-Za-z_$][\w$]*)", re.MULTILINE)
+_TS_TYPE_RE = re.compile(r"^\s*(?:export\s+)?type\s+([A-Za-z_$][\w$]*)\s*=", re.MULTILINE)
+_JS_NAMED_EXPORT_RE = re.compile(r"^\s*export\s+\{([^}]+)\}", re.MULTILINE)
def _js_ts_symbols(text: str) -> dict[str, list[str]]:
@@ -213,10 +218,31 @@ def _js_ts_symbols(text: str) -> dict[str, list[str]]:
for m in _JS_FN_RE.finditer(text):
functions.append(f"{m.group(1)}({m.group(2).strip()})")
for m in _JS_CONST_FN_RE.finditer(text):
- functions.append(f"{m.group(1)}({m.group(2).strip()})")
+ args = (m.group(2) if m.group(2) is not None else m.group(3) or "").strip()
+ functions.append(f"{m.group(1)}({args})")
classes = [m.group(1) for m in _JS_CLASS_RE.finditer(text)]
imports = [m.group(1) for m in _JS_IMPORT_RE.finditer(text)]
- return {"classes": classes, "functions": functions, "imports": imports}
+ types = [f"{m.group(1)} (interface)" for m in _TS_INTERFACE_RE.finditer(text)]
+ types.extend(f"{m.group(1)} (type)" for m in _TS_TYPE_RE.finditer(text))
+ exports: list[str] = []
+ for m in _JS_NAMED_EXPORT_RE.finditer(text):
+ for item in m.group(1).split(","):
+ name = item.strip().split(" as ", 1)[0].strip()
+ if name and name not in exports:
+ exports.append(name)
+ component_names = [
+ item.split("(", 1)[0]
+ for item in [*classes, *functions]
+ if item and item.split("(", 1)[0][:1].isupper()
+ ]
+ return {
+ "classes": classes,
+ "functions": functions,
+ "types": types,
+ "components": component_names,
+ "imports": imports,
+ "exports": exports,
+ }
_GO_FN_RE = re.compile(
@@ -250,6 +276,73 @@ def _rust_symbols(text: str) -> dict[str, list[str]]:
return {"types": types, "functions": functions, "imports": imports}
+_JAVA_TYPE_RE = re.compile(
+ r"^\s*(?:@\w+(?:\([^)]*\))?\s*)*(?:(?:public|protected|private|abstract|final|static|sealed|non-sealed)\s+)*(class|interface|enum|record)\s+([A-Za-z_][\w]*)",
+ re.MULTILINE,
+)
+_JAVA_METHOD_RE = re.compile(
+ r"^\s*(?:@\w+(?:\([^)]*\))?\s*)*(?:(?:public|protected|private|static|final|abstract|synchronized|native|default)\s+)+(?:<[^>]+>\s+)?[\w<>\[\], ?]+\s+([A-Za-z_][\w]*)\s*\(([^;{}]*)\)",
+ re.MULTILINE,
+)
+_JAVA_IMPORT_RE = re.compile(r"^\s*import\s+(?:static\s+)?([^;]+);", re.MULTILINE)
+
+
+def _java_symbols(text: str) -> dict[str, list[str]]:
+ types = [f"{m.group(2)} ({m.group(1)})" for m in _JAVA_TYPE_RE.finditer(text)]
+ methods = [f"{m.group(1)}({m.group(2).strip()})" for m in _JAVA_METHOD_RE.finditer(text)]
+ imports = [m.group(1).strip() for m in _JAVA_IMPORT_RE.finditer(text)]
+ return {"types": types, "methods": methods, "imports": imports}
+
+
+_SHELL_FN_RE = re.compile(
+ r"^\s*(?:function\s+)?([A-Za-z_][\w-]*)\s*(?:\(\))?\s*\{",
+ re.MULTILINE,
+)
+_SHELL_EXPORT_RE = re.compile(r"^\s*(?:export\s+)?([A-Z_][A-Z0-9_]*)=", re.MULTILINE)
+
+
+def _shell_symbols(text: str) -> dict[str, list[str]]:
+ functions = [f"{m.group(1)}()" for m in _SHELL_FN_RE.finditer(text)]
+ variables = [m.group(1) for m in _SHELL_EXPORT_RE.finditer(text)]
+ return {"functions": functions, "variables": variables}
+
+
+_SQL_CREATE_RE = re.compile(
+ r"\bcreate\s+(table|view|index|function|procedure)\s+(?:if\s+not\s+exists\s+)?[`\"[]?([A-Za-z_][\w.$]*)",
+ re.IGNORECASE,
+)
+
+
+def _sql_symbols(text: str) -> dict[str, list[str]]:
+ objects: list[str] = []
+ for m in _SQL_CREATE_RE.finditer(text):
+ objects.append(f"{m.group(2)} ({m.group(1).lower()})")
+ return {"objects": objects}
+
+
+_LOG_LEVEL_RE = re.compile(r"\b(TRACE|DEBUG|INFO|WARN|WARNING|ERROR|FATAL|CRITICAL)\b", re.IGNORECASE)
+
+
+def _log_symbols(text: str) -> dict[str, list[str]]:
+ counts: dict[str, int] = {}
+ signals: list[str] = []
+ for i, line in enumerate(text.splitlines(), start=1):
+ m = _LOG_LEVEL_RE.search(line)
+ if not m:
+ continue
+ level = m.group(1).upper()
+ if level == "WARNING":
+ level = "WARN"
+ counts[level] = counts.get(level, 0) + 1
+ if level in {"WARN", "ERROR", "FATAL", "CRITICAL"} and len(signals) < 10:
+ snippet = line.strip()
+ if len(snippet) > 180:
+ snippet = snippet[:177] + "..."
+ signals.append(f"{i}: {level} {snippet}")
+ levels = [f"{level}={count}" for level, count in sorted(counts.items())]
+ return {"levels": levels, "signals": signals}
+
+
def _generic_symbols(_text: str) -> dict[str, list[str]]:
return {}
@@ -452,6 +545,14 @@ def digest_read(
symbols = _go_symbols(text)
elif kind == "rust":
symbols = _rust_symbols(text)
+ elif kind == "java":
+ symbols = _java_symbols(text)
+ elif kind == "shell":
+ symbols = _shell_symbols(text)
+ elif kind == "sql":
+ symbols = _sql_symbols(text)
+ elif kind == "log":
+ symbols = _log_symbols(text)
else:
symbols = _generic_symbols(text)
diff --git a/dhee/router/quality_report.py b/dhee/router/quality_report.py
index d70ee96..77b7d53 100644
--- a/dhee/router/quality_report.py
+++ b/dhee/router/quality_report.py
@@ -43,6 +43,7 @@ class QualityReport:
context_governance: dict[str, Any] = field(default_factory=dict)
tool_schema: dict[str, Any] = field(default_factory=dict)
replay: dict[str, Any] = field(default_factory=dict)
+ quality_gates: dict[str, Any] = field(default_factory=dict)
edits: dict[str, Any] = field(default_factory=dict)
hooks: dict[str, Any] = field(default_factory=dict)
settings: dict[str, Any] = field(default_factory=dict)
@@ -134,80 +135,171 @@ def _tool_schema_section() -> dict[str, Any]:
return {"error": f"{type(exc).__name__}: {exc}"}
-def _replay_section(sessions_dir: Path | None = None, limit: int = 0) -> dict[str, Any]:
+def _replay_section(
+ sessions_dir: Path | None = None,
+ limit: int = 0,
+ *,
+ harness: str = "claude_code",
+ golden_path: Path | None = None,
+) -> dict[str, Any]:
"""Run the replay harness in-process and collect aggregate numbers."""
try:
from dhee.benchmarks.router_replay import (
- _default_sessions_dir,
+ aggregate_reports,
+ discover_transcripts,
+ load_golden_annotations,
replay_session,
)
- sdir = sessions_dir or _default_sessions_dir()
- if not sdir.exists():
- return {"error": f"sessions dir missing: {sdir}"}
-
- transcripts = sorted(
- sdir.glob("*.jsonl"), key=lambda p: p.stat().st_mtime, reverse=True
- )
- if limit:
- transcripts = transcripts[:limit]
-
- raw = digest = calls = 0
- by_tool: dict[str, int] = {}
- warnings = 0
- turns = 0
- cache_read = 0
- cache_creation = 0
- tool_result_tokens = 0
- for p in transcripts:
- r = replay_session(p)
- raw += r.raw_tokens
- digest += r.digest_tokens
- calls += r.total_calls
- for t, n in r.calls_by_tool.items():
- by_tool[t] = by_tool.get(t, 0) + n
- warnings += len(r.warnings)
- turns += r.assistant_turns
- cache_read += r.cache_read_input_tokens
- cache_creation += r.cache_creation_input_tokens
- tool_result_tokens += r.tool_result_tokens
-
- net_saved = raw - digest
- saved_pct = round(net_saved / raw * 100, 2) if raw else 0.0
- cache_read_per_turn = int(cache_read / turns) if turns else 0
- projected_cache_read_per_turn = (
- int((cache_read - net_saved) / turns) if turns and cache_read else 0
+ transcripts = discover_transcripts(
+ sessions_dir=sessions_dir,
+ harness=harness,
+ limit=limit,
)
- tool_result_share = round(tool_result_tokens / cache_read, 3) if cache_read else 0.0
+ if sessions_dir and not sessions_dir.exists():
+ return {"error": f"sessions dir missing: {sessions_dir}"}
+ annotations = load_golden_annotations(golden_path)
+ replay_harness = "auto" if harness == "all" else harness
+ reports = [
+ replay_session(p, harness=replay_harness, annotations=annotations)
+ for p in transcripts
+ ]
+ aggregate = aggregate_reports(reports)
# Promise #1 gate: target < 30K avg cache-read tokens per turn.
cache_read_target = 30_000
- promise1_met = projected_cache_read_per_turn < cache_read_target if turns else None
+ promise1_met = (
+ aggregate["projected_cache_read_per_turn"] < cache_read_target
+ if aggregate["assistant_turns"]
+ else None
+ )
return {
- "sessions_dir": str(sdir),
- "sessions": len(transcripts),
- "assistant_turns": turns,
- "total_calls": calls,
- "calls_by_tool": by_tool,
- "raw_tokens": raw,
- "digest_tokens": digest,
- "net_saved_tokens": net_saved,
- "saved_pct": saved_pct,
- "cache_read_tokens_total": cache_read,
- "cache_creation_tokens_total": cache_creation,
- "cache_read_per_turn": cache_read_per_turn,
- "projected_cache_read_per_turn": projected_cache_read_per_turn,
+ "sessions_dir": str(sessions_dir) if sessions_dir else "",
+ "harness": harness,
+ "golden_path": str(golden_path) if golden_path else "",
+ **aggregate,
"cache_read_target_per_turn": cache_read_target,
"promise1_met": promise1_met,
- "tool_result_tokens": tool_result_tokens,
- "tool_result_share": tool_result_share,
- "warnings_count": warnings,
}
except Exception as exc:
return {"error": f"{type(exc).__name__}: {exc}"}
+def _gate_status(value: Any, *, op: str, target: float, samples: int = 1) -> dict[str, Any]:
+ try:
+ actual = float(value)
+ except (TypeError, ValueError):
+ return {"passed": None, "actual": value, "target": target, "reason": "missing_value"}
+ if samples <= 0:
+ return {"passed": None, "actual": actual, "target": target, "reason": "insufficient_samples"}
+ if op == ">=":
+ passed = actual >= target
+ elif op == "<=":
+ passed = actual <= target
+ else:
+ passed = False
+ return {"passed": passed, "actual": actual, "target": target}
+
+
+def _quality_gates_section(
+ *,
+ router: dict[str, Any],
+ replay: dict[str, Any],
+ context_governance: dict[str, Any],
+) -> dict[str, Any]:
+ """Release-facing gates for the Developer Brain north-star metrics."""
+ replay_calls = int(replay.get("total_calls", 0) or 0) if not replay.get("error") else 0
+ router_calls = int(router.get("total_calls", 0) or 0) if not router.get("error") else 0
+ receipt_count = int(context_governance.get("receipt_count", 0) or 0) if not context_governance.get("error") else 0
+ parity = replay.get("task_parity") if isinstance(replay.get("task_parity"), dict) else {}
+ parity_failures = int(parity.get("fail", 0) or 0)
+ parity_avg_score = parity.get("avg_score")
+ parity_score_count = int(parity.get("score_count", 0) or 0)
+ annotated_sessions = int(replay.get("annotated_sessions", 0) or 0)
+ pending_reviews = int(replay.get("pending_review_sessions", 0) or 0)
+
+ gates = {
+ "router_token_savings": {
+ **_gate_status(replay.get("saved_pct"), op=">=", target=50.0, samples=replay_calls),
+ "unit": "percent",
+ "source": "router replay projection",
+ },
+ "expansion_rate": {
+ **_gate_status((float(router.get("expansion_rate", 0) or 0) * 100.0), op="<=", target=15.0, samples=router_calls),
+ "unit": "percent",
+ "source": "ptr-store expansion telemetry",
+ },
+ "cache_read_per_turn": {
+ **_gate_status(replay.get("projected_cache_read_per_turn"), op="<=", target=30_000.0, samples=int(replay.get("assistant_turns", 0) or 0)),
+ "unit": "tokens",
+ "source": "assistant usage + replay projection",
+ },
+ "context_governance": {
+ **_gate_status(context_governance.get("assertion_mismatch_count", 0), op="<=", target=0.0, samples=max(1, receipt_count)),
+ "unit": "incidents",
+ "source": "compiled context admission receipts",
+ },
+ "stale_context_incidents": {
+ **_gate_status(replay.get("stale_context_incidents", 0), op="<=", target=0.0, samples=int(replay.get("annotated_sessions", 0) or 0)),
+ "unit": "incidents",
+ "source": "golden replay annotations",
+ },
+ "task_parity_failures": {
+ **_gate_status(parity_failures, op="<=", target=0.0, samples=annotated_sessions),
+ "unit": "sessions",
+ "source": "golden replay annotations",
+ },
+ "task_parity_pending_review": {
+ **_gate_status(pending_reviews, op="<=", target=0.0, samples=annotated_sessions),
+ "unit": "sessions",
+ "source": "golden replay annotations",
+ },
+ "task_parity_score": {
+ **_gate_status(parity_avg_score, op=">=", target=0.95, samples=parity_score_count),
+ "unit": "score",
+ "source": "golden replay annotations",
+ },
+ }
+ statuses = [gate.get("passed") for gate in gates.values()]
+ if any(status is False for status in statuses):
+ verdict = "attention"
+ elif statuses and all(status is True for status in statuses):
+ verdict = "pass"
+ else:
+ verdict = "insufficient_data"
+ return {
+ "verdict": verdict,
+ "targets": {
+ "router_token_savings_pct": 50.0,
+ "expansion_rate_pct_max": 15.0,
+ "cache_read_per_turn_max": 30_000,
+ "context_governance_incidents_max": 0,
+ "stale_context_incidents_max": 0,
+ "task_parity_failures_max": 0,
+ "task_parity_pending_review_max": 0,
+ "task_parity_score_min": 0.95,
+ },
+ "gates": gates,
+ "note": "These gates are release-quality signals. None alone proves live task parity; replay and expansion data must be read together.",
+ }
+
+
+def gate_summary(report: QualityReport, *, allow_insufficient: bool = False) -> dict[str, Any]:
+ gates = (report.quality_gates or {}).get("gates") or {}
+ failed = sorted(name for name, gate in gates.items() if gate.get("passed") is False)
+ pending = sorted(name for name, gate in gates.items() if gate.get("passed") is None)
+ verdict = (report.quality_gates or {}).get("verdict", "unknown")
+ ok = not failed and (allow_insufficient or not pending)
+ return {
+ "ok": ok,
+ "verdict": verdict if ok or not allow_insufficient else ("pass_with_insufficient_data" if not failed else verdict),
+ "failed_gates": failed,
+ "pending_gates": pending,
+ "allow_insufficient": bool(allow_insufficient),
+ }
+
+
def _edits_section() -> dict[str, Any]:
try:
from dhee.router.edit_ledger import summarise
@@ -272,20 +364,41 @@ def _settings_section() -> dict[str, Any]:
return {"error": f"{type(exc).__name__}: {exc}"}
-def build_report(sessions_dir: Path | None = None, limit: int = 0) -> QualityReport:
+def build_report(
+ sessions_dir: Path | None = None,
+ limit: int = 0,
+ *,
+ harness: str = "claude_code",
+ golden_path: Path | None = None,
+) -> QualityReport:
try:
from dhee import __version__
except Exception:
__version__ = "unknown"
+ router = _router_section()
+ critical_surface = _critical_surface_section()
+ context_governance = _context_governance_section()
+ tool_schema = _tool_schema_section()
+ replay = _replay_section(
+ sessions_dir=sessions_dir,
+ limit=limit,
+ harness=harness,
+ golden_path=golden_path,
+ )
return QualityReport(
dhee_version=__version__,
generated_at=time.time(),
- router=_router_section(),
- critical_surface=_critical_surface_section(),
- context_governance=_context_governance_section(),
- tool_schema=_tool_schema_section(),
- replay=_replay_section(sessions_dir=sessions_dir, limit=limit),
+ router=router,
+ critical_surface=critical_surface,
+ context_governance=context_governance,
+ tool_schema=tool_schema,
+ replay=replay,
+ quality_gates=_quality_gates_section(
+ router=router,
+ replay=replay,
+ context_governance=context_governance,
+ ),
edits=_edits_section(),
hooks=_hooks_section(),
settings=_settings_section(),
@@ -305,6 +418,7 @@ def format_human(report: QualityReport) -> str:
cg = report.context_governance
ts = report.tool_schema
rep = report.replay
+ qg = report.quality_gates
e = report.edits
s = report.settings
h = report.hooks
@@ -342,6 +456,19 @@ def format_human(report: QualityReport) -> str:
f" structural={cs.get('avg_structural_fit', 0):.2f}"
f" confidence={cs.get('avg_confidence', 0):.2f}",
]
+ if qg:
+ lines += [
+ "",
+ "[ quality gates ]",
+ f" verdict: {qg.get('verdict', 'unknown')}",
+ ]
+ for name, gate in (qg.get("gates") or {}).items():
+ status = gate.get("passed")
+ marker = "pass" if status is True else ("attention" if status is False else "pending")
+ actual = gate.get("actual")
+ target = gate.get("target")
+ unit = gate.get("unit") or ""
+ lines.append(f" {name}: {marker} actual={actual} target={target} {unit}".rstrip())
if cg and not cg.get("error"):
lines += [
"",
@@ -366,12 +493,14 @@ def format_human(report: QualityReport) -> str:
lines += [
"",
"[ replay projection (counterfactual) ]",
- f" sessions: {rep.get('sessions', 0)}",
+ f" harness: {rep.get('harness', 'claude_code')}",
+ f" sessions: {rep.get('sessions', 0)} by harness: {rep.get('sessions_by_harness', {})}",
f" assistant turns: {rep.get('assistant_turns', 0)}",
f" tool calls: {rep.get('total_calls', 0)} by tool: {rep.get('calls_by_tool', {})}",
f" raw tokens: {rep.get('raw_tokens', 0):,}",
f" digest tokens: {rep.get('digest_tokens', 0):,}",
f" net saved: {rep.get('net_saved_tokens', 0):,} ({rep.get('saved_pct', 0):.1f}%)",
+ f" golden: annotated={rep.get('annotated_sessions', 0)} pending={rep.get('pending_review_sessions', 0)} stale={rep.get('stale_context_incidents', 0)} parity={rep.get('task_parity', {})}",
"",
"[ promise 1 — token savings (target < 30K cache-read / turn) ]",
f" cache-read / turn today: {rep.get('cache_read_per_turn', 0):,}",
@@ -404,6 +533,7 @@ def format_share(report: QualityReport) -> str:
cg = report.context_governance or {}
ts = report.tool_schema or {}
rep = report.replay or {}
+ qg = report.quality_gates or {}
s = report.settings or {}
h = report.hooks or {}
@@ -424,10 +554,16 @@ def format_share(report: QualityReport) -> str:
f"- dhee version: `{report.dhee_version}`",
f"- router enabled: **{enabled}**, enforce: **{enforce}**",
f"- hooks installed: {hooks}",
+ f"- quality gate verdict: **{qg.get('verdict', 'insufficient_data')}**",
"",
"## Projected savings (counterfactual replay of real sessions)",
"",
f"- sessions replayed: **{rep.get('sessions', 0)}**",
+ f"- sessions by harness: `{rep.get('sessions_by_harness', {})}`",
+ f"- golden annotations: **{rep.get('annotated_sessions', 0)}** sessions, "
+ f"pending review: **{rep.get('pending_review_sessions', 0)}**, "
+ f"stale-context incidents: **{rep.get('stale_context_incidents', 0)}**, "
+ f"task parity: `{rep.get('task_parity', {})}`",
f"- assistant turns: **{rep.get('assistant_turns', 0)}**",
f"- tool calls replayed: **{calls:,}**",
f"- raw tokens (native flow): **{raw:,}**",
diff --git a/dhee/runtime.py b/dhee/runtime.py
new file mode 100644
index 0000000..2ec1d2d
--- /dev/null
+++ b/dhee/runtime.py
@@ -0,0 +1,784 @@
+"""Local Dhee runtime manager.
+
+The runtime daemon is deliberately small and local-only. It gives users a
+clear answer to "is Dhee running and what venv/process is it using?" while
+leaving hot-path acceleration hooks available for later integration.
+"""
+
+from __future__ import annotations
+
+import atexit
+import json
+import os
+import signal
+import socket
+import subprocess
+import sys
+import threading
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+from pathlib import Path
+from typing import Any, Dict, Optional
+
+
+def _home() -> Path:
+ return Path(os.path.expanduser("~"))
+
+
+def data_dir() -> Path:
+ return Path(os.environ.get("DHEE_DATA_DIR") or (_home() / ".dhee")).expanduser()
+
+
+def runtime_dir(*, create: bool = True) -> Path:
+ root = data_dir() / "runtime"
+ if create:
+ root.mkdir(parents=True, exist_ok=True)
+ try:
+ os.chmod(root, 0o700)
+ except OSError:
+ pass
+ return root
+
+
+def state_path(*, create: bool = True) -> Path:
+ return runtime_dir(create=create) / "daemon.json"
+
+
+def log_path(*, create: bool = True) -> Path:
+ return runtime_dir(create=create) / "daemon.log"
+
+
+def _read_json(path: Path) -> Dict[str, Any]:
+ try:
+ return json.loads(path.read_text(encoding="utf-8"))
+ except Exception:
+ return {}
+
+
+def _write_json(path: Path, data: Dict[str, Any]) -> None:
+ path.parent.mkdir(parents=True, exist_ok=True)
+ tmp = path.with_name(f".{path.name}.{os.getpid()}.tmp")
+ tmp.write_text(json.dumps(data, indent=2, sort_keys=True, default=str) + "\n", encoding="utf-8")
+ try:
+ os.chmod(tmp, 0o600)
+ except OSError:
+ pass
+ os.replace(tmp, path)
+
+
+def _pid_running(pid: Any) -> bool:
+ try:
+ value = int(pid)
+ except (TypeError, ValueError):
+ return False
+ if value <= 0:
+ return False
+ try:
+ os.kill(value, 0)
+ return True
+ except ProcessLookupError:
+ return False
+ except PermissionError:
+ return True
+ except OSError:
+ return False
+
+
+def _managed_venv() -> Dict[str, Any]:
+ path = _home() / ".dhee" / ".venv"
+ current = Path(sys.prefix).resolve()
+ return {
+ "path": str(path),
+ "exists": path.exists(),
+ "current_prefix": str(current),
+ "current_is_managed": path.exists() and current == path.resolve(),
+ "python": sys.executable,
+ }
+
+
+def _fetch_json(url: str, *, timeout: float = 0.25) -> Dict[str, Any]:
+ with urllib.request.urlopen(url, timeout=timeout) as response:
+ raw = response.read(64 * 1024)
+ return json.loads(raw.decode("utf-8"))
+
+
+def _post_json(url: str, payload: Dict[str, Any], *, timeout: float = 2.0) -> Dict[str, Any]:
+ raw = json.dumps(payload, default=str).encode("utf-8")
+ request = urllib.request.Request(
+ url,
+ data=raw,
+ headers={"Content-Type": "application/json"},
+ method="POST",
+ )
+ with urllib.request.urlopen(request, timeout=timeout) as response:
+ body = response.read(1024 * 1024)
+ return json.loads(body.decode("utf-8"))
+
+
+def _active_endpoint() -> Optional[str]:
+ if str(os.environ.get("DHEE_RUNTIME_DISABLE") or "").strip().lower() in {"1", "true", "yes", "on"}:
+ return None
+ state = _read_json(state_path(create=False))
+ endpoint = str(state.get("endpoint") or "").strip()
+ if not endpoint or not _pid_running(state.get("pid")):
+ return None
+ if not endpoint.startswith("http://127.0.0.1:") and not endpoint.startswith("http://localhost:"):
+ return None
+ return endpoint
+
+
+def execute_shell(
+ command: str,
+ *,
+ repo: Optional[str] = None,
+ user_id: str = "default",
+ agent_id: str = "client",
+ workspace_id: Optional[str] = None,
+ timeout: float = 3.0,
+) -> Optional[Dict[str, Any]]:
+ """Execute a DheeFS shell command through the daemon if it is healthy.
+
+ Returns ``None`` when the daemon is unavailable so callers can fall back to
+ the existing in-process path. This keeps the runtime an accelerator, not a
+ new hard dependency.
+ """
+ endpoint = _active_endpoint()
+ if not endpoint:
+ return None
+ payload = {
+ "command": command,
+ "repo": repo,
+ "user_id": user_id,
+ "agent_id": agent_id,
+ "workspace_id": workspace_id,
+ }
+ try:
+ return _post_json(f"{endpoint}/dheefs/execute", payload, timeout=timeout)
+ except (OSError, urllib.error.URLError, json.JSONDecodeError, TimeoutError):
+ return None
+
+
+def execute_context(
+ action: str,
+ *,
+ repo: Optional[str] = None,
+ user_id: str = "default",
+ agent_id: str = "client",
+ workspace_id: Optional[str] = None,
+ args: Optional[Dict[str, Any]] = None,
+ timeout: float = 3.0,
+) -> Optional[Dict[str, Any]]:
+ """Execute a compiled context action through the daemon if it is healthy.
+
+ Returns ``None`` when the daemon is unavailable so callers can retain the
+ existing in-process ContextStateStore path.
+ """
+ endpoint = _active_endpoint()
+ if not endpoint:
+ return None
+ payload = {
+ "action": action,
+ "repo": repo,
+ "user_id": user_id,
+ "agent_id": agent_id,
+ "workspace_id": workspace_id,
+ }
+ if args:
+ payload.update(args)
+ try:
+ result = _post_json(f"{endpoint}/context/execute", payload, timeout=timeout)
+ if result.get("format") == "dhee_context_error":
+ return None
+ return result
+ except (OSError, urllib.error.URLError, json.JSONDecodeError, TimeoutError):
+ return None
+
+
+def execute_router(
+ action: str,
+ arguments: Dict[str, Any],
+ *,
+ timeout: float = 3.0,
+) -> Optional[Dict[str, Any]]:
+ """Execute a pointer-router action through the daemon if healthy.
+
+ Read and grep are always safe to accelerate. Bash is daemonized only when
+ the daemon process was started with the server-side bash opt-in and cwd
+ allowlist environment variables.
+ """
+ endpoint = _active_endpoint()
+ if not endpoint:
+ return None
+ normalized_action = str(action or "").strip().lower()
+ if normalized_action in {"bash", "dhee_bash"}:
+ try:
+ requested_timeout = float(arguments.get("timeout", 30.0))
+ except (TypeError, ValueError):
+ requested_timeout = 30.0
+ timeout = max(timeout, min(max(1.0, requested_timeout), 600.0) + 2.0)
+ payload = {
+ "action": action,
+ "arguments": arguments,
+ }
+ try:
+ result = _post_json(f"{endpoint}/router/execute", payload, timeout=timeout)
+ if result.get("format") == "dhee_router_error":
+ return None
+ return result
+ except (OSError, urllib.error.URLError, json.JSONDecodeError, TimeoutError):
+ return None
+
+
+def status(*, timeout: float = 0.25) -> Dict[str, Any]:
+ started = time.perf_counter()
+ state = _read_json(state_path(create=False))
+ pid = state.get("pid")
+ endpoint = state.get("endpoint")
+ pid_alive = _pid_running(pid)
+ health: Dict[str, Any] = {"ok": False}
+ if pid_alive and endpoint:
+ try:
+ health = _fetch_json(f"{endpoint}/healthz", timeout=timeout)
+ health["ok"] = True
+ except (OSError, urllib.error.URLError, json.JSONDecodeError, TimeoutError) as exc:
+ health = {"ok": False, "error": f"{type(exc).__name__}: {exc}"}
+ elif state:
+ health = {"ok": False, "error": "stale pidfile or daemon not running"}
+
+ running = bool(pid_alive and health.get("ok"))
+ if state and not pid_alive:
+ state["stale"] = True
+
+ return {
+ "daemon": {
+ "running": running,
+ "pid": pid,
+ "pid_alive": pid_alive,
+ "endpoint": endpoint,
+ "started_at": state.get("started_at"),
+ "uptime_seconds": max(0.0, time.time() - float(state.get("started_at") or time.time())) if running else 0.0,
+ "health": health,
+ "state": state,
+ },
+ "paths": {
+ "data_dir": str(data_dir()),
+ "runtime_dir": str(runtime_dir(create=False)),
+ "state": str(state_path(create=False)),
+ "log": str(log_path(create=False)),
+ },
+ "venv": _managed_venv(),
+ "client": {
+ "python": sys.executable,
+ "query_ms": round((time.perf_counter() - started) * 1000, 2),
+ },
+ }
+
+
+def start_daemon(*, wait: bool = True, timeout: float = 5.0) -> Dict[str, Any]:
+ current = status()
+ if current["daemon"]["running"]:
+ return {"started": False, "reason": "already_running", "status": current}
+
+ stale = state_path(create=False)
+ if stale.exists():
+ try:
+ stale.unlink()
+ except OSError:
+ pass
+
+ env = os.environ.copy()
+ env.setdefault("DHEE_DATA_DIR", str(data_dir()))
+ log = log_path()
+ log.parent.mkdir(parents=True, exist_ok=True)
+ log_handle = log.open("ab")
+ try:
+ proc = subprocess.Popen(
+ [sys.executable, "-m", "dhee.runtime_daemon"],
+ stdout=log_handle,
+ stderr=log_handle,
+ stdin=subprocess.DEVNULL,
+ env=env,
+ cwd=os.getcwd(),
+ start_new_session=True,
+ )
+ finally:
+ log_handle.close()
+
+ if not wait:
+ return {"started": True, "pid": proc.pid, "status": status()}
+
+ deadline = time.time() + timeout
+ last = status()
+ while time.time() < deadline:
+ last = status(timeout=0.5)
+ if last["daemon"]["running"]:
+ return {"started": True, "pid": proc.pid, "status": last}
+ if proc.poll() is not None:
+ break
+ time.sleep(0.05)
+ return {
+ "started": False,
+ "pid": proc.pid,
+ "error": "daemon did not become healthy before timeout",
+ "status": last,
+ }
+
+
+def stop_daemon(*, timeout: float = 3.0) -> Dict[str, Any]:
+ before = status()
+ pid = before["daemon"].get("pid")
+ if not before["daemon"].get("pid_alive"):
+ try:
+ state_path(create=False).unlink()
+ except OSError:
+ pass
+ return {"stopped": False, "reason": "not_running", "status": status()}
+
+ try:
+ pid_int = int(pid)
+ except (TypeError, ValueError):
+ return {"stopped": False, "reason": "invalid_pid", "status": before}
+ if pid_int == os.getpid():
+ return {"stopped": False, "reason": "refusing_to_stop_current_process", "status": before}
+
+ os.kill(pid_int, signal.SIGTERM)
+ deadline = time.time() + timeout
+ while time.time() < deadline:
+ if not _pid_running(pid_int):
+ try:
+ state_path(create=False).unlink()
+ except OSError:
+ pass
+ return {"stopped": True, "status": status()}
+ time.sleep(0.05)
+
+ try:
+ os.kill(pid_int, signal.SIGKILL)
+ except OSError:
+ pass
+ try:
+ state_path(create=False).unlink()
+ except OSError:
+ pass
+ return {"stopped": True, "forced": True, "status": status()}
+
+
+def restart_daemon(*, timeout: float = 5.0) -> Dict[str, Any]:
+ stopped = stop_daemon()
+ started = start_daemon(timeout=timeout)
+ return {"stopped": stopped, "started": started, "status": status()}
+
+
+def format_status(data: Dict[str, Any]) -> str:
+ daemon = data.get("daemon") or {}
+ paths = data.get("paths") or {}
+ venv = data.get("venv") or {}
+ lines = ["Dhee runtime"]
+ lines.append(f" daemon: {'running' if daemon.get('running') else 'stopped'}")
+ if daemon.get("pid"):
+ lines.append(f" pid: {daemon.get('pid')}")
+ if daemon.get("endpoint"):
+ lines.append(f" endpoint: {daemon.get('endpoint')}")
+ health = daemon.get("health") or {}
+ if health.get("error"):
+ lines.append(f" health: {health.get('error')}")
+ elif daemon.get("running"):
+ lines.append(f" health: ok")
+ lines.append(f" data: {paths.get('data_dir')}")
+ lines.append(f" runtime: {paths.get('runtime_dir')}")
+ lines.append(f" log: {paths.get('log')}")
+ lines.append(f" managed venv: {'present' if venv.get('exists') else 'missing'} ({venv.get('path')})")
+ lines.append(f" python: {venv.get('python')}")
+ return "\n".join(lines)
+
+
+@dataclass
+class _DaemonState:
+ started_at: float
+ host: str
+ port: int
+
+ @property
+ def endpoint(self) -> str:
+ return f"http://{self.host}:{self.port}"
+
+
+def serve_forever(host: str = "127.0.0.1", port: Optional[int] = None) -> None:
+ bind_port = int(port if port is not None else os.environ.get("DHEE_RUNTIME_PORT") or 0)
+ httpd = ThreadingHTTPServer((host, bind_port), _Handler)
+ actual_port = int(httpd.server_address[1])
+ state = _DaemonState(started_at=time.time(), host=host, port=actual_port)
+ _Handler.daemon_state = state
+ payload = {
+ "pid": os.getpid(),
+ "host": host,
+ "port": actual_port,
+ "endpoint": state.endpoint,
+ "started_at": state.started_at,
+ "python": sys.executable,
+ "cwd": os.getcwd(),
+ }
+ _write_json(state_path(), payload)
+
+ def _cleanup() -> None:
+ current = _read_json(state_path(create=False))
+ if current.get("pid") == os.getpid():
+ try:
+ state_path(create=False).unlink()
+ except OSError:
+ pass
+
+ def _shutdown(_signum: int, _frame: Any) -> None:
+ threading.Thread(target=httpd.shutdown, daemon=True).start()
+
+ atexit.register(_cleanup)
+ signal.signal(signal.SIGTERM, _shutdown)
+ signal.signal(signal.SIGINT, _shutdown)
+ try:
+ httpd.serve_forever(poll_interval=0.25)
+ finally:
+ httpd.server_close()
+ _cleanup()
+
+
+class _Handler(BaseHTTPRequestHandler):
+ daemon_state: Optional[_DaemonState] = None
+
+ def log_message(self, _format: str, *_args: Any) -> None:
+ return
+
+ def _send(self, code: int, payload: Dict[str, Any]) -> None:
+ raw = json.dumps(payload, indent=2, sort_keys=True, default=str).encode("utf-8")
+ self.send_response(code)
+ self.send_header("Content-Type", "application/json")
+ self.send_header("Content-Length", str(len(raw)))
+ self.end_headers()
+ self.wfile.write(raw)
+
+ def do_GET(self) -> None: # noqa: N802 - stdlib handler API
+ state = self.daemon_state
+ if self.path in {"/healthz", "/health"}:
+ self._send(
+ 200,
+ {
+ "status": "ok",
+ "pid": os.getpid(),
+ "uptime_seconds": max(0.0, time.time() - (state.started_at if state else time.time())),
+ "bash": _bash_runtime_status(),
+ },
+ )
+ return
+ if self.path == "/status":
+ self._send(
+ 200,
+ {
+ "status": "ok",
+ "pid": os.getpid(),
+ "endpoint": state.endpoint if state else None,
+ "started_at": state.started_at if state else None,
+ "data_dir": str(data_dir()),
+ "runtime_dir": str(runtime_dir()),
+ "python": sys.executable,
+ "bash": _bash_runtime_status(),
+ },
+ )
+ return
+ self._send(404, {"error": "not_found"})
+
+ def do_POST(self) -> None: # noqa: N802 - stdlib handler API
+ if self.client_address and self.client_address[0] not in {"127.0.0.1", "::1"}:
+ self._send(403, {"error": "forbidden"})
+ return
+ if self.path not in {"/dheefs/execute", "/context/execute", "/router/execute"}:
+ self._send(404, {"error": "not_found"})
+ return
+ try:
+ length = int(self.headers.get("Content-Length") or "0")
+ except ValueError:
+ self._send(400, {"error": "invalid_content_length"})
+ return
+ if length <= 0 or length > 1024 * 1024:
+ self._send(400, {"error": "invalid_request_size"})
+ return
+ try:
+ payload = json.loads(self.rfile.read(length).decode("utf-8"))
+ except Exception as exc:
+ self._send(400, {"error": f"invalid_json: {type(exc).__name__}: {exc}"})
+ return
+ if self.path == "/dheefs/execute":
+ result = _execute_dheefs_payload(payload)
+ elif self.path == "/context/execute":
+ result = _execute_context_payload(payload)
+ else:
+ result = _execute_router_payload(payload)
+ self._send(200, result)
+
+
+def _runtime_db() -> Any:
+ from dhee.db.sqlite import SQLiteManager
+
+ return SQLiteManager(str(data_dir() / "history.db"))
+
+
+def _execute_dheefs_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
+ try:
+ from dhee.fs import ContextWorkspace
+
+ repo = payload.get("repo")
+ if repo:
+ repo = os.path.abspath(os.path.expanduser(str(repo)))
+ workspace = ContextWorkspace(
+ repo=repo,
+ user_id=str(payload.get("user_id") or "default"),
+ agent_id=str(payload.get("agent_id") or "runtime"),
+ db=_runtime_db(),
+ workspace_id=payload.get("workspace_id") or repo,
+ )
+ result = workspace.execute(str(payload.get("command") or "")).to_dict()
+ result["runtime"] = {
+ "daemon": True,
+ "pid": os.getpid(),
+ "transport": "http-loopback",
+ }
+ return result
+ except Exception as exc:
+ return {
+ "ok": False,
+ "exit_code": 1,
+ "command": str(payload.get("command") or ""),
+ "stdout": f"{type(exc).__name__}: {exc}",
+ "stderr": f"{type(exc).__name__}: {exc}",
+ "data": {"error": str(exc), "error_type": type(exc).__name__},
+ "runtime": {
+ "daemon": True,
+ "pid": os.getpid(),
+ "transport": "http-loopback",
+ },
+ }
+
+
+def _runtime_metadata() -> Dict[str, Any]:
+ return {
+ "daemon": True,
+ "pid": os.getpid(),
+ "transport": "http-loopback",
+ }
+
+
+def _truthy_env(name: str) -> bool:
+ return str(os.environ.get(name) or "").strip().lower() in {"1", "true", "yes", "on"}
+
+
+def _bash_allowlist_raw() -> str:
+ return str(
+ os.environ.get("DHEE_RUNTIME_BASH_ALLOWLIST")
+ or os.environ.get("DHEE_RUNTIME_BASH_CWD_ALLOWLIST")
+ or ""
+ )
+
+
+def _bash_allowlist_roots() -> list[Path]:
+ raw = _bash_allowlist_raw()
+ roots: list[Path] = []
+ for item in raw.replace(",", os.pathsep).split(os.pathsep):
+ item = item.strip()
+ if not item:
+ continue
+ try:
+ root = Path(item).expanduser().resolve()
+ except OSError:
+ continue
+ if root.is_dir():
+ roots.append(root)
+ return roots
+
+
+def _bash_timeout_cap_seconds() -> float:
+ try:
+ value = float(os.environ.get("DHEE_RUNTIME_BASH_MAX_TIMEOUT") or 30.0)
+ except (TypeError, ValueError):
+ value = 30.0
+ return max(1.0, min(value, 600.0))
+
+
+def _bash_requested_timeout(arguments: Dict[str, Any]) -> float:
+ try:
+ value = float(arguments.get("timeout", 120.0))
+ except (TypeError, ValueError):
+ value = 120.0
+ return max(1.0, min(value, 600.0))
+
+
+def _path_within(child: Path, root: Path) -> bool:
+ try:
+ child.relative_to(root)
+ return True
+ except ValueError:
+ return False
+
+
+def _bash_runtime_status() -> Dict[str, Any]:
+ return {
+ "enabled": _truthy_env("DHEE_RUNTIME_ENABLE_BASH"),
+ "allowlist": [str(path) for path in _bash_allowlist_roots()],
+ "timeout_cap_seconds": _bash_timeout_cap_seconds(),
+ "trust_boundary": "server_env_enable_and_cwd_allowlist",
+ }
+
+
+def _bash_router_error(error: str, *, action: str, **extra: Any) -> Dict[str, Any]:
+ payload = {
+ "format": "dhee_router_error",
+ "error": error,
+ "action": action,
+ "runtime": _runtime_metadata(),
+ }
+ payload["runtime"]["bash"] = _bash_runtime_status()
+ payload.update(extra)
+ return payload
+
+
+def _execute_context_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
+ try:
+ from dhee.context_state import ContextStateStore
+
+ repo = payload.get("repo")
+ if repo:
+ repo = os.path.abspath(os.path.expanduser(str(repo)))
+ store = ContextStateStore(
+ repo=repo,
+ workspace_id=payload.get("workspace_id") or repo,
+ user_id=str(payload.get("user_id") or "default"),
+ agent_id=str(payload.get("agent_id") or "runtime"),
+ )
+ action = str(payload.get("action") or "").strip().lower()
+ if action == "status":
+ result = store.status()
+ elif action == "state":
+ fmt = str(payload.get("format") or "card").lower()
+ if fmt == "json":
+ result = {"format": "dhee_context_state", "state": store.load(), "status": store.status()}
+ elif fmt == "markdown":
+ result = {"format": "markdown", "text": store.render_markdown()}
+ else:
+ result = {"format": "card", "text": store.render_state_card(), "status": store.status()}
+ elif action == "debt":
+ result = store.debt_summary(top=bool(payload.get("top", False)))
+ elif action == "checkpoint":
+ result = store.checkpoint(reason=str(payload.get("reason") or "runtime checkpoint"))
+ elif action == "rollover":
+ result = store.rollover(reason=str(payload.get("reason") or "runtime rollover"))
+ elif action == "provision":
+ result = store.provision(str(payload.get("task") or payload.get("query") or ""))
+ else:
+ result = {
+ "format": "dhee_context_error",
+ "error": "unknown_context_action",
+ "action": action,
+ }
+ result["runtime"] = _runtime_metadata()
+ return result
+ except Exception as exc:
+ return {
+ "format": "dhee_context_error",
+ "error": str(exc),
+ "error_type": type(exc).__name__,
+ "action": str(payload.get("action") or ""),
+ "runtime": _runtime_metadata(),
+ }
+
+
+def _execute_bash_router_payload(arguments: Dict[str, Any], *, action: str) -> Dict[str, Any]:
+ if not _truthy_env("DHEE_RUNTIME_ENABLE_BASH"):
+ return _bash_router_error("bash_runtime_not_enabled", action=action)
+
+ roots = _bash_allowlist_roots()
+ if not roots:
+ return _bash_router_error("bash_runtime_allowlist_empty", action=action)
+
+ cwd_arg = arguments.get("cwd") or os.getcwd()
+ cwd_path = Path(str(cwd_arg)).expanduser()
+ if not cwd_path.is_absolute():
+ cwd_path = Path(os.getcwd()) / cwd_path
+ try:
+ cwd = cwd_path.resolve()
+ except OSError as exc:
+ return _bash_router_error(
+ "bash_runtime_cwd_unresolvable",
+ action=action,
+ cwd=str(cwd_path),
+ detail=f"{type(exc).__name__}: {exc}",
+ )
+ if not cwd.is_dir():
+ return _bash_router_error("bash_runtime_cwd_missing", action=action, cwd=str(cwd))
+
+ matches = [root for root in roots if _path_within(cwd, root)]
+ if not matches:
+ return _bash_router_error(
+ "bash_runtime_cwd_not_allowlisted",
+ action=action,
+ cwd=str(cwd),
+ allowlist=[str(root) for root in roots],
+ )
+
+ requested_timeout = _bash_requested_timeout(arguments)
+ timeout_cap = _bash_timeout_cap_seconds()
+ effective_timeout = min(requested_timeout, timeout_cap)
+ runtime_arguments = dict(arguments)
+ runtime_arguments["cwd"] = str(cwd)
+ runtime_arguments["timeout"] = effective_timeout
+
+ from dhee.router.handlers import handle_dhee_bash
+
+ result = handle_dhee_bash(runtime_arguments)
+ result["runtime"] = _runtime_metadata()
+ result["runtime"]["bash"] = {
+ "enabled": True,
+ "enabled_by": "DHEE_RUNTIME_ENABLE_BASH",
+ "cwd": str(cwd),
+ "allowlist_match": str(matches[0]),
+ "allowlist": [str(root) for root in roots],
+ "requested_timeout_seconds": requested_timeout,
+ "effective_timeout_seconds": effective_timeout,
+ "timeout_cap_seconds": timeout_cap,
+ "trust_boundary": "server_env_enable_and_cwd_allowlist",
+ "environment": {
+ "shell": os.environ.get("SHELL") or "/bin/sh",
+ "python": sys.executable,
+ },
+ }
+ return result
+
+
+def _execute_router_payload(payload: Dict[str, Any]) -> Dict[str, Any]:
+ try:
+ from dhee.router.handlers import handle_dhee_grep, handle_dhee_read
+
+ action = str(payload.get("action") or "").strip().lower()
+ arguments = payload.get("arguments") if isinstance(payload.get("arguments"), dict) else {}
+ if action in {"read", "dhee_read"}:
+ result = handle_dhee_read(arguments)
+ elif action in {"grep", "dhee_grep"}:
+ result = handle_dhee_grep(arguments)
+ elif action in {"bash", "dhee_bash"}:
+ return _execute_bash_router_payload(arguments, action=action)
+ else:
+ return {
+ "format": "dhee_router_error",
+ "error": "unsupported_router_action",
+ "action": action,
+ "runtime": _runtime_metadata(),
+ }
+ result["runtime"] = _runtime_metadata()
+ return result
+ except Exception as exc:
+ return {
+ "format": "dhee_router_error",
+ "error": str(exc),
+ "error_type": type(exc).__name__,
+ "action": str(payload.get("action") or ""),
+ "runtime": _runtime_metadata(),
+ }
diff --git a/dhee/runtime_daemon.py b/dhee/runtime_daemon.py
new file mode 100644
index 0000000..f96e156
--- /dev/null
+++ b/dhee/runtime_daemon.py
@@ -0,0 +1,13 @@
+"""Entry point for the local Dhee runtime daemon."""
+
+from __future__ import annotations
+
+from dhee.runtime import serve_forever
+
+
+def main() -> None:
+ serve_forever()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/install.sh b/install.sh
index f745947..173284f 100755
--- a/install.sh
+++ b/install.sh
@@ -177,4 +177,4 @@ printf " Update later: ${BOLD}dhee update${RESET}\n\n"
printf "${DIM} Status: dhee status (savings + brain health)${RESET}\n"
printf "${DIM} Recall: dhee recall \"\" (your personal cross-repo brain)${RESET}\n"
printf "${DIM} Inbox: dhee inbox (live broadcasts from your other agents)${RESET}\n"
-printf "${DIM} Remove: dhee uninstall-hooks && rm -rf ~/.dhee${RESET}\n\n"
+printf "${DIM} Remove: dhee uninstall --yes (stops daemon, removes managed venv + shell hooks)${RESET}\n\n"
diff --git a/tests/fixtures/golden_replay/golden.jsonl b/tests/fixtures/golden_replay/golden.jsonl
new file mode 100644
index 0000000..8341b21
--- /dev/null
+++ b/tests/fixtures/golden_replay/golden.jsonl
@@ -0,0 +1,2 @@
+{"session_id":"claude_code_debug","task_parity":"pass","task_parity_score":0.97,"stale_context_incidents":0,"note":"Claude Code debug replay preserved the failing target and avoided stale plan injection."}
+{"session_id":"codex_exec_debug","task_parity":"pass","task_parity_score":1.0,"stale_context_incidents":0,"note":"Codex exec replay preserved successful verification state."}
diff --git a/tests/fixtures/golden_replay/redacted_real/golden_needs_review.jsonl b/tests/fixtures/golden_replay/redacted_real/golden_needs_review.jsonl
new file mode 100644
index 0000000..453a798
--- /dev/null
+++ b/tests/fixtures/golden_replay/redacted_real/golden_needs_review.jsonl
@@ -0,0 +1,4 @@
+{"note":"Harvested from redacted real codex session; requires human parity review before release gating.","session_id":"redacted_real_codex_ac99c6113f58f19a","stale_context_incidents":0,"task_parity":"needs_review"}
+{"note":"Harvested from redacted real codex session; requires human parity review before release gating.","session_id":"redacted_real_codex_e9aab5e70135fb15","stale_context_incidents":0,"task_parity":"needs_review"}
+{"note":"Harvested from redacted real codex session; requires human parity review before release gating.","session_id":"redacted_real_codex_c918764edfc5535b","stale_context_incidents":0,"task_parity":"needs_review"}
+{"note":"Harvested from redacted real claude_code session; requires human parity review before release gating.","session_id":"redacted_real_claude_code_d15b0f834020060a","stale_context_incidents":0,"task_parity":"needs_review"}
diff --git a/tests/fixtures/golden_replay/redacted_real/manifest.json b/tests/fixtures/golden_replay/redacted_real/manifest.json
new file mode 100644
index 0000000..30716a2
--- /dev/null
+++ b/tests/fixtures/golden_replay/redacted_real/manifest.json
@@ -0,0 +1,138 @@
+{
+ "aggregate": {
+ "annotated_sessions": 4,
+ "assistant_turns": 10,
+ "cache_creation_tokens_total": 30797,
+ "cache_read_per_turn": 38774,
+ "cache_read_tokens_total": 387743,
+ "calls_by_tool": {
+ "Bash": 723,
+ "Read": 3
+ },
+ "digest_tokens": 155884,
+ "net_saved_tokens": 543702,
+ "pending_review_sessions": 4,
+ "projected_cache_read_per_turn": -15595,
+ "raw_tokens": 699586,
+ "saved_pct": 77.72,
+ "sessions": 4,
+ "sessions_by_harness": {
+ "claude_code": 1,
+ "codex": 3
+ },
+ "stale_context_incidents": 0,
+ "task_parity": {
+ "avg_score": null,
+ "fail": 0,
+ "pass": 0,
+ "score_count": 0,
+ "unknown": 4
+ },
+ "tool_result_share": 1.804,
+ "tool_result_tokens": 699586,
+ "total_calls": 726,
+ "warnings_count": 726
+ },
+ "format": "dhee_replay_corpus_manifest",
+ "generated_at": 1778669028.4784482,
+ "golden_path": "tests/fixtures/golden_replay/redacted_real/golden_needs_review.jsonl",
+ "output_dir": "tests/fixtures/golden_replay/redacted_real",
+ "privacy": {
+ "raw_paths": false,
+ "raw_prompts": false,
+ "raw_tool_outputs": false,
+ "secret_filter": "dhee.hooks.claude_code.privacy.filter_secrets"
+ },
+ "sessions": [
+ {
+ "annotation_status": "needs_review",
+ "calls_by_tool": {
+ "Bash": 320
+ },
+ "digest_tokens": 68293,
+ "harness": "codex",
+ "output_path": "tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_codex_ac99c6113f58f19a.jsonl",
+ "raw_tokens": 348538,
+ "sanitized_records": 1102,
+ "saved_pct": 80.41,
+ "session_id": "redacted_real_codex_ac99c6113f58f19a",
+ "source_path_sha256": "37cf3073bd86f55e",
+ "source_size_bytes": 68669775,
+ "total_calls": 320,
+ "warnings_count": 320
+ },
+ {
+ "annotation_status": "needs_review",
+ "calls_by_tool": {
+ "Bash": 171
+ },
+ "digest_tokens": 39200,
+ "harness": "codex",
+ "output_path": "tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_codex_e9aab5e70135fb15.jsonl",
+ "raw_tokens": 160095,
+ "sanitized_records": 769,
+ "saved_pct": 75.51,
+ "session_id": "redacted_real_codex_e9aab5e70135fb15",
+ "source_path_sha256": "4d2220f97262ac25",
+ "source_size_bytes": 1008621842,
+ "total_calls": 171,
+ "warnings_count": 171
+ },
+ {
+ "annotation_status": "needs_review",
+ "calls_by_tool": {
+ "Bash": 225
+ },
+ "digest_tokens": 46456,
+ "harness": "codex",
+ "output_path": "tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_codex_c918764edfc5535b.jsonl",
+ "raw_tokens": 180087,
+ "sanitized_records": 778,
+ "saved_pct": 74.2,
+ "session_id": "redacted_real_codex_c918764edfc5535b",
+ "source_path_sha256": "4db261a7c93167d9",
+ "source_size_bytes": 9281592,
+ "total_calls": 225,
+ "warnings_count": 225
+ },
+ {
+ "annotation_status": "needs_review",
+ "calls_by_tool": {
+ "Bash": 7,
+ "Read": 3
+ },
+ "digest_tokens": 1935,
+ "harness": "claude_code",
+ "output_path": "tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_claude_code_d15b0f834020060a.jsonl",
+ "raw_tokens": 10866,
+ "sanitized_records": 20,
+ "saved_pct": 82.19,
+ "session_id": "redacted_real_claude_code_d15b0f834020060a",
+ "source_path_sha256": "f4eb7d0ecca2ef20",
+ "source_size_bytes": 462780,
+ "total_calls": 10,
+ "warnings_count": 10
+ }
+ ],
+ "skipped": [
+ {
+ "reason": "too_few_calls",
+ "source_path_sha256": "9e852a36ebbf2e86"
+ },
+ {
+ "reason": "too_few_calls",
+ "source_path_sha256": "3a157dd4676c1c23"
+ },
+ {
+ "reason": "too_few_calls",
+ "source_path_sha256": "b7ce3efbf6c55a59"
+ },
+ {
+ "reason": "too_few_calls",
+ "source_path_sha256": "f7683fe005e57e5f"
+ }
+ ],
+ "source": "redacted_real_sessions",
+ "transcripts_considered": 8,
+ "version": 1
+}
diff --git a/tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_claude_code_d15b0f834020060a.jsonl b/tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_claude_code_d15b0f834020060a.jsonl
new file mode 100644
index 0000000..60fb1a4
--- /dev/null
+++ b/tests/fixtures/golden_replay/redacted_real/sessions/redacted_real_claude_code_d15b0f834020060a.jsonl
@@ -0,0 +1,20 @@
+{"message":{"content":[{"id":"tool-0001","input":{"file_path":"/file_ba15ed7714ae.py"},"name":"Read","type":"tool_use"}],"usage":{"cache_creation_input_tokens":15851,"cache_read_input_tokens":16508,"input_tokens":6,"output_tokens":212}},"type":"assistant"}
+{"message":{"content":[{"content":"......................\n\n.........................\n.............\n\n\n\n.........................\n..........................\n.....................\n\n...................\n..............................\n................................\n...............................\n..........................\n\n\n\n\n\n\n............\n\n\n\n\n\n\n\n\n..\n.....\n\n\n\n\n\n........\n......................\n...............\n................\n\n................................\n\n..........\n\n.......\n\n............\n.....\n...........\n..\n..............................\n.................\n.......................\n...............................\n.....................................\n...............\n.................\n...............................\n.................\n.................\n\n\n........................\n\n\n\n..............\n\n.......................................\n.....................................\n.........................\n\n\n\n\n.......\n........\n\n......................\n\n\n\n\n\n.......\n\n\n...................\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n.\n.\n......\n\n\n........................................\n.....................\n........................................\n\n\n\n.......................................\n.......................................\n....................................\n...................................\n........................\n....................................\n......\n\n\n\n\n.............................\n.......\n\n........................\n\n\n..................\n\n\n\n\n....................\n............\n...\n\n\n\n.....\n...........\n.....\n..\n.......................\n.....\n\n\n\n...........................................\n........................................\n\n.................................\n................................\n....................\n\n...............................\n...........................\n..........................\n..............................\n.........................\n\n\n.................................\n.............................\n............................\n................................\n.............................\n...............\n.................................\n.........................\n\n...............................\n..............................\n............................\n\n\n\n\n\n..............\n\n\n\n..................\n................................\n.......\n........................................\n\n\n..........\n.......................\n\n\n\n\n\n...........................................................................................\n\n.................\n................................\n.....................\n........\n\n\n\n............................................\n...............\n\n.....................................\n.....................................\n\n\n\n\n.....................................\n\n