From 4373923b024a3eefb83571e3b81499fbdf067d2c Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 15:29:18 +0800 Subject: [PATCH 01/13] docs(agent): document auto-compaction design --- docs/design/README.md | 6 ++ docs/design/agent/auto-compaction.md | 103 ++++++++++++++++++++++++ docs/research/README.md | 6 ++ docs/research/agent/auto-compaction.md | 106 +++++++++++++++++++++++++ 4 files changed, 221 insertions(+) create mode 100644 docs/design/agent/auto-compaction.md create mode 100644 docs/research/agent/auto-compaction.md diff --git a/docs/design/README.md b/docs/design/README.md index 1bf8c36a..90e9db0e 100644 --- a/docs/design/README.md +++ b/docs/design/README.md @@ -4,6 +4,12 @@ Architecture decisions and implementation specs for oxide-code. Organized by topic. Each subdirectory mirrors the corresponding directory in [`docs/research/`](../research/), where the underlying research lives. +## Agent Loop + +| Document | Description | +| ------------------------------------------- | ----------------------------------------------------- | +| [Auto-Compaction](agent/auto-compaction.md) | Automatic compaction thresholds, triggers, fail-safes | + ## Session | Document | Description | diff --git a/docs/design/agent/auto-compaction.md b/docs/design/agent/auto-compaction.md new file mode 100644 index 00000000..23b5d291 --- /dev/null +++ b/docs/design/agent/auto-compaction.md @@ -0,0 +1,103 @@ +# Auto-Compaction + +Automatic context compression builds on manual `/compact`: when the latest observed token usage approaches the active model's context window, oxide-code summarizes the current transcript, persists the normal compact boundary, resets the file tracker, and continues from the synthetic summary. + +Companion docs: [research/agent/auto-compaction.md](../../research/agent/auto-compaction.md), [slash/compact.md](../slash/compact.md), [session/persistence.md](../session/persistence.md). + +## Scope + +Auto-compaction is **default on** and can be disabled independently from manual `/compact`. The first implementation runs at safe boundaries: + +- after a complete text-only assistant turn; +- after a tool round is persisted, before the next sampling request; +- before starting a new user prompt if the previous turn left usage over threshold. + +It does not interrupt an in-flight stream or tool call. If a queued prompt exists, the prompt remains queued during compaction and drains afterward through the existing prompt-queue path. + +## Token Signal + +The agent loop records the maximum observed token usage from each stream: + +- `message_start.message.usage.input_tokens + output_tokens`; +- `message_delta.usage.input_tokens + output_tokens`. + +Anthropic's delta usage often carries only output tokens, so stream processing keeps the latest non-zero input and output values separately and computes `total = input + output`. This is a trigger signal, not billing telemetry. Missing usage means "do not auto-compact". + +## Threshold + +Each model has a known context window in `model.rs`: + +- normal Claude context: `200_000`; +- `[1m]` models with the 1M beta: `1_000_000`; +- unknown models: no window, so auto-compaction is disabled. + +The threshold is: + +```text +effective_window = context_window - min(max_tokens, 20_000) +threshold = effective_window - 13_000 +``` + +The 20k summary reserve mirrors Claude Code's p99 summary-output headroom and keeps a compact request from firing at the hard limit. The 13k buffer leaves room for the next prompt, dynamic instructions, and small tool-schema drift. If the subtraction would underflow, auto-compaction stays disabled. + +## Configuration + +Config surface: + +```toml +[client.auto_compact] +enabled = true +``` + +Environment: + +| Variable | Effect | +| ------------------------- | -------------------------------------------- | +| `OX_AUTO_COMPACT` | Overrides `client.auto_compact.enabled` | +| `OX_DISABLE_AUTO_COMPACT` | Disables automatic compaction only | +| `OX_DISABLE_COMPACT` | Disables automatic compaction and `/compact` | + +`OX_DISABLE_COMPACT` is reserved for parity with Claude Code's "all compaction off" switch. It should not remove `/compact` from help; the command should return an actionable error when invoked. + +## Trigger Flow + +`agent_turn` owns the automatic trigger because it has the live transcript, token usage, session handle, file tracker, sink, and user-action receiver. + +1. Stream a model response and update the latest token usage in `StreamOutcome`. +2. Persist the assistant message and any tool-result message for the round. +3. If auto-compaction is enabled and the latest total crosses the threshold, call the same compact driver used by `/compact`. +4. On success, replace `messages` with the synthetic post-compact message and emit `SessionCompacted`. +5. On failure, increment the auto-compaction failure counter and continue without changing the transcript. + +The failure counter is per agent-loop task. Three consecutive automatic failures disable further automatic attempts for the current session. Manual `/compact` does not consult this counter and resets it on success. + +## User Experience + +Manual and automatic compaction use the same visible `CompactedBlock`. Automatic compaction does not need a separate chat error on failure; repeated automatic failure is a background recovery problem, and the user's next regular request should proceed. The error still lands in logs. + +During TUI auto-compaction, the status bar uses the existing `Compacting` state. In bare REPL / headless mode, `StdioSink` already renders `SessionCompacted` as a stderr boundary line. + +## Design Decisions + +1. **Default-on.** Running out of context is worse than a well-marked summary boundary. A separate opt-out preserves user control. + +2. **Response usage over preflight counting.** The stream already carries usage. A count-tokens request would add latency and still be approximate once dynamic system context and tool definitions are included. + +3. **Boundary-only compaction.** The first version compacts only after a coherent transcript unit is persisted. This avoids partial tool loops and makes session replay identical to manual `/compact`. + +4. **Same summarizer as `/compact`.** No separate compaction model knob yet. The current `Client::stream_message` path already handles auth, model, effort, betas, prompt caching, and first-party gateway constraints. + +5. **Same persistence boundary as `/compact`.** Auto-compaction should not create a second session format. `Entry::Compact` can later gain a trigger field if the UI needs to distinguish manual from automatic in history. + +6. **Failure circuit breaker.** A too-large or malformed compact request can be unrecoverable. After 3 consecutive automatic failures, the loop stops trying until the session changes through manual compaction, `/clear`, or `/resume`. + +7. **No automatic continue prompt.** If the user queued input, it drains after compaction. Otherwise the assistant waits. Synthetic "continue" prompts make the agent act without fresh user intent. + +## Deferred + +- Mid-turn compaction while a model response still needs tool follow-up. +- Microcompact / prune for old tool-result bodies. +- Anchored re-compaction that updates a previous summary in place. +- Configurable auto-compaction threshold or compaction model. +- Token / cost status-bar redesign. +- Hook integration. diff --git a/docs/research/README.md b/docs/research/README.md index 7f859f70..2187786e 100644 --- a/docs/research/README.md +++ b/docs/research/README.md @@ -12,6 +12,12 @@ Organized by topic. Each subdirectory mirrors the corresponding directory in [`d | [Extended Thinking](api/extended-thinking.md) | Content block types, signatures, round-tripping | | [System Prompt](api/system-prompt.md) | Section assembly, CLAUDE.md, caching, block layout | +## Agent Loop + +| Document | Description | +| ------------------------------------------- | ----------------------------------------------------- | +| [Auto-Compaction](agent/auto-compaction.md) | Automatic compaction thresholds, triggers, fail-safes | + ## Session | Document | Description | diff --git a/docs/research/agent/auto-compaction.md b/docs/research/agent/auto-compaction.md new file mode 100644 index 00000000..7fe368b6 --- /dev/null +++ b/docs/research/agent/auto-compaction.md @@ -0,0 +1,106 @@ +# Auto-Compaction (Reference) + +Research on automatic context compaction across Claude Code, OpenAI Codex, and opencode. Companion to [slash/compact.md](../slash/compact.md), which covers manual compaction and replacement strategy. + +## Claude Code + +Claude Code runs automatic compaction proactively before the model call. The query loop applies snip / microcompact / context-collapse transforms first, then calls `autoCompactIfNeeded` with the transformed messages. A successful compact replaces the message set for the rest of the same turn. + +Threshold math is token-buffer based: + +- `effectiveWindow = contextWindow - min(modelMaxOutputTokens, 20_000)`. +- `autoCompactThreshold = effectiveWindow - 13_000`. +- Warning and error indicators use `threshold - 20_000`. +- Manual blocking limit uses `effectiveWindow - 3_000`. + +The token signal is `tokenCountWithEstimation(messages)`, which uses the last API usage plus estimates for unsampled tail content. Auto-compaction defaults on, can be disabled by global `autoCompactEnabled`, and is also gated by `DISABLE_COMPACT` / `DISABLE_AUTO_COMPACT`. `DISABLE_COMPACT` disables manual and automatic compaction; `DISABLE_AUTO_COMPACT` leaves `/compact` available. + +Failures are deliberately quiet. Auto-compaction first tries session-memory compaction, falls back to the full summarizer, and stops retrying after 3 consecutive failures. The circuit breaker is important because an over-limit session can otherwise retry a doomed compact request every turn. + +Claude Code also has pre-stages that oxide-code should not copy yet: + +- **Microcompact** clears old tool-result bodies before a full summary pass. +- **Session-memory compaction** prunes memory-specific slices. +- **Context-collapse** can own the headroom problem in feature-gated builds, so proactive auto-compact is suppressed when it is active. + +User-facing behavior is minimal: token warnings mention "until auto-compact" when enabled, and a compact boundary renders after success. Automatic failures are logged rather than surfaced in chat. + +Key files: + +- `claude-code/src/services/compact/autoCompact.ts`: threshold math, opt-out flags, circuit breaker. +- `claude-code/src/query.ts`: pre-query placement. +- `claude-code/src/components/Settings/Config.tsx`: `autoCompactEnabled` setting. +- `claude-code/src/utils/context.ts`: context-window detection. + +## OpenAI Codex + +Codex drives auto-compaction from model metadata. `ModelInfo::auto_compact_token_limit()` defaults to 90% of the resolved context window, or to a configured limit clamped to that 90% ceiling. If no context window or explicit limit is known, the runtime uses `i64::MAX`, effectively disabling auto-compact. + +Triggers: + +- **Pre-turn**: before recording the new user input, if current total usage is already over the limit. +- **Mid-turn**: after a sampling response, only when usage is over the limit and the model needs a follow-up or pending input exists. +- **Model downshift**: when switching to a smaller context-window model and the current token use exceeds the new model's limit. + +The token signal is `Session::get_total_token_usage()`, which combines cached last API token usage with estimates after the last model-generated item. Local compaction streams a normal model request. OpenAI / Azure providers use a remote compaction path, and a newer feature-gated path expects a `context_compaction` response item. + +Codex exposes configuration for `model_context_window`, `model_auto_compact_token_limit`, and `compact_prompt`. The auto limit is absolute, not a percentage, then clamped by model metadata. Hooks can run before and after manual or automatic compaction. + +Key files: + +- `codex-rs/protocol/src/openai_models.rs`: 90% default and configured-limit clamp. +- `codex-rs/core/src/session/turn.rs`: pre-turn, mid-turn, and model-downshift triggers. +- `codex-rs/core/src/compact.rs`: inline summarization and history replacement. +- `codex-rs/config/src/config_toml.rs`: config surface. + +## opencode + +opencode performs local app-level compaction through a hidden `compaction` agent. The compaction agent is tool-denied and receives prior context plus a strict Markdown summary template. It does not rely on provider-side automatic summarization. + +Threshold math is based on usable input context: + +- Default reserved buffer is `20_000`. +- If the provider exposes `model.limit.input`, usable tokens are `input - reserved`. +- Otherwise usable tokens are `context - maxOutputTokens(model)`. +- Auto-overflow is disabled when `compaction.auto === false` or model context is `0`. + +The overflow count prefers provider `tokens.total`; when absent, it falls back to `input + output + cache.read + cache.write`. opencode also reacts to provider context overflow errors by scheduling compaction. + +Compaction preserves a recent tail. Defaults are 2 user turns and a recent-token budget of 25% of usable context, clamped to 2,000-8,000 tokens unless configured. Old tool-output pruning is a separate pass: it can wipe older completed tool outputs once enough tokens are reclaimable after protecting recent results. + +Config supports `compaction.auto`, `compaction.prune`, `compaction.tail_turns`, `compaction.preserve_recent_tokens`, and `compaction.reserved`. Env flags `OPENCODE_DISABLE_AUTOCOMPACT` and `OPENCODE_DISABLE_PRUNE` override config. + +Key files: + +- `packages/opencode/src/session/overflow.ts`: usable-context threshold. +- `packages/opencode/src/session/prompt.ts`: post-assistant and overflow-triggered compaction scheduling. +- `packages/opencode/src/session/compaction.ts`: prompt, tail preservation, pruning. +- `packages/opencode/src/agent/agent.ts`: hidden tool-denied compaction agent. + +## Patterns Worth Borrowing + +1. **Default-on with explicit opt-out.** All three systems treat auto-compaction as normal context hygiene, while still giving users an escape hatch. + +2. **Use observed response usage.** Response usage is already available on the hot path. Pre-flight token-count calls add latency and still need estimates for dynamic system / tool content. + +3. **Reserve output headroom.** Claude Code and opencode both avoid compacting exactly at the model's advertised context limit. The compact request itself needs room to produce the summary. + +4. **Run at turn boundaries first.** Pre-turn or post-round compaction is much simpler than interrupting an in-flight response. Mid-turn compaction is useful only once the loop can resume safely after history replacement. + +5. **Circuit-break automatic failures.** Automatic failures should not spam chat or repeatedly hit the API when the session is too large to summarize. + +6. **Keep manual `/compact` independent.** Auto opt-out should not disable manual compaction unless the user explicitly disables all compaction. + +## Patterns to Defer + +1. **Mid-turn compaction.** Requires pausing a tool loop or assistant continuation, replacing history, and resuming the same logical turn. The first oxide-code version should compact after a complete round and before the next user-visible continuation. + +2. **Microcompact / prune.** Clearing old tool outputs can save tokens, but it is a separate retention policy with its own UI and persistence implications. + +3. **Anchored summary rewrites.** opencode's `` pattern helps repeated compactions, but repeated lossy rewrite quality needs real usage data before adding complexity. + +4. **Provider-specific remote compaction.** oxide-code talks to Anthropic Messages today, and the current manual compaction path already works through the normal stream. + +5. **Automatic continue prompts.** opencode can synthesize a "Continue..." prompt after auto-compaction. oxide-code should wait for the user unless a queued prompt already exists. + +6. **Hooks.** PreCompact / PostCompact hooks belong with a broader hook or workflow-skill system. From e0bae14ffc12d4f8a1aed280f299f8809bef6f8f Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 16:13:20 +0800 Subject: [PATCH 02/13] feat(agent): trigger automatic compaction --- crates/oxide-code/src/agent.rs | 313 +++++++++++++++++- crates/oxide-code/src/agent/compaction.rs | 33 ++ crates/oxide-code/src/client/anthropic.rs | 6 +- .../src/client/anthropic/testing.rs | 3 +- crates/oxide-code/src/config.rs | 244 ++++++++++++++ crates/oxide-code/src/config/file.rs | 44 +++ crates/oxide-code/src/main.rs | 160 +++++++-- crates/oxide-code/src/model.rs | 33 ++ crates/oxide-code/src/slash.rs | 10 +- crates/oxide-code/src/slash/config.rs | 12 +- crates/oxide-code/src/slash/status.rs | 10 +- crates/oxide-code/src/tui/app.rs | 10 +- .../oxide-code/src/tui/components/welcome.rs | 10 +- docs/design/agent/auto-compaction.md | 23 +- docs/research/agent/auto-compaction.md | 4 +- 15 files changed, 861 insertions(+), 54 deletions(-) diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index f10e45f4..e711a7ff 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -6,6 +6,7 @@ pub(crate) mod event; use std::collections::HashMap; use std::future::Future; +use std::pin::Pin; use anyhow::{Context, Result, anyhow, bail}; use tokio::sync::mpsc; @@ -13,13 +14,16 @@ use tracing::{debug, warn}; use crate::agent::event::{AgentEvent, AgentSink, UserAction}; use crate::client::anthropic::Client; -use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent}; +use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent, Usage}; +use crate::config::AutoCompactionConfig; +use crate::file_tracker::FileTracker; use crate::message::{ContentBlock, Message, Role, strip_trailing_thinking}; use crate::prompt::PromptParts; use crate::session::handle::{RecordOutcome, SessionHandle}; use crate::tool::{ToolDefinition, ToolMetadata, ToolOutput, ToolRegistry}; const MAX_TOOL_ROUNDS: usize = 25; +const MAX_AUTO_COMPACT_FAILURES: u8 = 3; // ── Turn Abort ── @@ -51,6 +55,12 @@ pub(crate) trait AgentClient: Send + Sync { user_context: Option<&str>, tools: &[ToolDefinition], ) -> Result>>; + + fn compact_session<'a>( + &'a self, + transcript: &'a [Message], + instructions: Option<&'a str>, + ) -> Pin> + Send + 'a>>; } impl AgentClient for Client { @@ -63,10 +73,50 @@ impl AgentClient for Client { ) -> Result>> { Client::stream_message(self, messages, system_sections, user_context, tools) } + + fn compact_session<'a>( + &'a self, + transcript: &'a [Message], + instructions: Option<&'a str>, + ) -> Pin> + Send + 'a>> { + Box::pin(compaction::compact_session(self, transcript, instructions)) + } } // ── Agent Turn ── +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub(crate) struct TokenUsage { + input_tokens: u32, + output_tokens: u32, +} + +impl TokenUsage { + pub(crate) const fn total_tokens(self) -> u32 { + self.input_tokens.saturating_add(self.output_tokens) + } + + fn observe(&mut self, usage: &Usage) { + if usage.input_tokens > 0 { + self.input_tokens = usage.input_tokens; + } + if usage.output_tokens > 0 { + self.output_tokens = usage.output_tokens; + } + } +} + +#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)] +pub(crate) struct TurnReport { + pub(crate) usage: Option, +} + +pub(crate) struct AutoCompact<'a> { + pub(crate) config: AutoCompactionConfig, + pub(crate) failures: &'a mut u8, + pub(crate) file_tracker: &'a FileTracker, +} + /// Drives one user prompt to a final assistant text reply. /// /// Each round streams a model response, dispatches any tool calls, and appends both the @@ -88,21 +138,25 @@ pub(crate) async fn agent_turn( sink: &dyn AgentSink, session: &SessionHandle, user_rx: &mut mpsc::Receiver, -) -> AbortResult<()> { + mut auto_compact: Option>, +) -> AbortResult { let tool_defs = tools.definitions(); let mut pending_prompts: Vec = Vec::new(); + let mut latest_usage = None; for _ in 0..MAX_TOOL_ROUNDS { strip_trailing_thinking(messages); let StreamOutcome { blocks, parse_errors, + usage, } = await_unless_aborted( stream_response(client, messages, &tool_defs, prompt, sink), user_rx, &mut pending_prompts, ) .await??; + latest_usage = usage.or(latest_usage); let tool_uses = collect_tool_uses(&blocks); let assistant_msg = Message { @@ -114,7 +168,9 @@ pub(crate) async fn agent_turn( // Queued prompts drain on the TUI side at idle. record_message(session, assistant_msg.clone(), sink).await; messages.push(assistant_msg); - return Ok(()); + return Ok(TurnReport { + usage: latest_usage, + }); } let (results, sidecars) = run_tool_round( @@ -134,6 +190,17 @@ pub(crate) async fn agent_turn( commit_round_writes(session, sink, &assistant_msg, &tool_result_msg, sidecars).await; messages.push(assistant_msg); messages.push(tool_result_msg); + auto_compact_if_needed( + client, + session, + messages, + sink, + user_rx, + &mut pending_prompts, + auto_compact.as_mut(), + usage, + ) + .await?; record_drained_prompts(pending_prompts.drain(..), messages, session, sink).await; } @@ -143,6 +210,59 @@ pub(crate) async fn agent_turn( ))) } +#[expect( + clippy::too_many_arguments, + reason = "auto-compaction needs the same live turn state as manual compaction plus the latest usage signal" +)] +pub(crate) async fn auto_compact_if_needed( + client: &dyn AgentClient, + session: &SessionHandle, + messages: &mut Vec, + sink: &dyn AgentSink, + user_rx: &mut mpsc::Receiver, + pending: &mut Vec, + auto: Option<&mut AutoCompact<'_>>, + usage: Option, +) -> AbortResult { + let Some(auto) = auto else { + return Ok(false); + }; + let Some(usage) = usage else { + return Ok(false); + }; + if *auto.failures >= MAX_AUTO_COMPACT_FAILURES + || !auto.config.should_trigger(usage.total_tokens()) + { + return Ok(false); + } + + let summary = + match await_unless_aborted(client.compact_session(messages, None), user_rx, pending).await? + { + Ok(summary) => summary, + Err(e) => { + *auto.failures += 1; + warn!("auto-compaction failed: {e:#}"); + return Ok(false); + } + }; + let compacted = compaction::replace_session_with_summary( + session, + auto.file_tracker, + messages, + sink, + summary, + None, + ) + .await; + if compacted { + *auto.failures = 0; + } else { + *auto.failures += 1; + } + Ok(compacted) +} + fn collect_tool_uses(blocks: &[ContentBlock]) -> Vec<(String, String, serde_json::Value)> { blocks .iter() @@ -228,7 +348,7 @@ async fn dispatch_tool_call( await_unless_aborted(tools.run(name, input), user_rx, pending).await } -async fn record_drained_prompts( +pub(crate) async fn record_drained_prompts( texts: impl IntoIterator, messages: &mut Vec, session: &SessionHandle, @@ -363,6 +483,7 @@ fn parse_tool_json(json_buf: &str) -> (serde_json::Value, Option) { struct StreamOutcome { blocks: Vec, parse_errors: HashMap, + usage: Option, } async fn stream_response( @@ -381,11 +502,19 @@ async fn stream_response( )?; let mut blocks: Vec> = Vec::new(); + let mut usage = TokenUsage::default(); + let mut saw_usage = false; while let Some(event) = rx.recv().await { let event = event.context("stream error")?; match event { + StreamEvent::MessageStart { message } => { + if let Some(observed) = message.usage { + usage.observe(&observed); + saw_usage = true; + } + } StreamEvent::ContentBlockStart { index, content_block, @@ -409,11 +538,19 @@ async fn stream_response( StreamEvent::Error { error } => { bail!("API error ({}): {}", error.error_type, error.message); } + StreamEvent::MessageDelta { + usage: Some(observed), + .. + } => { + usage.observe(&observed); + saw_usage = true; + } _ => {} } } let mut outcome = StreamOutcome::default(); + outcome.usage = saw_usage.then_some(usage); for acc in blocks.into_iter().flatten() { let (block, parse_error) = acc.into_content_block(); outcome.parse_errors.extend(parse_error); @@ -503,9 +640,10 @@ mod tests { use crate::agent::event::CapturingSink; use crate::client::anthropic::testing::test_client; use crate::client::anthropic::wire::{ - ApiError, ContentBlockInfo, MessageResponse, StreamEvent, Usage, + ApiError, ContentBlockInfo, MessageDeltaBody, MessageResponse, StreamEvent, Usage, }; - use crate::config::{Auth, Effort}; + use crate::config::{Auth, AutoCompactionConfig, Effort}; + use crate::file_tracker::FileTracker; use crate::message::Role; use crate::model::ResolvedModelId; use crate::session::handle::{self, SessionHandle}; @@ -568,6 +706,14 @@ mod tests { } Ok(rx) } + + fn compact_session<'a>( + &'a self, + _transcript: &'a [Message], + _instructions: Option<&'a str>, + ) -> Pin> + Send + 'a>> { + Box::pin(async { Ok("auto summary".to_owned()) }) + } } fn text_turn(text: &str) -> Vec { @@ -618,6 +764,41 @@ mod tests { ] } + fn text_turn_with_usage(text: &str, input_tokens: u32, output_tokens: u32) -> Vec { + vec![ + StreamEvent::MessageStart { + message: MessageResponse { + id: "msg_1".into(), + model: "claude-sonnet-4-6".into(), + usage: Some(Usage { + input_tokens, + output_tokens: 0, + }), + }, + }, + StreamEvent::ContentBlockStart { + index: 0, + content_block: ContentBlockInfo::Text { + text: String::new(), + }, + }, + StreamEvent::ContentBlockDelta { + index: 0, + delta: Delta::TextDelta { text: text.into() }, + }, + StreamEvent::MessageDelta { + delta: MessageDeltaBody { + stop_reason: Some("end_turn".into()), + }, + usage: Some(Usage { + input_tokens: 0, + output_tokens, + }), + }, + StreamEvent::MessageStop, + ] + } + fn tool_use_turn(id: &str, name: &str, input_json: &str) -> Vec { vec![ StreamEvent::ContentBlockStart { @@ -638,6 +819,30 @@ mod tests { ] } + fn tool_use_turn_with_usage( + id: &str, + name: &str, + input_json: &str, + input_tokens: u32, + output_tokens: u32, + ) -> Vec { + let mut events = tool_use_turn(id, name, input_json); + events.insert( + 0, + StreamEvent::MessageStart { + message: MessageResponse { + id: "msg_1".into(), + model: "claude-sonnet-4-6".into(), + usage: Some(Usage { + input_tokens, + output_tokens, + }), + }, + }, + ); + events + } + /// Echoes its input; exercises the tool-dispatch path without subprocess machinery. struct EchoTool; @@ -744,6 +949,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -781,6 +987,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -814,6 +1021,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -852,6 +1060,7 @@ mod tests { &sink, &session, &mut user_rx, + None, ) .await .unwrap(); @@ -871,6 +1080,31 @@ mod tests { assert_eq!(streamed, ["Hello immediately"]); } + #[tokio::test] + async fn agent_turn_reports_latest_stream_usage() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = FakeClient::new(vec![text_turn_with_usage("Hello!", 100, 7)]); + let tools = ToolRegistry::new(Vec::new()); + let sink = CapturingSink::new(); + let mut messages = vec![Message::user("hi")]; + + let report = agent_turn( + &client, + &tools, + &mut messages, + &empty_prompt(), + &sink, + &session, + &mut inert_user_rx(), + None, + ) + .await + .unwrap(); + + assert_eq!(report.usage.map(TokenUsage::total_tokens), Some(107)); + } + #[tokio::test] async fn agent_turn_single_tool_call_dispatches_and_completes_on_follow_up() { let dir = tempfile::tempdir().unwrap(); @@ -891,6 +1125,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -929,6 +1164,59 @@ mod tests { ))); } + #[tokio::test] + async fn agent_turn_auto_compacts_after_tool_round_crosses_threshold() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = FakeClient::new(vec![ + tool_use_turn_with_usage("tool_1", "echo", r#"{"v":1}"#, 9, 2), + text_turn_with_usage("Done", 1, 2), + ]); + let tools = ToolRegistry::new(vec![Box::new(EchoTool)]); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut failures = 0; + let mut messages = vec![ + Message::user("run echo"), + Message::assistant("earlier"), + Message::user("continue"), + ]; + + let report = agent_turn( + &client, + &tools, + &mut messages, + &empty_prompt(), + &sink, + &session, + &mut inert_user_rx(), + Some(AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + ) + .await + .unwrap(); + + assert_eq!(report.usage.map(TokenUsage::total_tokens), Some(3)); + assert_eq!(failures, 0); + assert_eq!( + sink.events() + .iter() + .filter(|event| matches!(event, AgentEvent::SessionCompacted { .. })) + .count(), + 1 + ); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("auto summary")) + ); + assert!(matches!(&messages[1].content[0], ContentBlock::Text { text } if text == "Done")); + } + #[tokio::test] async fn agent_turn_drains_mid_round_submit_into_messages_at_round_boundary() { // Pre-loaded SubmitPrompt is consumed during the round; at the boundary the agent splices @@ -956,6 +1244,7 @@ mod tests { &sink, &session, &mut rx, + None, ) .await .expect("turn must complete"); @@ -1017,6 +1306,7 @@ mod tests { &sink, &session, &mut rx, + None, ) .await .expect("turn must complete"); @@ -1066,6 +1356,7 @@ mod tests { &sink, &session, &mut rx, + None, ) .await .expect_err("cancel must surface as Err(Cancelled)"); @@ -1095,6 +1386,7 @@ mod tests { &sink, &session, &mut rx, + None, ) .await .expect_err("quit must surface as Err(Quit)"); @@ -1123,6 +1415,7 @@ mod tests { &sink, &session, &mut rx, + None, ) .await .expect_err("dead channel must surface as Err(Quit)"); @@ -1163,6 +1456,7 @@ mod tests { &sink, &session, &mut rx, + None, ) .await .unwrap_or_else(|_| panic!("turn must complete despite {action:?}")); @@ -1201,6 +1495,7 @@ mod tests { &sink, &session, &mut rx, + None, ), async { started.notified().await; @@ -1251,6 +1546,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -1290,6 +1586,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -1343,6 +1640,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .expect_err("cap must trip"); @@ -1376,6 +1674,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .expect_err("api error must propagate"); @@ -1419,6 +1718,7 @@ mod tests { &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); @@ -1478,6 +1778,7 @@ data: {"type":"message_stop"} &sink, &session, &mut inert_user_rx(), + None, ) .await .unwrap(); diff --git a/crates/oxide-code/src/agent/compaction.rs b/crates/oxide-code/src/agent/compaction.rs index 88495030..bcff8b94 100644 --- a/crates/oxide-code/src/agent/compaction.rs +++ b/crates/oxide-code/src/agent/compaction.rs @@ -6,9 +6,12 @@ use anyhow::{Result, bail}; use indoc::{formatdoc, indoc}; +use crate::agent::event::{AgentEvent, AgentSink}; use crate::client::anthropic::Client; use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent}; +use crate::file_tracker::FileTracker; use crate::message::{ContentBlock, Message, Role}; +use crate::session::handle::SessionHandle; /// Minimum messages required for compaction to be worthwhile. Below this, the summary is /// usually longer than the transcript itself. @@ -144,6 +147,36 @@ pub(crate) fn synthesize_post_compact_message(summary: &str) -> Message { ", prefix = SUMMARY_PREFIX.trim(), summary = summary.trim()}) } +/// Persists a compact boundary and swaps the live transcript to the synthetic summary root. +pub(crate) async fn replace_session_with_summary( + session: &SessionHandle, + file_tracker: &FileTracker, + messages: &mut Vec, + sink: &dyn AgentSink, + summary: String, + instructions: Option, +) -> bool { + let synthetic = synthesize_post_compact_message(&summary); + let outcome = session + .compact(summary.clone(), instructions.clone(), synthetic.clone()) + .await; + sink.session_write_error(outcome.failure.as_deref()); + if outcome.failure.is_some() { + return false; + } + + file_tracker.clear(); + *messages = vec![synthetic]; + if let Err(e) = sink.send(AgentEvent::SessionCompacted { + summary, + pre_count: outcome.pre_count, + instructions, + }) { + tracing::error!("session-compacted event dropped: {e}"); + } + true +} + /// Removes the synthetic summary prefix from a resumed post-compact root message. pub(crate) fn strip_synthetic_post_compact_prefix(message: &mut Message) -> bool { if message.role != Role::User { diff --git a/crates/oxide-code/src/client/anthropic.rs b/crates/oxide-code/src/client/anthropic.rs index 43566bd5..10c6837d 100644 --- a/crates/oxide-code/src/client/anthropic.rs +++ b/crates/oxide-code/src/client/anthropic.rs @@ -23,7 +23,7 @@ use tokio::sync::mpsc; use tracing::debug; use uuid::Uuid; -use crate::config::{Auth, Config, Effort}; +use crate::config::{Auth, CompactionConfig, Config, Effort}; use crate::message::{ContentBlock, Message, Role}; use crate::prompt::SYSTEM_PROMPT_DYNAMIC_BOUNDARY; use crate::tool::ToolDefinition; @@ -147,6 +147,10 @@ impl Client { self.config.effort } + pub(crate) fn compaction(&self) -> CompactionConfig { + self.config.compaction + } + #[cfg(test)] pub(crate) fn session_id(&self) -> &str { &self.session_id diff --git a/crates/oxide-code/src/client/anthropic/testing.rs b/crates/oxide-code/src/client/anthropic/testing.rs index d85733d8..4007a98e 100644 --- a/crates/oxide-code/src/client/anthropic/testing.rs +++ b/crates/oxide-code/src/client/anthropic/testing.rs @@ -3,7 +3,7 @@ use std::sync::{Arc, Mutex}; use super::Client; -use crate::config::{Auth, Config, PromptCacheTtl}; +use crate::config::{Auth, CompactionConfig, Config, PromptCacheTtl}; use crate::tui::theme::Theme; /// Minimal [`Config`] for unit / wiremock tests. @@ -15,6 +15,7 @@ pub(crate) fn test_config(base_url: impl Into, auth: Auth, model: &str) effort: None, max_tokens: 128, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig::disabled(), thinking: None, show_thinking: false, show_welcome: true, diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index cbf314ed..c63de8f4 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -17,6 +17,8 @@ use crate::util::env; const DEFAULT_MODEL: &str = "claude-opus-4-7[1m]"; const DEFAULT_BASE_URL: &str = "https://api.anthropic.com"; +const AUTO_COMPACTION_OUTPUT_RESERVE_CAP: u32 = 20_000; +const AUTO_COMPACTION_BUFFER_TOKENS: u32 = 13_000; /// Mirrors the fallback `loader::resolve_theme` applies when no `[tui.theme] base` is set. pub(crate) const DEFAULT_THEME: &str = "mocha"; @@ -50,6 +52,7 @@ pub(crate) struct ConfigSnapshot { pub(crate) base_url: String, pub(crate) max_tokens: u32, pub(crate) prompt_cache_ttl: PromptCacheTtl, + pub(crate) compaction: CompactionConfig, pub(crate) show_thinking: bool, pub(crate) show_welcome: bool, /// Resolved theme base name — built-in catalogue key or filesystem path. `/theme` reads this @@ -185,6 +188,43 @@ impl FromStr for PromptCacheTtl { } } +// ── CompactionConfig ── + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct CompactionConfig { + pub(crate) auto: AutoCompactionConfig, +} + +impl CompactionConfig { + pub(crate) const fn disabled() -> Self { + Self { + auto: AutoCompactionConfig::disabled(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub(crate) struct AutoCompactionConfig { + pub(crate) enabled: bool, + pub(crate) threshold_tokens: Option, +} + +impl AutoCompactionConfig { + pub(crate) const fn disabled() -> Self { + Self { + enabled: false, + threshold_tokens: None, + } + } + + pub(crate) const fn should_trigger(self, total_tokens: u32) -> bool { + match (self.enabled, self.threshold_tokens) { + (true, Some(threshold)) => total_tokens >= threshold, + _ => false, + } + } +} + // ── Config ── /// Resolved configuration. @@ -197,6 +237,7 @@ pub(crate) struct Config { pub(crate) base_url: String, pub(crate) max_tokens: u32, pub(crate) prompt_cache_ttl: PromptCacheTtl, + pub(crate) compaction: CompactionConfig, pub(crate) thinking: Option, pub(crate) show_thinking: bool, pub(crate) show_welcome: bool, @@ -275,6 +316,8 @@ impl Config { None => client.prompt_cache_ttl.unwrap_or(PromptCacheTtl::OneHour), }; + let compaction = resolve_compaction(client.compaction, &model, max_tokens)?; + let theme_name = theme_config .base .clone() @@ -291,6 +334,7 @@ impl Config { base_url, max_tokens, prompt_cache_ttl, + compaction, thinking, show_thinking, show_welcome, @@ -308,6 +352,7 @@ impl Config { base_url: self.base_url.clone(), max_tokens: self.max_tokens, prompt_cache_ttl: self.prompt_cache_ttl, + compaction: self.compaction, show_thinking: self.show_thinking, show_welcome: self.show_welcome, theme_name: self.theme_name.clone(), @@ -325,6 +370,13 @@ pub(crate) fn display_bool(flag: bool) -> &'static str { if flag { "on" } else { "off" } } +pub(crate) fn display_auto_compaction(auto: AutoCompactionConfig) -> String { + match (auto.enabled, auto.threshold_tokens) { + (true, Some(threshold)) => format!("on at {threshold} tokens"), + _ => "off".to_owned(), + } +} + fn default_max_tokens(effort: Option) -> u32 { match effort { Some(Effort::Xhigh | Effort::Max) => 64_000, @@ -333,6 +385,103 @@ fn default_max_tokens(effort: Option) -> u32 { } } +fn resolve_compaction( + file: Option, + model: &str, + max_tokens: u32, +) -> Result { + let auto_requested = env::bool("OX_COMPACTION_AUTO_ENABLED") + .or_else(|| file.as_ref().and_then(|c| c.auto_enabled)) + .unwrap_or(true); + let auto = if auto_requested { + resolve_auto_compaction(file.as_ref(), model, max_tokens)? + } else { + AutoCompactionConfig::disabled() + }; + + Ok(CompactionConfig { auto }) +} + +fn resolve_auto_compaction( + file: Option<&file::CompactionConfig>, + model: &str, + max_tokens: u32, +) -> Result { + let threshold = resolve_auto_threshold(file, model, max_tokens)?; + Ok(AutoCompactionConfig { + enabled: threshold.is_some(), + threshold_tokens: threshold, + }) +} + +fn resolve_auto_threshold( + file: Option<&file::CompactionConfig>, + model: &str, + max_tokens: u32, +) -> Result> { + let env_tokens = env_u32("OX_COMPACTION_AUTO_THRESHOLD_TOKENS")?; + let env_percent = env_u8("OX_COMPACTION_AUTO_THRESHOLD_PERCENT")?; + let env_threshold_set = env_tokens.is_some() || env_percent.is_some(); + let file_tokens = file.and_then(|c| c.auto_threshold_tokens); + let file_percent = file.and_then(|c| c.auto_threshold_percent); + let (tokens, percent) = if env_threshold_set { + (env_tokens, env_percent) + } else { + (file_tokens, file_percent) + }; + + match (tokens, percent) { + (Some(_), Some(_)) => { + bail!("set only one of auto_threshold_tokens or auto_threshold_percent for compaction") + } + (Some(tokens), None) => validate_positive_tokens(tokens).map(Some), + (None, Some(percent)) => threshold_from_percent(percent, model, max_tokens), + (None, None) => default_auto_threshold(model, max_tokens), + } +} + +fn validate_positive_tokens(tokens: u32) -> Result { + if tokens == 0 { + bail!("auto compaction threshold must be greater than zero"); + } + Ok(tokens) +} + +fn threshold_from_percent(percent: u8, model: &str, max_tokens: u32) -> Result> { + if !(1..=100).contains(&percent) { + bail!("auto compaction threshold percent must be between 1 and 100"); + } + let Some(context_window) = crate::model::context_window_for(model) else { + return Ok(None); + }; + let threshold = context_window.saturating_mul(u32::from(percent)) / 100; + Ok(default_auto_threshold_for_window(context_window, max_tokens).map(|max| threshold.min(max))) +} + +fn default_auto_threshold(model: &str, max_tokens: u32) -> Result> { + Ok(crate::model::context_window_for(model) + .and_then(|window| default_auto_threshold_for_window(window, max_tokens))) +} + +fn default_auto_threshold_for_window(context_window: u32, max_tokens: u32) -> Option { + let reserve = max_tokens.min(AUTO_COMPACTION_OUTPUT_RESERVE_CAP); + context_window + .checked_sub(reserve)? + .checked_sub(AUTO_COMPACTION_BUFFER_TOKENS) +} + +fn env_u32(key: &'static str) -> Result> { + env::string(key) + .map(|raw| raw.parse::().with_context(|| format!("{key}={raw:?}"))) + .transpose() +} + +fn env_u8(key: &'static str) -> Result> { + env::string(key) + .map(|raw| raw.parse::().with_context(|| format!("{key}={raw:?}"))) + .transpose() +} + fn validate_base_url(raw: &str) -> Result<()> { let url = reqwest::Url::parse(raw).with_context(|| format!("invalid base URL {raw:?}"))?; match url.scheme() { @@ -460,6 +609,9 @@ mod tests { "ANTHROPIC_BASE_URL", "ANTHROPIC_MAX_TOKENS", "ANTHROPIC_EFFORT", + "OX_COMPACTION_AUTO_ENABLED", + "OX_COMPACTION_AUTO_THRESHOLD_PERCENT", + "OX_COMPACTION_AUTO_THRESHOLD_TOKENS", "OX_SHOW_THINKING", "OX_SHOW_WELCOME", "OX_PROMPT_CACHE_TTL", @@ -519,6 +671,8 @@ mod tests { assert_eq!(config.max_tokens, 64_000); assert_eq!(config.effort, Some(Effort::Xhigh)); assert_eq!(config.prompt_cache_ttl, PromptCacheTtl::OneHour); + assert!(config.compaction.auto.enabled); + assert_eq!(config.compaction.auto.threshold_tokens, Some(967_000)); assert!(!config.show_thinking); assert!( config.show_welcome, @@ -561,6 +715,7 @@ mod tests { assert_eq!(config.model, "claude-opus-4-7"); assert_eq!(config.base_url, "https://example.invalid"); assert_eq!(config.max_tokens, 64); + assert!(config.compaction.auto.enabled); assert!(config.show_thinking); assert!( !config.show_welcome, @@ -590,6 +745,7 @@ mod tests { assert_eq!(config.model, "claude-sonnet-4-6"); assert_eq!(config.base_url, "https://config-file.invalid"); assert_eq!(config.max_tokens, 128); + assert!(config.compaction.auto.enabled); assert!(config.show_thinking); assert!( !config.show_welcome, @@ -671,6 +827,87 @@ mod tests { )); } + #[tokio::test] + async fn load_compaction_file_can_disable_default_on_auto_behavior() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r#" + [client.compaction] + auto_enabled = false + "#}, + ); + let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .unwrap(); + assert!(!config.compaction.auto.enabled); + } + + #[tokio::test] + async fn load_compaction_auto_env_beats_file() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r#" + [client.compaction] + auto_enabled = false + "#}, + ); + let vars = env_vars(vec![xdg(&dir), env("OX_COMPACTION_AUTO_ENABLED", "1")]); + let config = temp_env::async_with_vars(vars, Config::load()) + .await + .unwrap(); + assert!(config.compaction.auto.enabled); + } + + #[tokio::test] + async fn load_compaction_auto_threshold_tokens_sets_absolute_trigger() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r#" + [client.compaction] + auto_threshold_tokens = 400000 + "#}, + ); + let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .unwrap(); + assert_eq!(config.compaction.auto.threshold_tokens, Some(400_000)); + } + + #[tokio::test] + async fn load_compaction_auto_threshold_percent_uses_context_window() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("ANTHROPIC_MODEL", "claude-opus-4-7[1m]"), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "40"), + ]); + let config = temp_env::async_with_vars(vars, Config::load()) + .await + .unwrap(); + assert_eq!(config.compaction.auto.threshold_tokens, Some(400_000)); + } + + #[tokio::test] + async fn load_compaction_rejects_ambiguous_auto_thresholds() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r#" + [client.compaction] + auto_threshold_tokens = 400000 + auto_threshold_percent = 40 + "#}, + ); + let err = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .expect_err("ambiguous thresholds must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("only one"), "{msg}"); + } + #[tokio::test] async fn load_invalid_max_tokens_env_errors() { let dir = tempfile::tempdir().unwrap(); @@ -955,6 +1192,12 @@ mod tests { effort: Some(Effort::Xhigh), max_tokens: 64_000, prompt_cache_ttl: PromptCacheTtl::FiveMin, + compaction: CompactionConfig { + auto: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(42), + }, + }, thinking: None, show_thinking: true, show_welcome: false, @@ -968,6 +1211,7 @@ mod tests { assert_eq!(snap.effort, Some(Effort::Xhigh)); assert_eq!(snap.max_tokens, 64_000); assert_eq!(snap.prompt_cache_ttl, PromptCacheTtl::FiveMin); + assert_eq!(snap.compaction.auto.threshold_tokens, Some(42)); assert!(snap.show_thinking); assert!(!snap.show_welcome); assert_eq!(snap.theme_name, "macchiato"); diff --git a/crates/oxide-code/src/config/file.rs b/crates/oxide-code/src/config/file.rs index 11a8ac30..c3baf773 100644 --- a/crates/oxide-code/src/config/file.rs +++ b/crates/oxide-code/src/config/file.rs @@ -33,6 +33,15 @@ pub(super) struct ClientConfig { pub(super) effort: Option, pub(super) max_tokens: Option, pub(super) prompt_cache_ttl: Option, + pub(super) compaction: Option, +} + +#[derive(Debug, Default, Deserialize)] +#[serde(deny_unknown_fields)] +pub(super) struct CompactionConfig { + pub(super) auto_enabled: Option, + pub(super) auto_threshold_tokens: Option, + pub(super) auto_threshold_percent: Option, } #[derive(Debug, Default, Deserialize)] @@ -71,6 +80,17 @@ impl ClientConfig { effort: other.effort.or(self.effort), max_tokens: other.max_tokens.or(self.max_tokens), prompt_cache_ttl: other.prompt_cache_ttl.or(self.prompt_cache_ttl), + compaction: merge_section(self.compaction, other.compaction, CompactionConfig::merge), + } + } +} + +impl CompactionConfig { + fn merge(self, other: Self) -> Self { + Self { + auto_enabled: other.auto_enabled.or(self.auto_enabled), + auto_threshold_tokens: other.auto_threshold_tokens.or(self.auto_threshold_tokens), + auto_threshold_percent: other.auto_threshold_percent.or(self.auto_threshold_percent), } } } @@ -222,6 +242,11 @@ mod tests { effort: Some(super::super::Effort::Low), max_tokens: Some(1000), prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin), + compaction: Some(CompactionConfig { + auto_enabled: Some(false), + auto_threshold_tokens: Some(400_000), + auto_threshold_percent: None, + }), }), tui: Some(TuiConfig { show_thinking: Some(false), @@ -237,6 +262,11 @@ mod tests { effort: Some(super::super::Effort::Max), max_tokens: Some(2000), prompt_cache_ttl: Some(super::super::PromptCacheTtl::OneHour), + compaction: Some(CompactionConfig { + auto_enabled: Some(true), + auto_threshold_tokens: None, + auto_threshold_percent: Some(40), + }), }), tui: Some(TuiConfig { show_thinking: Some(true), @@ -259,6 +289,10 @@ mod tests { client.prompt_cache_ttl, Some(super::super::PromptCacheTtl::OneHour) ); + let compaction = client.compaction.expect("compaction section should merge"); + assert_eq!(compaction.auto_enabled, Some(true)); + assert_eq!(compaction.auto_threshold_tokens, Some(400_000)); + assert_eq!(compaction.auto_threshold_percent, Some(40)); let tui = merged.tui.expect("tui section should be present"); assert_eq!(tui.show_thinking, Some(true)); @@ -274,6 +308,11 @@ mod tests { effort: Some(super::super::Effort::High), max_tokens: Some(4096), prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin), + compaction: Some(CompactionConfig { + auto_enabled: Some(false), + auto_threshold_tokens: Some(400_000), + auto_threshold_percent: None, + }), }), tui: Some(TuiConfig { show_thinking: Some(true), @@ -293,6 +332,11 @@ mod tests { client.prompt_cache_ttl, Some(super::super::PromptCacheTtl::FiveMin) ); + let compaction = client + .compaction + .expect("compaction section should survive"); + assert_eq!(compaction.auto_enabled, Some(false)); + assert_eq!(compaction.auto_threshold_tokens, Some(400_000)); let tui = merged.tui.expect("tui section should survive"); assert_eq!(tui.show_thinking, Some(true)); diff --git a/crates/oxide-code/src/main.rs b/crates/oxide-code/src/main.rs index 5116e7ed..7ad9e97b 100644 --- a/crates/oxide-code/src/main.rs +++ b/crates/oxide-code/src/main.rs @@ -23,7 +23,7 @@ use tokio::sync::mpsc; use tracing::{debug, warn}; use agent::event::{AgentEvent, AgentSink, StdioSink, UserAction, inert_user_action_channel}; -use agent::{TurnAbort, agent_turn}; +use agent::{AutoCompact, TokenUsage, TurnAbort, agent_turn}; use client::anthropic::Client; use config::{Config, Effort}; use file_tracker::FileTracker; @@ -342,14 +342,50 @@ async fn agent_loop_task( file_tracker: Arc, ) -> Result<()> { let mut messages: Vec = resumed_messages; + let mut auto_compaction_failures = 0_u8; + let mut last_usage = None; while let Some(action) = user_rx.recv().await { match action { UserAction::SubmitPrompt(text) => { + let mut pre_prompt_pending = Vec::new(); + let pre_prompt_compact = auto_compact_before_prompt( + &client, + &session, + &file_tracker, + &mut messages, + &sink, + &mut user_rx, + &mut pre_prompt_pending, + &mut auto_compaction_failures, + last_usage, + ) + .await; + match pre_prompt_compact { + Ok(true) => last_usage = None, + Ok(false) => {} + Err(TurnAbort::Cancelled) => { + _ = sink.send(AgentEvent::Cancelled); + continue; + } + Err(TurnAbort::Quit) => break, + Err(TurnAbort::Failed(e)) => { + _ = sink.send(AgentEvent::Error(format!("{e:#}"))); + continue; + } + } + let user_msg = Message::user(&text); let outcome = session.record_message(user_msg.clone()).await; sink.session_write_error(outcome.failure.as_deref()); messages.push(user_msg); + agent::record_drained_prompts( + pre_prompt_pending.drain(..), + &mut messages, + &session, + &sink, + ) + .await; if let Some(seed) = outcome.ai_title_seed { session::title_generator::spawn( @@ -369,10 +405,16 @@ async fn agent_loop_task( &sink, &session, &mut user_rx, + Some(AutoCompact { + config: client.compaction().auto, + failures: &mut auto_compaction_failures, + file_tracker: &file_tracker, + }), ) .await; match outcome { - Ok(()) => { + Ok(report) => { + last_usage = report.usage; _ = sink.send(AgentEvent::TurnComplete); } Err(TurnAbort::Cancelled) => { @@ -396,6 +438,8 @@ async fn agent_loop_task( sink.session_write_error(outcome.finalize_failure.as_deref()); client.set_session_id(outcome.new_id.clone()); messages.clear(); + auto_compaction_failures = 0; + last_usage = None; if let Err(e) = sink.send(AgentEvent::SessionRolled { id: outcome.new_id }) { // /clear succeeded server-side but the TUI never sees the new id — surfaces as // a stuck "old session" header. Error-level so the log makes it findable. @@ -413,6 +457,8 @@ async fn agent_loop_task( &session_id, ) .await; + auto_compaction_failures = 0; + last_usage = None; } UserAction::Compact { instructions } => { let outcome = apply_compact( @@ -426,7 +472,11 @@ async fn agent_loop_task( ) .await; match outcome { - Ok(()) => {} + Ok(true) => { + auto_compaction_failures = 0; + last_usage = None; + } + Ok(false) => {} Err(TurnAbort::Cancelled) => { _ = sink.send(AgentEvent::Cancelled); } @@ -517,6 +567,38 @@ fn format_drift_warning(drifted: &[std::path::PathBuf]) -> String { ) } +#[expect( + clippy::too_many_arguments, + reason = "pre-prompt auto-compaction needs the live session state and the shared failure counter" +)] +async fn auto_compact_before_prompt( + client: &Client, + session: &SessionHandle, + file_tracker: &FileTracker, + messages: &mut Vec, + sink: &dyn AgentSink, + user_rx: &mut mpsc::Receiver, + pending: &mut Vec, + failures: &mut u8, + usage: Option, +) -> std::result::Result { + agent::auto_compact_if_needed( + client, + session, + messages, + sink, + user_rx, + pending, + Some(&mut AutoCompact { + config: client.compaction().auto, + failures, + file_tracker, + }), + usage, + ) + .await +} + /// Drives `/compact`: stream the summarization, replace the in-memory transcript with the /// synthetic continuation, persist the boundary + synthetic message, surface the post-compact /// system event so the TUI can repaint. Errors leave the session untouched. @@ -528,7 +610,7 @@ async fn apply_compact( sink: &dyn AgentSink, user_rx: &mut mpsc::Receiver, instructions: Option, -) -> std::result::Result<(), TurnAbort> { +) -> std::result::Result { let mut pending_prompts = Vec::new(); let summary = agent::await_unless_aborted( agent::compaction::compact_session(client, messages, instructions.as_deref()), @@ -537,26 +619,15 @@ async fn apply_compact( ) .await? .map_err(|e| TurnAbort::Failed(anyhow!("Compaction failed: {e:#}")))?; - let synthetic = agent::compaction::synthesize_post_compact_message(&summary); - let outcome = session - .compact(summary.clone(), instructions.clone(), synthetic.clone()) - .await; - sink.session_write_error(outcome.failure.as_deref()); - if outcome.failure.is_some() { - return Ok(()); - } - // Reset the file tracker so post-compact Edits require a fresh Read — pre-compact Reads - // are no longer in the visible transcript and the safety contract has to follow. - file_tracker.clear(); - *messages = vec![synthetic]; - if let Err(e) = sink.send(AgentEvent::SessionCompacted { + Ok(agent::compaction::replace_session_with_summary( + session, + file_tracker, + messages, + sink, summary, - pre_count: outcome.pre_count, instructions, - }) { - tracing::error!("session-compacted event dropped: {e}"); - } - Ok(()) + ) + .await) } async fn apply_rename(session: &SessionHandle, sink: &dyn AgentSink, title: String) { @@ -616,6 +687,8 @@ async fn bare_repl( let mut messages: Vec = resumed_messages; let mut shutdown_fired = false; let (_user_tx, mut user_rx) = inert_user_action_channel(); + let mut auto_compaction_failures = 0_u8; + let mut last_usage = None; let result: Result<()> = async { loop { @@ -639,10 +712,37 @@ async fn bare_repl( continue; } + let mut pre_prompt_pending = Vec::new(); + match auto_compact_before_prompt( + client, + &session, + &file_tracker, + &mut messages, + &sink, + &mut user_rx, + &mut pre_prompt_pending, + &mut auto_compaction_failures, + last_usage, + ) + .await + { + Ok(true) => last_usage = None, + Ok(false) => {} + Err(TurnAbort::Cancelled | TurnAbort::Quit) => continue, + Err(TurnAbort::Failed(e)) => return Err(e), + } + let user_msg = Message::user(&input); let outcome = session.record_message(user_msg.clone()).await; sink.session_write_error(outcome.failure.as_deref()); messages.push(user_msg); + agent::record_drained_prompts( + pre_prompt_pending.drain(..), + &mut messages, + &session, + &sink, + ) + .await; let prompt = prompt::build_prompt(model).await; let turn = agent_turn( client, @@ -652,6 +752,11 @@ async fn bare_repl( &sink, &session, &mut user_rx, + Some(AutoCompact { + config: client.compaction().auto, + failures: &mut auto_compaction_failures, + file_tracker: &file_tracker, + }), ); let turn_result = tokio::select! { r = turn => r, @@ -662,7 +767,8 @@ async fn bare_repl( } }; match turn_result { - Ok(()) | Err(TurnAbort::Cancelled | TurnAbort::Quit) => {} + Ok(report) => last_usage = report.usage, + Err(TurnAbort::Cancelled | TurnAbort::Quit) => {} Err(TurnAbort::Failed(e)) => return Err(e), } _ = sink.send(AgentEvent::TurnComplete); @@ -700,6 +806,7 @@ async fn headless( let prompt = prompt::build_prompt(model).await; let mut shutdown_fired = false; let (_user_tx, mut user_rx) = inert_user_action_channel(); + let mut auto_compaction_failures = 0_u8; let turn = agent_turn( client, &tools, @@ -708,10 +815,15 @@ async fn headless( &sink, &session, &mut user_rx, + Some(AutoCompact { + config: client.compaction().auto, + failures: &mut auto_compaction_failures, + file_tracker: &file_tracker, + }), ); let result: Result<()> = tokio::select! { r = turn => match r { - Ok(()) | Err(TurnAbort::Cancelled | TurnAbort::Quit) => Ok(()), + Ok(_) | Err(TurnAbort::Cancelled | TurnAbort::Quit) => Ok(()), Err(TurnAbort::Failed(e)) => Err(e), }, () = shutdown_signal() => { diff --git a/crates/oxide-code/src/model.rs b/crates/oxide-code/src/model.rs index c9f5e274..51ee3d1b 100644 --- a/crates/oxide-code/src/model.rs +++ b/crates/oxide-code/src/model.rs @@ -16,6 +16,9 @@ pub(crate) struct ModelInfo { pub(crate) capabilities: Capabilities, } +const STANDARD_CONTEXT_WINDOW: u32 = 200_000; +const CONTEXT_1M_WINDOW: u32 = 1_000_000; + // ── Capabilities ── /// Per-model gate set consumed by the wire-builder (header + body fields), the slash commands @@ -208,6 +211,17 @@ pub(crate) fn capabilities_for(model: &str) -> Capabilities { .unwrap_or_default() } +/// Effective context window for known Claude models. `[1m]` opts into the 1M beta only on +/// models that advertise that capability; unknown raw ids stay disabled for auto-compaction. +pub(crate) fn context_window_for(model: &str) -> Option { + let info = lookup(model)?; + if model.ends_with("[1m]") && info.capabilities.context_1m { + Some(CONTEXT_1M_WINDOW) + } else { + Some(STANDARD_CONTEXT_WINDOW) + } +} + /// Human-facing label: the row's [`ModelInfo::display_name`] plus a ` (1M context)` suffix on /// `[1m]` ids; the raw id when the model is unknown. pub(crate) fn display_name(model: &str) -> Cow<'_, str> { @@ -498,6 +512,25 @@ mod tests { } } + // ── context_window_for ── + + #[test] + fn context_window_for_known_models_defaults_to_standard_window() { + assert_eq!(context_window_for("claude-opus-4-7"), Some(200_000)); + assert_eq!(context_window_for("claude-haiku-4-5"), Some(200_000)); + } + + #[test] + fn context_window_for_1m_suffix_requires_model_capability() { + assert_eq!(context_window_for("claude-opus-4-7[1m]"), Some(1_000_000)); + assert_eq!(context_window_for("claude-haiku-4-5[1m]"), Some(200_000)); + } + + #[test] + fn context_window_for_unknown_model_is_none() { + assert_eq!(context_window_for("claude-future-9"), None); + } + // ── display_name ── #[test] diff --git a/crates/oxide-code/src/slash.rs b/crates/oxide-code/src/slash.rs index 6913bc26..3b90ce5e 100644 --- a/crates/oxide-code/src/slash.rs +++ b/crates/oxide-code/src/slash.rs @@ -120,7 +120,9 @@ fn classify_in(commands: &[&dyn registry::SlashCommand], parsed: &Parsed) -> Sla /// Fully-populated `LiveSessionInfo` for per-command tests. #[cfg(test)] pub(crate) fn test_session_info() -> LiveSessionInfo { - use crate::config::{ConfigSnapshot, Effort, PromptCacheTtl}; + use crate::config::{ + AutoCompactionConfig, CompactionConfig, ConfigSnapshot, Effort, PromptCacheTtl, + }; // Real MODELS row so `display_name()` resolves to a known label. LiveSessionInfo { @@ -134,6 +136,12 @@ pub(crate) fn test_session_info() -> LiveSessionInfo { effort: Some(Effort::High), max_tokens: 32_000, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig { + auto: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }, + }, show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), diff --git a/crates/oxide-code/src/slash/config.rs b/crates/oxide-code/src/slash/config.rs index 0f6aa3d7..cc9582a7 100644 --- a/crates/oxide-code/src/slash/config.rs +++ b/crates/oxide-code/src/slash/config.rs @@ -6,7 +6,7 @@ use std::path::Path; use super::context::{LiveSessionInfo, SlashContext}; use super::registry::{SlashCommand, SlashOutcome}; -use crate::config::{display_bool, display_effort, file}; +use crate::config::{display_auto_compaction, display_bool, display_effort, file}; use crate::tui::modal::kv_overview::{KvOverview, KvSection}; use crate::util::path::tildify; @@ -54,6 +54,10 @@ fn build_modal( "Prompt Cache TTL".to_owned(), cfg.prompt_cache_ttl.to_string(), ), + ( + "Auto Compaction".to_owned(), + display_auto_compaction(cfg.compaction.auto), + ), ( "Show Thinking".to_owned(), display_bool(cfg.show_thinking).to_owned(), @@ -123,11 +127,11 @@ mod tests { #[test] fn build_modal_height_accounts_for_both_sections() { - // title + blank + (heading + blank + 8 rows) + blank + (heading + blank + 2 rows) - // + blank + footer = 2 + 10 + 1 + 4 + 2 = 19. + // title + blank + (heading + blank + 9 rows) + blank + (heading + blank + 2 rows) + // + blank + footer = 2 + 11 + 1 + 4 + 2 = 20. let info = test_session_info(); let m = build_modal(&info, None, None); - assert_eq!(m.height(80), 19); + assert_eq!(m.height(80), 20); } // ── display_path ── diff --git a/crates/oxide-code/src/slash/status.rs b/crates/oxide-code/src/slash/status.rs index 67d67ced..4e11fbb0 100644 --- a/crates/oxide-code/src/slash/status.rs +++ b/crates/oxide-code/src/slash/status.rs @@ -3,7 +3,7 @@ use super::context::{LiveSessionInfo, SlashContext}; use super::registry::{SlashCommand, SlashOutcome}; -use crate::config::{display_bool, display_effort}; +use crate::config::{display_auto_compaction, display_bool, display_effort}; use crate::tui::modal::kv_overview::{KvOverview, KvSection}; pub(super) struct StatusCmd; @@ -40,6 +40,10 @@ fn build_modal(info: &LiveSessionInfo) -> KvOverview { "Context Cache".to_owned(), info.config.prompt_cache_ttl.to_string(), ), + ( + "Auto Compaction".to_owned(), + display_auto_compaction(info.config.compaction.auto), + ), ( "Show Thinking".to_owned(), display_bool(info.config.show_thinking).to_owned(), @@ -91,7 +95,7 @@ mod tests { fn build_modal_renders_one_row_per_session_descriptor() { let info = test_session_info(); let m = build_modal(&info); - // Title + blank + 9 rows + blank + footer = 13. - assert_eq!(m.height(80), 13); + // Title + blank + 10 rows + blank + footer = 14. + assert_eq!(m.height(80), 14); } } diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index dc336cc6..bbacab69 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -807,7 +807,9 @@ mod tests { fn test_session_info() -> LiveSessionInfo { // `test-model` is intentionally unknown so `display_name` falls back to the literal // id, keeping insta snapshots stable. - use crate::config::{ConfigSnapshot, Effort, PromptCacheTtl}; + use crate::config::{ + AutoCompactionConfig, CompactionConfig, ConfigSnapshot, Effort, PromptCacheTtl, + }; LiveSessionInfo { cwd: "~/test".to_owned(), @@ -820,6 +822,12 @@ mod tests { effort: Some(Effort::High), max_tokens: 32_000, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig { + auto: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }, + }, show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), diff --git a/crates/oxide-code/src/tui/components/welcome.rs b/crates/oxide-code/src/tui/components/welcome.rs index a3fa0567..984575f7 100644 --- a/crates/oxide-code/src/tui/components/welcome.rs +++ b/crates/oxide-code/src/tui/components/welcome.rs @@ -326,7 +326,9 @@ mod tests { use ratatui::backend::TestBackend; use super::*; - use crate::config::{ConfigSnapshot, Effort, PromptCacheTtl}; + use crate::config::{ + AutoCompactionConfig, CompactionConfig, ConfigSnapshot, Effort, PromptCacheTtl, + }; use crate::slash::LiveSessionInfo; const TEST_SEED: u64 = 0x00C0_FFEE; @@ -343,6 +345,12 @@ mod tests { effort: Some(Effort::Xhigh), max_tokens: 64_000, prompt_cache_ttl: PromptCacheTtl::OneHour, + compaction: CompactionConfig { + auto: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(967_000), + }, + }, show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), diff --git a/docs/design/agent/auto-compaction.md b/docs/design/agent/auto-compaction.md index 23b5d291..580c2e04 100644 --- a/docs/design/agent/auto-compaction.md +++ b/docs/design/agent/auto-compaction.md @@ -21,7 +21,7 @@ The agent loop records the maximum observed token usage from each stream: - `message_start.message.usage.input_tokens + output_tokens`; - `message_delta.usage.input_tokens + output_tokens`. -Anthropic's delta usage often carries only output tokens, so stream processing keeps the latest non-zero input and output values separately and computes `total = input + output`. This is a trigger signal, not billing telemetry. Missing usage means "do not auto-compact". +Anthropic's delta usage often carries only output tokens, so stream processing keeps the latest non-zero input and output values separately and computes `total = input + output`. Treat this value only as the auto-compaction trigger signal; it is unsuitable for billing telemetry. Missing usage means "do not auto-compact". ## Threshold @@ -45,19 +45,22 @@ The 20k summary reserve mirrors Claude Code's p99 summary-output headroom and ke Config surface: ```toml -[client.auto_compact] -enabled = true +[client.compaction] +auto_enabled = true +auto_threshold_tokens = 400000 +# or: +auto_threshold_percent = 40 ``` Environment: -| Variable | Effect | -| ------------------------- | -------------------------------------------- | -| `OX_AUTO_COMPACT` | Overrides `client.auto_compact.enabled` | -| `OX_DISABLE_AUTO_COMPACT` | Disables automatic compaction only | -| `OX_DISABLE_COMPACT` | Disables automatic compaction and `/compact` | +| Variable | Effect | +| ---------------------------------------- | ------------------------------------------- | +| `OX_COMPACTION_AUTO_ENABLED` | Overrides `client.compaction.auto_enabled` | +| `OX_COMPACTION_AUTO_THRESHOLD_TOKENS` | Absolute automatic trigger threshold | +| `OX_COMPACTION_AUTO_THRESHOLD_PERCENT` | Percent of the model context window | -`OX_DISABLE_COMPACT` is reserved for parity with Claude Code's "all compaction off" switch. It should not remove `/compact` from help; the command should return an actionable error when invoked. +Manual `/compact` remains available. The config controls only whether automatic compaction triggers and where that trigger fires. Token and percent thresholds are mutually exclusive so the resolved trigger stays obvious. ## Trigger Flow @@ -98,6 +101,6 @@ During TUI auto-compaction, the status bar uses the existing `Compacting` state. - Mid-turn compaction while a model response still needs tool follow-up. - Microcompact / prune for old tool-result bodies. - Anchored re-compaction that updates a previous summary in place. -- Configurable auto-compaction threshold or compaction model. +- Separate compaction model. - Token / cost status-bar redesign. - Hook integration. diff --git a/docs/research/agent/auto-compaction.md b/docs/research/agent/auto-compaction.md index 7fe368b6..480abf44 100644 --- a/docs/research/agent/auto-compaction.md +++ b/docs/research/agent/auto-compaction.md @@ -44,7 +44,7 @@ Triggers: The token signal is `Session::get_total_token_usage()`, which combines cached last API token usage with estimates after the last model-generated item. Local compaction streams a normal model request. OpenAI / Azure providers use a remote compaction path, and a newer feature-gated path expects a `context_compaction` response item. -Codex exposes configuration for `model_context_window`, `model_auto_compact_token_limit`, and `compact_prompt`. The auto limit is absolute, not a percentage, then clamped by model metadata. Hooks can run before and after manual or automatic compaction. +Codex exposes configuration for `model_context_window`, `model_auto_compact_token_limit`, and `compact_prompt`. The auto limit is absolute; percentage values are not part of that surface. Hooks can run before and after manual or automatic compaction. Key files: @@ -89,7 +89,7 @@ Key files: 5. **Circuit-break automatic failures.** Automatic failures should not spam chat or repeatedly hit the API when the session is too large to summarize. -6. **Keep manual `/compact` independent.** Auto opt-out should not disable manual compaction unless the user explicitly disables all compaction. +6. **Keep manual `/compact` independent.** Auto opt-out should not disable manual compaction. ## Patterns to Defer From 180fb6bca95b2a8c41b260bfa72af2ebc3fc9752 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 16:19:33 +0800 Subject: [PATCH 03/13] feat(tui): show jump-to-bottom overlay --- crates/oxide-code/src/tui/app.rs | 103 ++++++++++++++++++- crates/oxide-code/src/tui/components/chat.rs | 73 +++++++++++++ docs/roadmap.md | 5 +- 3 files changed, 175 insertions(+), 6 deletions(-) diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index bbacab69..70a1a4b8 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -11,8 +11,9 @@ use std::time::{Duration, Instant}; use anyhow::Result; use crossterm::event::{Event, EventStream, KeyCode, KeyEvent}; use futures::{Stream, StreamExt}; -use ratatui::layout::{Constraint, Layout}; +use ratatui::layout::{Alignment, Constraint, Layout, Rect}; use ratatui::text::{Line, Span}; +use ratatui::widgets::Paragraph; use tokio::sync::mpsc; use super::components::chat::ChatView; @@ -29,7 +30,7 @@ use crate::message::Message; use crate::session::entry::CompactInfo; use crate::slash::{self, LiveSessionInfo, SlashContext, SlashKind}; use crate::tool::{ToolMetadata, ToolRegistry, ToolResultView}; -use crate::util::text::truncate_to_width; +use crate::util::text::{center_truncate_to_width, truncate_to_width}; /// Tick interval for animation frames and render coalescing (~60 FPS). const TICK_INTERVAL: Duration = Duration::from_millis(16); @@ -643,6 +644,7 @@ impl App { welcome::paint(frame, chunks[1], &self.theme, &snap); } else { self.chat.render(frame, chunks[1]); + self.render_jump_overlay(frame, chunks[1]); } if preview_height > 0 { self.render_preview(frame, chunks[2]); @@ -689,6 +691,43 @@ impl App { area, ); } + + fn render_jump_overlay(&self, frame: &mut ratatui::Frame<'_>, area: Rect) { + if !self.chat.is_scrolled_up() || area.width < 25 || area.height == 0 { + return; + } + + let new_count = self.chat.new_content_since_pause(); + let label = jump_overlay_label(new_count, usize::from(area.width)); + let style = if new_count == 0 { + self.theme.dim() + } else { + self.theme.accent() + }; + let band = Rect { + y: area.y + area.height.saturating_sub(1), + height: 1, + ..area + }; + frame.render_widget( + Paragraph::new(Line::from(Span::styled(label, style))) + .style(self.theme.surface()) + .alignment(Alignment::Right), + band, + ); + } +} + +fn jump_overlay_label(new_count: u32, width: usize) -> String { + if width < 40 { + return "↓ (ctrl+End)".to_owned(); + } + let label = match new_count { + 0 => "Jump to bottom (ctrl+End) ↓".to_owned(), + 1 => "1 new message (ctrl+End) ↓".to_owned(), + n => format!("{n} new messages (ctrl+End) ↓"), + }; + center_truncate_to_width(&label, width.saturating_sub(2)) } /// Renders a queued prompt as a dim ghost, capped at `body_width` columns. @@ -2803,6 +2842,30 @@ mod tests { .join("\n") } + fn long_chat_block() -> String { + use std::fmt::Write as _; + + let mut body = String::new(); + for i in 0..30 { + _ = writeln!(body, "line {i:02} of a long chat block"); + } + body + } + + // ── jump_overlay_label ── + + #[test] + fn jump_overlay_label_renders_idle_and_new_content_variants() { + assert_eq!(jump_overlay_label(0, 60), "Jump to bottom (ctrl+End) ↓"); + assert_eq!(jump_overlay_label(1, 60), "1 new message (ctrl+End) ↓"); + assert_eq!(jump_overlay_label(3, 60), "3 new messages (ctrl+End) ↓"); + } + + #[test] + fn jump_overlay_label_uses_short_form_below_full_width() { + assert_eq!(jump_overlay_label(3, 30), "↓ (ctrl+End)"); + } + #[test] fn draw_frame_lays_out_status_chat_and_input_in_order() { let (mut app, _rx, _agent_tx) = test_app(Some("Session title")); @@ -2832,6 +2895,42 @@ mod tests { insta::assert_snapshot!(render_app(&mut app, 60, 8)); } + #[test] + fn draw_frame_auto_scroll_on_hides_jump_overlay() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.chat.push_system_message(long_chat_block()); + + let text = rendered_text(&mut app, 60, 10); + assert!(!text.contains("Jump to bottom"), "{text}"); + } + + #[test] + fn draw_frame_scrolled_up_shows_jump_overlay() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.chat.push_system_message(long_chat_block()); + _ = render_app(&mut app, 60, 10); + + app.chat + .handle_event(&key_event(KeyCode::PageUp, KeyModifiers::NONE)); + let text = rendered_text(&mut app, 60, 10); + + assert!(text.contains("Jump to bottom"), "{text}"); + } + + #[test] + fn draw_frame_scrolled_up_counts_new_content() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.chat.push_system_message(long_chat_block()); + _ = render_app(&mut app, 60, 10); + app.chat + .handle_event(&key_event(KeyCode::PageUp, KeyModifiers::NONE)); + + app.chat.push_error("background update"); + let text = rendered_text(&mut app, 60, 10); + + assert!(text.contains("1 new message"), "{text}"); + } + #[test] fn draw_frame_narrow_width_still_renders_all_three_panels() { let (mut app, _rx, _agent_tx) = test_app(Some("narrow")); diff --git a/crates/oxide-code/src/tui/components/chat.rs b/crates/oxide-code/src/tui/components/chat.rs index e3a421f4..0addc3b6 100644 --- a/crates/oxide-code/src/tui/components/chat.rs +++ b/crates/oxide-code/src/tui/components/chat.rs @@ -40,6 +40,7 @@ pub(crate) struct ChatView { viewport_height: u16, viewport_width: u16, auto_scroll: bool, + new_content_since_pause: u32, } impl ChatView { @@ -55,6 +56,7 @@ impl ChatView { viewport_height: 0, viewport_width: 0, auto_scroll: true, + new_content_since_pause: 0, } } @@ -150,6 +152,7 @@ impl ChatView { self.commit_streaming(); self.blocks.push(Box::new(UserMessage::new(text))); self.auto_scroll = true; + self.new_content_since_pause = 0; } /// Appends a streamed token to the current assistant response. @@ -158,6 +161,9 @@ impl ChatView { /// block split. Auto-scroll only follows the tail when the user hasn't manually scrolled up. pub(crate) fn append_stream_token(&mut self, token: &str) { self.commit_thinking_buffer(); + if self.streaming.is_none() { + self.bump_paused_counter(); + } self.streaming .get_or_insert_with(StreamingAssistant::new) .append(token); @@ -169,6 +175,9 @@ impl ChatView { /// Appends a thinking token to the live thinking display buffer. pub(crate) fn append_thinking_token(&mut self, token: &str) { + if self.thinking_buffer.is_empty() { + self.bump_paused_counter(); + } self.thinking_buffer.push_str(token); if self.auto_scroll { self.scroll_to_bottom(); @@ -200,6 +209,7 @@ impl ChatView { /// Appends a tool call, flushing any in-flight streaming buffer. pub(crate) fn push_tool_call(&mut self, icon: &'static str, label: &str) { self.commit_streaming(); + self.bump_paused_counter(); self.blocks.push(Box::new(ToolCallBlock::new(icon, label))); } @@ -210,6 +220,7 @@ impl ChatView { view: ToolResultView, is_error: bool, ) { + self.bump_paused_counter(); self.blocks .push(Box::new(ToolResultBlock::new(label, view, is_error))); } @@ -225,11 +236,13 @@ impl ChatView { /// Appends an error message. pub(crate) fn push_error(&mut self, msg: &str) { + self.bump_paused_counter(); self.blocks.push(Box::new(ErrorBlock::new(msg))); } /// Appends informational output from a slash command. pub(crate) fn push_system_message(&mut self, body: impl Into) { + self.bump_paused_counter(); self.blocks.push(Box::new(SystemMessageBlock::new(body))); } @@ -240,6 +253,7 @@ impl ChatView { instructions: Option<&str>, summary: impl Into, ) { + self.bump_paused_counter(); self.blocks.push(Box::new(CompactedBlock::new( pre_count, instructions, @@ -249,12 +263,14 @@ impl ChatView { /// Appends a unified diff body for display. pub(crate) fn push_git_diff(&mut self, text: impl Into) { + self.bump_paused_counter(); self.blocks.push(Box::new(GitDiffBlock::new(text))); } /// Appends an interrupted marker. Flushes any in-flight streaming buffer first. pub(crate) fn push_interrupted_marker(&mut self) { self.commit_streaming(); + self.bump_paused_counter(); self.blocks.push(Box::new(InterruptedMarker)); } @@ -266,6 +282,15 @@ impl ChatView { self.scroll_offset = 0; self.content_height.set(0); self.auto_scroll = true; + self.new_content_since_pause = 0; + } + + pub(crate) const fn is_scrolled_up(&self) -> bool { + !self.auto_scroll + } + + pub(crate) const fn new_content_since_pause(&self) -> u32 { + self.new_content_since_pause } /// Number of committed chat blocks. @@ -374,6 +399,7 @@ impl ChatView { .content_height .get() .saturating_sub(self.viewport_height); + self.new_content_since_pause = 0; } fn scroll_up(&mut self, lines: u16) { @@ -389,6 +415,13 @@ impl ChatView { self.scroll_offset = self.scroll_offset.saturating_add(lines).min(max); if self.scroll_offset >= max { self.auto_scroll = true; + self.new_content_since_pause = 0; + } + } + + fn bump_paused_counter(&mut self) { + if !self.auto_scroll { + self.new_content_since_pause = self.new_content_since_pause.saturating_add(1); } } @@ -2142,6 +2175,42 @@ mod tests { assert_eq!(chat.viewport_height, 20); } + #[test] + fn paused_counter_bumps_only_while_scrolled_up() { + let mut chat = test_chat(); + chat.push_system_message("one"); + assert_eq!(chat.new_content_since_pause(), 0); + + chat.auto_scroll = false; + chat.push_system_message("two"); + chat.push_error("three"); + assert_eq!(chat.new_content_since_pause(), 2); + } + + #[test] + fn paused_counter_streaming_bumps_once_per_block() { + let mut chat = test_chat(); + chat.auto_scroll = false; + + chat.append_stream_token("a"); + chat.append_stream_token("b"); + assert_eq!(chat.new_content_since_pause(), 1); + + chat.commit_streaming(); + chat.append_stream_token("c"); + assert_eq!(chat.new_content_since_pause(), 2); + } + + #[test] + fn paused_counter_saturates() { + let mut chat = test_chat(); + chat.auto_scroll = false; + chat.new_content_since_pause = u32::MAX; + + chat.push_error("overflow"); + assert_eq!(chat.new_content_since_pause(), u32::MAX); + } + #[test] fn update_layout_invalidates_streaming_cache_on_width_change() { let mut chat = test_chat(); @@ -2249,10 +2318,12 @@ mod tests { chat.viewport_height = 20; chat.scroll_offset = 10; chat.auto_scroll = false; + chat.new_content_since_pause = 3; chat.handle_event(&ctrl_key_event(KeyCode::End)); assert_eq!(chat.scroll_offset, 80); assert!(chat.auto_scroll); + assert_eq!(chat.new_content_since_pause(), 0); } #[test] @@ -2507,10 +2578,12 @@ mod tests { chat.content_height.set(100); chat.viewport_height = 20; chat.scroll_offset = 75; + chat.new_content_since_pause = 3; chat.scroll_down(10); assert_eq!(chat.scroll_offset, 80); assert!(chat.auto_scroll); + assert_eq!(chat.new_content_since_pause(), 0); } // ── build_text ── diff --git a/docs/roadmap.md b/docs/roadmap.md index a70635d7..d068f94e 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -72,6 +72,7 @@ The direction is simple: ### Context Compression - Manual `/compact [instructions]` streams a one-shot summarization through the live model and replaces the in-memory transcript with a synthetic continuation. Optional trailing instructions steer the focus. +- Auto-compaction is enabled by default and triggers when observed response usage crosses the resolved threshold. `[client.compaction]` and `OX_COMPACTION_AUTO_*` can disable or tune the automatic trigger. - Persisted as a dedicated `compact` JSONL boundary plus the synthetic post-compact message. Resume sees only the post-compact tail. - File tracker resets on compact. Edits after `/compact` require a fresh Read. @@ -89,10 +90,6 @@ The direction is simple: - Project-level allowlists to auto-approve trusted commands. - Plan mode: read-only review of the agent's proposed changes before any tool runs. -### Auto-Compaction - -- Fire `/compact` automatically when the running token usage approaches the model's context window. Threshold math (effective context window minus reserved-output buffer), per-turn check at sampling boundaries, single-turn circuit breaker, and a config knob for opt-out. - ### Slash Commands (continuation) Remaining surface beyond Working Today: From 0a874cdf44132385ddc82e9d4206861ff1c3fd9f Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 16:25:27 +0800 Subject: [PATCH 04/13] docs(roadmap): tighten product status --- docs/roadmap.md | 101 +++++++++++++++++++++--------------------------- 1 file changed, 44 insertions(+), 57 deletions(-) diff --git a/docs/roadmap.md b/docs/roadmap.md index d068f94e..4d04fa3c 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -1,29 +1,26 @@ # Roadmap -oxide-code is still early. This roadmap is the high-level product view: what works, what's being built next, and what is intentionally out of scope for now. +oxide-code is still early. This roadmap is the high-level product view: what works, what's being built next, and what is out of scope for now. The direction is simple: -- Build a useful terminal-based AI coding assistant in Rust. -- Follow the agent-harness architecture: the model is the agent, everything else is harness (tools, context, permissions, coordination). -- Keep the architecture understandable. New features should fit the current model instead of forcing large abstractions too early. +- Build a fast terminal AI coding assistant for real project work. +- Keep the assistant transparent about context, tools, sessions, and state. +- Grow the product carefully so daily workflows stay understandable. ## Working Today ### Terminal UI -- Streaming output with markdown rendering and syntax-highlighted code blocks. -- Multi-line input with a prompt marker and a status bar showing model, working directory, and run state. -- Welcome surface on empty chat with identity, environment, and a few starter commands. -- Rich per-tool views: edit diffs with line gutters, line-numbered read excerpts, grouped grep matches, and structured glob lists. Bash output rides the default truncated-text view. -- Themable via runtime-loaded TOML, with 5 built-in palettes (Catppuccin Mocha, Macchiato, Frappe, Latte, Material) and per-slot overrides. -- Three modes: full TUI, bare REPL (`--no-tui`), and headless (`-p`). +- Streaming chat with markdown, syntax-highlighted code, and clear tool output. +- Multi-line input, a live status bar, and a focused welcome screen for new sessions. +- Theme support with built-in palettes and user-defined TOML themes. +- Full TUI, bare REPL (`--no-tui`), and headless (`-p`) modes. ### Agent Loop -- Async streaming from the Anthropic Messages API. -- Tool-use round-trip: the model calls tools, results feed back, and the loop continues until a text-only response. -- Extended thinking with optional dimmed display. +- Anthropic-powered streaming turns with tool use and multi-step continuation. +- Optional extended-thinking display for models that support it. ### Tools @@ -38,99 +35,89 @@ The direction is simple: ### Turn Interruption & Queueing -- Esc / Ctrl+C while busy interrupts the in-flight turn. Partial output is preserved with a clear `(interrupted)` marker. -- Type during a busy turn to queue prompts. Queued prompts splice into the same multi-step turn at the next round boundary (between tool calls), so follow-ups land without aborting in-flight work. Tool-less turns drain queued prompts at the turn boundary instead. -- Esc on idle pops the most recent queued prompt back into the input for editing. -- Idle Ctrl+C arms a 1-second exit confirmation. A second press confirms. +- Interrupt busy turns without losing partial output. +- Queue follow-up prompts while the assistant is working, then edit or cancel them from idle. +- Exit intentionally with a guarded Ctrl+C confirmation. ### System Prompt -- Runtime environment (cwd, platform, shell, git status, date, model) injected every turn. -- `CLAUDE.md` / `AGENTS.md` discovered from user-global and project scopes (root-to-CWD walk, root-level and `.claude/` at each level). +- Project environment and model context are injected every turn. +- `CLAUDE.md` / `AGENTS.md` instructions are loaded from user and project scopes. ### Session Persistence -- Every conversation saved as JSONL under `$XDG_DATA_HOME/ox/sessions/{project}/`. -- `ox --list` browses recent sessions, capped at 30 by default with `--limit N` / `--limit 0` overrides. `ox -c` resumes by recency, prefix, or path. -- Mid-session `/resume` (alias `/continue`) opens an in-place picker (substring search, Tab toggles current-project ↔ all projects). `/resume ` jumps directly. -- AI-generated 3-7-word titles land shortly after the first prompt. +- Conversations are saved per project and can be listed or resumed later. +- Mid-session `/resume` switches chats without restarting the app. +- Short AI-generated titles make session history easier to scan. ### File-Change Tracking -- Per-session tracker remembers each Read. Unchanged re-reads return a cache-hit stub instead of the full body. -- Edit and Write require a prior full Read and refuse if the on-disk bytes have drifted (xxh64 fallback for cloud-sync mtime touches). -- Tracker state persists into the session JSONL on clean exit and restores on resume. +- Tracks reads so edits are made against files the assistant has actually seen. +- Refuses stale writes when files changed on disk. +- Restores edit-safety state when a session resumes. ### Slash Commands -- Built-in: `/clear` (aliases `/new`, `/reset`), `/compact`, `/config`, `/delete`, `/diff`, `/effort`, `/help`, `/init`, `/model`, `/rename`, `/resume` (alias `/continue`), `/status`, `/theme`. See the [user guide](guide/slash-commands.md). -- Autocomplete popup on typing `/`, with ranked filter, Tab completion, and arg-mode completion for commands with curated rosters (`/model`, `/effort`, `/theme`). -- Mid-session swaps (`/model`, `/effort`, `/rename`, `/resume`, `/theme`) are session-only, and no slash command writes user config files. -- Destructive ops (`/delete `, or Ctrl+D / Delete inside the `/resume` picker) gate behind a Y/N confirm modal. The live session is excluded; any saved non-live session can be deleted. -- Modal UI primitive: focus-grabbing overlays above the input for picker, slider, editor, and read-only kv-overview forms. Nested modals layer cleanly, and Esc / Ctrl+C cancels any modal. +- Built-in commands cover session control, config/status, model and theme changes, diffs, compaction, and help. See the [user guide](guide/slash-commands.md). +- Autocomplete, typed shortcuts, and modal pickers keep common actions quick. +- Destructive session actions require confirmation. ### Context Compression -- Manual `/compact [instructions]` streams a one-shot summarization through the live model and replaces the in-memory transcript with a synthetic continuation. Optional trailing instructions steer the focus. -- Auto-compaction is enabled by default and triggers when observed response usage crosses the resolved threshold. `[client.compaction]` and `OX_COMPACTION_AUTO_*` can disable or tune the automatic trigger. -- Persisted as a dedicated `compact` JSONL boundary plus the synthetic post-compact message. Resume sees only the post-compact tail. -- File tracker resets on compact. Edits after `/compact` require a fresh Read. +- Manual `/compact [instructions]` and default auto-compaction keep long sessions usable. +- Compaction keeps a visible history boundary and makes future edits require fresh reads. ### Authentication & Configuration -- Anthropic API key via `ANTHROPIC_API_KEY` or config file. -- Claude Code OAuth credentials picked up automatically (macOS Keychain, Linux file). -- TOML config with layered precedence: defaults → user (`~/.config/ox/config.toml`) → project (`ox.toml`) → environment. +- Supports Anthropic API keys and Claude Code OAuth pickup. +- Layered TOML configuration supports user, project, and environment overrides. ## Current Focus ### Permission & Approval -- Per-tool approval prompts before destructive actions (bash, write, edit). -- Project-level allowlists to auto-approve trusted commands. -- Plan mode: read-only review of the agent's proposed changes before any tool runs. +- Approval prompts for destructive tool actions. +- Project allowlists for trusted commands. +- Plan mode for reviewing the assistant's proposed work before tools run. ### Slash Commands (continuation) Remaining surface beyond Working Today: -- Deferred: `/cost`, `/login` / `/logout`, custom user commands, `/init` multi-phase flow. +- Cost visibility, login/logout, custom commands, and a guided `/init` flow. -Persistence stance: `/model`, `/effort`, and `/theme` mutate session state only, and restart returns to user-declared config. Cross-session persistence will land as an **explicit subcommand** writing to a user-opted-in path, never as a silent merge into a `~/.claude.json`-style mega-file. +Persistence stance: session commands should feel reversible. Cross-session writes will require an explicit user action. ### Viewport Virtualization -- Render only the visible chat region for sessions with thousands of blocks. +- Keep very long sessions responsive by rendering only the visible chat region. ## Later ### MCP Integration -- MCP client to call external tool servers (Atlassian, GitHub, custom). -- MCP server mode to expose oxide-code as a tool to other agents. +- MCP client support for external tool servers. +- MCP server mode so other agents can call oxide-code. ### Agent Infrastructure -- Task management for multi-step work (TodoWrite-style tracking). -- Subagent spawning to delegate self-contained sub-tasks. -- Background tasks for long-running shell processes. -- Agent-team coordination across multiple subagents. -- Git-worktree isolation for parallel implementation attempts. +- Task tracking for multi-step work. +- Subagents for self-contained delegation. +- Background shell processes and stronger parallel-work support. ### Sandboxing -- Sandboxed execution for `bash` / `write` / `edit` so the agent runs without trusting the host shell. +- Sandboxed `bash`, `write`, and `edit` execution. ### Workflow Skills -- User-extensible templates that can override built-ins or add new ones (e.g. project-local `~/.claude/commands/review.md`). Built-ins like `/init` ship under Working Today. -- Auth slash commands: `/login`, `/logout`. -- Configurable instruction directories beyond `.claude/`. +- User-extensible workflow templates. +- Auth slash commands. +- Configurable instruction directories. ### Status Bar Redesign -- Current bar packs model + status + (optional) title + cwd into a single line. Layout collapses to model + status under width pressure but reads as cluttered above ~80 cols. -- Direction: a richer, possibly multi-segment surface with token / cost meter, queued-prompt indicator, session id glance, and theme indicator. Likely needs a layout rethink rather than incremental slot additions. +- A clearer status surface for model, cost, queue state, session identity, and theme. ## Not the Goal Right Now From c752c6841d055ba57aa96a8bad1e61d615c88fe2 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 16:37:03 +0800 Subject: [PATCH 05/13] refactor(agent): tighten auto-compaction loop state --- crates/oxide-code/src/agent.rs | 10 +- .../oxide-code/src/client/anthropic/wire.rs | 7 - crates/oxide-code/src/config.rs | 31 +- crates/oxide-code/src/config/file.rs | 45 +-- crates/oxide-code/src/main.rs | 299 +++++++++++------- 5 files changed, 229 insertions(+), 163 deletions(-) diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index e711a7ff..60f3d6bb 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -130,6 +130,10 @@ pub(crate) struct AutoCompact<'a> { /// - [`TurnAbort::Cancelled`] / [`TurnAbort::Quit`] on the matching [`UserAction`]. /// - [`TurnAbort::Failed`] for stream errors, tool-dispatch failures, or hitting /// [`MAX_TOOL_ROUNDS`] without a final response. +#[expect( + clippy::too_many_arguments, + reason = "the turn driver keeps the live mutable state explicit at the call site" +)] pub(crate) async fn agent_turn( client: &dyn AgentClient, tools: &ToolRegistry, @@ -549,8 +553,10 @@ async fn stream_response( } } - let mut outcome = StreamOutcome::default(); - outcome.usage = saw_usage.then_some(usage); + let mut outcome = StreamOutcome { + usage: saw_usage.then_some(usage), + ..StreamOutcome::default() + }; for acc in blocks.into_iter().flatten() { let (block, parse_error) = acc.into_content_block(); outcome.parse_errors.extend(parse_error); diff --git a/crates/oxide-code/src/client/anthropic/wire.rs b/crates/oxide-code/src/client/anthropic/wire.rs index 9c79df6c..0a5d3e99 100644 --- a/crates/oxide-code/src/client/anthropic/wire.rs +++ b/crates/oxide-code/src/client/anthropic/wire.rs @@ -231,13 +231,6 @@ pub(crate) struct MessageDeltaBody { pub(crate) stop_reason: Option, } -#[cfg_attr( - not(test), - expect( - dead_code, - reason = "fields populated by serde, defined for full SSE protocol coverage" - ) -)] #[derive(Debug, Clone, Deserialize)] pub(crate) struct Usage { #[serde(default)] diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index c63de8f4..32a5555a 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -196,6 +196,7 @@ pub(crate) struct CompactionConfig { } impl CompactionConfig { + #[cfg(test)] pub(crate) const fn disabled() -> Self { Self { auto: AutoCompactionConfig::disabled(), @@ -391,7 +392,7 @@ fn resolve_compaction( max_tokens: u32, ) -> Result { let auto_requested = env::bool("OX_COMPACTION_AUTO_ENABLED") - .or_else(|| file.as_ref().and_then(|c| c.auto_enabled)) + .or_else(|| file.as_ref().and_then(|c| c.enabled)) .unwrap_or(true); let auto = if auto_requested { resolve_auto_compaction(file.as_ref(), model, max_tokens)? @@ -422,8 +423,8 @@ fn resolve_auto_threshold( let env_tokens = env_u32("OX_COMPACTION_AUTO_THRESHOLD_TOKENS")?; let env_percent = env_u8("OX_COMPACTION_AUTO_THRESHOLD_PERCENT")?; let env_threshold_set = env_tokens.is_some() || env_percent.is_some(); - let file_tokens = file.and_then(|c| c.auto_threshold_tokens); - let file_percent = file.and_then(|c| c.auto_threshold_percent); + let file_tokens = file.and_then(|c| c.threshold_tokens); + let file_percent = file.and_then(|c| c.threshold_percent); let (tokens, percent) = if env_threshold_set { (env_tokens, env_percent) } else { @@ -436,7 +437,7 @@ fn resolve_auto_threshold( } (Some(tokens), None) => validate_positive_tokens(tokens).map(Some), (None, Some(percent)) => threshold_from_percent(percent, model, max_tokens), - (None, None) => default_auto_threshold(model, max_tokens), + (None, None) => Ok(default_auto_threshold(model, max_tokens)), } } @@ -458,9 +459,9 @@ fn threshold_from_percent(percent: u8, model: &str, max_tokens: u32) -> Result Result> { - Ok(crate::model::context_window_for(model) - .and_then(|window| default_auto_threshold_for_window(window, max_tokens))) +fn default_auto_threshold(model: &str, max_tokens: u32) -> Option { + crate::model::context_window_for(model) + .and_then(|window| default_auto_threshold_for_window(window, max_tokens)) } fn default_auto_threshold_for_window(context_window: u32, max_tokens: u32) -> Option { @@ -832,10 +833,10 @@ mod tests { let dir = tempfile::tempdir().unwrap(); write_user_config( dir.path(), - indoc::indoc! {r#" + indoc::indoc! {r" [client.compaction] auto_enabled = false - "#}, + "}, ); let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) .await @@ -848,10 +849,10 @@ mod tests { let dir = tempfile::tempdir().unwrap(); write_user_config( dir.path(), - indoc::indoc! {r#" + indoc::indoc! {r" [client.compaction] auto_enabled = false - "#}, + "}, ); let vars = env_vars(vec![xdg(&dir), env("OX_COMPACTION_AUTO_ENABLED", "1")]); let config = temp_env::async_with_vars(vars, Config::load()) @@ -865,10 +866,10 @@ mod tests { let dir = tempfile::tempdir().unwrap(); write_user_config( dir.path(), - indoc::indoc! {r#" + indoc::indoc! {r" [client.compaction] auto_threshold_tokens = 400000 - "#}, + "}, ); let config = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) .await @@ -895,11 +896,11 @@ mod tests { let dir = tempfile::tempdir().unwrap(); write_user_config( dir.path(), - indoc::indoc! {r#" + indoc::indoc! {r" [client.compaction] auto_threshold_tokens = 400000 auto_threshold_percent = 40 - "#}, + "}, ); let err = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) .await diff --git a/crates/oxide-code/src/config/file.rs b/crates/oxide-code/src/config/file.rs index c3baf773..2b82d0cf 100644 --- a/crates/oxide-code/src/config/file.rs +++ b/crates/oxide-code/src/config/file.rs @@ -36,12 +36,15 @@ pub(super) struct ClientConfig { pub(super) compaction: Option, } -#[derive(Debug, Default, Deserialize)] +#[derive(Debug, Default, Clone, Copy, Deserialize)] #[serde(deny_unknown_fields)] pub(super) struct CompactionConfig { - pub(super) auto_enabled: Option, - pub(super) auto_threshold_tokens: Option, - pub(super) auto_threshold_percent: Option, + #[serde(rename = "auto_enabled")] + pub(super) enabled: Option, + #[serde(rename = "auto_threshold_tokens")] + pub(super) threshold_tokens: Option, + #[serde(rename = "auto_threshold_percent")] + pub(super) threshold_percent: Option, } #[derive(Debug, Default, Deserialize)] @@ -88,9 +91,9 @@ impl ClientConfig { impl CompactionConfig { fn merge(self, other: Self) -> Self { Self { - auto_enabled: other.auto_enabled.or(self.auto_enabled), - auto_threshold_tokens: other.auto_threshold_tokens.or(self.auto_threshold_tokens), - auto_threshold_percent: other.auto_threshold_percent.or(self.auto_threshold_percent), + enabled: other.enabled.or(self.enabled), + threshold_tokens: other.threshold_tokens.or(self.threshold_tokens), + threshold_percent: other.threshold_percent.or(self.threshold_percent), } } } @@ -243,9 +246,9 @@ mod tests { max_tokens: Some(1000), prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin), compaction: Some(CompactionConfig { - auto_enabled: Some(false), - auto_threshold_tokens: Some(400_000), - auto_threshold_percent: None, + enabled: Some(false), + threshold_tokens: Some(400_000), + threshold_percent: None, }), }), tui: Some(TuiConfig { @@ -263,9 +266,9 @@ mod tests { max_tokens: Some(2000), prompt_cache_ttl: Some(super::super::PromptCacheTtl::OneHour), compaction: Some(CompactionConfig { - auto_enabled: Some(true), - auto_threshold_tokens: None, - auto_threshold_percent: Some(40), + enabled: Some(true), + threshold_tokens: None, + threshold_percent: Some(40), }), }), tui: Some(TuiConfig { @@ -290,9 +293,9 @@ mod tests { Some(super::super::PromptCacheTtl::OneHour) ); let compaction = client.compaction.expect("compaction section should merge"); - assert_eq!(compaction.auto_enabled, Some(true)); - assert_eq!(compaction.auto_threshold_tokens, Some(400_000)); - assert_eq!(compaction.auto_threshold_percent, Some(40)); + assert_eq!(compaction.enabled, Some(true)); + assert_eq!(compaction.threshold_tokens, Some(400_000)); + assert_eq!(compaction.threshold_percent, Some(40)); let tui = merged.tui.expect("tui section should be present"); assert_eq!(tui.show_thinking, Some(true)); @@ -309,9 +312,9 @@ mod tests { max_tokens: Some(4096), prompt_cache_ttl: Some(super::super::PromptCacheTtl::FiveMin), compaction: Some(CompactionConfig { - auto_enabled: Some(false), - auto_threshold_tokens: Some(400_000), - auto_threshold_percent: None, + enabled: Some(false), + threshold_tokens: Some(400_000), + threshold_percent: None, }), }), tui: Some(TuiConfig { @@ -335,8 +338,8 @@ mod tests { let compaction = client .compaction .expect("compaction section should survive"); - assert_eq!(compaction.auto_enabled, Some(false)); - assert_eq!(compaction.auto_threshold_tokens, Some(400_000)); + assert_eq!(compaction.enabled, Some(false)); + assert_eq!(compaction.threshold_tokens, Some(400_000)); let tui = merged.tui.expect("tui section should survive"); assert_eq!(tui.show_thinking, Some(true)); diff --git a/crates/oxide-code/src/main.rs b/crates/oxide-code/src/main.rs index 7ad9e97b..508ad316 100644 --- a/crates/oxide-code/src/main.rs +++ b/crates/oxide-code/src/main.rs @@ -329,174 +329,237 @@ async fn run_tui( /// Each `TurnAbort` arm emits exactly one terminal event (`Error` xor `TurnComplete`). #[expect( clippy::too_many_arguments, - reason = "session lifecycle (store, handle, file tracker) lives here for /clear; bundling into a struct would just rename the dependencies" + reason = "the task entry point receives the spawned loop dependencies before AgentLoopTask owns them" )] async fn agent_loop_task( - mut client: Client, + client: Client, tools: Arc, sink: tui::event::ChannelSink, - mut user_rx: mpsc::Receiver, - mut session: SessionHandle, + user_rx: mpsc::Receiver, + session: SessionHandle, resumed_messages: Vec, store: SessionStore, file_tracker: Arc, ) -> Result<()> { - let mut messages: Vec = resumed_messages; - let mut auto_compaction_failures = 0_u8; - let mut last_usage = None; - - while let Some(action) = user_rx.recv().await { - match action { - UserAction::SubmitPrompt(text) => { - let mut pre_prompt_pending = Vec::new(); - let pre_prompt_compact = auto_compact_before_prompt( - &client, - &session, - &file_tracker, - &mut messages, - &sink, - &mut user_rx, - &mut pre_prompt_pending, - &mut auto_compaction_failures, - last_usage, - ) - .await; - match pre_prompt_compact { - Ok(true) => last_usage = None, - Ok(false) => {} - Err(TurnAbort::Cancelled) => { - _ = sink.send(AgentEvent::Cancelled); - continue; - } - Err(TurnAbort::Quit) => break, - Err(TurnAbort::Failed(e)) => { - _ = sink.send(AgentEvent::Error(format!("{e:#}"))); - continue; - } - } + AgentLoopTask { + client, + tools, + sink, + user_rx, + session, + messages: resumed_messages, + store, + file_tracker, + auto_compaction_failures: 0, + last_usage: None, + } + .run() + .await +} - let user_msg = Message::user(&text); - let outcome = session.record_message(user_msg.clone()).await; - sink.session_write_error(outcome.failure.as_deref()); - messages.push(user_msg); - agent::record_drained_prompts( - pre_prompt_pending.drain(..), - &mut messages, - &session, - &sink, - ) - .await; +struct AgentLoopTask { + client: Client, + tools: Arc, + sink: tui::event::ChannelSink, + user_rx: mpsc::Receiver, + session: SessionHandle, + messages: Vec, + store: SessionStore, + file_tracker: Arc, + auto_compaction_failures: u8, + last_usage: Option, +} - if let Some(seed) = outcome.ai_title_seed { - session::title_generator::spawn( - client.clone(), - session.clone(), - sink.clone(), - seed, - ); - } +enum LoopControl { + Continue, + Stop, +} - let prompt = prompt::build_prompt(client.model()).await; - let outcome = agent_turn( - &client, - &tools, - &mut messages, - &prompt, - &sink, - &session, - &mut user_rx, - Some(AutoCompact { - config: client.compaction().auto, - failures: &mut auto_compaction_failures, - file_tracker: &file_tracker, - }), - ) - .await; - match outcome { - Ok(report) => { - last_usage = report.usage; - _ = sink.send(AgentEvent::TurnComplete); - } - Err(TurnAbort::Cancelled) => { - _ = sink.send(AgentEvent::Cancelled); - } - Err(TurnAbort::Quit) => break, - Err(TurnAbort::Failed(e)) => { - _ = sink.send(AgentEvent::Error(format!("{e:#}"))); - } - } +impl AgentLoopTask { + async fn run(&mut self) -> Result<()> { + while let Some(action) = self.user_rx.recv().await { + if matches!(self.handle_action(action).await, LoopControl::Stop) { + break; } + } + + Ok(()) + } + + async fn handle_action(&mut self, action: UserAction) -> LoopControl { + match action { + UserAction::SubmitPrompt(text) => self.handle_submit_prompt(text).await, // Cancel / ConfirmExit are no-ops here; PreviewTheme / SwapTheme are TUI-only and // applied client-side in `App::apply_action_locally`. UserAction::Cancel | UserAction::ConfirmExit | UserAction::PreviewTheme { .. } - | UserAction::SwapTheme { .. } => {} + | UserAction::SwapTheme { .. } => LoopControl::Continue, UserAction::Clear => { - let outcome = - roll_session(&mut session, &store, &file_tracker, client.model()).await; - sink.session_write_error(outcome.finalize_failure.as_deref()); - client.set_session_id(outcome.new_id.clone()); - messages.clear(); - auto_compaction_failures = 0; - last_usage = None; - if let Err(e) = sink.send(AgentEvent::SessionRolled { id: outcome.new_id }) { + let outcome = roll_session( + &mut self.session, + &self.store, + &self.file_tracker, + self.client.model(), + ) + .await; + self.sink + .session_write_error(outcome.finalize_failure.as_deref()); + self.client.set_session_id(outcome.new_id.clone()); + self.messages.clear(); + self.reset_auto_compaction(); + if let Err(e) = self + .sink + .send(AgentEvent::SessionRolled { id: outcome.new_id }) + { // /clear succeeded server-side but the TUI never sees the new id — surfaces as // a stuck "old session" header. Error-level so the log makes it findable. tracing::error!("session-rolled event dropped: {e}"); } + LoopControl::Continue } UserAction::Resume { session_id } => { apply_resume( - &mut session, - &mut client, - &mut messages, - &store, - &file_tracker, - &sink, + &mut self.session, + &mut self.client, + &mut self.messages, + &self.store, + &self.file_tracker, + &self.sink, &session_id, ) .await; - auto_compaction_failures = 0; - last_usage = None; + self.reset_auto_compaction(); + LoopControl::Continue } UserAction::Compact { instructions } => { let outcome = apply_compact( - &client, - &session, - &file_tracker, - &mut messages, - &sink, - &mut user_rx, + &self.client, + &self.session, + &self.file_tracker, + &mut self.messages, + &self.sink, + &mut self.user_rx, instructions, ) .await; match outcome { Ok(true) => { - auto_compaction_failures = 0; - last_usage = None; + self.reset_auto_compaction(); + LoopControl::Continue } - Ok(false) => {} + Ok(false) => LoopControl::Continue, Err(TurnAbort::Cancelled) => { - _ = sink.send(AgentEvent::Cancelled); + _ = self.sink.send(AgentEvent::Cancelled); + LoopControl::Continue } - Err(TurnAbort::Quit) => break, + Err(TurnAbort::Quit) => LoopControl::Stop, Err(TurnAbort::Failed(e)) => { - _ = sink.send(AgentEvent::Error(format!("{e:#}"))); + _ = self.sink.send(AgentEvent::Error(format!("{e:#}"))); + LoopControl::Continue } } } UserAction::Rename { title } => { - apply_rename(&session, &sink, title).await; + apply_rename(&self.session, &self.sink, title).await; + LoopControl::Continue } UserAction::SwapConfig { model, effort } => { - apply_swap_config(&mut client, &sink, model, effort); + apply_swap_config(&mut self.client, &self.sink, model, effort); + LoopControl::Continue } - UserAction::Quit => break, + UserAction::Quit => LoopControl::Stop, } } - Ok(()) + async fn handle_submit_prompt(&mut self, text: String) -> LoopControl { + let mut pre_prompt_pending = Vec::new(); + let pre_prompt_compact = auto_compact_before_prompt( + &self.client, + &self.session, + &self.file_tracker, + &mut self.messages, + &self.sink, + &mut self.user_rx, + &mut pre_prompt_pending, + &mut self.auto_compaction_failures, + self.last_usage, + ) + .await; + match pre_prompt_compact { + Ok(true) => self.last_usage = None, + Ok(false) => {} + Err(TurnAbort::Cancelled) => { + _ = self.sink.send(AgentEvent::Cancelled); + return LoopControl::Continue; + } + Err(TurnAbort::Quit) => return LoopControl::Stop, + Err(TurnAbort::Failed(e)) => { + _ = self.sink.send(AgentEvent::Error(format!("{e:#}"))); + return LoopControl::Continue; + } + } + + let user_msg = Message::user(&text); + let outcome = self.session.record_message(user_msg.clone()).await; + self.sink.session_write_error(outcome.failure.as_deref()); + self.messages.push(user_msg); + agent::record_drained_prompts( + pre_prompt_pending.drain(..), + &mut self.messages, + &self.session, + &self.sink, + ) + .await; + + if let Some(seed) = outcome.ai_title_seed { + session::title_generator::spawn( + self.client.clone(), + self.session.clone(), + self.sink.clone(), + seed, + ); + } + + let prompt = prompt::build_prompt(self.client.model()).await; + let auto_config = self.client.compaction().auto; + let outcome = agent_turn( + &self.client, + &self.tools, + &mut self.messages, + &prompt, + &self.sink, + &self.session, + &mut self.user_rx, + Some(AutoCompact { + config: auto_config, + failures: &mut self.auto_compaction_failures, + file_tracker: &self.file_tracker, + }), + ) + .await; + match outcome { + Ok(report) => { + self.last_usage = report.usage; + _ = self.sink.send(AgentEvent::TurnComplete); + LoopControl::Continue + } + Err(TurnAbort::Cancelled) => { + _ = self.sink.send(AgentEvent::Cancelled); + LoopControl::Continue + } + Err(TurnAbort::Quit) => LoopControl::Stop, + Err(TurnAbort::Failed(e)) => { + _ = self.sink.send(AgentEvent::Error(format!("{e:#}"))); + LoopControl::Continue + } + } + } + + fn reset_auto_compaction(&mut self) { + self.auto_compaction_failures = 0; + self.last_usage = None; + } } /// Drives the mid-session resume: swap the handle, repaint the chat, surface previous-session From d2011ad7d546e3700a98c201143f5cefac514116 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 16:50:45 +0800 Subject: [PATCH 06/13] test(agent): cover auto-compaction edge cases --- crates/oxide-code/src/agent.rs | 181 ++++++++++++++++++++++ crates/oxide-code/src/agent/compaction.rs | 40 ++++- crates/oxide-code/src/client/anthropic.rs | 16 +- crates/oxide-code/src/config.rs | 83 ++++++++++ 4 files changed, 318 insertions(+), 2 deletions(-) diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index 60f3d6bb..1356535e 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -697,6 +697,28 @@ mod tests { } } + struct FailingCompactClient; + + impl AgentClient for FailingCompactClient { + fn stream_message( + &self, + _messages: &[Message], + _system_sections: &[&str], + _user_context: Option<&str>, + _tools: &[ToolDefinition], + ) -> Result>> { + unreachable!("auto-compaction tests do not stream turns") + } + + fn compact_session<'a>( + &'a self, + _transcript: &'a [Message], + _instructions: Option<&'a str>, + ) -> Pin> + Send + 'a>> { + Box::pin(async { Err(anyhow!("summarizer unavailable")) }) + } + } + impl AgentClient for FakeClient { fn stream_message( &self, @@ -938,6 +960,165 @@ mod tests { crate::session::handle::testing::dead("dead-test-session") } + // ── auto_compact_if_needed ── + + #[tokio::test] + async fn auto_compact_if_needed_skips_without_auto_state_usage_or_threshold() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = FakeClient::new(Vec::new()); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![Message::user("hi"), Message::assistant("there")]; + let mut pending = Vec::new(); + let mut failures = 0; + + let absent = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + None, + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + assert!(!absent); + + let missing_usage = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + None, + ) + .await + .unwrap(); + assert!(!missing_usage); + + let below_threshold = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(100), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + assert!(!below_threshold); + assert_eq!(messages.len(), 2); + assert_eq!(failures, 0); + } + + #[tokio::test] + async fn auto_compact_if_needed_counts_summarizer_failure_without_replacing_messages() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![Message::user("hi"), Message::assistant("there")]; + let mut pending = Vec::new(); + let mut failures = 0; + + let compacted = auto_compact_if_needed( + &FailingCompactClient, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + + assert!(!compacted); + assert_eq!(failures, 1); + assert!(matches!(&messages[0].content[0], ContentBlock::Text { text } if text == "hi")); + } + + #[tokio::test] + async fn auto_compact_if_needed_counts_persist_failure_without_replacing_messages() { + let session = dead_test_session(); + let client = FakeClient::new(Vec::new()); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![Message::user("hi"), Message::assistant("there")]; + let mut pending = Vec::new(); + let mut failures = 0; + + let compacted = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ) + .await + .unwrap(); + + assert!(!compacted); + assert_eq!(failures, 1); + assert!(matches!(&messages[0].content[0], ContentBlock::Text { text } if text == "hi")); + assert!( + sink.events() + .iter() + .any(|event| matches!(event, AgentEvent::Error(message) if message.contains("Session write failed"))) + ); + } + #[tokio::test] async fn agent_turn_dead_session_surfaces_write_failure_on_first_call() { // Write errors must not abort the turn; one Error event surfaces and the turn returns Ok. diff --git a/crates/oxide-code/src/agent/compaction.rs b/crates/oxide-code/src/agent/compaction.rs index bcff8b94..b42ff02e 100644 --- a/crates/oxide-code/src/agent/compaction.rs +++ b/crates/oxide-code/src/agent/compaction.rs @@ -195,13 +195,24 @@ pub(crate) fn strip_synthetic_post_compact_prefix(message: &mut Message) -> bool #[cfg(test)] mod tests { + use anyhow::anyhow; use serde_json::json; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, Request, ResponseTemplate}; use super::*; + use crate::agent::AgentClient; use crate::client::anthropic::testing::{Captured, api_key, captured, test_client}; use crate::message::Role; + use crate::session::store::test_store; + + struct FailingSink; + + impl AgentSink for FailingSink { + fn send(&self, _event: AgentEvent) -> anyhow::Result<()> { + Err(anyhow!("sink closed")) + } + } // ── compact_session ── @@ -291,12 +302,39 @@ mod tests { .await; let client = test_client(server.uri(), api_key(), "claude-haiku-4-5"); - let summary = compact_session(&client, &fake_transcript(), None) + let summary = AgentClient::compact_session(&client, &fake_transcript(), None) .await .unwrap(); assert_eq!(summary, "fixed login bug"); } + #[tokio::test] + async fn replace_session_with_summary_still_replaces_messages_when_event_send_fails() { + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = crate::session::handle::start(&store, "claude-sonnet-4-6"); + let outcome = session.record_message(Message::user("fix the bug")).await; + assert!(outcome.failure.is_none(), "{:?}", outcome.failure); + let tracker = FileTracker::default(); + let mut messages = fake_transcript(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &FailingSink, + "fixed login bug".to_owned(), + None, + ) + .await; + + assert!(compacted); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("fixed login bug")) + ); + } + #[tokio::test] async fn compact_session_collects_initial_text_from_content_block_start() { let server = MockServer::start().await; diff --git a/crates/oxide-code/src/client/anthropic.rs b/crates/oxide-code/src/client/anthropic.rs index 10c6837d..71313cdf 100644 --- a/crates/oxide-code/src/client/anthropic.rs +++ b/crates/oxide-code/src/client/anthropic.rs @@ -396,7 +396,7 @@ mod tests { use super::testing::{Captured, api_key, captured, oauth, test_config}; use super::wire::{ContentBlockInfo, Delta}; use super::*; - use crate::config::{Effort, ThinkingConfig}; + use crate::config::{AutoCompactionConfig, CompactionConfig, Effort, ThinkingConfig}; // ── Fixtures ── @@ -478,6 +478,20 @@ mod tests { assert_eq!(client.model(), "claude-sonnet-4-6"); } + #[test] + fn new_exposes_compaction_config() { + let mut config = test_config(OFFLINE_URL, Auth::ApiKey("sk-test".to_owned()), TEST_MODEL); + config.compaction = CompactionConfig { + auto: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(123_456), + }, + }; + let client = Client::new(config, None).unwrap(); + + assert_eq!(client.compaction().auto.threshold_tokens, Some(123_456)); + } + #[test] fn new_none_session_id_generates_uuid_v4() { let client = Client::new( diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index 32a5555a..e3642cf7 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -909,6 +909,49 @@ mod tests { assert!(msg.contains("only one"), "{msg}"); } + #[tokio::test] + async fn load_compaction_rejects_zero_auto_threshold_tokens() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("OX_COMPACTION_AUTO_THRESHOLD_TOKENS", "0"), + ]); + let err = temp_env::async_with_vars(vars, Config::load()) + .await + .expect_err("zero threshold must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("greater than zero"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_out_of_range_auto_threshold_percent() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "101"), + ]); + let err = temp_env::async_with_vars(vars, Config::load()) + .await + .expect_err("out-of-range threshold percent must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("between 1 and 100"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_percent_for_unknown_model_disables_auto_trigger() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("ANTHROPIC_MODEL", "custom-model"), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "40"), + ]); + let config = temp_env::async_with_vars(vars, Config::load()) + .await + .unwrap(); + assert!(!config.compaction.auto.enabled); + assert_eq!(config.compaction.auto.threshold_tokens, None); + } + #[tokio::test] async fn load_invalid_max_tokens_env_errors() { let dir = tempfile::tempdir().unwrap(); @@ -1234,6 +1277,46 @@ mod tests { assert_eq!(display_bool(false), "off"); } + // ── display_auto_compaction ── + + #[test] + fn display_auto_compaction_names_enabled_threshold_or_off() { + assert_eq!( + display_auto_compaction(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(400_000), + }), + "on at 400000 tokens", + ); + assert_eq!( + display_auto_compaction(AutoCompactionConfig { + enabled: false, + threshold_tokens: Some(400_000), + }), + "off", + ); + } + + // ── AutoCompactionConfig::should_trigger ── + + #[test] + fn should_trigger_requires_enabled_threshold_and_enough_tokens() { + let enabled = AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }; + assert!(enabled.should_trigger(10)); + assert!(!enabled.should_trigger(9)); + assert!(!AutoCompactionConfig::disabled().should_trigger(100)); + assert!( + !AutoCompactionConfig { + enabled: true, + threshold_tokens: None, + } + .should_trigger(100) + ); + } + // ── default_max_tokens ── #[test] From cf0b372a7d601a3041e1608bfaf39730925f2d4a Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 17:34:47 +0800 Subject: [PATCH 07/13] fix(config): reject unsafe auto-compaction thresholds --- crates/oxide-code/src/config.rs | 50 ++++++++++++++++++++++++---- docs/design/agent/auto-compaction.md | 2 ++ docs/guide/configuration.md | 38 +++++++++++++++------ 3 files changed, 74 insertions(+), 16 deletions(-) diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index e3642cf7..25dc90c4 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -19,6 +19,7 @@ const DEFAULT_MODEL: &str = "claude-opus-4-7[1m]"; const DEFAULT_BASE_URL: &str = "https://api.anthropic.com"; const AUTO_COMPACTION_OUTPUT_RESERVE_CAP: u32 = 20_000; const AUTO_COMPACTION_BUFFER_TOKENS: u32 = 13_000; +const MIN_AUTO_COMPACTION_THRESHOLD_TOKENS: u32 = 50_000; /// Mirrors the fallback `loader::resolve_theme` applies when no `[tui.theme] base` is set. pub(crate) const DEFAULT_THEME: &str = "mocha"; @@ -435,15 +436,18 @@ fn resolve_auto_threshold( (Some(_), Some(_)) => { bail!("set only one of auto_threshold_tokens or auto_threshold_percent for compaction") } - (Some(tokens), None) => validate_positive_tokens(tokens).map(Some), + (Some(tokens), None) => validate_auto_threshold_tokens(tokens).map(Some), (None, Some(percent)) => threshold_from_percent(percent, model, max_tokens), (None, None) => Ok(default_auto_threshold(model, max_tokens)), } } -fn validate_positive_tokens(tokens: u32) -> Result { - if tokens == 0 { - bail!("auto compaction threshold must be greater than zero"); +fn validate_auto_threshold_tokens(tokens: u32) -> Result { + if tokens < MIN_AUTO_COMPACTION_THRESHOLD_TOKENS { + bail!( + "auto compaction threshold must be at least \ + {MIN_AUTO_COMPACTION_THRESHOLD_TOKENS} tokens" + ); } Ok(tokens) } @@ -456,7 +460,9 @@ fn threshold_from_percent(percent: u8, model: &str, max_tokens: u32) -> Result Option { @@ -920,7 +926,39 @@ mod tests { .await .expect_err("zero threshold must fail config load"); let msg = format!("{err:#}"); - assert!(msg.contains("greater than zero"), "{msg}"); + assert!(msg.contains("at least 50000 tokens"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_too_low_auto_threshold_tokens() { + let dir = tempfile::tempdir().unwrap(); + write_user_config( + dir.path(), + indoc::indoc! {r" + [client.compaction] + auto_threshold_tokens = 49999 + "}, + ); + let err = temp_env::async_with_vars(env_vars(vec![xdg(&dir)]), Config::load()) + .await + .expect_err("low threshold must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("at least 50000 tokens"), "{msg}"); + } + + #[tokio::test] + async fn load_compaction_rejects_too_low_auto_threshold_percent() { + let dir = tempfile::tempdir().unwrap(); + let vars = env_vars(vec![ + xdg(&dir), + env("ANTHROPIC_MODEL", "claude-opus-4-7[1m]"), + env("OX_COMPACTION_AUTO_THRESHOLD_PERCENT", "4"), + ]); + let err = temp_env::async_with_vars(vars, Config::load()) + .await + .expect_err("resolved low threshold must fail config load"); + let msg = format!("{err:#}"); + assert!(msg.contains("at least 50000 tokens"), "{msg}"); } #[tokio::test] diff --git a/docs/design/agent/auto-compaction.md b/docs/design/agent/auto-compaction.md index 580c2e04..61eabc27 100644 --- a/docs/design/agent/auto-compaction.md +++ b/docs/design/agent/auto-compaction.md @@ -62,6 +62,8 @@ Environment: Manual `/compact` remains available. The config controls only whether automatic compaction triggers and where that trigger fires. Token and percent thresholds are mutually exclusive so the resolved trigger stays obvious. +Explicit thresholds must resolve to at least `50_000` tokens. Lower values create frequent summarization loops, extra latency, and avoidable summary loss long before context pressure exists. + ## Trigger Flow `agent_turn` owns the automatic trigger because it has the live transcript, token usage, session handle, file tracker, sink, and user-action receiver. diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index 9a32e890..b732d3f3 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -21,6 +21,9 @@ effort = "high" max_tokens = 32000 prompt_cache_ttl = "1h" +[client.compaction] +auto_threshold_tokens = 400000 + [tui] show_thinking = true ``` @@ -69,6 +72,18 @@ Use `base_url` only in `~/.config/ox/config.toml` or `ANTHROPIC_BASE_URL`. Proje Accepted values: `"5m"` (matches the server default as of 2026-03-06) and `"1h"` (higher write premium, bigger hit-rate win on long sessions). oxide-code defaults to `"1h"` because Anthropic's silent 2026-03 TTL drop cut typical prompt-caching savings from 80 %+ to 40-55 %. See [Agentic Request Body Fields](../research/api/anthropic.md#agentic-request-body-fields) for the wire shape and cost analysis. +### `[client.compaction]`: context compression + +Auto-compaction is enabled by default for known model context windows. The default trigger leaves room for the next response and a safety buffer. Set one threshold override when you want compaction to happen earlier: + +| Key | Type | Default | Description | +| ------------------------ | ------- | ------------------- | ------------------------------------------ | +| `auto_enabled` | boolean | `true` | Enable automatic context compaction | +| `auto_threshold_tokens` | integer | model-derived | Absolute trigger, minimum `50000` tokens | +| `auto_threshold_percent` | integer | model-derived | Percent of context window, capped safely | + +`auto_threshold_tokens` and `auto_threshold_percent` are mutually exclusive. Percent thresholds resolve to tokens before validation, so the effective trigger must still be at least `50000` tokens. + #### 1M Context Window: `[1m]` Tag Append `[1m]` to `model` to opt into the 1M-token context window on models that support it (any Sonnet 4.x, plus Opus 4.6 and newer): @@ -127,16 +142,19 @@ Prefer the environment variable (or OAuth) over `api_key` in a config file. `ox. Environment variables override all config file values. -| Variable | Config key | Default | Description | -| ---------------------- | ------------------------- | --------------------------- | ---------------------------- | -| `ANTHROPIC_API_KEY` | `client.api_key` | - | Anthropic API key | -| `ANTHROPIC_BASE_URL` | `client.base_url` | `https://api.anthropic.com` | API base URL | -| `ANTHROPIC_MODEL` | `client.model` | `claude-opus-4-7[1m]` | Model to use | -| `ANTHROPIC_EFFORT` | `client.effort` | per-model | Intelligence-vs-latency tier | -| `ANTHROPIC_MAX_TOKENS` | `client.max_tokens` | effort-derived | Max tokens per response | -| `OX_PROMPT_CACHE_TTL` | `client.prompt_cache_ttl` | `1h` | Prompt-cache TTL | -| `OX_SHOW_THINKING` | `tui.show_thinking` | `false` | Show extended thinking | -| `OX_SHOW_WELCOME` | `tui.show_welcome` | `true` | Paint the welcome splash | +| Variable | Config key | Default | Description | +| --------------------------------------- | ------------------------------------------ | --------------------------- | ---------------------------- | +| `ANTHROPIC_API_KEY` | `client.api_key` | - | Anthropic API key | +| `ANTHROPIC_BASE_URL` | `client.base_url` | `https://api.anthropic.com` | API base URL | +| `ANTHROPIC_MODEL` | `client.model` | `claude-opus-4-7[1m]` | Model to use | +| `ANTHROPIC_EFFORT` | `client.effort` | per-model | Intelligence-vs-latency tier | +| `ANTHROPIC_MAX_TOKENS` | `client.max_tokens` | effort-derived | Max tokens per response | +| `OX_PROMPT_CACHE_TTL` | `client.prompt_cache_ttl` | `1h` | Prompt-cache TTL | +| `OX_COMPACTION_AUTO_ENABLED` | `client.compaction.auto_enabled` | `true` | Enable auto-compaction | +| `OX_COMPACTION_AUTO_THRESHOLD_TOKENS` | `client.compaction.auto_threshold_tokens` | model-derived | Absolute compaction trigger | +| `OX_COMPACTION_AUTO_THRESHOLD_PERCENT` | `client.compaction.auto_threshold_percent` | model-derived | Percent compaction trigger | +| `OX_SHOW_THINKING` | `tui.show_thinking` | `false` | Show extended thinking | +| `OX_SHOW_WELCOME` | `tui.show_welcome` | `true` | Paint the welcome splash | Set `OX_SHOW_THINKING=1` to display the model's thinking process (dimmed text) when extended thinking is enabled for the model. From 23fe55b11f85e1abe33630554e01f8778d7c0b58 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 18:18:46 +0800 Subject: [PATCH 08/13] fix(compaction): harden automatic compact boundaries --- crates/oxide-code/src/agent.rs | 340 +++++++++++++----- .../oxide-code/src/agent/compact_boundary.rs | 176 +++++++++ crates/oxide-code/src/agent/compaction.rs | 77 +--- crates/oxide-code/src/agent/event.rs | 7 +- crates/oxide-code/src/client/anthropic.rs | 55 ++- crates/oxide-code/src/config.rs | 117 ++++-- crates/oxide-code/src/config/file.rs | 33 +- crates/oxide-code/src/main.rs | 124 ++++++- crates/oxide-code/src/slash.rs | 10 +- crates/oxide-code/src/slash/config.rs | 32 ++ crates/oxide-code/src/slash/status.rs | 32 ++ crates/oxide-code/src/tui/app.rs | 64 +++- .../oxide-code/src/tui/components/welcome.rs | 10 +- 13 files changed, 833 insertions(+), 244 deletions(-) create mode 100644 crates/oxide-code/src/agent/compact_boundary.rs diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index 1356535e..93a7843c 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -1,12 +1,12 @@ //! Agent turn loop. Streams the model response, dispatches tool calls, records to the session, //! and stops on text-only response or [`MAX_TOOL_ROUNDS`]. +pub(crate) mod compact_boundary; pub(crate) mod compaction; pub(crate) mod event; use std::collections::HashMap; use std::future::Future; -use std::pin::Pin; use anyhow::{Context, Result, anyhow, bail}; use tokio::sync::mpsc; @@ -55,12 +55,6 @@ pub(crate) trait AgentClient: Send + Sync { user_context: Option<&str>, tools: &[ToolDefinition], ) -> Result>>; - - fn compact_session<'a>( - &'a self, - transcript: &'a [Message], - instructions: Option<&'a str>, - ) -> Pin> + Send + 'a>>; } impl AgentClient for Client { @@ -73,14 +67,6 @@ impl AgentClient for Client { ) -> Result>> { Client::stream_message(self, messages, system_sections, user_context, tools) } - - fn compact_session<'a>( - &'a self, - transcript: &'a [Message], - instructions: Option<&'a str>, - ) -> Pin> + Send + 'a>> { - Box::pin(compaction::compact_session(self, transcript, instructions)) - } } // ── Agent Turn ── @@ -92,6 +78,14 @@ pub(crate) struct TokenUsage { } impl TokenUsage { + #[cfg(test)] + pub(crate) const fn new(input_tokens: u32, output_tokens: u32) -> Self { + Self { + input_tokens, + output_tokens, + } + } + pub(crate) const fn total_tokens(self) -> u32 { self.input_tokens.saturating_add(self.output_tokens) } @@ -142,7 +136,6 @@ pub(crate) async fn agent_turn( sink: &dyn AgentSink, session: &SessionHandle, user_rx: &mut mpsc::Receiver, - mut auto_compact: Option>, ) -> AbortResult { let tool_defs = tools.definitions(); let mut pending_prompts: Vec = Vec::new(); @@ -194,17 +187,6 @@ pub(crate) async fn agent_turn( commit_round_writes(session, sink, &assistant_msg, &tool_result_msg, sidecars).await; messages.push(assistant_msg); messages.push(tool_result_msg); - auto_compact_if_needed( - client, - session, - messages, - sink, - user_rx, - &mut pending_prompts, - auto_compact.as_mut(), - usage, - ) - .await?; record_drained_prompts(pending_prompts.drain(..), messages, session, sink).await; } @@ -240,23 +222,28 @@ pub(crate) async fn auto_compact_if_needed( return Ok(false); } - let summary = - match await_unless_aborted(client.compact_session(messages, None), user_rx, pending).await? - { - Ok(summary) => summary, - Err(e) => { - *auto.failures += 1; - warn!("auto-compaction failed: {e:#}"); - return Ok(false); - } - }; - let compacted = compaction::replace_session_with_summary( + let summary = match await_unless_aborted( + compaction::compact_session(client, messages, None), + user_rx, + pending, + ) + .await? + { + Ok(summary) => summary, + Err(e) => { + *auto.failures += 1; + warn!("auto-compaction failed: {e:#}"); + return Ok(false); + } + }; + let compacted = compact_boundary::replace_session_with_summary( session, auto.file_tracker, messages, sink, summary, None, + true, ) .await; if compacted { @@ -636,6 +623,7 @@ mod tests { use std::pin::Pin; use std::sync::Arc; use std::sync::Mutex as StdMutex; + use std::sync::atomic::{AtomicUsize, Ordering}; use serde_json::json; use tokio::sync::Notify; @@ -707,15 +695,63 @@ mod tests { _user_context: Option<&str>, _tools: &[ToolDefinition], ) -> Result>> { - unreachable!("auto-compaction tests do not stream turns") + Err(anyhow!("summarizer unavailable")) + } + } + + struct CountingFailingClient { + calls: AtomicUsize, + } + + impl CountingFailingClient { + fn new() -> Self { + Self { + calls: AtomicUsize::new(0), + } + } + + fn calls(&self) -> usize { + self.calls.load(Ordering::SeqCst) + } + } + + impl AgentClient for CountingFailingClient { + fn stream_message( + &self, + _messages: &[Message], + _system_sections: &[&str], + _user_context: Option<&str>, + _tools: &[ToolDefinition], + ) -> Result>> { + self.calls.fetch_add(1, Ordering::SeqCst); + Err(anyhow!("summarizer unavailable")) } + } + + struct DelayedSummaryClient { + started: Arc, + release: Arc, + } - fn compact_session<'a>( - &'a self, - _transcript: &'a [Message], - _instructions: Option<&'a str>, - ) -> Pin> + Send + 'a>> { - Box::pin(async { Err(anyhow!("summarizer unavailable")) }) + impl AgentClient for DelayedSummaryClient { + fn stream_message( + &self, + _messages: &[Message], + _system_sections: &[&str], + _user_context: Option<&str>, + _tools: &[ToolDefinition], + ) -> Result>> { + let (tx, rx) = mpsc::channel(8); + let started = self.started.clone(); + let release = self.release.clone(); + tokio::spawn(async move { + started.notify_one(); + release.notified().await; + for event in text_turn("auto summary") { + tx.send(Ok(event)).await.expect("test receiver alive"); + } + }); + Ok(rx) } } @@ -734,14 +770,6 @@ mod tests { } Ok(rx) } - - fn compact_session<'a>( - &'a self, - _transcript: &'a [Message], - _instructions: Option<&'a str>, - ) -> Pin> + Send + 'a>> { - Box::pin(async { Ok("auto summary".to_owned()) }) - } } fn text_turn(text: &str) -> Vec { @@ -969,7 +997,12 @@ mod tests { let client = FakeClient::new(Vec::new()); let sink = CapturingSink::new(); let tracker = FileTracker::default(); - let mut messages = vec![Message::user("hi"), Message::assistant("there")]; + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; let mut pending = Vec::new(); let mut failures = 0; @@ -1034,7 +1067,7 @@ mod tests { .await .unwrap(); assert!(!below_threshold); - assert_eq!(messages.len(), 2); + assert_eq!(messages.len(), 4); assert_eq!(failures, 0); } @@ -1044,7 +1077,12 @@ mod tests { let session = test_session(dir.path()); let sink = CapturingSink::new(); let tracker = FileTracker::default(); - let mut messages = vec![Message::user("hi"), Message::assistant("there")]; + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; let mut pending = Vec::new(); let mut failures = 0; @@ -1079,10 +1117,15 @@ mod tests { #[tokio::test] async fn auto_compact_if_needed_counts_persist_failure_without_replacing_messages() { let session = dead_test_session(); - let client = FakeClient::new(Vec::new()); + let client = FakeClient::new(vec![text_turn("auto summary")]); let sink = CapturingSink::new(); let tracker = FileTracker::default(); - let mut messages = vec![Message::user("hi"), Message::assistant("there")]; + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; let mut pending = Vec::new(); let mut failures = 0; @@ -1119,6 +1162,148 @@ mod tests { ); } + #[tokio::test] + async fn auto_compact_if_needed_stops_after_failure_limit() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let client = CountingFailingClient::new(); + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = MAX_AUTO_COMPACT_FAILURES - 1; + let usage = Some(TokenUsage { + input_tokens: 50_000, + output_tokens: 1, + }); + + let first = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(50_000), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + usage, + ) + .await + .unwrap(); + let second = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut inert_user_rx(), + &mut pending, + Some(&mut AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(50_000), + }, + failures: &mut failures, + file_tracker: &tracker, + }), + usage, + ) + .await + .unwrap(); + + assert!(!first); + assert!(!second); + assert_eq!(failures, MAX_AUTO_COMPACT_FAILURES); + assert_eq!(client.calls(), 1); + } + + #[tokio::test] + async fn auto_compact_if_needed_queues_submit_while_summarizing() { + let dir = tempfile::tempdir().unwrap(); + let session = test_session(dir.path()); + let started = Arc::new(Notify::new()); + let release = Arc::new(Notify::new()); + let client = DelayedSummaryClient { + started: started.clone(), + release: release.clone(), + }; + let sink = CapturingSink::new(); + let tracker = FileTracker::default(); + let mut messages = vec![ + Message::user("hi"), + Message::assistant("there"), + Message::user("next"), + Message::assistant("done"), + ]; + let mut pending = Vec::new(); + let mut failures = 0; + let (tx, mut rx) = mpsc::channel::(1); + let mut auto = AutoCompact { + config: AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(10), + }, + failures: &mut failures, + file_tracker: &tracker, + }; + + let compact = auto_compact_if_needed( + &client, + &session, + &mut messages, + &sink, + &mut rx, + &mut pending, + Some(&mut auto), + Some(TokenUsage { + input_tokens: 20, + output_tokens: 1, + }), + ); + let queue_prompt = async { + started.notified().await; + tx.send(UserAction::SubmitPrompt("queued after summary".into())) + .await + .unwrap(); + tokio::task::yield_now().await; + release.notify_one(); + }; + let (compacted, ()) = tokio::join!(compact, queue_prompt); + let compacted = compacted.unwrap(); + + assert!(compacted); + assert_eq!(pending, vec!["queued after summary"]); + assert_eq!(*auto.failures, 0); + assert_eq!( + sink.events() + .iter() + .filter(|event| matches!(event, AgentEvent::PromptDrained(_))) + .count(), + 0 + ); + assert!(sink.events().iter().any(|event| matches!( + event, + AgentEvent::SessionCompacted { + automatic: true, + .. + } + ))); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("auto summary")) + ); + } + #[tokio::test] async fn agent_turn_dead_session_surfaces_write_failure_on_first_call() { // Write errors must not abort the turn; one Error event surfaces and the turn returns Ok. @@ -1136,7 +1321,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1174,7 +1358,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1208,7 +1391,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1247,7 +1429,6 @@ mod tests { &sink, &session, &mut user_rx, - None, ) .await .unwrap(); @@ -1284,7 +1465,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1312,7 +1492,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1352,7 +1531,7 @@ mod tests { } #[tokio::test] - async fn agent_turn_auto_compacts_after_tool_round_crosses_threshold() { + async fn agent_turn_does_not_auto_compact_between_tool_result_and_follow_up() { let dir = tempfile::tempdir().unwrap(); let session = test_session(dir.path()); let client = FakeClient::new(vec![ @@ -1361,8 +1540,6 @@ mod tests { ]); let tools = ToolRegistry::new(vec![Box::new(EchoTool)]); let sink = CapturingSink::new(); - let tracker = FileTracker::default(); - let mut failures = 0; let mut messages = vec![ Message::user("run echo"), Message::assistant("earlier"), @@ -1377,31 +1554,19 @@ mod tests { &sink, &session, &mut inert_user_rx(), - Some(AutoCompact { - config: AutoCompactionConfig { - enabled: true, - threshold_tokens: Some(10), - }, - failures: &mut failures, - file_tracker: &tracker, - }), ) .await .unwrap(); assert_eq!(report.usage.map(TokenUsage::total_tokens), Some(3)); - assert_eq!(failures, 0); assert_eq!( sink.events() .iter() .filter(|event| matches!(event, AgentEvent::SessionCompacted { .. })) .count(), - 1 - ); - assert!( - matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("auto summary")) + 0 ); - assert!(matches!(&messages[1].content[0], ContentBlock::Text { text } if text == "Done")); + assert!(matches!(&messages[5].content[0], ContentBlock::Text { text } if text == "Done")); } #[tokio::test] @@ -1431,7 +1596,6 @@ mod tests { &sink, &session, &mut rx, - None, ) .await .expect("turn must complete"); @@ -1493,7 +1657,6 @@ mod tests { &sink, &session, &mut rx, - None, ) .await .expect("turn must complete"); @@ -1543,7 +1706,6 @@ mod tests { &sink, &session, &mut rx, - None, ) .await .expect_err("cancel must surface as Err(Cancelled)"); @@ -1573,7 +1735,6 @@ mod tests { &sink, &session, &mut rx, - None, ) .await .expect_err("quit must surface as Err(Quit)"); @@ -1602,7 +1763,6 @@ mod tests { &sink, &session, &mut rx, - None, ) .await .expect_err("dead channel must surface as Err(Quit)"); @@ -1643,7 +1803,6 @@ mod tests { &sink, &session, &mut rx, - None, ) .await .unwrap_or_else(|_| panic!("turn must complete despite {action:?}")); @@ -1682,7 +1841,6 @@ mod tests { &sink, &session, &mut rx, - None, ), async { started.notified().await; @@ -1733,7 +1891,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1773,7 +1930,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1827,7 +1983,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .expect_err("cap must trip"); @@ -1861,7 +2016,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .expect_err("api error must propagate"); @@ -1905,7 +2059,6 @@ mod tests { &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); @@ -1965,7 +2118,6 @@ data: {"type":"message_stop"} &sink, &session, &mut inert_user_rx(), - None, ) .await .unwrap(); diff --git a/crates/oxide-code/src/agent/compact_boundary.rs b/crates/oxide-code/src/agent/compact_boundary.rs new file mode 100644 index 00000000..bbd6a452 --- /dev/null +++ b/crates/oxide-code/src/agent/compact_boundary.rs @@ -0,0 +1,176 @@ +//! Compact-boundary persistence and live transcript replacement. + +use crate::agent::compaction::synthesize_post_compact_message; +use crate::agent::event::{AgentEvent, AgentSink}; +use crate::file_tracker::FileTracker; +use crate::message::Message; +use crate::session::handle::SessionHandle; + +/// Persists a compact boundary and swaps the live transcript to the synthetic summary root. +pub(crate) async fn replace_session_with_summary( + session: &SessionHandle, + file_tracker: &FileTracker, + messages: &mut Vec, + sink: &dyn AgentSink, + summary: String, + instructions: Option, + automatic: bool, +) -> bool { + let synthetic = synthesize_post_compact_message(&summary); + let outcome = session + .compact(summary.clone(), instructions.clone(), synthetic.clone()) + .await; + sink.session_write_error(outcome.failure.as_deref()); + if outcome.failure.is_some() { + return false; + } + + file_tracker.clear(); + *messages = vec![synthetic]; + if let Err(e) = sink.send(AgentEvent::SessionCompacted { + summary, + pre_count: outcome.pre_count, + instructions, + automatic, + }) { + tracing::error!("session-compacted event dropped: {e}"); + } + true +} + +#[cfg(test)] +mod tests { + use std::time::SystemTime; + + use anyhow::anyhow; + + use super::*; + use crate::agent::event::{AgentSink, CapturingSink}; + use crate::file_tracker::LastView; + use crate::message::{ContentBlock, Message}; + use crate::session::handle; + use crate::session::store::test_store; + + struct FailingSink; + + impl AgentSink for FailingSink { + fn send(&self, _event: AgentEvent) -> anyhow::Result<()> { + Err(anyhow!("sink closed")) + } + } + + fn fake_transcript() -> Vec { + vec![ + Message::user("fix the bug"), + Message::assistant("looking now"), + Message::user("any progress?"), + Message::assistant("found it"), + ] + } + + #[tokio::test] + async fn replace_session_with_summary_clears_tracker_and_replaces_messages() { + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = handle::start(&store, "claude-sonnet-4-6"); + let tracker = FileTracker::default(); + tracker.record_read( + std::path::Path::new("/tmp/example.rs"), + b"old", + SystemTime::UNIX_EPOCH, + 3, + LastView::Full, + ); + let mut messages = fake_transcript(); + let sink = CapturingSink::new(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &sink, + "fixed login bug".to_owned(), + None, + true, + ) + .await; + + assert!(compacted); + assert!(tracker.snapshot_all().is_empty()); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("fixed login bug")) + ); + } + + #[tokio::test] + async fn replace_session_with_summary_preserves_state_when_persist_fails() { + let session = handle::testing::dead("dead-compact-session"); + let tracker = FileTracker::default(); + let path = std::path::PathBuf::from("/tmp/example.rs"); + tracker.record_read(&path, b"old", SystemTime::UNIX_EPOCH, 3, LastView::Full); + let original = fake_transcript(); + let mut messages = original.clone(); + let sink = CapturingSink::new(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &sink, + "fixed login bug".to_owned(), + None, + true, + ) + .await; + + assert!(!compacted); + assert_eq!(messages.len(), original.len()); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text == "fix the bug") + ); + assert!( + matches!(&messages[3].content[0], ContentBlock::Text { text } if text == "found it") + ); + assert_eq!(tracker.snapshot_all().len(), 1); + assert!( + sink.events() + .iter() + .any(|event| matches!(event, AgentEvent::Error(message) if message.contains("Session write failed"))) + ); + } + + #[tokio::test] + async fn replace_session_with_summary_still_replaces_messages_when_event_send_fails() { + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = handle::start(&store, "claude-sonnet-4-6"); + let tracker = FileTracker::default(); + tracker.record_read( + std::path::Path::new("/tmp/example.rs"), + b"old", + SystemTime::UNIX_EPOCH, + 3, + LastView::Full, + ); + let mut messages = fake_transcript(); + + let compacted = replace_session_with_summary( + &session, + &tracker, + &mut messages, + &FailingSink, + "fixed login bug".to_owned(), + None, + true, + ) + .await; + + assert!(compacted); + assert!(tracker.snapshot_all().is_empty()); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("fixed login bug")) + ); + } +} diff --git a/crates/oxide-code/src/agent/compaction.rs b/crates/oxide-code/src/agent/compaction.rs index b42ff02e..337e1eec 100644 --- a/crates/oxide-code/src/agent/compaction.rs +++ b/crates/oxide-code/src/agent/compaction.rs @@ -6,12 +6,9 @@ use anyhow::{Result, bail}; use indoc::{formatdoc, indoc}; -use crate::agent::event::{AgentEvent, AgentSink}; -use crate::client::anthropic::Client; +use crate::agent::AgentClient; use crate::client::anthropic::wire::{ContentBlockInfo, Delta, StreamEvent}; -use crate::file_tracker::FileTracker; use crate::message::{ContentBlock, Message, Role}; -use crate::session::handle::SessionHandle; /// Minimum messages required for compaction to be worthwhile. Below this, the summary is /// usually longer than the transcript itself. @@ -54,7 +51,7 @@ const SUMMARY_PREFIX: &str = indoc! {r" /// Errors when the transcript is too short to compact, when the API errors mid-stream, or when /// the model returns an empty / whitespace-only response. pub(crate) async fn compact_session( - client: &Client, + client: &dyn AgentClient, transcript: &[Message], instructions: Option<&str>, ) -> Result { @@ -147,36 +144,6 @@ pub(crate) fn synthesize_post_compact_message(summary: &str) -> Message { ", prefix = SUMMARY_PREFIX.trim(), summary = summary.trim()}) } -/// Persists a compact boundary and swaps the live transcript to the synthetic summary root. -pub(crate) async fn replace_session_with_summary( - session: &SessionHandle, - file_tracker: &FileTracker, - messages: &mut Vec, - sink: &dyn AgentSink, - summary: String, - instructions: Option, -) -> bool { - let synthetic = synthesize_post_compact_message(&summary); - let outcome = session - .compact(summary.clone(), instructions.clone(), synthetic.clone()) - .await; - sink.session_write_error(outcome.failure.as_deref()); - if outcome.failure.is_some() { - return false; - } - - file_tracker.clear(); - *messages = vec![synthetic]; - if let Err(e) = sink.send(AgentEvent::SessionCompacted { - summary, - pre_count: outcome.pre_count, - instructions, - }) { - tracing::error!("session-compacted event dropped: {e}"); - } - true -} - /// Removes the synthetic summary prefix from a resumed post-compact root message. pub(crate) fn strip_synthetic_post_compact_prefix(message: &mut Message) -> bool { if message.role != Role::User { @@ -195,24 +162,13 @@ pub(crate) fn strip_synthetic_post_compact_prefix(message: &mut Message) -> bool #[cfg(test)] mod tests { - use anyhow::anyhow; use serde_json::json; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, Request, ResponseTemplate}; use super::*; - use crate::agent::AgentClient; use crate::client::anthropic::testing::{Captured, api_key, captured, test_client}; use crate::message::Role; - use crate::session::store::test_store; - - struct FailingSink; - - impl AgentSink for FailingSink { - fn send(&self, _event: AgentEvent) -> anyhow::Result<()> { - Err(anyhow!("sink closed")) - } - } // ── compact_session ── @@ -302,39 +258,12 @@ mod tests { .await; let client = test_client(server.uri(), api_key(), "claude-haiku-4-5"); - let summary = AgentClient::compact_session(&client, &fake_transcript(), None) + let summary = compact_session(&client, &fake_transcript(), None) .await .unwrap(); assert_eq!(summary, "fixed login bug"); } - #[tokio::test] - async fn replace_session_with_summary_still_replaces_messages_when_event_send_fails() { - let dir = tempfile::tempdir().unwrap(); - let store = test_store(dir.path()); - let session = crate::session::handle::start(&store, "claude-sonnet-4-6"); - let outcome = session.record_message(Message::user("fix the bug")).await; - assert!(outcome.failure.is_none(), "{:?}", outcome.failure); - let tracker = FileTracker::default(); - let mut messages = fake_transcript(); - - let compacted = replace_session_with_summary( - &session, - &tracker, - &mut messages, - &FailingSink, - "fixed login bug".to_owned(), - None, - ) - .await; - - assert!(compacted); - assert_eq!(messages.len(), 1); - assert!( - matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("fixed login bug")) - ); - } - #[tokio::test] async fn compact_session_collects_initial_text_from_content_block_start() { let server = MockServer::start().await; diff --git a/crates/oxide-code/src/agent/event.rs b/crates/oxide-code/src/agent/event.rs index 1f0c32f4..d48b9664 100644 --- a/crates/oxide-code/src/agent/event.rs +++ b/crates/oxide-code/src/agent/event.rs @@ -5,7 +5,7 @@ use std::sync::Arc; use anyhow::Result; use tokio::sync::mpsc; -use crate::config::Effort; +use crate::config::{CompactionConfig, Effort}; use crate::model::ResolvedModelId; use crate::tool::ToolRegistry; @@ -63,16 +63,20 @@ pub(crate) enum AgentEvent { /// `/compact` finished — summary captures the prior transcript, `pre_count` is for the /// post-compact UI line. The agent loop has already swapped the in-memory transcript to /// the synthetic continuation; the UI clears its chat and replays a single boundary block. + /// Automatic compaction can happen before a submitted prompt starts, so the TUI keeps the + /// busy state until the following turn completes. SessionCompacted { summary: String, pre_count: u32, instructions: Option, + automatic: bool, }, /// Live config after a [`UserAction::SwapConfig`]. `effort` is the resolved value (post-clamp); /// `requested_effort` is the user's explicit pick, used to surface `(clamped from X)`. ConfigChanged { model_id: String, effort: Option, + compaction: CompactionConfig, requested_effort: Option, }, /// User-visible error from the agent loop, session writer, or tool dispatch. Renders as a @@ -373,6 +377,7 @@ mod tests { AgentEvent::ConfigChanged { model_id: "claude-opus-4-7".to_owned(), effort: Some(Effort::Xhigh), + compaction: CompactionConfig::disabled(), requested_effort: Some(Effort::Xhigh), }, ] { diff --git a/crates/oxide-code/src/client/anthropic.rs b/crates/oxide-code/src/client/anthropic.rs index 71313cdf..06d68da7 100644 --- a/crates/oxide-code/src/client/anthropic.rs +++ b/crates/oxide-code/src/client/anthropic.rs @@ -165,13 +165,18 @@ impl Client { self.session_id = id; } - /// Swaps the active model and re-clamps `config.effort` against the new caps. - pub(crate) fn set_model(&mut self, model: String) -> Option { + /// Swaps the active model and re-clamps model-derived config against the new caps. + pub(crate) fn set_model(&mut self, model: String) -> Result> { let caps = crate::model::capabilities_for(&model); let effort = caps.resolve_effort(self.config.effort); + let compaction = self + .config + .compaction + .for_model(&model, self.config.max_tokens)?; self.config.effort = effort; + self.config.compaction = compaction; self.config.model = model; - effort + Ok(effort) } /// Swaps the active effort, clamped against the current model's caps. @@ -481,12 +486,10 @@ mod tests { #[test] fn new_exposes_compaction_config() { let mut config = test_config(OFFLINE_URL, Auth::ApiKey("sk-test".to_owned()), TEST_MODEL); - config.compaction = CompactionConfig { - auto: AutoCompactionConfig { - enabled: true, - threshold_tokens: Some(123_456), - }, - }; + config.compaction = CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(123_456), + }); let client = Client::new(config, None).unwrap(); assert_eq!(client.compaction().auto.threshold_tokens, Some(123_456)); @@ -632,7 +635,7 @@ mod tests { ), ] { let mut client = client_with(from_model, from_effort); - let returned = client.set_model(swap_to.to_owned()); + let returned = client.set_model(swap_to.to_owned()).unwrap(); assert_eq!(returned, expect, "{from_model} → {swap_to}: returned"); assert_eq!( client.config.effort, expect, @@ -647,8 +650,38 @@ mod tests { // `[1m]` is a client-side opt-in; the swap must store it verbatim so `compute_betas` keeps // sending the 1M context beta. Regressing this drops 1M context silently. let mut client = client_with("claude-opus-4-6", Some(Effort::Max)); - client.set_model("claude-opus-4-7[1m]".to_owned()); + client.set_model("claude-opus-4-7[1m]".to_owned()).unwrap(); + assert_eq!(client.model(), "claude-opus-4-7[1m]"); + } + + #[test] + fn set_model_recomputes_compaction_threshold_from_new_context_window() { + let mut config = test_config(OFFLINE_URL, api_key(), "claude-opus-4-7[1m]"); + config.max_tokens = 64_000; + config.compaction = CompactionConfig::default_for_test(&config.model, config.max_tokens); + let mut client = Client::new(config, Some("sid".to_owned())).unwrap(); + + let effort = client.set_model("claude-sonnet-4-6".to_owned()).unwrap(); + + assert_eq!(effort, Some(Effort::High)); + assert_eq!(client.model(), "claude-sonnet-4-6"); + assert_eq!(client.compaction().auto.threshold_tokens, Some(167_000)); + } + + #[test] + fn set_model_rejects_threshold_above_new_context_window() { + let mut config = test_config(OFFLINE_URL, api_key(), "claude-opus-4-7[1m]"); + config.compaction = CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(967_000), + }); + let mut client = Client::new(config, Some("sid".to_owned())).unwrap(); + + client + .set_model("claude-sonnet-4-6".to_owned()) + .expect_err("threshold above the smaller window must reject the swap"); assert_eq!(client.model(), "claude-opus-4-7[1m]"); + assert_eq!(client.compaction().auto.threshold_tokens, Some(967_000)); } // ── Client::set_effort ── diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index 25dc90c4..0a8a19f7 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -194,6 +194,7 @@ impl FromStr for PromptCacheTtl { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub(crate) struct CompactionConfig { pub(crate) auto: AutoCompactionConfig, + policy: AutoCompactionPolicy, } impl CompactionConfig { @@ -201,8 +202,31 @@ impl CompactionConfig { pub(crate) const fn disabled() -> Self { Self { auto: AutoCompactionConfig::disabled(), + policy: AutoCompactionPolicy::Disabled, } } + + pub(crate) fn for_model(self, model: &str, max_tokens: u32) -> Result { + resolve_compaction_policy(self.policy, model, max_tokens) + } + + #[cfg(test)] + pub(crate) const fn resolved_for_test(auto: AutoCompactionConfig) -> Self { + let policy = if auto.enabled { + match auto.threshold_tokens { + Some(tokens) => AutoCompactionPolicy::Tokens(tokens), + None => AutoCompactionPolicy::Default, + } + } else { + AutoCompactionPolicy::Disabled + }; + Self { auto, policy } + } + + #[cfg(test)] + pub(crate) fn default_for_test(model: &str, max_tokens: u32) -> Self { + resolve_compaction_policy(AutoCompactionPolicy::Default, model, max_tokens).unwrap() + } } #[derive(Debug, Clone, Copy, PartialEq, Eq)] @@ -227,6 +251,14 @@ impl AutoCompactionConfig { } } +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum AutoCompactionPolicy { + Disabled, + Default, + Tokens(u32), + Percent(u8), +} + // ── Config ── /// Resolved configuration. @@ -395,32 +427,44 @@ fn resolve_compaction( let auto_requested = env::bool("OX_COMPACTION_AUTO_ENABLED") .or_else(|| file.as_ref().and_then(|c| c.enabled)) .unwrap_or(true); - let auto = if auto_requested { - resolve_auto_compaction(file.as_ref(), model, max_tokens)? + + let policy = if auto_requested { + resolve_auto_policy(file.as_ref())? } else { - AutoCompactionConfig::disabled() + AutoCompactionPolicy::Disabled }; + resolve_compaction_policy(policy, model, max_tokens) +} - Ok(CompactionConfig { auto }) +fn resolve_compaction_policy( + policy: AutoCompactionPolicy, + model: &str, + max_tokens: u32, +) -> Result { + let auto = resolve_auto_compaction(policy, model, max_tokens)?; + Ok(CompactionConfig { auto, policy }) } fn resolve_auto_compaction( - file: Option<&file::CompactionConfig>, + policy: AutoCompactionPolicy, model: &str, max_tokens: u32, ) -> Result { - let threshold = resolve_auto_threshold(file, model, max_tokens)?; + let threshold = match policy { + AutoCompactionPolicy::Disabled => return Ok(AutoCompactionConfig::disabled()), + AutoCompactionPolicy::Default => default_auto_threshold(model, max_tokens), + AutoCompactionPolicy::Tokens(tokens) => threshold_from_tokens(tokens, model, max_tokens)?, + AutoCompactionPolicy::Percent(percent) => { + threshold_from_percent(percent, model, max_tokens)? + } + }; Ok(AutoCompactionConfig { enabled: threshold.is_some(), threshold_tokens: threshold, }) } -fn resolve_auto_threshold( - file: Option<&file::CompactionConfig>, - model: &str, - max_tokens: u32, -) -> Result> { +fn resolve_auto_policy(file: Option<&file::CompactionConfig>) -> Result { let env_tokens = env_u32("OX_COMPACTION_AUTO_THRESHOLD_TOKENS")?; let env_percent = env_u8("OX_COMPACTION_AUTO_THRESHOLD_PERCENT")?; let env_threshold_set = env_tokens.is_some() || env_percent.is_some(); @@ -436,13 +480,23 @@ fn resolve_auto_threshold( (Some(_), Some(_)) => { bail!("set only one of auto_threshold_tokens or auto_threshold_percent for compaction") } - (Some(tokens), None) => validate_auto_threshold_tokens(tokens).map(Some), - (None, Some(percent)) => threshold_from_percent(percent, model, max_tokens), - (None, None) => Ok(default_auto_threshold(model, max_tokens)), + (Some(tokens), None) => Ok(AutoCompactionPolicy::Tokens(tokens)), + (None, Some(percent)) => Ok(AutoCompactionPolicy::Percent(percent)), + (None, None) => Ok(AutoCompactionPolicy::Default), } } -fn validate_auto_threshold_tokens(tokens: u32) -> Result { +fn threshold_from_tokens(tokens: u32, model: &str, max_tokens: u32) -> Result> { + validate_auto_threshold_floor(tokens)?; + if let Some(max) = default_auto_threshold(model, max_tokens) + && tokens > max + { + bail!("auto compaction threshold must be at most {max} tokens for model {model:?}"); + } + Ok(Some(tokens)) +} + +fn validate_auto_threshold_floor(tokens: u32) -> Result { if tokens < MIN_AUTO_COMPACTION_THRESHOLD_TOKENS { bail!( "auto compaction threshold must be at least \ @@ -461,7 +515,7 @@ fn threshold_from_percent(percent: u8, model: &str, max_tokens: u32) -> Result Self { + let other_sets_threshold = + other.threshold_tokens.is_some() || other.threshold_percent.is_some(); + let (threshold_tokens, threshold_percent) = if other_sets_threshold { + (other.threshold_tokens, other.threshold_percent) + } else { + (self.threshold_tokens, self.threshold_percent) + }; + Self { enabled: other.enabled.or(self.enabled), - threshold_tokens: other.threshold_tokens.or(self.threshold_tokens), - threshold_percent: other.threshold_percent.or(self.threshold_percent), + threshold_tokens, + threshold_percent, } } } @@ -294,13 +302,32 @@ mod tests { ); let compaction = client.compaction.expect("compaction section should merge"); assert_eq!(compaction.enabled, Some(true)); - assert_eq!(compaction.threshold_tokens, Some(400_000)); + assert_eq!(compaction.threshold_tokens, None); assert_eq!(compaction.threshold_percent, Some(40)); let tui = merged.tui.expect("tui section should be present"); assert_eq!(tui.show_thinking, Some(true)); } + #[test] + fn merge_compaction_enabled_does_not_clear_base_threshold() { + let base = CompactionConfig { + enabled: Some(false), + threshold_tokens: Some(400_000), + threshold_percent: None, + }; + let other = CompactionConfig { + enabled: Some(true), + threshold_tokens: None, + threshold_percent: None, + }; + let merged = base.merge(other); + + assert_eq!(merged.enabled, Some(true)); + assert_eq!(merged.threshold_tokens, Some(400_000)); + assert_eq!(merged.threshold_percent, None); + } + #[test] fn merge_falls_back_to_base_when_other_is_none() { let base = FileConfig { diff --git a/crates/oxide-code/src/main.rs b/crates/oxide-code/src/main.rs index 508ad316..944741c7 100644 --- a/crates/oxide-code/src/main.rs +++ b/crates/oxide-code/src/main.rs @@ -522,7 +522,6 @@ impl AgentLoopTask { } let prompt = prompt::build_prompt(self.client.model()).await; - let auto_config = self.client.compaction().auto; let outcome = agent_turn( &self.client, &self.tools, @@ -531,11 +530,6 @@ impl AgentLoopTask { &self.sink, &self.session, &mut self.user_rx, - Some(AutoCompact { - config: auto_config, - failures: &mut self.auto_compaction_failures, - file_tracker: &self.file_tracker, - }), ) .await; match outcome { @@ -682,13 +676,14 @@ async fn apply_compact( ) .await? .map_err(|e| TurnAbort::Failed(anyhow!("Compaction failed: {e:#}")))?; - Ok(agent::compaction::replace_session_with_summary( + Ok(agent::compact_boundary::replace_session_with_summary( session, file_tracker, messages, sink, summary, instructions, + false, ) .await) } @@ -715,7 +710,10 @@ fn apply_swap_config( effort: Option, ) { if let Some(id) = model { - client.set_model(id.into_inner()); + if let Err(e) = client.set_model(id.into_inner()) { + _ = sink.send(AgentEvent::Error(format!("Config change failed: {e:#}"))); + return; + } } let resolved = match effort { Some(pick) => client.set_effort(pick), @@ -724,6 +722,7 @@ fn apply_swap_config( if let Err(e) = sink.send(AgentEvent::ConfigChanged { model_id: client.model().to_owned(), effort: resolved, + compaction: client.compaction(), requested_effort: effort, }) { // Dropping this leaves the status bar showing the previous model / effort even though the @@ -815,11 +814,6 @@ async fn bare_repl( &sink, &session, &mut user_rx, - Some(AutoCompact { - config: client.compaction().auto, - failures: &mut auto_compaction_failures, - file_tracker: &file_tracker, - }), ); let turn_result = tokio::select! { r = turn => r, @@ -869,7 +863,6 @@ async fn headless( let prompt = prompt::build_prompt(model).await; let mut shutdown_fired = false; let (_user_tx, mut user_rx) = inert_user_action_channel(); - let mut auto_compaction_failures = 0_u8; let turn = agent_turn( client, &tools, @@ -878,11 +871,6 @@ async fn headless( &sink, &session, &mut user_rx, - Some(AutoCompact { - config: client.compaction().auto, - failures: &mut auto_compaction_failures, - file_tracker: &file_tracker, - }), ); let result: Result<()> = tokio::select! { r = turn => match r { @@ -905,3 +893,101 @@ async fn headless( println!(); Ok(()) } + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use wiremock::matchers::{method, path}; + use wiremock::{Mock, MockServer, ResponseTemplate}; + + use super::*; + use crate::agent::event::CapturingSink; + use crate::client::anthropic::testing::{api_key, test_config}; + use crate::config::{AutoCompactionConfig, CompactionConfig}; + use crate::message::ContentBlock; + use crate::session::store::test_store; + + fn streamed_summary_body(text: &str) -> String { + let start = serde_json::json!({ + "type": "message_start", + "message": {"id": "m", "model": "claude-haiku-4-5"}, + }); + let block = serde_json::json!({ + "type": "content_block_start", + "index": 0, + "content_block": {"type": "text", "text": text}, + }); + format!( + "event: message_start\ndata: {start}\n\n\ + event: content_block_start\ndata: {block}\n\n\ + event: message_stop\ndata: {{\"type\":\"message_stop\"}}\n\n", + ) + } + + #[tokio::test] + async fn auto_compact_before_prompt_compacts_previous_turn_before_recording_new_prompt() { + let server = MockServer::start().await; + Mock::given(method("POST")) + .and(path("/v1/messages")) + .respond_with( + ResponseTemplate::new(200) + .set_body_string(streamed_summary_body("auto summary")) + .insert_header("content-type", "text/event-stream"), + ) + .mount(&server) + .await; + + let mut config = test_config(server.uri(), api_key(), "claude-opus-4-7[1m]"); + config.compaction = CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(50_000), + }); + let client = Client::new(config, Some("sid".to_owned())).unwrap(); + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = session::handle::start(&store, "claude-opus-4-7[1m]"); + let file_tracker = Arc::new(FileTracker::default()); + let sink = CapturingSink::new(); + let (_tx, mut user_rx) = agent::event::inert_user_action_channel(); + let mut pending = Vec::new(); + let mut failures = 0; + let mut messages = vec![ + Message::user("one"), + Message::assistant("two"), + Message::user("three"), + Message::assistant("four"), + ]; + + let compacted = auto_compact_before_prompt( + &client, + &session, + &file_tracker, + &mut messages, + &sink, + &mut user_rx, + &mut pending, + &mut failures, + Some(TokenUsage::new(50_000, 1)), + ) + .await + .unwrap(); + + assert!(compacted); + assert!(pending.is_empty()); + assert_eq!(failures, 0); + assert_eq!(messages.len(), 1); + assert!( + matches!(&messages[0].content[0], ContentBlock::Text { text } if text.contains("auto summary")) + ); + assert!(sink.events().iter().any(|event| { + matches!( + event, + AgentEvent::SessionCompacted { + automatic: true, + .. + } + ) + })); + } +} diff --git a/crates/oxide-code/src/slash.rs b/crates/oxide-code/src/slash.rs index 3b90ce5e..ed44821c 100644 --- a/crates/oxide-code/src/slash.rs +++ b/crates/oxide-code/src/slash.rs @@ -136,12 +136,10 @@ pub(crate) fn test_session_info() -> LiveSessionInfo { effort: Some(Effort::High), max_tokens: 32_000, prompt_cache_ttl: PromptCacheTtl::OneHour, - compaction: CompactionConfig { - auto: AutoCompactionConfig { - enabled: true, - threshold_tokens: Some(155_000), - }, - }, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }), show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), diff --git a/crates/oxide-code/src/slash/config.rs b/crates/oxide-code/src/slash/config.rs index cc9582a7..48782f00 100644 --- a/crates/oxide-code/src/slash/config.rs +++ b/crates/oxide-code/src/slash/config.rs @@ -99,6 +99,28 @@ mod tests { use crate::tui::modal::Modal; use crate::tui::theme::Theme; + fn render_modal(modal: &KvOverview, width: u16) -> String { + use ratatui::Terminal; + use ratatui::backend::TestBackend; + use ratatui::layout::Rect; + + let height = modal.height(width); + let theme = Theme::default(); + let mut terminal = Terminal::new(TestBackend::new(width, height)).unwrap(); + terminal + .draw(|frame| modal.render(frame, Rect::new(0, 0, width, height), &theme)) + .unwrap(); + let buffer = terminal.backend().buffer(); + let mut out = String::new(); + for y in 0..height { + for x in 0..width { + out.push_str(buffer[(x, y)].symbol()); + } + out.push('\n'); + } + out + } + // ── ConfigCmd metadata ── #[test] @@ -134,6 +156,16 @@ mod tests { assert_eq!(m.height(80), 20); } + #[test] + fn build_modal_renders_resolved_auto_compaction() { + let info = test_session_info(); + let m = build_modal(&info, None, None); + let rendered = render_modal(&m, 80); + + assert!(rendered.contains("Auto Compaction"), "{rendered}"); + assert!(rendered.contains("on at 155000 tokens"), "{rendered}"); + } + // ── display_path ── #[test] diff --git a/crates/oxide-code/src/slash/status.rs b/crates/oxide-code/src/slash/status.rs index 4e11fbb0..17c5dbd7 100644 --- a/crates/oxide-code/src/slash/status.rs +++ b/crates/oxide-code/src/slash/status.rs @@ -64,6 +64,28 @@ mod tests { use crate::tui::modal::Modal; use crate::tui::theme::Theme; + fn render_modal(modal: &KvOverview, width: u16) -> String { + use ratatui::Terminal; + use ratatui::backend::TestBackend; + use ratatui::layout::Rect; + + let height = modal.height(width); + let theme = Theme::default(); + let mut terminal = Terminal::new(TestBackend::new(width, height)).unwrap(); + terminal + .draw(|frame| modal.render(frame, Rect::new(0, 0, width, height), &theme)) + .unwrap(); + let buffer = terminal.backend().buffer(); + let mut out = String::new(); + for y in 0..height { + for x in 0..width { + out.push_str(buffer[(x, y)].symbol()); + } + out.push('\n'); + } + out + } + // ── StatusCmd metadata ── #[test] @@ -98,4 +120,14 @@ mod tests { // Title + blank + 10 rows + blank + footer = 14. assert_eq!(m.height(80), 14); } + + #[test] + fn build_modal_renders_resolved_auto_compaction() { + let info = test_session_info(); + let m = build_modal(&info); + let rendered = render_modal(&m, 80); + + assert!(rendered.contains("Auto Compaction"), "{rendered}"); + assert!(rendered.contains("on at 155000 tokens"), "{rendered}"); + } } diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index 70a1a4b8..44731fe7 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -478,10 +478,17 @@ impl App { summary, pre_count, instructions, - } => self.apply_session_compacted(&summary, pre_count, instructions.as_deref()), + automatic, + } => self.apply_session_compacted( + &summary, + pre_count, + instructions.as_deref(), + automatic, + ), AgentEvent::ConfigChanged { model_id, effort, + compaction, requested_effort, } => { let model_changed = model_id != self.session_info.config.model_id; @@ -499,6 +506,7 @@ impl App { } self.session_info.config.model_id = model_id; self.session_info.config.effort = effort; + self.session_info.config.compaction = compaction; self.chat.push_system_message(confirmation); } AgentEvent::Error(msg) => { @@ -549,13 +557,16 @@ impl App { summary: &str, pre_count: u32, instructions: Option<&str>, + automatic: bool, ) { self.chat.clear_history(); self.pending_calls.clear(); self.chat .push_compacted_block(pre_count, instructions, summary.to_owned()); self.clear_modals(); - self.finalize_idle(); + if !automatic { + self.finalize_idle(); + } } /// Resets to idle, clears orphan calls, re-enables input, and drains queued prompts. @@ -861,12 +872,10 @@ mod tests { effort: Some(Effort::High), max_tokens: 32_000, prompt_cache_ttl: PromptCacheTtl::OneHour, - compaction: CompactionConfig { - auto: AutoCompactionConfig { - enabled: true, - threshold_tokens: Some(155_000), - }, - }, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }), show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), @@ -1975,6 +1984,12 @@ mod tests { app.handle_agent_event(AgentEvent::ConfigChanged { model_id: "claude-sonnet-4-6".to_owned(), effort: Some(crate::config::Effort::High), + compaction: crate::config::CompactionConfig::resolved_for_test( + crate::config::AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(167_000), + }, + ), requested_effort: None, }); @@ -1984,6 +1999,10 @@ mod tests { Some(crate::config::Effort::High), ); assert_eq!(app.status_bar.model(), "Claude Sonnet 4.6"); + assert_eq!( + app.session_info.config.compaction.auto.threshold_tokens, + Some(167_000), + ); let body = app.chat.last_system_text().expect("confirmation block"); assert_eq!( body, @@ -2001,6 +2020,7 @@ mod tests { app.handle_agent_event(AgentEvent::ConfigChanged { model_id: app.session_info.config.model_id.clone(), effort: Some(crate::config::Effort::Xhigh), + compaction: app.session_info.config.compaction, requested_effort: Some(crate::config::Effort::Xhigh), }); assert_eq!( @@ -2301,6 +2321,7 @@ mod tests { summary: "## Recap\n\nDid the thing.".to_owned(), pre_count: 4, instructions: Some("focus on auth".to_owned()), + automatic: false, }); assert_eq!( @@ -2330,6 +2351,7 @@ mod tests { summary: "s".to_owned(), pre_count: 2, instructions: None, + automatic: false, }); let forwarded = rx.recv().await.expect("drained prompt reaches the agent"); @@ -2348,10 +2370,36 @@ mod tests { summary: "summary only".to_owned(), pre_count: 2, instructions: None, + automatic: false, }); assert_eq!(app.chat.entry_count(), 1, "exactly one boundary block"); } + #[tokio::test] + async fn handle_session_compacted_automatic_keeps_busy_state_and_queued_prompts() { + let (mut app, mut rx, _agent_tx) = test_app(None); + app.input.set_enabled(false); + app.status_bar.set_status(Status::Streaming); + app.pending_prompts + .push_back("queued while busy".to_owned()); + + app.handle_agent_event(AgentEvent::SessionCompacted { + summary: "auto summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: true, + }); + + assert_eq!(app.chat.entry_count(), 1); + assert_eq!(app.status_bar.status(), &Status::Streaming); + assert!(!app.input.is_enabled()); + assert_eq!(app.pending_prompts.len(), 1); + assert!( + rx.try_recv().is_err(), + "automatic compact must not drain early" + ); + } + #[test] fn handle_stream_token_switches_to_streaming_and_disables_input() { let (mut app, _rx, _agent_tx) = test_app(None); diff --git a/crates/oxide-code/src/tui/components/welcome.rs b/crates/oxide-code/src/tui/components/welcome.rs index 984575f7..08226488 100644 --- a/crates/oxide-code/src/tui/components/welcome.rs +++ b/crates/oxide-code/src/tui/components/welcome.rs @@ -345,12 +345,10 @@ mod tests { effort: Some(Effort::Xhigh), max_tokens: 64_000, prompt_cache_ttl: PromptCacheTtl::OneHour, - compaction: CompactionConfig { - auto: AutoCompactionConfig { - enabled: true, - threshold_tokens: Some(967_000), - }, - }, + compaction: CompactionConfig::resolved_for_test(AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(967_000), + }), show_thinking: false, show_welcome: true, theme_name: "mocha".to_owned(), From 599aa456234714a5723521e957020febf966e169 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 18:21:00 +0800 Subject: [PATCH 09/13] docs(compaction): update automatic trigger design --- CLAUDE.md | 1 + docs/design/agent/auto-compaction.md | 29 ++++++++++++-------------- docs/guide/configuration.md | 8 ++++--- docs/research/agent/auto-compaction.md | 2 +- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 5dd243af..5265bff7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -25,6 +25,7 @@ ox # Start an interactive session . ├── agent.rs # Agent turn loop, stream accumulation, tool dispatch ├── agent/ +│ ├── compact_boundary.rs # Compact boundary persistence, file-tracker reset, live transcript replacement │ ├── compaction.rs # /compact driver: stripped-transcript summarization request + summary-prefix wrapping │ └── event.rs # AgentEvent, UserAction, AgentSink trait, StdioSink ├── client.rs # Client module root diff --git a/docs/design/agent/auto-compaction.md b/docs/design/agent/auto-compaction.md index 61eabc27..15a89338 100644 --- a/docs/design/agent/auto-compaction.md +++ b/docs/design/agent/auto-compaction.md @@ -6,13 +6,9 @@ Companion docs: [research/agent/auto-compaction.md](../../research/agent/auto-co ## Scope -Auto-compaction is **default on** and can be disabled independently from manual `/compact`. The first implementation runs at safe boundaries: +Auto-compaction is **default on** and can be disabled independently from manual `/compact`. The trigger runs before recording a new user prompt when the previous completed turn left usage over threshold. Tool results are compacted only after the assistant has consumed them and returned a final response. -- after a complete text-only assistant turn; -- after a tool round is persisted, before the next sampling request; -- before starting a new user prompt if the previous turn left usage over threshold. - -It does not interrupt an in-flight stream or tool call. If a queued prompt exists, the prompt remains queued during compaction and drains afterward through the existing prompt-queue path. +It does not interrupt an in-flight stream or tool call. If another prompt arrives while summarization is running, the prompt remains queued during compaction and drains afterward through the existing prompt-queue path. ## Token Signal @@ -58,21 +54,22 @@ Environment: | ---------------------------------------- | ------------------------------------------- | | `OX_COMPACTION_AUTO_ENABLED` | Overrides `client.compaction.auto_enabled` | | `OX_COMPACTION_AUTO_THRESHOLD_TOKENS` | Absolute automatic trigger threshold | -| `OX_COMPACTION_AUTO_THRESHOLD_PERCENT` | Percent of the model context window | +| `OX_COMPACTION_AUTO_THRESHOLD_PERCENT` | Percent of context, capped by safe trigger | Manual `/compact` remains available. The config controls only whether automatic compaction triggers and where that trigger fires. Token and percent thresholds are mutually exclusive so the resolved trigger stays obvious. -Explicit thresholds must resolve to at least `50_000` tokens. Lower values create frequent summarization loops, extra latency, and avoidable summary loss long before context pressure exists. +Explicit token thresholds must be at least `50_000` tokens and, for models with known context windows, no higher than the model-derived safe trigger. Percent thresholds must be 1-100 and are capped by the same safe trigger after they resolve to tokens. Lower values create frequent summarization loops, extra latency, and avoidable summary loss long before context pressure exists. ## Trigger Flow -`agent_turn` owns the automatic trigger because it has the live transcript, token usage, session handle, file tracker, sink, and user-action receiver. +The main loop owns the automatic trigger because it can compact before a new prompt is recorded. The agent turn reports the latest usage signal after each completed turn. -1. Stream a model response and update the latest token usage in `StreamOutcome`. -2. Persist the assistant message and any tool-result message for the round. -3. If auto-compaction is enabled and the latest total crosses the threshold, call the same compact driver used by `/compact`. -4. On success, replace `messages` with the synthetic post-compact message and emit `SessionCompacted`. -5. On failure, increment the auto-compaction failure counter and continue without changing the transcript. +1. `agent_turn` streams a complete turn, persists the transcript tail, and returns the latest token usage from `StreamOutcome`. +2. The main loop stores that usage as the pending automatic trigger signal. +3. When the next `SubmitPrompt` arrives, `auto_compact_before_prompt` checks the stored usage before recording the prompt. +4. If the total crosses the threshold, it calls the same compact driver used by `/compact`. +5. On success, `compact_boundary` persists the compact boundary, clears the file tracker, replaces `messages` with the synthetic post-compact message, and emits `SessionCompacted`. +6. On failure, the loop increments the auto-compaction failure counter and records the new prompt against the unchanged transcript. The failure counter is per agent-loop task. Three consecutive automatic failures disable further automatic attempts for the current session. Manual `/compact` does not consult this counter and resets it on success. @@ -80,7 +77,7 @@ The failure counter is per agent-loop task. Three consecutive automatic failures Manual and automatic compaction use the same visible `CompactedBlock`. Automatic compaction does not need a separate chat error on failure; repeated automatic failure is a background recovery problem, and the user's next regular request should proceed. The error still lands in logs. -During TUI auto-compaction, the status bar uses the existing `Compacting` state. In bare REPL / headless mode, `StdioSink` already renders `SessionCompacted` as a stderr boundary line. +During TUI auto-compaction, the status bar uses the existing `Compacting` state. Automatic `SessionCompacted` events keep the TUI busy until the queued prompt drains or the prompt submission finishes. In bare REPL / headless mode, `StdioSink` already renders `SessionCompacted` as a stderr boundary line. ## Design Decisions @@ -88,7 +85,7 @@ During TUI auto-compaction, the status bar uses the existing `Compacting` state. 2. **Response usage over preflight counting.** The stream already carries usage. A count-tokens request would add latency and still be approximate once dynamic system context and tool definitions are included. -3. **Boundary-only compaction.** The first version compacts only after a coherent transcript unit is persisted. This avoids partial tool loops and makes session replay identical to manual `/compact`. +3. **Boundary-only compaction.** The first version compacts after a coherent transcript unit is persisted and before the next prompt starts. This avoids partial tool loops and makes session replay identical to manual `/compact`. 4. **Same summarizer as `/compact`.** No separate compaction model knob yet. The current `Client::stream_message` path already handles auth, model, effort, betas, prompt caching, and first-party gateway constraints. diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index b732d3f3..cf6c5810 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -79,10 +79,12 @@ Auto-compaction is enabled by default for known model context windows. The defau | Key | Type | Default | Description | | ------------------------ | ------- | ------------------- | ------------------------------------------ | | `auto_enabled` | boolean | `true` | Enable automatic context compaction | -| `auto_threshold_tokens` | integer | model-derived | Absolute trigger, minimum `50000` tokens | -| `auto_threshold_percent` | integer | model-derived | Percent of context window, capped safely | +| `auto_threshold_tokens` | integer | model-derived | Absolute trigger, `50000` token minimum | +| `auto_threshold_percent` | integer | model-derived | Percent of context, capped by safe trigger | -`auto_threshold_tokens` and `auto_threshold_percent` are mutually exclusive. Percent thresholds resolve to tokens before validation, so the effective trigger must still be at least `50000` tokens. +`auto_threshold_tokens` and `auto_threshold_percent` are mutually exclusive. Absolute thresholds must be at least `50000` tokens. For models with known context windows, absolute thresholds must also stay within the model-derived safe trigger. Percent thresholds must be 1-100 and are capped by that safe trigger after they resolve to tokens. + +For models without known context windows, the default and percent-based automatic triggers stay off. An explicit token threshold still works after floor validation. #### 1M Context Window: `[1m]` Tag diff --git a/docs/research/agent/auto-compaction.md b/docs/research/agent/auto-compaction.md index 480abf44..47d4b6c4 100644 --- a/docs/research/agent/auto-compaction.md +++ b/docs/research/agent/auto-compaction.md @@ -93,7 +93,7 @@ Key files: ## Patterns to Defer -1. **Mid-turn compaction.** Requires pausing a tool loop or assistant continuation, replacing history, and resuming the same logical turn. The first oxide-code version should compact after a complete round and before the next user-visible continuation. +1. **Mid-turn compaction.** Requires pausing a tool loop or assistant continuation, replacing history, and resuming the same logical turn. The first oxide-code version should compact before recording the next user prompt after a completed turn crosses the trigger. 2. **Microcompact / prune.** Clearing old tool outputs can save tokens, but it is a separate retention policy with its own UI and persistence implications. From 6ac60253cf89f9ba0d7bc18c9cdfadc41e19ae98 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 18:23:14 +0800 Subject: [PATCH 10/13] refactor(tui): keep compaction event handling focused --- crates/oxide-code/src/agent.rs | 4 --- crates/oxide-code/src/main.rs | 10 +++---- crates/oxide-code/src/tui/app.rs | 47 +++++++++++++++++++------------- 3 files changed, 33 insertions(+), 28 deletions(-) diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index 93a7843c..3e4ff8fe 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -124,10 +124,6 @@ pub(crate) struct AutoCompact<'a> { /// - [`TurnAbort::Cancelled`] / [`TurnAbort::Quit`] on the matching [`UserAction`]. /// - [`TurnAbort::Failed`] for stream errors, tool-dispatch failures, or hitting /// [`MAX_TOOL_ROUNDS`] without a final response. -#[expect( - clippy::too_many_arguments, - reason = "the turn driver keeps the live mutable state explicit at the call site" -)] pub(crate) async fn agent_turn( client: &dyn AgentClient, tools: &ToolRegistry, diff --git a/crates/oxide-code/src/main.rs b/crates/oxide-code/src/main.rs index 944741c7..2aad7355 100644 --- a/crates/oxide-code/src/main.rs +++ b/crates/oxide-code/src/main.rs @@ -709,11 +709,11 @@ fn apply_swap_config( model: Option, effort: Option, ) { - if let Some(id) = model { - if let Err(e) = client.set_model(id.into_inner()) { - _ = sink.send(AgentEvent::Error(format!("Config change failed: {e:#}"))); - return; - } + if let Some(id) = model + && let Err(e) = client.set_model(id.into_inner()) + { + _ = sink.send(AgentEvent::Error(format!("Config change failed: {e:#}"))); + return; } let resolved = match effort { Some(pick) => client.set_effort(pick), diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index 44731fe7..a947f9c6 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -26,6 +26,7 @@ use super::pending_calls::{PendingCall, PendingCalls, result_header}; use super::terminal::{Tui, draw_sync}; use super::theme::Theme; use crate::agent::event::{AgentEvent, UserAction}; +use crate::config::{CompactionConfig, Effort}; use crate::message::Message; use crate::session::entry::CompactInfo; use crate::slash::{self, LiveSessionInfo, SlashContext, SlashKind}; @@ -490,25 +491,7 @@ impl App { effort, compaction, requested_effort, - } => { - let model_changed = model_id != self.session_info.config.model_id; - let prev_effort = self.session_info.config.effort; - let confirmation = format_config_change( - &model_id, - model_changed, - prev_effort, - effort, - requested_effort, - ); - if model_changed { - self.status_bar - .set_model(crate::model::display_name(&model_id).into_owned()); - } - self.session_info.config.model_id = model_id; - self.session_info.config.effort = effort; - self.session_info.config.compaction = compaction; - self.chat.push_system_message(confirmation); - } + } => self.apply_config_changed(model_id, effort, compaction, requested_effort), AgentEvent::Error(msg) => { self.chat.push_error(&msg); self.finish_turn(); @@ -569,6 +552,32 @@ impl App { } } + fn apply_config_changed( + &mut self, + model_id: String, + effort: Option, + compaction: CompactionConfig, + requested_effort: Option, + ) { + let model_changed = model_id != self.session_info.config.model_id; + let prev_effort = self.session_info.config.effort; + let confirmation = format_config_change( + &model_id, + model_changed, + prev_effort, + effort, + requested_effort, + ); + if model_changed { + self.status_bar + .set_model(crate::model::display_name(&model_id).into_owned()); + } + self.session_info.config.model_id = model_id; + self.session_info.config.effort = effort; + self.session_info.config.compaction = compaction; + self.chat.push_system_message(confirmation); + } + /// Resets to idle, clears orphan calls, re-enables input, and drains queued prompts. fn finalize_idle(&mut self) { self.pending_calls.clear(); From 9e162f3435abd4f6c1b04bf1719b6de5b0c3e67f Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 18:52:10 +0800 Subject: [PATCH 11/13] fix(compaction): close auto-compact review gaps --- crates/oxide-code/src/agent.rs | 1 + crates/oxide-code/src/agent/event.rs | 43 +++++- crates/oxide-code/src/config.rs | 20 ++- crates/oxide-code/src/main.rs | 49 ++++++- crates/oxide-code/src/slash/status.rs | 2 +- crates/oxide-code/src/tui/app.rs | 183 ++++++++++++++++++++++---- 6 files changed, 267 insertions(+), 31 deletions(-) diff --git a/crates/oxide-code/src/agent.rs b/crates/oxide-code/src/agent.rs index 3e4ff8fe..eb931cca 100644 --- a/crates/oxide-code/src/agent.rs +++ b/crates/oxide-code/src/agent.rs @@ -218,6 +218,7 @@ pub(crate) async fn auto_compact_if_needed( return Ok(false); } + _ = sink.send(AgentEvent::AutoCompactionStarted); let summary = match await_unless_aborted( compaction::compact_session(client, messages, None), user_rx, diff --git a/crates/oxide-code/src/agent/event.rs b/crates/oxide-code/src/agent/event.rs index d48b9664..67a90a0c 100644 --- a/crates/oxide-code/src/agent/event.rs +++ b/crates/oxide-code/src/agent/event.rs @@ -47,6 +47,9 @@ pub(crate) enum AgentEvent { /// User cancelled mid-turn ([`UserAction::Cancel`]); the in-flight reply is truncated and the /// inline [`INTERRUPTED_MARKER`] is rendered. Cancelled, + /// Automatic compaction started before the submitted prompt runs. TUI switches to compacting + /// status while the summarizer request streams. + AutoCompactionStarted, /// Background title generator finished; UI updates the chrome label. SessionTitleUpdated { session_id: String, title: String }, /// `/clear` rolled the session — a new session UUID is now active. @@ -207,12 +210,24 @@ impl StdioSink { } writeln!(stderr)?; } + AgentEvent::SessionCompacted { + pre_count, + automatic, + .. + } => { + let label = if automatic { + "Auto-compacted" + } else { + "Compacted" + }; + writeln!(stderr, "{label} {pre_count} messages into summary")?; + } // TUI-only — no stdio surface to update. AgentEvent::PromptDrained(_) + | AgentEvent::AutoCompactionStarted | AgentEvent::SessionTitleUpdated { .. } | AgentEvent::SessionRolled { .. } | AgentEvent::SessionResumed { .. } - | AgentEvent::SessionCompacted { .. } | AgentEvent::ConfigChanged { .. } => {} AgentEvent::TurnComplete => { writeln!(stdout)?; @@ -367,6 +382,7 @@ mod tests { fn render_tui_only_events_emit_nothing_on_either_stream() { for event in [ AgentEvent::PromptDrained("queued".to_owned()), + AgentEvent::AutoCompactionStarted, AgentEvent::SessionTitleUpdated { session_id: "sid".to_owned(), title: "New title".to_owned(), @@ -387,6 +403,31 @@ mod tests { } } + #[test] + fn render_session_compacted_writes_stderr_boundary() { + let (_, stderr) = render_one( + &test_sink(false), + AgentEvent::SessionCompacted { + summary: "summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: false, + }, + ); + assert_eq!(stderr, "Compacted 4 messages into summary\n"); + + let (_, stderr) = render_one( + &test_sink(false), + AgentEvent::SessionCompacted { + summary: "summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: true, + }, + ); + assert_eq!(stderr, "Auto-compacted 4 messages into summary\n"); + } + #[test] fn render_turn_complete_writes_trailing_newline_to_stdout() { let (stdout, stderr) = render_one(&test_sink(false), AgentEvent::TurnComplete); diff --git a/crates/oxide-code/src/config.rs b/crates/oxide-code/src/config.rs index 0a8a19f7..b4f9d9c8 100644 --- a/crates/oxide-code/src/config.rs +++ b/crates/oxide-code/src/config.rs @@ -407,6 +407,7 @@ pub(crate) fn display_bool(flag: bool) -> &'static str { pub(crate) fn display_auto_compaction(auto: AutoCompactionConfig) -> String { match (auto.enabled, auto.threshold_tokens) { (true, Some(threshold)) => format!("on at {threshold} tokens"), + (true, None) => "off (no threshold)".to_owned(), _ => "off".to_owned(), } } @@ -514,8 +515,14 @@ fn threshold_from_percent(percent: u8, model: &str, max_tokens: u32) -> Result { - apply_swap_config(&mut self.client, &self.sink, model, effort); + if apply_swap_config(&mut self.client, &self.sink, model, effort) { + self.auto_compaction_failures = 0; + } LoopControl::Continue } UserAction::Quit => LoopControl::Stop, @@ -708,12 +710,12 @@ fn apply_swap_config( sink: &dyn AgentSink, model: Option, effort: Option, -) { +) -> bool { if let Some(id) = model && let Err(e) = client.set_model(id.into_inner()) { _ = sink.send(AgentEvent::Error(format!("Config change failed: {e:#}"))); - return; + return false; } let resolved = match effort { Some(pick) => client.set_effort(pick), @@ -730,6 +732,7 @@ fn apply_swap_config( // wrong after a /model or /effort swap. tracing::error!("config-changed event dropped: {e}"); } + true } // ── Bare REPL Mode ── @@ -990,4 +993,44 @@ mod tests { ) })); } + + #[tokio::test] + async fn handle_action_swap_config_resets_auto_compaction_breaker() { + let server = MockServer::start().await; + let config = test_config(server.uri(), api_key(), "claude-opus-4-7[1m]"); + let client = Client::new(config, Some("sid".to_owned())).unwrap(); + let dir = tempfile::tempdir().unwrap(); + let store = test_store(dir.path()); + let session = session::handle::start(&store, "claude-opus-4-7[1m]"); + let file_tracker = Arc::new(FileTracker::default()); + let (sink, mut event_rx) = tui::event::channel(); + let (_user_tx, user_rx) = agent::event::inert_user_action_channel(); + let mut task = AgentLoopTask { + client, + tools: Arc::new(ToolRegistry::new(Vec::new())), + sink, + user_rx, + session, + messages: Vec::new(), + store, + file_tracker, + auto_compaction_failures: 3, + last_usage: Some(TokenUsage::new(100_000, 1)), + }; + + let control = task + .handle_action(UserAction::SwapConfig { + model: None, + effort: Some(Effort::Xhigh), + }) + .await; + + assert!(matches!(control, LoopControl::Continue)); + assert_eq!(task.auto_compaction_failures, 0); + assert_eq!(task.last_usage, Some(TokenUsage::new(100_000, 1))); + assert!(matches!( + event_rx.recv().await, + Some(AgentEvent::ConfigChanged { .. }) + )); + } } diff --git a/crates/oxide-code/src/slash/status.rs b/crates/oxide-code/src/slash/status.rs index 17c5dbd7..f76f25aa 100644 --- a/crates/oxide-code/src/slash/status.rs +++ b/crates/oxide-code/src/slash/status.rs @@ -37,7 +37,7 @@ fn build_modal(info: &LiveSessionInfo) -> KvOverview { ("Auth".to_owned(), info.config.auth_label.to_owned()), ("Version".to_owned(), info.version.to_owned()), ( - "Context Cache".to_owned(), + "Prompt Cache TTL".to_owned(), info.config.prompt_cache_ttl.to_string(), ), ( diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index a947f9c6..d2bb832f 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -26,7 +26,7 @@ use super::pending_calls::{PendingCall, PendingCalls, result_header}; use super::terminal::{Tui, draw_sync}; use super::theme::Theme; use crate::agent::event::{AgentEvent, UserAction}; -use crate::config::{CompactionConfig, Effort}; +use crate::config::{CompactionConfig, Effort, display_auto_compaction}; use crate::message::Message; use crate::session::entry::CompactInfo; use crate::slash::{self, LiveSessionInfo, SlashContext, SlashKind}; @@ -53,6 +53,8 @@ pub(crate) struct App { tools: Arc, /// Correlates `ToolCallStart` with its matching `ToolCallEnd`. pending_calls: PendingCalls, + /// Prompt already painted for the in-flight turn. Replayed if auto-compaction clears chat. + active_prompt: Option, /// FIFO of prompts submitted mid-turn. Drained at turn boundaries. pending_prompts: VecDeque, modals: ModalStack, @@ -102,6 +104,7 @@ impl App { user_tx, tools, pending_calls: PendingCalls::new(), + active_prompt: None, pending_prompts: VecDeque::new(), modals: ModalStack::new(), preview_theme_snapshot: None, @@ -364,6 +367,9 @@ impl App { } if let Some(action) = synthesized { if matches!(action, UserAction::SubmitPrompt(_)) { + if slash::echoes_input(&parsed) { + self.active_prompt = Some(text.to_owned()); + } self.input.set_enabled(false); self.status_bar.set_status(Status::Streaming); self.forward_to_agent(action); @@ -374,6 +380,7 @@ impl App { return false; } self.chat.push_user_message(text.to_owned()); + self.active_prompt = Some(text.to_owned()); self.input.set_enabled(false); self.status_bar.set_status(Status::Streaming); return true; @@ -456,8 +463,13 @@ impl App { } AgentEvent::Cancelled => { self.chat.push_interrupted_marker(); + self.active_prompt = None; self.finalize_idle(); } + AgentEvent::AutoCompactionStarted => { + self.set_active_status(Status::Compacting); + self.input.set_enabled(false); + } AgentEvent::SessionTitleUpdated { session_id, title } => { if session_id == self.session_info.session_id { self.status_bar.set_title(Some(title)); @@ -467,6 +479,7 @@ impl App { self.session_info.session_id = id; self.status_bar.set_title(None); self.chat.clear_history(); + self.active_prompt = None; } AgentEvent::SessionResumed { id, @@ -502,6 +515,7 @@ impl App { fn finish_turn(&mut self) { self.chat.commit_streaming(); + self.active_prompt = None; self.finalize_idle(); } @@ -520,6 +534,7 @@ impl App { self.chat .load_history(messages, compact, tool_metadata, self.tools.as_ref()); self.pending_calls.clear(); + self.active_prompt = None; // Queued prompts belonged to the previous thread, so resume drops them. let dropped = self.pending_prompts.len(); self.pending_prompts.clear(); @@ -546,8 +561,12 @@ impl App { self.pending_calls.clear(); self.chat .push_compacted_block(pre_count, instructions, summary.to_owned()); + if automatic && let Some(prompt) = &self.active_prompt { + self.chat.push_user_message(prompt.clone()); + } self.clear_modals(); if !automatic { + self.active_prompt = None; self.finalize_idle(); } } @@ -561,12 +580,15 @@ impl App { ) { let model_changed = model_id != self.session_info.config.model_id; let prev_effort = self.session_info.config.effort; + let prev_compaction = self.session_info.config.compaction; let confirmation = format_config_change( &model_id, model_changed, prev_effort, effort, requested_effort, + prev_compaction, + compaction, ); if model_changed { self.status_bar @@ -770,9 +792,11 @@ fn format_config_change( prev_effort: Option, new_effort: Option, requested_effort: Option, + prev_compaction: CompactionConfig, + new_compaction: CompactionConfig, ) -> String { - if !model_changed { - return match (requested_effort, new_effort) { + let message = if !model_changed { + match (requested_effort, new_effort) { (Some(req), Some(eff)) if req == eff => format!("Effort set to {eff}."), (Some(req), Some(eff)) => format!("Effort set to {eff} (clamped from {req})."), (Some(req), None) => { @@ -780,26 +804,44 @@ fn format_config_change( } // Slash dispatch keeps this unreachable, but a clear fallback beats a panic. (None, _) => "Config unchanged.".to_owned(), - }; - } - let head = format!( - "Switched to {} ({model_id})", - crate::model::display_name(model_id) - ); - match (requested_effort, prev_effort, new_effort) { - (Some(req), _, Some(eff)) if req == eff => format!("{head} · effort {eff}."), - (Some(req), _, Some(eff)) => format!("{head} · effort {eff} (clamped from {req})."), - (Some(req), _, None) => { - format!("{head}. Effort unchanged — model has no effort tier (asked for {req}).") } - (None, None, None) => format!("{head}."), - (None, Some(_), None) => format!("{head}. Effort cleared (model has no effort tier)."), - (None, None, Some(eff)) => format!("{head} · effort {eff} (model default)."), - (None, Some(prev), Some(new)) if new < prev => { - format!("{head} · effort {new} (clamped from {prev}).") + } else { + let head = format!( + "Switched to {} ({model_id})", + crate::model::display_name(model_id) + ); + match (requested_effort, prev_effort, new_effort) { + (Some(req), _, Some(eff)) if req == eff => format!("{head} · effort {eff}."), + (Some(req), _, Some(eff)) => format!("{head} · effort {eff} (clamped from {req})."), + (Some(req), _, None) => { + format!("{head}. Effort unchanged — model has no effort tier (asked for {req}).") + } + (None, None, None) => format!("{head}."), + (None, Some(_), None) => format!("{head}. Effort cleared (model has no effort tier)."), + (None, None, Some(eff)) => format!("{head} · effort {eff} (model default)."), + (None, Some(prev), Some(new)) if new < prev => { + format!("{head} · effort {new} (clamped from {prev}).") + } + (None, Some(_), Some(eff)) => format!("{head} · effort {eff}."), } - (None, Some(_), Some(eff)) => format!("{head} · effort {eff}."), + }; + if model_changed && prev_compaction.auto != new_compaction.auto { + return append_sentence( + message, + format!( + "Auto compaction {}", + display_auto_compaction(new_compaction.auto) + ), + ); + } + message +} + +fn append_sentence(mut message: String, sentence: String) -> String { + if message.ends_with('.') { + message.pop(); } + format!("{message}. {sentence}.") } #[cfg(test)] @@ -892,6 +934,13 @@ mod tests { } } + fn base_compaction() -> CompactionConfig { + CompactionConfig::resolved_for_test(crate::config::AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(155_000), + }) + } + /// Minimal modal for layout tests: paints `title` on its only row, ignores keys. struct FakeModal { title: String, @@ -2015,7 +2064,7 @@ mod tests { let body = app.chat.last_system_text().expect("confirmation block"); assert_eq!( body, - "Switched to Claude Sonnet 4.6 (claude-sonnet-4-6) · effort high.", + "Switched to Claude Sonnet 4.6 (claude-sonnet-4-6) · effort high. Auto compaction on at 167000 tokens.", ); assert!(app.dirty); } @@ -2048,7 +2097,15 @@ mod tests { fn format_config_change_swap_both_none_omits_effort_clause() { // Pin: no `effort` substring at all, never a stray "none" // word. Mutation that prints `effort none.` would surface here. - let s = format_config_change("claude-haiku-4-5", true, None, None, None); + let s = format_config_change( + "claude-haiku-4-5", + true, + None, + None, + None, + base_compaction(), + base_compaction(), + ); assert_eq!(s, "Switched to Claude Haiku 4.5 (claude-haiku-4-5)."); } @@ -2062,6 +2119,8 @@ mod tests { Some(crate::config::Effort::Xhigh), None, None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2080,6 +2139,8 @@ mod tests { None, Some(crate::config::Effort::Xhigh), None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2096,6 +2157,8 @@ mod tests { Some(crate::config::Effort::Xhigh), Some(crate::config::Effort::High), None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2113,6 +2176,8 @@ mod tests { Some(crate::config::Effort::High), Some(crate::config::Effort::High), None, + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2130,6 +2195,8 @@ mod tests { Some(crate::config::Effort::Medium), Some(crate::config::Effort::High), Some(crate::config::Effort::Xhigh), + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2145,6 +2212,8 @@ mod tests { Some(crate::config::Effort::High), Some(crate::config::Effort::Xhigh), Some(crate::config::Effort::Xhigh), + base_compaction(), + base_compaction(), ); assert_eq!(s, "Effort set to xhigh."); } @@ -2157,6 +2226,8 @@ mod tests { Some(crate::config::Effort::Medium), Some(crate::config::Effort::High), Some(crate::config::Effort::Xhigh), + base_compaction(), + base_compaction(), ); assert_eq!(s, "Effort set to high (clamped from xhigh)."); } @@ -2171,6 +2242,8 @@ mod tests { None, None, Some(crate::config::Effort::High), + base_compaction(), + base_compaction(), ); assert_eq!( s, @@ -2178,6 +2251,30 @@ mod tests { ); } + #[test] + fn format_config_change_model_swap_mentions_compaction_threshold_change() { + let new_compaction = + CompactionConfig::resolved_for_test(crate::config::AutoCompactionConfig { + enabled: true, + threshold_tokens: Some(167_000), + }); + + let s = format_config_change( + "claude-sonnet-4-6", + true, + Some(crate::config::Effort::High), + Some(crate::config::Effort::High), + None, + base_compaction(), + new_compaction, + ); + + assert_eq!( + s, + "Switched to Claude Sonnet 4.6 (claude-sonnet-4-6) · effort high. Auto compaction on at 167000 tokens." + ); + } + #[test] fn handle_session_rolled_clears_chat_rebinds_id_and_drops_stale_title() { let (mut app, _rx, _agent_tx) = test_app(Some("Old session title")); @@ -2388,7 +2485,7 @@ mod tests { async fn handle_session_compacted_automatic_keeps_busy_state_and_queued_prompts() { let (mut app, mut rx, _agent_tx) = test_app(None); app.input.set_enabled(false); - app.status_bar.set_status(Status::Streaming); + app.status_bar.set_status(Status::Compacting); app.pending_prompts .push_back("queued while busy".to_owned()); @@ -2400,7 +2497,7 @@ mod tests { }); assert_eq!(app.chat.entry_count(), 1); - assert_eq!(app.status_bar.status(), &Status::Streaming); + assert_eq!(app.status_bar.status(), &Status::Compacting); assert!(!app.input.is_enabled()); assert_eq!(app.pending_prompts.len(), 1); assert!( @@ -2409,6 +2506,44 @@ mod tests { ); } + #[test] + fn handle_auto_compaction_started_sets_compacting_status() { + let (mut app, _rx, _agent_tx) = test_app(None); + app.dispatch_user_action(UserAction::SubmitPrompt("active question".to_owned())); + + app.handle_agent_event(AgentEvent::AutoCompactionStarted); + + assert_eq!(app.status_bar.status(), &Status::Compacting); + assert!(!app.input.is_enabled()); + } + + #[tokio::test] + async fn handle_session_compacted_automatic_replays_active_prompt_after_summary() { + let (mut app, mut rx, _agent_tx) = test_app(None); + + app.dispatch_user_action(UserAction::SubmitPrompt("active question".to_owned())); + let forwarded = rx.recv().await.expect("prompt reaches the agent"); + assert_eq!( + forwarded, + UserAction::SubmitPrompt("active question".to_owned()) + ); + app.handle_agent_event(AgentEvent::AutoCompactionStarted); + + app.handle_agent_event(AgentEvent::SessionCompacted { + summary: "auto summary".to_owned(), + pre_count: 4, + instructions: None, + automatic: true, + }); + + assert_eq!(app.chat.entry_count(), 2); + assert_eq!(app.status_bar.status(), &Status::Compacting); + assert!(!app.input.is_enabled()); + let text = rendered_text(&mut app, 80, 10); + assert!(text.contains("auto summary")); + assert!(text.contains("active question")); + } + #[test] fn handle_stream_token_switches_to_streaming_and_disables_input() { let (mut app, _rx, _agent_tx) = test_app(None); From d55a4be48e6f2d7fc872ef359358e5d4b4853046 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 18:52:16 +0800 Subject: [PATCH 12/13] docs(compaction): align auto-compact review notes --- docs/design/agent/auto-compaction.md | 7 ++++--- docs/guide/configuration.md | 2 +- docs/roadmap.md | 6 +++--- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/docs/design/agent/auto-compaction.md b/docs/design/agent/auto-compaction.md index 15a89338..1fdba10d 100644 --- a/docs/design/agent/auto-compaction.md +++ b/docs/design/agent/auto-compaction.md @@ -58,7 +58,7 @@ Environment: Manual `/compact` remains available. The config controls only whether automatic compaction triggers and where that trigger fires. Token and percent thresholds are mutually exclusive so the resolved trigger stays obvious. -Explicit token thresholds must be at least `50_000` tokens and, for models with known context windows, no higher than the model-derived safe trigger. Percent thresholds must be 1-100 and are capped by the same safe trigger after they resolve to tokens. Lower values create frequent summarization loops, extra latency, and avoidable summary loss long before context pressure exists. +Explicit token thresholds must be at least `50_000` tokens and, for models with known context windows, no higher than the model-derived safe trigger. Percent thresholds must be 1-100, are capped by the same safe trigger after they resolve to tokens, and must still resolve to at least `50_000` tokens. Lower values create frequent summarization loops, extra latency, and avoidable summary loss long before context pressure exists. ## Trigger Flow @@ -68,8 +68,9 @@ The main loop owns the automatic trigger because it can compact before a new pro 2. The main loop stores that usage as the pending automatic trigger signal. 3. When the next `SubmitPrompt` arrives, `auto_compact_before_prompt` checks the stored usage before recording the prompt. 4. If the total crosses the threshold, it calls the same compact driver used by `/compact`. -5. On success, `compact_boundary` persists the compact boundary, clears the file tracker, replaces `messages` with the synthetic post-compact message, and emits `SessionCompacted`. -6. On failure, the loop increments the auto-compaction failure counter and records the new prompt against the unchanged transcript. +5. The agent loop emits `AutoCompactionStarted` so the TUI can show compaction status while the summarizer runs. +6. On success, `compact_boundary` persists the compact boundary, clears the file tracker, replaces `messages` with the synthetic post-compact message, and emits `SessionCompacted`. +7. On failure, the loop increments the auto-compaction failure counter and records the new prompt against the unchanged transcript. The failure counter is per agent-loop task. Three consecutive automatic failures disable further automatic attempts for the current session. Manual `/compact` does not consult this counter and resets it on success. diff --git a/docs/guide/configuration.md b/docs/guide/configuration.md index cf6c5810..8229135b 100644 --- a/docs/guide/configuration.md +++ b/docs/guide/configuration.md @@ -82,7 +82,7 @@ Auto-compaction is enabled by default for known model context windows. The defau | `auto_threshold_tokens` | integer | model-derived | Absolute trigger, `50000` token minimum | | `auto_threshold_percent` | integer | model-derived | Percent of context, capped by safe trigger | -`auto_threshold_tokens` and `auto_threshold_percent` are mutually exclusive. Absolute thresholds must be at least `50000` tokens. For models with known context windows, absolute thresholds must also stay within the model-derived safe trigger. Percent thresholds must be 1-100 and are capped by that safe trigger after they resolve to tokens. +`auto_threshold_tokens` and `auto_threshold_percent` are mutually exclusive. Absolute thresholds must be at least `50000` tokens. For models with known context windows, absolute thresholds must also stay within the model-derived safe trigger. Percent thresholds must be 1-100, are capped by that safe trigger after they resolve to tokens, and must still resolve to at least `50000` tokens. For models without known context windows, the default and percent-based automatic triggers stay off. An explicit token threshold still works after floor validation. diff --git a/docs/roadmap.md b/docs/roadmap.md index 4d04fa3c..65e78cf2 100644 --- a/docs/roadmap.md +++ b/docs/roadmap.md @@ -4,9 +4,9 @@ oxide-code is still early. This roadmap is the high-level product view: what wor The direction is simple: -- Build a fast terminal AI coding assistant for real project work. -- Keep the assistant transparent about context, tools, sessions, and state. -- Grow the product carefully so daily workflows stay understandable. +- Keep the terminal as the primary interface: streaming chat, tool output, and session controls stay keyboard-first. +- Keep context and state visible: model, instructions, compaction, queued prompts, and session identity should be inspectable from the UI. +- Add workflow depth only when it fits the current agent-harness model. ## Working Today From 40a6f7daa7e92a8aded6ef71d8b9e5667a6108a2 Mon Sep 17 00:00:00 2001 From: Hakula Chen Date: Tue, 12 May 2026 18:53:52 +0800 Subject: [PATCH 13/13] refactor(tui): simplify config change formatter --- crates/oxide-code/src/tui/app.rs | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/crates/oxide-code/src/tui/app.rs b/crates/oxide-code/src/tui/app.rs index d2bb832f..0da516e7 100644 --- a/crates/oxide-code/src/tui/app.rs +++ b/crates/oxide-code/src/tui/app.rs @@ -795,17 +795,7 @@ fn format_config_change( prev_compaction: CompactionConfig, new_compaction: CompactionConfig, ) -> String { - let message = if !model_changed { - match (requested_effort, new_effort) { - (Some(req), Some(eff)) if req == eff => format!("Effort set to {eff}."), - (Some(req), Some(eff)) => format!("Effort set to {eff} (clamped from {req})."), - (Some(req), None) => { - format!("Effort unchanged — model has no effort tier (asked for {req}).") - } - // Slash dispatch keeps this unreachable, but a clear fallback beats a panic. - (None, _) => "Config unchanged.".to_owned(), - } - } else { + let message = if model_changed { let head = format!( "Switched to {} ({model_id})", crate::model::display_name(model_id) @@ -824,11 +814,21 @@ fn format_config_change( } (None, Some(_), Some(eff)) => format!("{head} · effort {eff}."), } + } else { + match (requested_effort, new_effort) { + (Some(req), Some(eff)) if req == eff => format!("Effort set to {eff}."), + (Some(req), Some(eff)) => format!("Effort set to {eff} (clamped from {req})."), + (Some(req), None) => { + format!("Effort unchanged — model has no effort tier (asked for {req}).") + } + // Slash dispatch keeps this unreachable, but a clear fallback beats a panic. + (None, _) => "Config unchanged.".to_owned(), + } }; if model_changed && prev_compaction.auto != new_compaction.auto { return append_sentence( message, - format!( + &format!( "Auto compaction {}", display_auto_compaction(new_compaction.auto) ), @@ -837,7 +837,7 @@ fn format_config_change( message } -fn append_sentence(mut message: String, sentence: String) -> String { +fn append_sentence(mut message: String, sentence: &str) -> String { if message.ends_with('.') { message.pop(); }