diff --git a/.cursor-plugin/marketplace.json b/.cursor-plugin/marketplace.json index f3adf10..1396952 100644 --- a/.cursor-plugin/marketplace.json +++ b/.cursor-plugin/marketplace.json @@ -52,6 +52,11 @@ "name": "docs-canvas", "source": "docs-canvas", "description": "Render documentation — architecture notes, API references, runbooks, and codebase walkthroughs — as a navigable Cursor Canvas with sections, table of contents, diagrams, and cross-references." + }, + { + "name": "cursor-sdk", + "source": "cursor-sdk", + "description": "Build apps, scripts, CI pipelines, and automations on top of the Cursor TypeScript SDK (@cursor/sdk) — runtime selection, auth, streaming, MCP, error handling, and ready-to-extend integration patterns." } ] } diff --git a/README.md b/README.md index 01f0fdc..ce4943e 100644 --- a/README.md +++ b/README.md @@ -13,6 +13,7 @@ Official Cursor plugins for popular developer tools, frameworks, and SaaS produc | `cli-for-agent` | [CLI for Agents](cli-for-agent/) | Cursor | Developer Tools | Patterns for designing CLIs that coding agents can run reliably: flags, help with examples, pipelines, errors, idempotency, dry-run. | | `pr-review-canvas` | [PR Review Canvas](pr-review-canvas/) | Cursor | Developer Tools | Render PR diffs as interactive Cursor Canvases organized for reviewer comprehension — groups changes by importance, separates boilerplate from core logic, and highlights tricky or unexpected code. | | `docs-canvas` | [Docs Canvas](docs-canvas/) | Cursor | Developer Tools | Render documentation — architecture notes, API references, runbooks, and codebase walkthroughs — as a navigable Cursor Canvas with sections, table of contents, diagrams, and cross-references. | +| `cursor-sdk` | [Cursor SDK](cursor-sdk/) | Cursor | Developer Tools | Build apps, scripts, CI pipelines, and automations on top of the Cursor TypeScript SDK (@cursor/sdk) — runtime selection, auth, streaming, MCP, error handling, and ready-to-extend integration patterns. | Author values match each plugin’s `plugin.json` `author.name` (Cursor lists `plugins@cursor.com` in the manifest). diff --git a/cursor-sdk/.cursor-plugin/plugin.json b/cursor-sdk/.cursor-plugin/plugin.json new file mode 100644 index 0000000..64e4bdc --- /dev/null +++ b/cursor-sdk/.cursor-plugin/plugin.json @@ -0,0 +1,32 @@ +{ + "name": "cursor-sdk", + "displayName": "Cursor SDK", + "version": "1.0.0", + "description": "Build apps, scripts, CI pipelines, and automations on top of the Cursor TypeScript SDK (@cursor/sdk) — runtime selection, auth, streaming, MCP, error handling, and ready-to-extend integration patterns.", + "author": { + "name": "Cursor", + "email": "plugins@cursor.com" + }, + "homepage": "https://github.com/cursor/plugins/tree/main/cursor-sdk", + "repository": "https://github.com/cursor/plugins", + "license": "MIT", + "keywords": [ + "cursor-sdk", + "cursor", + "sdk", + "typescript", + "agents", + "automation", + "ci", + "mcp", + "streaming" + ], + "category": "developer-tools", + "tags": [ + "sdk", + "agents", + "automation", + "developer-tools" + ], + "skills": "./skills/" +} diff --git a/cursor-sdk/LICENSE b/cursor-sdk/LICENSE new file mode 100644 index 0000000..ca2bba7 --- /dev/null +++ b/cursor-sdk/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2026 Cursor + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/cursor-sdk/README.md b/cursor-sdk/README.md new file mode 100644 index 0000000..d988f4f --- /dev/null +++ b/cursor-sdk/README.md @@ -0,0 +1,33 @@ +# Cursor SDK + +Cursor plugin with a single skill that helps users build on top of the Cursor TypeScript SDK (`@cursor/sdk`). The skill covers the three invocation patterns (`Agent.prompt`, `Agent.create` + `agent.send`, `Agent.resume`), the top traps for new integrations, runtime choice (local vs cloud), auth, streaming, MCP, error handling, and ready-to-extend patterns for CI, scheduled jobs, chat, and webhooks. + +The skill is short by design — it points at focused reference files only when the user's task clearly falls into one of them. + +## What it includes + +- `cursor-sdk`: design and integration guidance for building with `@cursor/sdk`, plus reference files for runtime choice, auth, error handling, streaming, MCP, advanced features, and integration patterns. + +## When to use it + +Use whenever the user is integrating, installing, or writing code against the Cursor SDK; mentions `Agent.create`, `Agent.prompt`, `Agent.resume`, `agent.send`, `run.stream`, `CursorAgentError`, or `@cursor/sdk`; wants to run Cursor agents from a script, CI/CD pipeline, GitHub Action, backend service, bot, or webhook; is choosing between local and cloud runtime; is configuring MCP servers for an SDK agent; or is porting REST `/v1/agents` calls to the SDK. + +The skill is the source of truth for the external `@cursor/sdk` package and is meant to be loaded eagerly rather than answered from memory. + +## Reference files + +The skill keeps the main `SKILL.md` short and reads a reference file only when the user's task clearly falls inside it: + +| If the user is... | Reference | +| ------------------------------------------------------------------------------------ | -------------------------------------- | +| Picking between local and cloud runtime | `references/runtime-choice.md` | +| Debugging auth (401s, missing key, team vs user keys) | `references/auth.md` | +| Handling errors, retries, rate limits, `CursorAgentError`, `result.status === error` | `references/error-handling.md` | +| Consuming streams, picking event types, cancelling, or stream vs wait | `references/streaming.md` | +| Configuring MCP servers (HTTP, stdio, transport, auth injection) | `references/mcp.md` | +| Sub-agents, resume, artifacts, listing/inspecting agents, `Agent.messages` | `references/advanced.md` | +| Building a specific integration (CI review bot, triage, chat, webhook) | `references/patterns.md` | + +## License + +MIT diff --git a/cursor-sdk/skills/cursor-sdk/SKILL.md b/cursor-sdk/skills/cursor-sdk/SKILL.md new file mode 100644 index 0000000..bb070b3 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/SKILL.md @@ -0,0 +1,239 @@ +--- +name: cursor-sdk +description: Guide users building apps, scripts, CI pipelines, or automations on top of the Cursor TypeScript SDK (`@cursor/sdk`). Use this skill whenever the user mentions integrating, installing, or writing code against the Cursor SDK; whenever they say `Agent.create`, `Agent.prompt`, `Agent.resume`, `agent.send`, `run.stream`, `CursorAgentError`, or `@cursor/sdk`; whenever they ask to run Cursor agents programmatically from a script, CI/CD pipeline, GitHub Action, backend service, or any other code that isn't the Cursor IDE itself; and whenever they want to pick between local and cloud runtime, configure MCP servers for an SDK agent, or handle streaming, cancellation, or errors from an SDK agent. Also trigger when a user is wiring Cursor into an automation, writing a bot that runs Cursor, or porting REST `/v1/agents` calls to the SDK, even if they don't explicitly name the package. Use this eagerly rather than answering from memory; the SDK surface evolves and this skill plus its references are the source of truth for the external package. +--- + +# Cursor SDK + +The Cursor TypeScript SDK (`@cursor/sdk`) runs Cursor agents programmatically. The same interfaces drives the local runtime (agent runs on your machine against your files) and the cloud runtime (agent runs on Cursor-hosted or self-hosted infrastructure against a cloned repo and opens PRs). + +Use this skill to help someone **bootstrap a working integration quickly** and **avoid the handful of traps that bite new users**. Canonical docs live at [https://cursor.com/docs/api/sdk/typescript](https://cursor.com/docs/api/sdk/typescript); this skill only adds decision-making, failure-mode prevention, and ready-to-extend patterns. + +## Voice and Posture + +This skill helps the user **build** with the SDK. It is not the place to validate, congratulate, or sell the SDK as a choice. The user's intent is the input; your job is execution. + +- **When the user names the SDK explicitly** (says "Cursor SDK", `@cursor/sdk`, `Agent.create`, `Agent.prompt`, etc.): assume they know what the SDK is and have decided to use it. Skip framing, skip pep talk, go straight to producing the integration. No "good news", no "the SDK is perfect for this", no "this is almost exactly the pattern X is designed for". +- **When the user describes a problem the SDK fits but doesn't name it** ("I want a bot that reviews my PRs", "I want a script that asks Cursor questions about my repo"): the SDK isn't yet a confirmed choice. Surface it as a question, briefly, then wait: *"The Cursor SDK is what I'd reach for here — want me to design it that way, or do you have a different runtime in mind?"* If they confirm, proceed. If they push back or want options, give options. +- **In either case, don't restate the user's intent back to them.** They know what they want. Get to the design. + +Avoid these specific openers (and their close cousins): + +- "Good news: this is exactly the pattern…" +- "The SDK is built for this shape." +- "Great, you've come to the right place." +- "This is almost exactly the X the SDK is designed for." +- Any lede that compliments the user's choice or restates their goal in flattering terms. + +Prefer: + +- Open with the design decision or the first thing they need to know. +- If you genuinely have a design choice to flag (local vs cloud, prompt vs send, sync vs stream), name it in one sentence and explain why; don't preface it with validation. + +## When to open a reference file + +Keep this page short. Read a reference file only when the user's task clearly falls inside it: + + +| If the user is... | Read | +| ------------------------------------------------------------------------------------ | -------------------------------------------------------------- | +| Picking between local and cloud runtime, or not sure which they should use | [`references/runtime-choice.md`](references/runtime-choice.md) | +| Debugging auth (401s, "Missing CURSOR_API_KEY", team-vs-user keys, local vs prod) | [`references/auth.md`](references/auth.md) | +| Handling errors, retries, rate limits, `CursorAgentError`, `result.status === error` | [`references/error-handling.md`](references/error-handling.md) | +| Consuming streams, picking event types, cancelling, or deciding stream vs wait | [`references/streaming.md`](references/streaming.md) | +| Configuring MCP servers (HTTP, stdio, cloud vs local transport, auth injection) | [`references/mcp.md`](references/mcp.md) | +| Using sub-agents, resume, artifacts, listing/inspecting agents, `Agent.messages` | [`references/advanced.md`](references/advanced.md) | +| Building a specific integration (CI review bot, scheduled triage, chat, webhook) | [`references/patterns.md`](references/patterns.md) | + + +Everything below is the minimum needed for 80% of tasks. + +## The Three Invocation Patterns + +Almost every SDK integration collapses to one of three shapes. Pick the one that fits the job, don't mix them. + +### 1. `Agent.prompt(...)` — one-shot + +```typescript +import { Agent } from "@cursor/sdk"; + +const result = await Agent.prompt("Refactor src/utils.ts for readability", { + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, +}); +console.log(result.status, result.result); +``` + +Use for fire-and-forget scripts, GitHub Actions steps, or any "send this prompt, get a result, exit" flow. No streaming, no follow-ups, no cleanup to remember. If you're reaching for this and then immediately resuming, you wanted pattern 2 instead. + +### 2. `Agent.create(...)` + `agent.send(...)` — durable with follow-ups + +```typescript +import { Agent } from "@cursor/sdk"; + +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, +}); + +try { + const run = await agent.send("Find the bug in src/auth.ts"); + for await (const event of run.stream()) { + if (event.type === "assistant") { + for (const block of event.message.content) { + if (block.type === "text") process.stdout.write(block.text); + } + } + } + const result = await run.wait(); + + // Follow-up keeps full conversation context. + const run2 = await agent.send("Now write a regression test for it"); + await run2.wait(); +} finally { + await agent[Symbol.asyncDispose](); +} +``` + +Use when you need streaming, multi-turn conversation, or lifecycle operations (cancel, status listener). This is the shape of most non-trivial integrations. + +### 3. `Agent.resume(...)` — pick up an existing agent later + +```typescript +const agent = Agent.resume(previousAgentId, { + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, +}); +const run = await agent.send("Also update the changelog"); +await run.wait(); +``` + +Use across process boundaries: a cron that continues last night's cleanup, a webhook that extends a user's agent, an interactive CLI that reloads conversation state. **Inline `mcpServers` are not persisted across resume** — pass them again on the resume call. + +## Top Five Traps (read these before writing code) + +These trip up almost every new integration. They're all easy to prevent once you know about them. + +### 1. Missing `cloud: { repos }` silently defaults to local + +`AgentOptions` doesn't require `local` or `cloud`; if you omit both, the SDK selects the local runtime. The trap: if you intended a cloud agent and forgot the `cloud:` field, you get a local agent silently — no error, just a local agent ID and a local executor. Always pass `cloud: { repos }` explicitly when you want cloud, and pass `local: { cwd }` explicitly for local even though it's the default. Picking the right runtime: see [`references/runtime-choice.md`](references/runtime-choice.md). + +### 2. Two different kinds of failure, one instinct to conflate them + +```typescript +try { + const run = await agent.send(prompt); + const result = await run.wait(); + if (result.status === "error") { + // Agent started but failed mid-run. Inspect transcript, git state, tool outputs. + console.error(`run failed: ${result.id}`); + process.exit(2); + } +} catch (err) { + if (err instanceof CursorAgentError) { + // Didn't start. Auth, config, network. Fix environment, retry. + console.error(`startup failed: ${err.message}, retryable=${err.isRetryable}`); + process.exit(1); + } + throw err; +} +``` + +`CursorAgentError` thrown → the run never executed (auth, config, network). `result.status === "error"` → the agent did work, and that work failed. Different fixes, different exit codes, different observability. Full taxonomy in [`references/error-handling.md`](references/error-handling.md). + +### 3. Forgetting `await agent[Symbol.asyncDispose]()` leaks resources + +The SDK holds handles to local executors, persisted run stores, and cloud API clients. Not disposing means leaked child processes, open databases, and in long-running services, memory growth. Always dispose in a `finally`, or use `Agent.prompt()` (disposes for you), or use the `await using` syntax if your tsconfig targets it: + +```typescript +await using agent = Agent.create({ /* ... */ }); +``` + +### 4. Streaming is optional but `wait()` is (almost) required + +`run.stream()` is how you observe; `run.wait()` is how you get the terminal result. You can skip streaming, but skipping `wait()` means you can't tell whether the run finished, errored, or was cancelled, and you'll leak the run's internal watchers. Always call `wait()`. If you don't want live output, just call `wait()` alone. See [`references/streaming.md`](references/streaming.md) for event type reference. + +### 5. Not every `run` operation is supported on every runtime + +`Run` exposes four operations — `stream`, `wait`, `cancel`, `conversation` — and the runtime may or may not support each. Always guard with `run.supports("...")` before calling, rather than assuming: + +```typescript +if (run.supports("cancel")) await run.cancel(); +if (run.supports("conversation")) console.log(await run.conversation()); +``` + +Current gap worth knowing about: detached/re-hydrated runs (you got the handle from `Agent.getRun(...)` after the live event store has closed) may not support `stream()` and may have empty `conversation()`. `run.unsupportedReason(op)` tells you why. Cloud `run.conversation()` IS supported — it accumulates best-effort from the stream. + +## Local vs Cloud, in one sentence each + +- **Local** — runs on the caller's machine against `cwd`, reuses their environment and credentials, good for dev loops and CI that already has a repo checkout. +- **Cloud** — runs on a Cursor-hosted VM against a freshly cloned `repos[].url`, good for long jobs, fire-and-forget automation, and opening real PRs (`autoCreatePR: true`). + +Decision tree, capability differences, and capability gaps (artifacts, cancel, MCP transport): [`references/runtime-choice.md`](references/runtime-choice.md). + +## Auth, minimum viable + +```bash +export CURSOR_API_KEY="cursor_..." # user API key or team service-account key +``` + +The SDK reads `CURSOR_API_KEY` if `apiKey` isn't passed. Both user keys (from [https://cursor.com/dashboard/cloud-agents](https://cursor.com/dashboard/cloud-agents)) and team service-account keys (Team Settings → Service accounts) work for local and cloud runs. + +If you're seeing 401s, the usual suspects are: key pasted with surrounding whitespace, key minted against a different environment, or the key belongs to a user without repo access for a cloud run. Full troubleshooting: [`references/auth.md`](references/auth.md). + +## Model Selection + +```typescript +import { Cursor } from "@cursor/sdk"; + +const models = await Cursor.models.list({ apiKey: process.env.CURSOR_API_KEY! }); +``` + +`composer-2` is the current default for most integrations. `{ id: "auto" }` lets the server pick. Model IDs change; don't hardcode exotic ones without calling `Cursor.models.list()` first to confirm the caller has access. + +Model is **required for local**, **optional for cloud** (the server resolves a default from the caller's account). + +## Production Best Practices + +Apply these to any integration that runs unattended: + +1. **Wrap every `Agent.create` / `Agent.prompt` / `Agent.resume` in a try/finally with `[Symbol.asyncDispose]()`**. Non-negotiable. +2. **Distinguish startup failures from run failures** — exit code 1 for `CursorAgentError`, exit code 2 for `result.status === "error"`, exit code 0 only for `finished`. Makes CI failures actually readable. +3. **Log `run.id` and `agent.agentId` immediately after `send()`** before streaming. If the stream hangs, the IDs are what you need to investigate in the dashboard or via `Agent.getRun(...)`. +4. **Respect `error.isRetryable`** — it's the backend telling you the specific failure is safe to retry. Blind retries can cause duplicate cloud runs; respecting the flag doesn't. +5. **Use `local: { settingSources: [] }` (default) unless you need ambient config.** Opting into `"all"` loads project/user/team/MDM settings from the caller's environment, which is rarely what you want from a service. Note: `settingSources` lives under `local`, not at the top level; it has no effect on cloud agents (cloud always honors team/project/plugins). +6. **For cloud agents in CI, set `skipReviewerRequest: true`** unless a human should be paged — it suppresses the reviewer-request step and keeps PR notifications quiet. +7. **Always pass `apiKey` explicitly** in shared-infrastructure code instead of relying on the env var. Makes the credential dependency obvious and prevents cross-tenant mistakes. +8. **Prefer `Agent.prompt(...)` for true one-shots** — it disposes for you and is harder to leak. + +Longer version with examples: [`references/patterns.md`](references/patterns.md). + +## Observing a Run You Didn't Launch + +You can inspect any agent/run by ID later: + +```typescript +// Cloud: IDs that start with "bc-" auto-route to the cloud API +const info = await Agent.get("bc-abc123", { apiKey }); +const run = await Agent.getRun(runId, { runtime: "cloud", agentId: "bc-abc123", apiKey }); + +// Local: you need the cwd where the agent was created +const localInfo = await Agent.list({ runtime: "local", cwd: process.cwd() }); +``` + +A cloud `bc-`-prefixed agent ID is **not** a run ID. If you only have a run ID (from a log or a webhook), pass it to `Agent.getRun` with the runtime hint; don't confuse the two. + +## Offering a Canvas + +If the user's integration monitors, lists, or visualizes agents — dashboards of active runs, conversation replays, tool-call timelines — offer a Cursor Canvas to render it. If they accept, defer entirely to the `canvas` skill. + +## What This Skill Doesn't Cover + +- The Cloud Agents REST API (`/v1/agents/*`). If the user needs a non-TS client, the REST API is documented separately at ; check there for current capabilities before assuming parity with the SDK. +- `.cursor/hooks.json` hooks. Cloud agents execute them but the SDK doesn't manage them; see Cursor's Hooks docs. +- Private workers / self-hosted cloud. Send users to the Private Workers docs. +- Python / non-TS SDKs. There is no first-party SDK in other languages at time of writing; REST is the portable option. + diff --git a/cursor-sdk/skills/cursor-sdk/references/advanced.md b/cursor-sdk/skills/cursor-sdk/references/advanced.md new file mode 100644 index 0000000..0ae42f2 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/advanced.md @@ -0,0 +1,238 @@ +# Advanced: Sub-agents, Resume, Artifacts, Inspection + +You have a working `Agent.create` + `send` + `wait` loop. These are the capabilities you reach for next. + +## Sub-agents + +Sub-agents are **cloud-only at v1**. The `agents:` field on `AgentOptions` is wired through `customSubagents` on the cloud create call; the local executor silently drops it. If you pass `agents` alongside `local: { ... }`, nothing happens. Scope sub-agent designs to cloud agents until local parity ships. + +Define named sub-agents that the main agent can spawn via the `Agent` tool: + +```typescript +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + cloud: { + repos: [{ url: "https://github.com/your-org/your-repo", startingRef: "main" }], + }, + agents: { + "code-reviewer": { + description: "Expert code reviewer for quality and security.", + prompt: "Review code for bugs, security issues, and proven approaches. Be concrete and cite file:line.", + model: "inherit", + }, + "test-writer": { + description: "Writes tests for code changes.", + prompt: "Write comprehensive unit and integration tests. Use the project's test framework.", + }, + }, +}); +``` + +The key-name (`"code-reviewer"`) is how the main agent refers to the sub-agent. The `description` tells it when to invoke; the `prompt` is the sub-agent's system prompt. + +### Good use cases + +- **Specialized review** — spawn a reviewer sub-agent to audit changes the main agent just made. +- **Parallel research** — the main agent delegates "summarize X", "summarize Y", "summarize Z" to three sub-agents simultaneously. +- **Risk quarantine** — isolate destructive operations in a named sub-agent with a restricted prompt. + +### Bad use cases + +- Replacing normal helper functions. If it doesn't need LLM reasoning, don't make it a sub-agent. +- Deeply nested "agents calling agents calling agents" chains. One level of sub-agents is almost always enough; beyond that you usually want a different architecture. + +### Sub-agent MCP + +Cloud sub-agents reference parent MCP servers by **name**, not inline config. Inline `McpServerConfig` entries are rejected at v1 with `ConfigurationError("cloud custom subagents only support string references in v1.")`. Configure the server on the parent's `mcpServers` map and reference it by key from the sub-agent: + +```typescript +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + cloud: { repos: [{ url: "https://github.com/your-org/your-repo", startingRef: "main" }] }, + mcpServers: { + postgres: { + type: "http", + url: "https://mcp.example.com/postgres", + headers: { Authorization: `Bearer ${process.env.PG_RO_TOKEN!}` }, + }, + }, + agents: { + "db-reader": { + description: "Answers read-only questions against the database.", + prompt: "Query with read-only SQL. Never execute writes.", + mcpServers: ["postgres"], + }, + }, +}); +``` + +The parent's `mcpServers` is the truth; sub-agents pick by name from that map. + +## Resuming Agents + +Agent IDs persist. Resume later from any process with the right options: + +```typescript +const agent = Agent.resume(agentId, { + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, + mcpServers: sameInlineMcpYouUsedBefore, // inline MCP is not persisted! +}); +const run = await agent.send("continue where we left off"); +``` + +### What persists + +- Conversation history and agent state. +- Agent-scoped settings that were baked in at creation time server-side. +- Cloud: the whole agent (clone, branch, PR state if any). + +### What does not persist + +- **Inline `mcpServers`** — pass them again on resume. +- Local: the `cwd` is identified by path. If the path is gone, resume can't find the local agent. +- **`local.settingSources`** — ambient configuration is reloaded based on current environment. + +### Finding agents to resume + +```typescript +// Local agents under a cwd: +const local = await Agent.list({ runtime: "local", cwd: process.cwd(), limit: 20 }); + +// Cloud agents for the caller: +const cloud = await Agent.list({ runtime: "cloud", apiKey }); + +// Or a specific cloud agent by ID: +const one = await Agent.get("bc-abc123", { apiKey }); +``` + +Both list calls return `{ items, nextCursor }`. Use `nextCursor` for pagination. + +## Inspecting Runs + +```typescript +const runs = await Agent.listRuns(agentId, { runtime: "local", cwd: process.cwd() }); +const run = await Agent.getRun(runs.items[0].id, { runtime: "local", cwd: process.cwd() }); + +// Replay the stream (works when the persisted events are available): +if (run.supports("stream")) { + for await (const event of run.stream()) { /* ... */ } +} +``` + +`run.conversation()` returns accumulated `ConversationTurn[]` — useful for rendering a transcript UI. Live local runs include tool-call details; replayed runs reconstruct from the persisted stream and may be sparser if the originating runtime didn't capture tool args/results. + +### Cloud run IDs vs. agent IDs + +Cloud agent IDs start with `bc-`. Cloud run IDs look like regular UUIDs. **Don't pass a `bc-` ID to `getRun` expecting it to work** — you need the run ID. + +To get a run ID from a cloud `bc-` agent: + +```typescript +const runs = await Agent.listRuns("bc-abc123", { runtime: "cloud", apiKey }); +for (const r of runs.items) console.log(r.id); +``` + +## Persisted Messages + +`run.conversation()` is scoped to one run. To get the full persisted conversation across all runs of an agent: + +```typescript +const messages = await Agent.messages.list(agentId, { runtime: "local", cwd: process.cwd() }); +``` + +Messages are a raw, schema-stable shape — user turns, assistant turns, metadata. You'll usually pass them through `extractReadableMessages(...)`-style helpers in your code to render. + +## Artifacts + +On cloud agents, the agent can produce artifact files beyond the git diff — think test results, coverage reports, generated assets: + +```typescript +const artifacts = await agent.listArtifacts(); +for (const a of artifacts) console.log(a.path, a.sizeBytes); + +const buffer = await agent.downloadArtifact(artifacts[0].path); +``` + +Local agents currently return an empty list from `listArtifacts()` and throw from `downloadArtifact()`. Treat artifact flows as cloud-only today. + +## Lifecycle: archive, unarchive, delete + +```typescript +await Agent.archive("bc-abc123", { apiKey }); +await Agent.unarchive("bc-abc123", { apiKey }); +await Agent.delete("bc-abc123", { apiKey }); +``` + +- **Archive** hides the agent from default lists but keeps history (`includeArchived: true` on `Agent.list({ runtime: "cloud" })` to include them). +- **Unarchive** reverses it. +- **Delete** is destructive — no undo. Scope cautiously. + +Missing IDs throw. Don't swallow the error; a missing ID usually means your bookkeeping is wrong. + +## Account and Catalog + +```typescript +import { Cursor } from "@cursor/sdk"; + +const me = await Cursor.me({ apiKey }); // apiKeyName, userEmail, createdAt +const models = await Cursor.models.list({ apiKey }); // available model IDs +const repos = await Cursor.repositories.list({ apiKey }); // GitHub repos the caller has connected +``` + +- `Cursor.models.list()` — call before constructing options if you don't know what's available. Don't hardcode exotic model IDs. +- `Cursor.repositories.list()` — gives you `cloud.repos[].url` entries the caller can actually use. If you're building a UI that asks the user to pick a repo, this is your source. +- `Cursor.me()` — confirms the key's identity. Useful in ops tooling. + +All three are cloud-only and require an API key. + +## `agent.reload()` + +Call after you change local settings (`.cursor/*`, MCP config files, hook files) if you want the underlying local executor to pick them up without recreating the agent: + +```typescript +await agent.reload(); +``` + +Doesn't apply to in-flight runs — it affects future `send()` calls. + +## Putting It Together — Long-Lived Service + +```typescript +import { Agent } from "@cursor/sdk"; + +export class MyAgentOrchestrator { + async runOrContinue(agentId: string | undefined, prompt: string) { + const apiKey = process.env.CURSOR_API_KEY!; + const options = { + apiKey, + model: { id: "composer-2" }, + local: { cwd: "/var/app/repo" }, + mcpServers: { + linear: { + type: "http" as const, + url: "https://mcp.linear.app/sse", + headers: { Authorization: `Bearer ${process.env.LINEAR_API_KEY!}` }, + }, + }, + }; + await using agent = agentId + ? Agent.resume(agentId, options) + : Agent.create(options); + + const run = await agent.send(prompt); + const result = await run.wait(); + return { agentId: agent.agentId, runId: run.id, status: result.status }; + } +} +``` + +Key choices: + +- `await using` syntax means you can't forget to dispose. +- MCP config re-passed every call so resume works. +- Persisted `agentId` in the caller's DB; resume on subsequent requests. + diff --git a/cursor-sdk/skills/cursor-sdk/references/auth.md b/cursor-sdk/skills/cursor-sdk/references/auth.md new file mode 100644 index 0000000..5306b62 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/auth.md @@ -0,0 +1,119 @@ +# Authentication + +The short version: `export CURSOR_API_KEY=cursor_...` and `Agent.create({ apiKey: process.env.CURSOR_API_KEY! })`. Everything else in this page is what you read when that doesn't work. + +## Key types + +The SDK accepts two key kinds; both work for local and cloud. + + +| Key kind | Minted at | When you'd use it | +| ------------------------ | -------------------------------------------------------------------------------------- | ------------------------------------------------------------------------- | +| User API key | [https://cursor.com/dashboard/cloud-agents](https://cursor.com/dashboard/cloud-agents) | Dev tools, personal scripts, running as a specific user | +| Team service-account key | Team Settings → Service accounts | Shared CI, backend services, anywhere a real person shouldn't own the key | + + +Both sit in the same `apiKey` / `CURSOR_API_KEY` slot — no second-class citizen. The token format is the same; you can't tell them apart by inspection. + +## How the SDK finds the key + +Priority order: + +1. `apiKey` passed to the options object (`Agent.create`, `Agent.prompt`, `Agent.resume`, `Agent.get`, etc.) +2. `process.env.CURSOR_API_KEY` + +That's it — there's no config file, no keychain integration in the SDK itself. For shared infrastructure code, **always pass `apiKey` explicitly** rather than relying on the env var, so the credential dependency is obvious at the call site. + +Some cloud-only helpers (`Agent.archive`, `Agent.delete`, `Cursor.me`, `Cursor.models.list`, `Cursor.repositories.list`) accept `apiKey` as a named option too. When they don't receive one, they fall back to `CURSOR_API_KEY`. + +## Minimum viable setup + +```bash +export CURSOR_API_KEY="cursor_..." +``` + +```typescript +import { Agent } from "@cursor/sdk"; + +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, +}); +``` + +The non-null assertion (`!`) is a readable way to say "fail loudly if the env var is missing" — otherwise the SDK will throw an auth error later and the stack trace won't point at the env var. + +A slightly more polite pattern: + +```typescript +const apiKey = process.env.CURSOR_API_KEY; +if (!apiKey) { + console.error("Missing CURSOR_API_KEY. Mint one at https://cursor.com/dashboard/cloud-agents."); + process.exit(1); +} +``` + +## Symptoms of bad auth + + +| Symptom | Diagnosis | +| -------------------------------------------------------- | ------------------------------------------------------------------------------ | +| `AuthenticationError: ...` on first `send()` | Key missing, expired, or malformed (including whitespace) | +| `ConfigurationError: BAD_USER_API_KEY` | Key is syntactically invalid | +| `AuthenticationError` only on cloud, local works | Key can't reach the cloud agents surface — could be a permissions issue | +| Cloud run errors with `ERROR_GITHUB_NO_USER_CREDENTIALS` | Caller doesn't have a GitHub connection to the target repo | +| Works locally, 401s in CI | Env var isn't making it into the CI job (quoting, secret scoping, scope, etc.) | +| Intermittent 401s | Almost always a key rotation issue or two conflicting `CURSOR_API_KEY` values | + + +### `ERROR_GITHUB_NO_USER_CREDENTIALS` is not a code bug + +Cloud runs clone the target repo using the caller's GitHub credentials. If the user behind `CURSOR_API_KEY` has never connected GitHub in the Cursor dashboard, the cloud agent can't access the repo and the run fails immediately. Fix: caller links GitHub in the dashboard. No code change. + +For service-account keys, the service account needs GitHub access configured just like a user would. + +## Rotating keys + +- Rotate on a schedule for production workloads; don't bake a key into an image. +- When a key leaks, revoke in the dashboard first, then mint a new one. The SDK picks up new env values on next process start. +- Prefer per-environment keys (staging, prod) over one global key so revocations are scoped. + +## Multiple keys in one process + +Totally supported — each `Agent.create` / `Agent.prompt` call is independent. Pass `apiKey` explicitly so you don't accidentally fall back to an env var the caller wasn't thinking about: + +```typescript +const userAgent = Agent.create({ apiKey: userKey, /* ... */ }); +const botAgent = Agent.create({ apiKey: botKey, /* ... */ }); +``` + +## Local development + +If you're developing integrations and iterating fast, keep your key in a `.env` file that's gitignored and loaded via something like `dotenv`: + +```typescript +import "dotenv/config"; +// Now process.env.CURSOR_API_KEY is populated. +``` + +Don't commit `.env`. The SDK does not read `.env` itself. + +## CI checklist + +- Put the key in the secrets store, not a workflow file. +- Scope the secret to the specific job/workflow that needs it. +- For GitHub Actions, use repo-level or environment-level secrets (environment gives you approval gates too). +- Print only a key-prefix (first 6 chars) to logs if you need to confirm which key is in use, never the full value. +- Fail fast if the key is missing — don't let the job run for 10 minutes and 401 at the end. + +## Service accounts for production integrations + +For anything that isn't a personal dev script: + +1. Create a team service account in Team Settings. +2. Give it only the permissions it needs. +3. Mint a key for it and store the key in your secrets manager. +4. If the integration spawns cloud agents, link GitHub for the service account (not the operator's personal account). +5. Monitor usage against the service account — per-caller attribution is what you lose if everything is running under one key. + diff --git a/cursor-sdk/skills/cursor-sdk/references/error-handling.md b/cursor-sdk/skills/cursor-sdk/references/error-handling.md new file mode 100644 index 0000000..cc59da7 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/error-handling.md @@ -0,0 +1,167 @@ +# Error Handling + +The single most common source of integration bugs: treating "agent couldn't start" and "agent did work and that work failed" as the same error. They aren't. + +## The Two Failure Axes + +``` + didn't start started, didn't finish cleanly + ──────────── ────────────────────────────── + throws CursorAgentError returns RunResult { status: "error" | "cancelled" } + .isRetryable .id (look it up in the dashboard) + .code / .protoErrorCode .durationMs .git .result +``` + +Always handle both. A try/catch alone won't catch a failed run; checking `result.status` alone won't catch auth failures. + +```typescript +import { Agent, CursorAgentError } from "@cursor/sdk"; + +async function runOnce(): Promise { + await using agent = Agent.create({ /* ... */ }); + try { + const run = await agent.send(prompt); + const result = await run.wait(); + + switch (result.status) { + case "finished": + console.log(`ok: ${result.id}`); + return; + case "cancelled": + console.warn(`cancelled: ${result.id}`); + return; + case "error": + throw new Error(`run ${result.id} failed after executing; inspect run state`); + default: { + const _exhaustive: never = result.status; + throw new Error(`unexpected status: ${_exhaustive}`); + } + } + } catch (err) { + if (err instanceof CursorAgentError) { + console.error(`startup error (${err.constructor.name}): ${err.message}`); + if (err.isRetryable) { + // Backoff-and-retry path + } + throw err; + } + throw err; + } +} +``` + +## `CursorAgentError` Subtypes + +Every SDK-thrown error extends `CursorAgentError`. Check the concrete subclass to decide what to do. + + +| Class | Typical HTTP | What it means | Fix | +| --------------------- | ------------ | --------------------------------------------------- | ---------------------------------------------------------- | +| `AuthenticationError` | 401 | Invalid/expired/missing key, wrong permissions | Fix `CURSOR_API_KEY` (see `[auth.md](auth.md)`) | +| `RateLimitError` | 429 | Hit request or usage cap | Backoff; the error carries `isRetryable` | +| `ConfigurationError` | 400/404 | Bad model id, malformed request, resource not found | Don't retry. Fix the call. | +| `NetworkError` | 503/504 | Upstream timeout, transient infra | Retry with jitter if `isRetryable` | +| `UnknownAgentError` | — | Classified neither by proto code nor HTTP code | Log and surface; check `.cause` for the raw `ConnectError` | + + +They all carry: + +- `message` — user-facing description (already stripped of Connect's `[unknown]` prefix) +- `isRetryable` — authoritative from the backend, not a heuristic +- `code` — the underlying Connect/gRPC `Code` when relevant +- `protoErrorCode` — fine-grained backend error code; stable enum values +- `cause` — original `ConnectError` for deep debugging; don't leak it to end users + +### `UnsupportedRunOperationError` + +Distinct base — it's about the SDK, not the backend. Thrown when you call `run.stream()`, `run.wait()`, `run.cancel()`, or `run.conversation()` on a `Run` that doesn't support that operation. Common trigger: `cancel()`/`stream()` on a detached handle obtained from `Agent.getRun(...)` after the live event store closed. (Cloud `conversation()` IS supported — it accumulates best-effort from the stream.) + +Always prefer `run.supports(...)` over `try/catch`: + +```typescript +if (run.supports("cancel")) { + await run.cancel(); +} else { + console.warn(`cancel not supported: ${run.unsupportedReason("cancel")}`); +} +``` + +## Retry Patterns + +**Do retry** (with backoff + jitter): + +- `NetworkError` with `isRetryable === true` +- `RateLimitError` with `isRetryable === true` (rare; usually the backend wants you to wait longer than a tight retry) +- `UnknownAgentError` with `isRetryable === true` — the backend is telling you it was transient + +**Don't retry**: + +- `AuthenticationError` — the key won't get better +- `ConfigurationError` — bad input won't get better +- Anything with `isRetryable === false` — the backend is telling you it's terminal + +Keep retries small (≤3) for agent startup; agents are expensive to re-launch. If the first attempt fails `RateLimitError` with `isRetryable === true`, back off at least 30 seconds. + +```typescript +import { Agent, CursorAgentError, RateLimitError, NetworkError } from "@cursor/sdk"; + +async function createWithRetry(options: Parameters[0]) { + const maxAttempts = 3; + for (let attempt = 1; attempt <= maxAttempts; attempt++) { + try { + return Agent.create(options); + } catch (err) { + const retryable = + err instanceof CursorAgentError && + err.isRetryable && + (err instanceof NetworkError || err instanceof RateLimitError); + if (!retryable || attempt === maxAttempts) throw err; + const backoffMs = 2 ** attempt * 1000 + Math.random() * 500; + await new Promise(r => setTimeout(r, backoffMs)); + } + } + throw new Error("unreachable"); +} +``` + +`Agent.create` is lazy — it doesn't hit the backend until `send()`. Most "startup" errors surface there. Wrap `agent.send(...)` with retries, not just `Agent.create(...)`. + +## `RunResult.status === "error"` — What To Do + +The run executed at least partially, hit something the agent couldn't recover from, and reported error. There's no stack trace in `result`; the signal is: + +- `result.id` — the run ID. Fetch it with `Agent.getRun(result.id, { runtime: "cloud", agentId, apiKey })` (cloud) or `Agent.getRun(result.id, { runtime: "local", cwd })` (local), then read `run.conversation()` to see what the agent tried. +- `result.durationMs` — if 0 or tiny, the failure was very early (unlikely runtime issue). +- `result.git` — on cloud, tells you whether a branch was created before failing. +- `result.model` — confirms which model ran; useful when you're testing multiple. + +You usually *don't* retry `status: "error"` automatically. The agent already burned tokens and committed to a direction; a blind retry is likely to do the same thing. Design for human triage: log the ID, surface a dashboard link, and escalate. + +Retry is defensible when: + +- You're doing bulk/fan-out work and a small error rate is acceptable. +- The prompt is purely read-only and idempotent. +- You've also inspected the conversation and know the failure was environmental (e.g., a flaky MCP server). + +## `status: "cancelled"` — What To Do + +Runs report this after a successful `run.cancel()` (local or cloud). Treat cancellation as non-fatal: log, clean up, move on. For cloud runs that were cancelled server-side (e.g., via the dashboard or a sibling caller), you'll also see `"cancelled"` when you eventually `wait()`. + +## Debugging in Production + +Always log at least: + +- `agent.agentId` — right after create/resume, before any `send()` +- `run.id` — right after `send()`, before the stream +- `result.status`, `result.durationMs`, `result.git` — after `wait()` +- On error: the full `err.message`, `err.constructor.name`, `err.isRetryable`, and (for internal logs only) `err.protoErrorCode` + +Those five are enough to correlate anything a user reports with a specific run in the Cursor dashboard. + +## Don't + +- **Don't `process.exit(1)` on every error**. A `RateLimitError` with `isRetryable: true` wants a backoff loop, not an exit. +- **Don't log `err.cause`** to end users — it's the raw `ConnectError` with internal fields. Log `err.message` to humans, `err.cause` to internal observability only. +- **Don't swallow `CursorAgentError`** silently into a generic `console.warn`. The subclass is the signal. +- **Don't retry `AuthenticationError` more than once** even if `isRetryable: true` — it almost always means the key is still bad. + diff --git a/cursor-sdk/skills/cursor-sdk/references/mcp.md b/cursor-sdk/skills/cursor-sdk/references/mcp.md new file mode 100644 index 0000000..775fa49 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/mcp.md @@ -0,0 +1,189 @@ +# MCP Servers + +MCP (Model Context Protocol) servers give the agent extra tools. The SDK supports them on both local and cloud runtimes, but the transport semantics and credential-handling rules differ, and getting them wrong is usually silent ("why doesn't the agent have my Linear tools?"). + +## Two transports, three deployment shapes + +```typescript +type McpServerConfig = + | { type?: "stdio"; command: string; args?: string[]; env?: Record; cwd?: string } + | { type?: "http" | "sse"; url: string; headers?: Record; auth?: { CLIENT_ID: string; CLIENT_SECRET?: string; scopes?: string[] } }; +``` + +Three patterns you'll actually use: + +1. **Local stdio** — spawn a subprocess on the caller's machine. Default for most `@modelcontextprotocol/server-*` packages. +2. **Local HTTP / remote HTTP** — a URL the SDK (local) or the backend (cloud) calls on behalf of the agent. +3. **Cloud stdio** — subprocess spawned **inside the cloud VM**, not on the caller's machine. + +The shape looks similar in both runtimes; what changes is the execution location. One concrete delta worth knowing: cloud stdio servers reject `cwd` — the VM controls the working directory, and `cloud-mcp-utils.ts` throws `ConfigurationError("Cloud MCP server cannot include cwd.")` if you try to set it. Omit `cwd` for cloud stdio. + +## Local Runtime — MCP Config + +```typescript +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, + mcpServers: { + filesystem: { + type: "stdio", + command: "npx", + args: ["-y", "@modelcontextprotocol/server-filesystem", process.cwd()], + cwd: process.cwd(), + env: { NODE_OPTIONS: "--max-old-space-size=4096" }, + }, + docs: { + type: "http", + url: "https://example.com/mcp", + headers: { Authorization: `Bearer ${process.env.DOCS_TOKEN!}` }, + }, + }, +}); +``` + +- Stdio servers run as child processes of your Node process. Make sure `command` is on PATH or use an absolute path. Dispose the agent cleanly to reap these. +- HTTP servers are called directly from the local SDK. `headers` go on every request. +- `auth: { CLIENT_ID, CLIENT_SECRET?, scopes? }` triggers OAuth flow — for first-party integrations that issue proper OAuth tokens. Most self-managed MCP servers just need `headers`. + +## Cloud Runtime — MCP Config + +```typescript +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + cloud: { + repos: [{ url: "https://github.com/your-org/your-repo", startingRef: "main" }], + }, + mcpServers: { + linear: { + type: "http", + url: "https://mcp.linear.app/sse", + headers: { Authorization: `Bearer ${process.env.LINEAR_API_KEY!}` }, + }, + github: { + type: "stdio", + command: "npx", + args: ["-y", "@modelcontextprotocol/server-github"], + env: { GITHUB_TOKEN: process.env.GITHUB_TOKEN! }, + }, + }, +}); +``` + +Critical differences from local: + +- **HTTP `headers` and `auth` are proxied by the Cursor backend.** Sensitive header values are redacted server-side and do not reach the cloud VM. Safe place for OAuth tokens, API keys, etc. +- **Stdio `env` values are injected into the cloud VM.** Treat them like any production secret — they will be visible to processes running inside the VM. Don't ship end-user credentials this way. +- **`command`/`args` must resolve inside the cloud VM**, which has a standard Linux image. `npx`, `node`, and common binaries work; expect other tools to require a `command` that resolves in the VM or a container-provided binary. + +Dashboard-configured MCP servers are also respected on cloud. Users configure them once at [https://cursor.com/agents](https://cursor.com/agents); inline config on `Agent.create` stacks on top. + +## Persistence Across `Agent.resume(...)` + +**Inline `mcpServers` are not persisted.** If you resume an agent and expect the same MCP tool access, pass the config again: + +```typescript +const baseMcp = { /* ... */ }; +const agent = Agent.resume(agentId, { + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, + mcpServers: baseMcp, +}); +``` + +Dashboard-configured servers do persist (they're keyed to the user, not the agent), so the common mistake is ceding convenience to inline config and then wondering why resume loses it. + +## Settings-sourced MCP servers + +By default, local SDK agents do **not** load ambient user/team/project MCP config. Opt in via `settingSources`: + +```typescript +const agent = Agent.create({ + apiKey, + model: { id: "composer-2" }, + local: { + cwd: process.cwd(), + settingSources: ["project", "user"], // or "all" + }, +}); +``` + +`settingSources` lives **inside `local`** — putting it at the top level will fail TypeScript and silently no-op in JavaScript. It has no effect on cloud agents; cloud always honors `project` / `team` / `plugins`. + +Valid sources: `"project"`, `"user"`, `"team"`, `"mdm"`, `"plugins"`, or `"all"` to include everything. Use the narrowest set that gives you what you need; `"all"` can pull in team policies and plugins that surprise production code. + +Inline `mcpServers` always win: they're explicit input, not ambient configuration. You can combine. + +## Choosing Between HTTP and stdio + +For a remote service (Linear, GitHub, Jira, Figma, your own internal tool): + +- **Prefer HTTP** when the service has a stable MCP endpoint. Cursor's backend handles auth proxying on cloud; you don't have to ship secrets into the VM. +- **Use stdio** when your integration is a local-first helper (filesystem, a CLI, a development tool) or when you need process-level isolation. On cloud, stdio servers run inside the Cursor VM — fine for stateless helpers, dangerous for anything that holds long-lived credentials. + +For first-party MCP servers that support OAuth, use `auth: { CLIENT_ID, CLIENT_SECRET?, scopes? }` instead of hardcoding `Authorization` headers. + +## Reloading MCP after file changes + +If you change `.cursor/mcp.json` or other setting sources on local disk while an agent is running and want the agent to pick it up without recreating: + +```typescript +await agent.reload(); +``` + +This only affects future `send()` calls; in-flight runs use the config they started with. + +## Debugging MCP failures + +MCP setup problems tend to show up as "the agent doesn't have the tool" rather than a thrown error. Checklist: + +1. **Did the server actually register?** Watch the `system` event at stream start — it sometimes includes the tool catalog. If not, the server isn't wired up. +2. **Stdio on cloud** — is the `command` available in the cloud VM image? `npx` is. Your custom `/usr/local/bin/…` isn't. +3. **HTTP headers missing?** On cloud, sensitive headers are redacted but still forwarded; missing headers mean the SDK config didn't include them. Double-check the spelling (`Authorization`, not `authorization` in some servers). +4. **Resume without re-passing MCP?** You lost the inline servers. Re-pass them on the resume options. +5. **`settingSources` not set?** The agent isn't loading your project's `.cursor/mcp.json`. Add `"project"`. + +## Copy-paste starters + +### Local filesystem helper + +```typescript +mcpServers: { + filesystem: { + type: "stdio", + command: "npx", + args: ["-y", "@modelcontextprotocol/server-filesystem", process.cwd()], + }, +}, +``` + +### Cloud + Linear (HTTP, cloud-proxied auth) + +```typescript +mcpServers: { + linear: { + type: "http", + url: "https://mcp.linear.app/sse", + headers: { Authorization: `Bearer ${process.env.LINEAR_API_KEY!}` }, + }, +}, +``` + +### Cloud + GitHub (stdio, secret injected into the VM) + +```typescript +mcpServers: { + github: { + type: "stdio", + command: "npx", + args: ["-y", "@modelcontextprotocol/server-github"], + env: { GITHUB_TOKEN: process.env.GITHUB_TOKEN! }, + }, +}, +``` + +### Cursor dashboard-configured server (no inline config needed) + +Users with a dashboard-configured server get it for free as long as the cloud agent is allowed to use team MCP. No inline config required; stack it only if you also want an additional server. \ No newline at end of file diff --git a/cursor-sdk/skills/cursor-sdk/references/patterns.md b/cursor-sdk/skills/cursor-sdk/references/patterns.md new file mode 100644 index 0000000..484b153 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/patterns.md @@ -0,0 +1,340 @@ +# Integration Patterns + +Five starting templates for the shapes people actually build. Copy one, delete what you don't need. + +Each pattern applies the [error-handling](error-handling.md) and [streaming](streaming.md) best practices from the rest of this skill — don't strip them when adapting. + +--- + +## 1. GitHub Action: automated code review on PRs + +Goal: when a PR opens, run a cloud agent against it, post review comments. + +```typescript +import { Agent, CursorAgentError } from "@cursor/sdk"; + +async function main() { + const { + PR_URL, + REPO_URL, + HEAD_REF, + BASE_REF, + CURSOR_API_KEY, + GITHUB_TOKEN, + } = process.env; + + if (!CURSOR_API_KEY || !REPO_URL || !HEAD_REF) { + console.error("Missing required env: CURSOR_API_KEY, REPO_URL, HEAD_REF"); + process.exit(1); + } + + await using agent = Agent.create({ + apiKey: CURSOR_API_KEY, + model: { id: "composer-2" }, + cloud: { + repos: [{ url: REPO_URL, startingRef: HEAD_REF }], + workOnCurrentBranch: true, + skipReviewerRequest: true, // don't re-page reviewers + }, + mcpServers: GITHUB_TOKEN + ? { + github: { + type: "stdio", + command: "npx", + args: ["-y", "@modelcontextprotocol/server-github"], + env: { GITHUB_TOKEN }, + }, + } + : undefined, + }); + + const prompt = `Review the changes on ${HEAD_REF} vs ${BASE_REF} for ${PR_URL}. +Focus on: correctness, security, readability. Post GitHub review comments inline +for concrete issues. No praise-only comments. If nothing to flag, say so.`; + + try { + const run = await agent.send(prompt); + console.log(`[review] agent=${agent.agentId} run=${run.id}`); + + for await (const event of run.stream()) { + if (event.type === "status") console.log(`[review] ${event.status}`); + if (event.type === "tool_call" && event.status !== "running") { + console.log(`[review] tool: ${event.name} -> ${event.status}`); + } + } + + const result = await run.wait(); + if (result.status !== "finished") { + console.error(`[review] run ${result.id} ended as ${result.status}`); + process.exit(2); + } + console.log(`[review] done: ${result.durationMs}ms`); + } catch (err) { + if (err instanceof CursorAgentError) { + console.error(`[review] startup failed: ${err.message}`); + process.exit(err.isRetryable ? 75 : 1); // EX_TEMPFAIL for transient + } + throw err; + } +} + +main(); +``` + +Why this shape: + +- Cloud runtime (needs to post GitHub comments; works independently of the runner). +- `skipReviewerRequest: true` keeps the action quiet in CI. +- Exit codes: `0` finished, `1` permanent startup failure, `2` run finished with status `error`, `75` transient retryable failure. + +--- + +## 2. Scheduled triage: cron-driven cloud runs with resume + +Goal: every morning, resume yesterday's triage agent and ask it to triage today's new Linear tickets. + +```typescript +import { Agent, CursorAgentError } from "@cursor/sdk"; +import { readFile, writeFile } from "node:fs/promises"; + +const STATE_PATH = "/var/lib/triage/state.json"; + +const mcpServers = { + linear: { + type: "http" as const, + url: "https://mcp.linear.app/sse", + headers: { Authorization: `Bearer ${process.env.LINEAR_API_KEY!}` }, + }, +}; + +async function main() { + const state = await readState(); + + await using agent = state.agentId + ? Agent.resume(state.agentId, { + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + cloud: { repos: [{ url: process.env.REPO_URL!, startingRef: "main" }] }, + mcpServers, // must re-pass on resume + }) + : Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + cloud: { repos: [{ url: process.env.REPO_URL!, startingRef: "main" }] }, + mcpServers, + }); + + console.log(`[triage] agent=${agent.agentId}`); + + try { + const run = await agent.send( + "Triage new Linear tickets opened in the last 24h. Label, assign, comment with next steps." + ); + const result = await run.wait(); + if (result.status === "error") { + console.error(`[triage] run ${result.id} errored`); + } + await writeState({ agentId: agent.agentId, lastRunId: result.id }); + } catch (err) { + if (err instanceof CursorAgentError && err.isRetryable) { + console.error(`[triage] transient: ${err.message}, will retry next tick`); + return; + } + throw err; + } +} + +async function readState(): Promise<{ agentId?: string; lastRunId?: string }> { + try { return JSON.parse(await readFile(STATE_PATH, "utf-8")); } + catch { return {}; } +} +async function writeState(s: { agentId: string; lastRunId: string }) { + await writeFile(STATE_PATH, JSON.stringify(s)); +} + +main(); +``` + +Why this shape: + +- Persisted `agentId` across cron invocations keeps conversation memory (e.g., "remember which tickets we already triaged yesterday"). +- MCP re-passed on every resume. +- Graceful on retryable errors: skip this tick, try next. + +--- + +## 3. One-shot analysis script + +Goal: dev runs a command against a local repo and gets a written analysis. + +```typescript +#!/usr/bin/env node +import { Agent } from "@cursor/sdk"; + +const prompt = process.argv.slice(2).join(" ").trim(); +if (!prompt) { + console.error("Usage: analyze.ts "); + process.exit(1); +} + +const result = await Agent.prompt(prompt, { + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, +}); + +console.log(result.result ?? "(no output)"); +process.exit(result.status === "finished" ? 0 : 2); +``` + +Why this shape: + +- `Agent.prompt` disposes for you. Perfect for throwaway CLIs. +- No streaming — it's a one-shot. +- Exit code carries status; use in shell pipelines (`&&`, `|| fallback`). + +--- + +## 4. Backend service: user-scoped agents behind an HTTP API + +Goal: your backend service exposes an endpoint that runs a Cursor agent on a user's behalf. Each user has their own durable agent. + +```typescript +import express from "express"; +import { Agent, CursorAgentError } from "@cursor/sdk"; + +const app = express(); +app.use(express.json()); + +// Pretend this lives in your DB +const userAgents = new Map(); + +app.post("/agents/:userId/send", async (req, res) => { + const { userId } = req.params; + const { prompt } = req.body; + const apiKey = process.env.CURSOR_SERVICE_ACCOUNT_KEY!; + + const existing = userAgents.get(userId); + const options = { + apiKey, + model: { id: "composer-2" as const }, + cloud: { repos: [{ url: "https://github.com/your-org/workspace", startingRef: "main" }] }, + }; + + try { + await using agent = existing + ? Agent.resume(existing, options) + : Agent.create(options); + + const run = await agent.send(prompt); + const result = await run.wait(); + + userAgents.set(userId, agent.agentId); + res.json({ + agentId: agent.agentId, + runId: result.id, + status: result.status, + durationMs: result.durationMs, + }); + } catch (err) { + if (err instanceof CursorAgentError) { + res.status(err.code === undefined ? 500 : 502).json({ + error: err.constructor.name, + message: err.message, + retryable: err.isRetryable, + }); + return; + } + res.status(500).json({ error: "internal" }); + } +}); + +app.listen(3000); +``` + +Why this shape: + +- Service-account key, not user keys — this is shared infrastructure. +- `await using` per request; no lingering agent handles between requests. +- `userAgents` is a stand-in for your database; persist `agentId` per user for resume. +- Error surface passes `isRetryable` through so callers can back off intelligently. + +Don't do this if you need response streaming to the client — switch to `run.stream()` into a server-sent-events endpoint. + +--- + +## 5. Fan-out: run an agent against many repos in parallel + +Goal: spin off a cloud agent per repo in a list, collect results. + +```typescript +import { Agent, CursorAgentError } from "@cursor/sdk"; + +async function dispatchOne(repoUrl: string, apiKey: string, prompt: string) { + await using agent = Agent.create({ + apiKey, + model: { id: "composer-2" }, + cloud: { + repos: [{ url: repoUrl, startingRef: "main" }], + autoCreatePR: false, + skipReviewerRequest: true, + }, + }); + try { + const run = await agent.send(prompt); + const result = await run.wait(); + return { repoUrl, agentId: agent.agentId, status: result.status, runId: result.id }; + } catch (err) { + if (err instanceof CursorAgentError) { + return { repoUrl, error: err.constructor.name, message: err.message }; + } + throw err; + } +} + +const repos = [ + "https://github.com/your-org/service-a", + "https://github.com/your-org/service-b", + "https://github.com/your-org/service-c", +]; + +const prompt = "Audit Dockerfile for outdated base images. Propose an update."; +const apiKey = process.env.CURSOR_API_KEY!; + +const results = await Promise.allSettled(repos.map(r => dispatchOne(r, apiKey, prompt))); +console.log(JSON.stringify(results, null, 2)); +``` + +Why this shape: + +- `Promise.allSettled` — one repo failing doesn't torpedo the others. +- Cloud runtime because cloud agents are actually independent VMs; local fan-out would serialize through the caller's machine. +- Per-agent `await using` so each completes cleanup even when others are still running. +- `autoCreatePR: false` because we want to review results first; run a follow-up to open PRs once you've picked which ones pass the audit. + +**Rate-limit awareness**: fanning out 100 agents at once will hit backend limits. For large N, batch: + +```typescript +async function runInBatches(items: T[], size: number, fn: (item: T) => Promise) { + const results: R[] = []; + for (let i = 0; i < items.length; i += size) { + const batch = items.slice(i, i + size); + results.push(...await Promise.all(batch.map(fn))); + } + return results; +} +``` + +--- + +## Cross-cutting best practices + +Applied to all five patterns: + +- **Log `agent.agentId` and `run.id` before the stream.** Every failure investigation starts from those two IDs. +- **Distinguish exit/response codes by failure type.** Your ops team should be able to tell "couldn't authenticate" from "agent did work and it went wrong" at a glance. +- **Respect `isRetryable`.** Back off on transient, don't retry on terminal. Blind retries on a failed cloud run spawn duplicate PRs. +- **`await using` or explicit `finally`.** Every `Agent.create` / `Agent.resume` needs an unambiguous disposal path. +- **Pass `apiKey` explicitly.** Don't rely on ambient env in shared infrastructure code. +- **Don't commit `CURSOR_API_KEY`.** Don't log it. Prefix-only (`cursor_01ab...`) when you need to confirm which key is in use. + diff --git a/cursor-sdk/skills/cursor-sdk/references/runtime-choice.md b/cursor-sdk/skills/cursor-sdk/references/runtime-choice.md new file mode 100644 index 0000000..c6117d4 --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/runtime-choice.md @@ -0,0 +1,111 @@ +# Runtime Choice: Local vs Cloud + +The SDK exposes one surface on top of two very different runtimes. Picking the wrong one isn't fatal but will burn hours. Pick deliberately. + +## Decision Tree + +Start here: + +1. **Does the agent need to open a PR on GitHub?** → Cloud. Local agents modify files in `cwd`; they don't branch or push. +2. **Will the agent run longer than the caller's process can stay alive?** (cron that fires a fire-and-forget job, webhook that spawns overnight work) → Cloud. The agent outlives your script. +3. **Does the agent need compute, isolation, or credentials the caller's machine doesn't have?** (sandboxed eval, controlled env vars, pinned runtime) → Cloud. +4. **Is this a dev-loop script, CI step that already checked out the repo, or a CLI against the user's current project?** → Local. The repo is already on disk; cloud would re-clone it for no reason. +5. **Does the user want to run without a network call to GitHub?** (air-gapped, non-GitHub repo, experiments on uncommitted code) → Local. Cloud requires a GitHub repo URL. + +If two points pull opposite ways, Cloud is usually the safer pick for production integrations and Local for dev tooling. + +## Capability Matrix + + +| Capability | Local | Cloud (Cursor-hosted) | +| ---------------------------------- | ------------------------------------- | ------------------------------------------------------------- | +| Opens real PRs | No | Yes (`cloud.autoCreatePR: true`) | +| Works on uncommitted local changes | Yes | No — clones from `startingRef` | +| Outlives caller process | No | Yes — resumable by `agentId` | +| Cancellable mid-run | Yes | Yes (server-side; check `run.supports("cancel")` defensively) | +| Artifact download | Not implemented yet | Yes | +| MCP stdio transport | Yes | Yes (command runs inside the cloud VM) | +| MCP HTTP transport | Yes | Yes | +| Ambient Cursor settings | Opt-in via `settingSources` | Always enterprise/team hooks respected | +| Requires GitHub repo | No | Yes (`cloud.repos[].url`) | +| Requires API key | For remote model calls (most prompts) | Always | + + +## Local Runtime — How It Actually Works + +```typescript +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + local: { cwd: "/absolute/or/relative/path/to/repo" }, +}); +``` + +- `cwd` is where the agent reads and writes files. The type accepts a string array, but the local executor currently uses only the first entry (`platform.ts`'s `getCwd` returns `cwd[0]`); pass a single path until multi-root ships. +- The agent spawns in-process helpers (tool execution, shell runner, MCP stdio processes). Dispose cleanly to reap them. +- Persisted state lives under `cwd`'s Cursor data directory. `Agent.list({ runtime: "local", cwd })` surfaces previously-created agents there. +- Ambient settings (project rules, team policies, team-configured MCP servers) are **not** loaded by default. Pass `settingSources` **inside `local`** (e.g. `local: { cwd, settingSources: ["project"] }`, or `"all"` for everything) to opt in. +- Local runs execute tools on the caller's machine with the caller's permissions. Treat the agent like you'd treat `rm -rf` — scoped `cwd`, no secrets in env vars you don't want exposed. + +When to prefer local: + +- CLI tooling, editor integrations, dev scripts. +- CI steps where the repo is already checked out and you want to inspect the tree directly. +- Fast iteration: no network clone, no PR, no reviewer notification. + +## Cloud Runtime — How It Actually Works + +```typescript +const agent = Agent.create({ + apiKey: process.env.CURSOR_API_KEY!, + model: { id: "composer-2" }, + cloud: { + repos: [{ url: "https://github.com/your-org/your-repo", startingRef: "main" }], + autoCreatePR: true, + skipReviewerRequest: true, // Keep CI quiet; flip to false for review-worthy changes + }, +}); +``` + +- Cursor provisions a VM, clones `repos[].url` at `startingRef`, runs the agent, pushes a branch, and (if `autoCreatePR`) opens a PR. +- Agent IDs are prefixed `bc-` (background composer). SDK helpers (`Agent.get`, `Agent.archive`, etc.) auto-route on that prefix. +- The caller (the user behind `CURSOR_API_KEY`) must have a GitHub connection to the target repo. If not, the cloud side returns `ERROR_GITHUB_NO_USER_CREDENTIALS` — it's an environment setup issue, not a code bug. +- `run.cancel()` is supported on cloud (server-side cancel); still guard with `run.supports("cancel")` for defensive portability. +- Set `workOnCurrentBranch: true` only when you want the agent to push to an existing branch — rare, and usually means you're trying to emulate local; use local instead. + +When to prefer cloud: + +- Anything that opens a PR for a human to review. +- Scheduled/automated work that shouldn't block a local process. +- Parallel fan-out across many repos or branches. +- Running against a repo the caller doesn't have checked out. + +## Common "I meant the other one" Symptoms + +- **"Agent created but nothing happened on GitHub"** — you passed `local:` when you meant `cloud:`. Local doesn't push. +- **"Cloud agent can't see my uncommitted changes"** — by design. Commit or use local. +- **"Cloud agent said it can't find my GitHub repo"** — the caller's Cursor account doesn't have a GitHub connection for that repo. Not a code bug; sort it in the dashboard. +- **"`run.cancel()` throws on my run"** — usually a detached run handle (`Agent.getRun(...)` on a run whose live channel is gone). Guard with `run.supports("cancel")` before calling. +- **"I tried to reuse an agent across machines"** — cloud agents resume anywhere (`Agent.resume(bcId, { ... })`). Local agents are scoped to their `cwd`'s data directory; resume from another machine gives you a fresh agent. + +## Hybrid: running a local and a cloud agent from the same script + +Perfectly fine, common pattern (e.g., local inspection + cloud PR). Each agent is independent; dispose both. + +```typescript +const localAgent = Agent.create({ /* ... local */ }); +const cloudAgent = Agent.create({ /* ... cloud */ }); +try { + const summary = await Agent.prompt("Summarize the diff on HEAD", { + apiKey, + model: { id: "composer-2" }, + local: { cwd: process.cwd() }, + }); + const cloudRun = await cloudAgent.send(`Follow up on: ${summary.result}`); + await cloudRun.wait(); +} finally { + await localAgent[Symbol.asyncDispose](); + await cloudAgent[Symbol.asyncDispose](); +} +``` + diff --git a/cursor-sdk/skills/cursor-sdk/references/streaming.md b/cursor-sdk/skills/cursor-sdk/references/streaming.md new file mode 100644 index 0000000..b9f2eee --- /dev/null +++ b/cursor-sdk/skills/cursor-sdk/references/streaming.md @@ -0,0 +1,207 @@ +# Streaming and Run Lifecycle + +`run.stream()` is an async generator of `SDKMessage` events. Same event shapes for local and cloud runtimes — write one consumer and it works everywhere. + +## When to stream vs. just `wait()` + + +| Situation | Stream? | `wait()`? | +| ----------------------------------------------------- | --------------- | --------- | +| Rendering live output to a user (CLI, chat, web UI) | Yes | Yes | +| Fire-and-forget script that just needs success/fail | No | Yes | +| CI step that wants to log tool calls for debugging | Yes, to stderr | Yes | +| Observability: recording every event for later replay | Yes, persist it | Yes | +| Polling another run you didn't launch | Stream is fine | Yes | + + +You almost always want `wait()`. You sometimes don't want `stream()`. There is no "stream without wait" pattern that's correct — the stream tells you what happened, `wait()` tells you whether it succeeded. + +## The Canonical Consumer + +```typescript +for await (const event of run.stream()) { + switch (event.type) { + case "assistant": + for (const block of event.message.content) { + if (block.type === "text") process.stdout.write(block.text); + // block.type === "tool_use" means the assistant announced a tool call; + // the actual execution will follow via tool_call events. + } + break; + case "thinking": + // Reasoning content. Usually hidden from end users, kept for logs. + process.stderr.write(`[thinking] ${event.text}\n`); + break; + case "tool_call": + console.error(`[tool] ${event.name} ${event.status} (${event.call_id})`); + if (event.args !== undefined) console.error(` args: ${JSON.stringify(event.args)}`); + if (event.result !== undefined) console.error(` result: ${JSON.stringify(event.result)}`); + break; + case "status": + console.error(`[status] ${event.status}`); + break; + case "task": + if (event.text) console.error(`[task] ${event.text}`); + break; + case "user": + // Echo of the prompt. Usually ignorable. + break; + case "system": + // Init metadata (model, tool list). Useful for logs. + break; + case "request": + // Request tracking. Log event.request_id for correlation. + break; + } +} + +const result = await run.wait(); +``` + +## Event Reference + +Every event has `agent_id` and `run_id`. The `type` discriminates everything else. + +### `"assistant"` + +Model text or tool-use announcements. + +```typescript +{ + type: "assistant", + message: { role: "assistant", content: Array } +} +``` + +- `TextBlock` = `{ type: "text", text: string }` — render this. +- `ToolUseBlock` = `{ type: "tool_use", id, name, input }` — the assistant is asking to call a tool. You don't need to act on it; the runtime will execute and emit `tool_call` events. Useful for UIs that want to show "calling `grep`…" the moment the LLM asks. + +### `"thinking"` + +Reasoning content. Keep it out of primary UI (users don't need it), keep it in logs (it's invaluable when debugging). + +```typescript +{ type: "thinking", text: string, thinking_duration_ms?: number } +``` + +### `"tool_call"` + +Actual tool execution lifecycle. + +```typescript +{ + type: "tool_call", + call_id: string, + name: string, + status: "running" | "completed" | "error", + args?: unknown, + result?: unknown, + truncated?: { args?: boolean; result?: boolean } +} +``` + +Emitted once with `status: "running"` (args available, result undefined), then again with `status: "completed"` or `"error"` (result available). `truncated` flags mean the payload was trimmed server-side — don't try to parse it fully. + +### `"status"` + +Run lifecycle transitions. Matches `SDKStatusMessage`: + + +| `status` value | Means | +| -------------- | --------------------------------------- | +| `"CREATING"` | Cloud run is being set up (clone, boot) | +| `"RUNNING"` | Actively executing | +| `"FINISHED"` | Completed successfully | +| `"ERROR"` | Run failed mid-flight | +| `"CANCELLED"` | Run was cancelled | +| `"EXPIRED"` | Run aged out | + + +**Don't treat the `FINISHED` status event as "I can skip `wait()`"** — it's a heads-up, not a terminal result. `wait()` returns a `RunResult` with usage/duration/git info you can't get from the stream. + +### `"task"` + +Higher-level task status messages (e.g., "Planning", "Editing files"). Optional; useful for summarized progress UI. + +### `"user"` + +Echo of the user prompt at the start of the run. Usually ignored by consumers. + +### `"system"` + +Init metadata: model actually used, tool catalog. Good to log once at stream start. + +### `"request"` + +Request-ID tracking for correlation with server-side observability. Log `event.request_id` if you have an internal tracing system. + +## Callbacks vs. the Stream + +`agent.send(...)` also takes callbacks: + +```typescript +await agent.send(prompt, { + onDelta: ({ update }) => { /* raw executor delta */ }, + onStep: ({ step }) => { /* batched step after text/thinking/tool settle */ }, +}); +``` + +- `onDelta` fires on every raw executor delta — much finer grain than the `SDKMessage` stream. Useful for local UIs that want sub-block updates. Rare in integrations. +- `onStep` fires when a logical step completes (text + thinking + tools bundled). Similar to walking `assistant`+`tool_call` from the stream but pre-assembled. + +Both callbacks are awaited before the next update is pipelined — you can apply backpressure by returning a Promise. Don't put slow I/O in `onDelta` without care; it can stall the run. + +Prefer `run.stream()` for most consumers. Reach for `onDelta` / `onStep` only when you're building a local UI and need the finer shape. + +## Cancellation + +```typescript +if (run.supports("cancel")) { + setTimeout(() => run.cancel(), 30_000); +} +``` + +`run.cancel()` is supported on both local and cloud runs. For cloud it POSTs to the server's cancel endpoint and reconciles local status from the server's authoritative response. Guard with `run.supports("cancel")` anyway — detached/replayed run handles (`Agent.getRun(...)`) may not have a live cancellation channel, and the guard is the right defensive posture. + +After cancel, continue consuming the stream until it ends; you'll see a terminal `status` event and `run.wait()` will resolve with `status: "cancelled"`. + +## Status Listener (stream-free observation) + +If you just want to know when the run transitions and don't care about content: + +```typescript +const unsubscribe = run.onDidChangeStatus(status => { + console.error(`[status-listener] ${status}`); +}); + +await run.wait(); +unsubscribe(); +``` + +This doesn't require `stream()` and fires for every transition. Useful for a progress UI that doesn't render agent output. + +## Observing a Run You Didn't Launch + +```typescript +const existing = await Agent.getRun(runId, { runtime: "cloud", agentId: "bc-abc123", apiKey }); +if (existing.supports("stream")) { + for await (const event of existing.stream()) { + // same loop + } +} +const result = await existing.wait(); +``` + +Replayed streams reconstruct events from persisted state. Tool payloads (args/result) are present when the original run captured them; older runs may be sparser. + +## Backpressure and Long Streams + +The async iterator applies backpressure naturally — the runtime won't produce events faster than your `for await` can drain them. But if your consumer does heavy per-event work (DB writes, network calls), you can stall the run for its lifetime. Queue and process out of band when that matters. + +## Common Mistakes + +- **Not draining the stream** — leaves resources open. If you open `run.stream()`, you must consume it fully or call `run.cancel()`. +- **Assuming tool `args`/`result` are always present** — they're optional; check before destructuring. +- **Parsing `tool_call.result` as a specific shape without checking `name`** — every tool has its own shape. If you need strong typing, branch on `event.name` first. +- **Reacting to `"status": "FINISHED"` as the end** — it's the terminal status, but `wait()` still has to resolve to give you usage/git/duration. Always `await run.wait()` too. +