From bca612957941f8bad424d1856e7f46226a122d60 Mon Sep 17 00:00:00 2001 From: ericzakariasson Date: Thu, 30 Apr 2026 10:05:05 +0200 Subject: [PATCH 1/3] Update cursor team kit workflows Made-with: Cursor --- .cursor-plugin/marketplace.json | 2 +- README.md | 2 +- cursor-team-kit/.cursor-plugin/plugin.json | 12 +- cursor-team-kit/README.md | 8 +- cursor-team-kit/agents/ci-watcher.md | 15 +-- .../skills/cli-automation-harness/SKILL.md | 109 ++++++++++++++++++ cursor-team-kit/skills/fix-ci/SKILL.md | 11 +- .../skills/get-pr-comments/SKILL.md | 2 +- cursor-team-kit/skills/loop-on-ci/SKILL.md | 30 +++-- .../skills/make-pr-easy-to-review/SKILL.md | 59 ++++++++++ .../skills/maximum-throughput/SKILL.md | 82 +++++++++++++ .../skills/review-and-ship/SKILL.md | 17 +-- .../skills/ui-automation-harness/SKILL.md | 109 ++++++++++++++++++ cursor-team-kit/skills/verify-this/SKILL.md | 74 ++++++++++++ .../skills/what-did-i-get-done/SKILL.md | 2 +- .../skills/workflow-from-chats/SKILL.md | 50 ++++++++ 16 files changed, 545 insertions(+), 39 deletions(-) create mode 100644 cursor-team-kit/skills/cli-automation-harness/SKILL.md create mode 100644 cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md create mode 100644 cursor-team-kit/skills/maximum-throughput/SKILL.md create mode 100644 cursor-team-kit/skills/ui-automation-harness/SKILL.md create mode 100644 cursor-team-kit/skills/verify-this/SKILL.md create mode 100644 cursor-team-kit/skills/workflow-from-chats/SKILL.md diff --git a/.cursor-plugin/marketplace.json b/.cursor-plugin/marketplace.json index 1396952..a15da0f 100644 --- a/.cursor-plugin/marketplace.json +++ b/.cursor-plugin/marketplace.json @@ -21,7 +21,7 @@ { "name": "cursor-team-kit", "source": "cursor-team-kit", - "description": "Internal team workflows used by Cursor developers for CI, code review, and shipping." + "description": "Internal team workflows used by Cursor developers for CI, code review, shipping, local automation, and verification." }, { "name": "create-plugin", diff --git a/README.md b/README.md index ce4943e..455f2f7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Official Cursor plugins for popular developer tools, frameworks, and SaaS produc | `name` | Plugin | Author | Category | `description` (from marketplace) | |:-------|:-------|:-------|:---------|:-------------------------------------| | `continual-learning` | [Continual Learning](continual-learning/) | Cursor | Developer Tools | Incremental transcript-driven memory updates for AGENTS.md using high-signal bullet points only. | -| `cursor-team-kit` | [Cursor Team Kit](cursor-team-kit/) | Cursor | Developer Tools | Internal team workflows used by Cursor developers for CI, code review, and shipping. | +| `cursor-team-kit` | [Cursor Team Kit](cursor-team-kit/) | Cursor | Developer Tools | Internal team workflows used by Cursor developers for CI, code review, shipping, local automation, and verification. | | `create-plugin` | [Create Plugin](create-plugin/) | Cursor | Developer Tools | Scaffold and validate new Cursor plugins. | | `agent-compatibility` | [Agent Compatibility](agent-compatibility/) | Cursor | Developer Tools | CLI-backed repo compatibility scans plus Cursor agents that audit startup, validation, and docs against reality. | | `cli-for-agent` | [CLI for Agents](cli-for-agent/) | Cursor | Developer Tools | Patterns for designing CLIs that coding agents can run reliably: flags, help with examples, pipelines, errors, idempotency, dry-run. | diff --git a/cursor-team-kit/.cursor-plugin/plugin.json b/cursor-team-kit/.cursor-plugin/plugin.json index 549ba3d..ef6181f 100644 --- a/cursor-team-kit/.cursor-plugin/plugin.json +++ b/cursor-team-kit/.cursor-plugin/plugin.json @@ -1,8 +1,8 @@ { "name": "cursor-team-kit", "displayName": "Cursor Team Kit", - "version": "1.0.0", - "description": "Internal workflows used by Cursor developers for CI, code review, and shipping. Covers the full dev loop: CI monitoring and fixing, PR creation, merge conflicts, smoke tests, compiler checks, code cleanup, and work summaries.", + "version": "1.1.0", + "description": "Internal workflows used by Cursor developers for CI, code review, shipping, local CLI/UI automation harnesses, verify-this, maximum-throughput, test reliability, code cleanup, and work summaries. Designed to work without requiring third-party service integrations.", "author": { "name": "Cursor", "email": "plugins@cursor.com" @@ -16,13 +16,17 @@ "ci", "code-review", "shipping", - "testing" + "testing", + "verification", + "local-automation" ], "category": "developer-tools", "tags": [ "internal-workflows", "quality", - "delivery" + "delivery", + "review", + "automation" ], "skills": "./skills/", "agents": "./agents/", diff --git a/cursor-team-kit/README.md b/cursor-team-kit/README.md index 91e938f..89e8ca7 100644 --- a/cursor-team-kit/README.md +++ b/cursor-team-kit/README.md @@ -1,6 +1,6 @@ # Cursor Team Kit plugin -Internal-style workflows for CI, code review, shipping, and test reliability. +Internal-style workflows for CI, code review, shipping, and test reliability. The kit is designed to be plug and play without requiring third-party service integrations. ## Installation @@ -17,6 +17,11 @@ Internal-style workflows for CI, code review, shipping, and test reliability. | `loop-on-ci` | Watch CI runs and iterate on failures until checks pass | | `review-and-ship` | Run a structured review, commit changes, and open a PR | | `pr-review-canvas` | Generate an interactive HTML PR walkthrough with annotated, categorized diffs | +| `maximum-throughput` | Split independent engineering work into concurrent lanes with clean handoffs | +| `verify-this` | Prove or disprove claims with baseline/treatment artifacts and a clear verdict | +| `cli-automation-harness` | Build or adapt a local harness to drive and profile interactive CLIs or TUIs | +| `ui-automation-harness` | Build or adapt a local browser/CDP harness for web or Electron UIs | +| `make-pr-easy-to-review` | Clean noisy PR history, improve descriptions, and add reviewer guidance | | `run-smoke-tests` | Run Playwright smoke tests and triage failures | | `fix-ci` | Find failing CI jobs, inspect logs, and apply focused fixes | | `new-branch-and-pr` | Create a fresh branch, complete work, and open a pull request | @@ -26,6 +31,7 @@ Internal-style workflows for CI, code review, shipping, and test reliability. | `weekly-review` | Generate a weekly recap of shipped work with bugfix/tech-debt/net-new highlights | | `fix-merge-conflicts` | Resolve merge conflicts, validate build/tests, and summarize decisions | | `deslop` | Remove AI-generated code slop and clean up code style | +| `workflow-from-chats` | Extract durable working preferences from chats into skills, rules, or docs | ### Agents diff --git a/cursor-team-kit/agents/ci-watcher.md b/cursor-team-kit/agents/ci-watcher.md index 8801f81..8246bf1 100644 --- a/cursor-team-kit/agents/ci-watcher.md +++ b/cursor-team-kit/agents/ci-watcher.md @@ -1,13 +1,13 @@ --- name: ci-watcher -description: Watch GitHub CI for the current branch and report pass/fail with relevant failure logs. Use when waiting for CI results or CI has failed. Use proactively to monitor branch CI. +description: Watch PR CI for the current branch and report pass/fail with relevant failure links. Use when waiting for CI results or CI has failed. Use proactively to monitor branch CI. model: fast is_background: true --- # CI watcher -CI monitoring specialist for GitHub Actions. +CI monitoring specialist for PR-attached checks. ## Trigger @@ -16,12 +16,13 @@ Use when waiting for CI results, CI has failed, or when proactively monitoring b ## Workflow 1. Determine current branch: `git branch --show-current` -2. Find latest run for that branch: `gh run list --branch --limit 1` -3. Watch to completion: `gh run watch --exit-status` -4. If failed, fetch failed logs: `gh run view --log-failed` +2. Resolve the PR: `gh pr view --json number,url,headRefName` +3. Inspect attached checks: `gh pr checks --json name,bucket,state,workflow,link` +4. If checks are pending, watch: `gh pr checks --watch --fail-fast` +5. If a GitHub Actions check failed, fetch logs with `gh run view --log-failed`; otherwise, return the check link and concise next step. ## Output - CI status (passed/failed) -- Workflow/run metadata -- If failed: concise failure excerpt and likely next step +- PR and check metadata +- If failed: concise failure excerpt or external check link and likely next step diff --git a/cursor-team-kit/skills/cli-automation-harness/SKILL.md b/cursor-team-kit/skills/cli-automation-harness/SKILL.md new file mode 100644 index 0000000..39c91b2 --- /dev/null +++ b/cursor-team-kit/skills/cli-automation-harness/SKILL.md @@ -0,0 +1,109 @@ +--- +name: cli-automation-harness +description: Build or adapt a local harness to drive, inspect, and profile an interactive CLI or TUI without external services. Use for CLI UX checks, startup regressions, memory leaks, hangs, prompt flows, or terminal demos. +--- + +# CLI Automation Harness + +Use a repeatable local harness to exercise an interactive CLI instead of poking at it manually. First reuse the repo's own test/demo harness if it exists; otherwise assemble a temporary harness from standard local tools. + +## What It Is Used For + +- Reproducing CLI/TUI bugs with deterministic input. +- Verifying keyboard flows, prompts, interrupts, resize behavior, and terminal layout. +- Capturing before/after transcripts for bug fixes. +- Profiling startup time, slow operations, hangs, or memory growth. +- Recording a short terminal demo when output is easier to show than explain. + +## Harness Loop + +1. Identify the command under test and the smallest reproducible workspace. +2. Discover existing local harnesses: package scripts, e2e tests, demo recorders, expect scripts, or PTY helpers. +3. If no harness exists, launch the CLI in an isolated terminal session with deterministic env vars. +4. Capture the current screen before interacting. +5. Send one action at a time: text, Enter, arrows, Escape, Ctrl-C, resize. +6. Wait for a concrete screen pattern or prompt before the next action. +7. Save the transcript and any profile artifacts. +8. Kill the session cleanly. + +## Harness Options + +- Repo-native harness: prefer checked-in scripts because they know the app's startup, env, and prompts. +- `tmux`: managed sessions, `capture-pane`, `send-keys`, attach/detach. +- PTY probe: use a short Python, Node, or Expect script when tmux is unavailable. +- Runtime inspector: use Node or Bun inspector for CPU profiles, heap snapshots, and live evaluation. +- Terminal recorder: use repo-local demo tools or asciinema-compatible tools when the user asks for a demo. + +## Minimal tmux Harness + +```bash +SESSION="cli-harness-$(date +%s)" +tmux new-session -d -s "$SESSION" -- +tmux capture-pane -pt "$SESSION" +tmux send-keys -t "$SESSION" "help" Enter +tmux capture-pane -pt "$SESSION" +tmux kill-session -t "$SESSION" +``` + +For Node CLIs: + +```bash +NODE_OPTIONS="--inspect=127.0.0.1:0" tmux new-session -d -s "$SESSION" -- +``` + +Read the terminal output to find the inspector URL, then use Chrome DevTools-compatible tooling if profiling is needed. + +## Minimal PTY Harness + +Use a PTY script when you need deterministic waits in a repo that does not have tmux or a demo harness. Keep it temporary unless the user asks to add a reusable test. + +```python +import os +import pty +import select +import subprocess +import time + +master_fd, slave_fd = pty.openpty() +proc = subprocess.Popen( + ["", ""], + stdin=slave_fd, + stdout=slave_fd, + stderr=slave_fd, + close_fds=True, +) +os.close(slave_fd) + +deadline = time.time() + 30 +buffer = b"" +while time.time() < deadline: + ready, _, _ = select.select([master_fd], [], [], 0.25) + if not ready: + continue + chunk = os.read(master_fd, 4096) + buffer += chunk + if b"" in buffer: + os.write(master_fd, b"help\n") + break + +print(buffer.decode(errors="replace")) +proc.terminate() +os.close(master_fd) +``` + +If the CLI needs richer terminal control, use `pty.fork()` or an existing PTY library. + +## Profiling Recipes + +- Startup regression: capture baseline and treatment startup timings under the same machine, env, and command. +- Slow operation: start a CPU profile, perform the operation, stop the profile, and compare top self-time functions. +- Memory leak: force GC if available, take a heap snapshot, perform the operation repeatedly, force GC again, and take another snapshot. +- Hang: capture the screen, active handles/resources, and a stack/CPU sample before interrupting. + +## Guardrails + +- Prefer deterministic waits over sleeps. If you must sleep, explain why. +- Do not send credentials or destructive commands into a controlled session. +- Keep the harness in `/tmp` unless the repo already has a testing/demo harness. +- Do not hard-code paths from another repository. Adapt commands to the current repo's scripts and runtime. +- Clean up tmux sessions, temp dirs, inspector processes, and demo artifacts unless the user asks to keep them. diff --git a/cursor-team-kit/skills/fix-ci/SKILL.md b/cursor-team-kit/skills/fix-ci/SKILL.md index 7f9fb3f..60adbdd 100644 --- a/cursor-team-kit/skills/fix-ci/SKILL.md +++ b/cursor-team-kit/skills/fix-ci/SKILL.md @@ -1,25 +1,26 @@ --- name: fix-ci -description: Find failing CI jobs, inspect logs, and apply focused fixes +description: Find failing PR checks, inspect logs or external check links, and apply focused fixes --- # Fix CI ## Trigger -Branch CI is failing and needs a fast, iterative path to green checks. +Branch or PR CI is failing and needs a fast, iterative path to green checks. ## Workflow -1. Identify the latest run for the current branch. -2. Inspect failed jobs and extract the first actionable error. +1. Resolve the active PR and inspect `gh pr checks --json name,bucket,state,workflow,link`. +2. Inspect failed jobs and extract the first actionable error. Use GitHub Actions logs when available; otherwise use the check link to identify the failing command or service. 3. Apply the smallest safe fix. -4. Re-run CI and repeat until green. +4. Push, re-check the PR check set, and repeat until green. ## Guardrails - Fix one actionable failure at a time. - Prefer minimal, low-risk changes before broader refactors. +- Keep `gh pr checks` as the source of truth for overall PR CI state. ## Output diff --git a/cursor-team-kit/skills/get-pr-comments/SKILL.md b/cursor-team-kit/skills/get-pr-comments/SKILL.md index af50321..396aad5 100644 --- a/cursor-team-kit/skills/get-pr-comments/SKILL.md +++ b/cursor-team-kit/skills/get-pr-comments/SKILL.md @@ -20,4 +20,4 @@ Need a concise, actionable summary of feedback on the active pull request. - Grouped feedback summary - Action list ordered by priority -- Open questions that still need clarification +- Open questions that still need clarification \ No newline at end of file diff --git a/cursor-team-kit/skills/loop-on-ci/SKILL.md b/cursor-team-kit/skills/loop-on-ci/SKILL.md index aad5565..23a3860 100644 --- a/cursor-team-kit/skills/loop-on-ci/SKILL.md +++ b/cursor-team-kit/skills/loop-on-ci/SKILL.md @@ -1,31 +1,37 @@ --- name: loop-on-ci -description: Watch CI runs and iterate on failures until all checks pass +description: Monitor PR checks and fix failures until green. Uses gh pr checks as the source of truth for PR-attached checks. --- # Loop on CI ## Trigger -Need to watch branch CI and iterate on failures until green. +Need to watch a branch or pull request and iterate on CI failures until all required checks are green. + +Use `gh pr checks` as the source of truth. It includes all PR-attached checks, while `gh run list` only covers GitHub Actions. ## Workflow -1. Find the current branch and latest workflow run. -2. Wait for CI completion with `gh run watch --exit-status`. -3. If failed, inspect failed logs, implement a focused fix, commit, and push. -4. Repeat until all required checks pass. +1. Resolve the PR for the current branch. +2. Inspect current PR checks before waiting. +3. If checks already failed, diagnose those failures first. +4. If checks are pending, watch with `gh pr checks --watch --fail-fast`. +5. After each push, re-check the full PR check set and repeat until green. ## Commands ```bash -# Latest run for current branch -gh run list --branch "$(git branch --show-current)" --limit 5 +# Resolve the active PR +gh pr view --json number,url,headRefName + +# Inspect all attached checks +gh pr checks --json name,bucket,state,workflow,link -# Block until completion (0 on pass, non-zero on fail) -gh run watch --exit-status +# Watch pending checks and fail fast +gh pr checks --watch --fail-fast -# Inspect failed jobs +# GitHub Actions logs, when the failing check links to a GHA run gh run view --log-failed ``` @@ -33,7 +39,9 @@ gh run view --log-failed - Keep each fix scoped to a single failure cause when possible. - Do not bypass hooks (`--no-verify`) to force progress. +- If the failure is clearly unrelated to the PR and appears fixed on main, merge latest main instead of bloating the PR with unrelated fixes. - If failures are flaky, retry once and report flake evidence. +- Re-run `gh pr checks --json name,bucket,state,workflow,link` after every push; the check set can change. ## Output diff --git a/cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md b/cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md new file mode 100644 index 0000000..f5343c0 --- /dev/null +++ b/cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md @@ -0,0 +1,59 @@ +--- +name: make-pr-easy-to-review +description: Prepare PRs for review by cleaning noisy history, improving PR descriptions, and adding reviewer guidance without changing code behavior. Use for "make this easy to review", "tidy this PR", "clean up commits", or "annotate the diff". +--- + +# Make PR Easy to Review + +Prepare a PR so a reviewer can quickly understand the intent, important files, and risk. The default goal is reviewability without behavior changes. + +## Workflow + +1. Resolve the target PR from the user-provided URL or current branch. +2. Inspect commits, diff size, changed paths, generated files, and PR description. +3. Identify reviewability issues: noisy commits, stale description, unrelated changes, mixed mechanical and logic changes, missing tests, or unclear reviewer entry points. +4. Propose a plan before rewriting history or force-pushing. +5. Apply safe improvements, then verify the tree or diff still matches the intended code. + +## History Cleanup + +Only rewrite history when the user asks for it or agrees to the plan. Before rewriting: + +```bash +gh pr view --json title,headRefName,baseRefName,state,commits +git fetch origin +ORIGINAL_TREE=$(git rev-parse origin/^{tree}) +``` + +Good commit groupings usually follow dependency order: + +1. Schema/storage or generated API definitions. +2. Core logic. +3. Wiring and integration. +4. UI or surface behavior. +5. Tests. + +After rewriting, verify content identity: + +```bash +echo "Original tree: $ORIGINAL_TREE" +echo "Current tree: $(git rev-parse HEAD^{tree})" +git diff origin/ --stat +``` + +Do not push if the tree changed unintentionally. + +## Reviewer Guidance + +When code behavior should stay untouched, prefer PR description and review notes: + +- Add a TL;DR that matches the actual diff. +- Separate core files from generated or mechanical files. +- Call out risky behavior changes, migration order, rollout plan, and test coverage. +- Link issue trackers, dashboards, or design docs when they explain intent. + +## Guardrails + +- Never hide meaningful behavior changes inside "cleanup". +- Do not bypass hooks unless the user explicitly asks. +- If the PR is too large to make reviewable with notes, recommend splitting instead of polishing around the problem. diff --git a/cursor-team-kit/skills/maximum-throughput/SKILL.md b/cursor-team-kit/skills/maximum-throughput/SKILL.md new file mode 100644 index 0000000..361c256 --- /dev/null +++ b/cursor-team-kit/skills/maximum-throughput/SKILL.md @@ -0,0 +1,82 @@ +--- +name: maximum-throughput +description: Split independent engineering work into concurrent lanes with clear contracts and mergeable handoffs. Use when a task can be explored, reviewed, tested, or implemented in parallel. +--- + +# Maximum Throughput + +When work can run independently, split it early. The goal is not more agents; it is shorter feedback loops with clean synthesis. + +## Decide What Can Split + +Good split points: + +- Different subsystems or directories. +- Independent hypotheses for a bug. +- Separate review lenses: correctness, security, performance, tests. +- One shared interface with independent implementations. +- Gathering once, then parallel reasoning over the same context. + +Bad split points: + +- Two tasks that constantly need each other's intermediate decisions. +- Multiple agents editing the same small file. +- A single causal chain where the next step depends on the previous result. +- Tiny tasks where coordination costs more than the work. + +## Delegation Modes + +Blank-slate: + +- Use for independent exploration or scoped implementation. +- Include all required context in the prompt: paths, constraints, commands, expected output. +- Ask for concrete handoff fields: files read, findings, risks, tests, next steps. + +Shared-context: + +- Gather expensive context once. +- Write it to a local file or concise note. +- Send parallel lanes to reason over that same context from different angles. +- Merge findings by theme, not by which lane produced them. + +## Contracts + +Before parallel implementation, define the boundary: + +- Data shape or interface. +- Ownership of files. +- Test command each lane should run. +- What counts as done. +- Handoff format. + +If no contract exists, create one before dispatching work. + +## Synthesis + +- Deduplicate repeated findings. +- Treat independently confirmed findings as higher confidence. +- Call out conflicts directly. +- Check that independently produced changes compose into a valid build. +- Run the narrowest validation that covers the merged result. + +## Handoff Format + +Ask each lane to return: + +```text +Summary: +Files touched/read: +Key findings: +Risks: +Validation: +Follow-ups: +``` + +For review tasks, include severity and exact file/symbol references. For implementation tasks, include test output and any skipped checks. + +## Guardrails + +- Do not parallelize destructive or stateful operations against the same resource. +- Do not hide uncertainty during synthesis. +- Do not merge code from parallel lanes without reading the diff. +- Prefer a smaller number of well-separated lanes over broad, redundant fan-out. diff --git a/cursor-team-kit/skills/review-and-ship/SKILL.md b/cursor-team-kit/skills/review-and-ship/SKILL.md index aaf0e56..000e753 100644 --- a/cursor-team-kit/skills/review-and-ship/SKILL.md +++ b/cursor-team-kit/skills/review-and-ship/SKILL.md @@ -1,21 +1,22 @@ --- name: review-and-ship -description: Run a structured review, close key issues, and ship changes via PR +description: Review the current branch for bugs, intent fit, and test coverage; run or write tests; commit focused work; open or update a PR. --- # Review and ship ## Trigger -Reviewing changes before shipping. Close key issues and open/update PR. +Reviewing changes before shipping. Close key issues, verify behavior, and open or update a PR. ## Workflow -1. Review diff against base branch and identify behavior-impacting risks. -2. Run or update tests for changed behavior. -3. Fix critical issues before finalizing. -4. Commit selective files with a concise message. -5. Push branch and open or update a PR. +1. Gather context: diff against base branch, uncommitted changes, recent commits, changed files, and user intent from recent relevant chats if useful. +2. Run targeted tests for changed behavior. If no focused tests exist, decide whether to add them or document the gap. +3. Review for correctness, regressions, security, and intent fit. Use parallel subagents for larger diffs. +4. Fix critical issues before finalizing and re-run affected tests. +5. Commit selective files with a concise message. +6. Push branch and open or update a PR. ## Suggested Checks @@ -23,6 +24,7 @@ Reviewing changes before shipping. Close key issues and open/update PR. git fetch origin main git diff origin/main...HEAD git status +gh pr checks --json name,bucket,state,workflow,link ``` ## Guardrails @@ -30,6 +32,7 @@ git status - Prioritize correctness, security, and regressions over style-only comments. - Keep commits focused and avoid unrelated file changes. - If pre-commit checks fail, fix the issues rather than bypassing hooks. +- Use `gh pr checks` instead of GitHub Actions-only commands when judging PR readiness. ## Output diff --git a/cursor-team-kit/skills/ui-automation-harness/SKILL.md b/cursor-team-kit/skills/ui-automation-harness/SKILL.md new file mode 100644 index 0000000..f6fc6db --- /dev/null +++ b/cursor-team-kit/skills/ui-automation-harness/SKILL.md @@ -0,0 +1,109 @@ +--- +name: ui-automation-harness +description: Build or adapt a local browser/CDP harness to drive and inspect a web, IDE, or Electron UI. Use for local UI verification, screenshots, accessibility snapshots, perf profiles, visual diffs, or reproducing UI bugs. +--- + +# UI Automation Harness + +Use local browser automation to verify UI behavior with evidence. First reuse the repo's own Playwright, browser, or Electron harness if it exists; otherwise assemble a temporary local harness around the app's dev server or Chromium debug port. + +## What It Is Used For + +- Reproducing UI bugs that depend on real browser focus, keyboard input, scrolling, resizing, or rendering. +- Verifying visual or accessibility changes with screenshots and snapshots. +- Checking local web, IDE, or Electron behavior before shipping. +- Capturing console logs, network logs, CPU profiles, traces, or heap snapshots. +- Creating before/after evidence for `verify-this`. + +## Setup Pattern + +1. Start the app locally using the repo's documented dev command. +2. Discover existing local harnesses: Playwright tests, Cypress specs, Storybook, browser scripts, Electron launch scripts, or snapshot tools. +3. For a web app, connect to the local URL with the existing browser tooling. +4. For Electron/Chromium, enable a remote debugging port when supported. +5. Select the correct page by stable app markers, not by tab order alone. +6. Prefer accessibility roles, labels, and stable `data-*` selectors over coordinates. + +## Generic Web Harness + +Use the repo's installed browser tooling when possible. If the repo already has Playwright, a minimal one-off probe looks like: + +```javascript +import { chromium } from "playwright"; + +const browser = await chromium.launch(); +const page = await browser.newPage({ viewport: { width: 1280, height: 800 } }); +await page.goto("http://127.0.0.1:"); +await page.getByRole("button", { name: /submit/i }).click(); +await page.screenshot({ path: "/tmp/ui-harness-after.png", fullPage: true }); +await browser.close(); +``` + +Do not add Playwright as a project dependency just for this probe unless the user asks. Prefer existing dev dependencies or external browser tools already available in the environment. + +## Generic CDP Harness + +For Electron or a Chromium app launched with `--remote-debugging-port=`, connect over CDP: + +```javascript +import { chromium } from "playwright"; + +const browser = await chromium.connectOverCDP("http://127.0.0.1:"); +const pages = browser.contexts().flatMap((context) => context.pages()); +let page; +for (const candidate of pages) { + if (await candidate.locator("").count()) { + page = candidate; + break; + } +} + +if (!page) { + console.log(await Promise.all(pages.map(async (p) => ({ + title: await p.title(), + url: p.url(), + })))); + throw new Error("No matching app page found"); +} + +await page.screenshot({ path: "/tmp/ui-harness-cdp.png", fullPage: true }); +await browser.close(); +``` + +Replace `` with a stable marker from the current repo, such as a root app node, landmark, or product-specific `data-*` attribute. + +## Interaction Loop + +1. Capture a page snapshot or screenshot before acting. +2. Choose a target from the latest page structure. +3. Perform exactly one structural action: click, type, keypress, drag, scroll, navigate, or resize. +4. Capture a fresh snapshot/screenshot. +5. Verify the expected state change. +6. Save artifacts for before/after comparisons when the user asked for proof. + +## CDP Capabilities + +Use raw CDP only when higher-level browser APIs are insufficient: + +- Performance: CPU profiles, traces, paint flashing, FPS meter, layout shift inspection. +- Memory: heap snapshots and forced GC for leak investigations. +- Network: request blocking, throttling, cache disablement, request/response logs. +- Rendering: viewport changes, color scheme emulation, reduced motion, accessibility checks. +- Debugging: console streaming, exception capture, DOM snapshots. + +## Page Selection + +When multiple app windows/tabs share a debug port: + +- Prefer a positive marker for the surface under test, such as an app root selector. +- Use a negative marker to avoid the wrong surface when necessary. +- If no page matches, list available page titles and URLs instead of guessing. + +## Guardrails + +- Do not rely on stale element references after navigation or structural changes. +- Avoid coordinate clicks unless a fresh screenshot was captured immediately before the click. +- Keep test data local and disposable. +- Do not store screenshots or heap snapshots from privacy-sensitive workspaces unless the user explicitly agrees. +- Do not hard-code selectors, ports, or script paths from another repository. Discover the current repo's local app markers. +- Clean up dev servers, debug sessions, and temp profiles when done. diff --git a/cursor-team-kit/skills/verify-this/SKILL.md b/cursor-team-kit/skills/verify-this/SKILL.md new file mode 100644 index 0000000..4f7e348 --- /dev/null +++ b/cursor-team-kit/skills/verify-this/SKILL.md @@ -0,0 +1,74 @@ +--- +name: verify-this +description: Verify a claim with fresh local evidence: restate it falsifiably, capture baseline and treatment, compare artifacts, and return VERIFIED, NOT VERIFIED, or INCONCLUSIVE. +--- + +# Verify This + +Verification is not a recap. It proves or disproves a specific claim with repeatable evidence. + +## When To Use + +- The user asks "verify this", "prove it works", "did this fix it", or "show me the evidence". +- A bug fix needs a before/after repro. +- A UI, CLI, API, performance, or memory claim needs measurement. +- A test passes but the user-visible behavior still needs confirmation. + +Do not use this for vague claims like "the code is cleaner". Ask for a measurable claim first. + +## Workflow + +1. Restate the claim in falsifiable form: condition, metric, and threshold. +2. Pick the smallest local surface that can disprove it. +3. Capture a baseline from the old state: merge base, parent commit, failing branch, or current broken repro. +4. Capture treatment from the changed state with the same command, data, warmup, and environment. +5. Compare raw artifacts: numbers, screenshots, terminal transcripts, HTTP responses, profiles, heap snapshots, or test output. +6. Return exactly one verdict: `VERIFIED`, `NOT VERIFIED`, or `INCONCLUSIVE`. + +## Local Surfaces + +- Code behavior: focused unit/integration tests or a minimal repro script. +- CLI/TUI behavior: `cli-automation-harness`, terminal transcript, or demo recording. +- UI behavior: `ui-automation-harness`, screenshots, accessibility snapshots, or browser traces. +- API behavior: local HTTP/RPC request and response diff. +- Performance: same-machine baseline/treatment timings or CPU profiles. +- Memory: heap snapshots before and after the suspected operation. + +## Artifact Layout + +When safe to write artifacts: + +```text +/tmp/verify-this// +├── claim.md +├── timeline.md +├── baseline/ +├── treatment/ +├── diff/ +└── verdict.md +``` + +If artifacts may contain sensitive code, prompts, screenshots, HTTP bodies, or heap data, keep only the minimal inline evidence unless the user agrees to disk storage. + +## Verdict Rules + +- `VERIFIED`: baseline and treatment differ in the predicted direction, by the claimed threshold, with no obvious confound. +- `NOT VERIFIED`: the behavior is unchanged, moves the wrong way, or misses the threshold. +- `INCONCLUSIVE`: no valid baseline, noisy signal, failed measurement, or an environment difference invalidates the comparison. + +## Output + +Use this shape: + +```text +VERIFIED | NOT VERIFIED | INCONCLUSIVE +Claim: + +Evidence: +: baseline=<...>, treatment=<...>, delta=<...>, threshold=<...> + +Reasoning: + +``` + +Do not soften a negative result. A clear `NOT VERIFIED` is useful. diff --git a/cursor-team-kit/skills/what-did-i-get-done/SKILL.md b/cursor-team-kit/skills/what-did-i-get-done/SKILL.md index 51995c1..e6772b2 100644 --- a/cursor-team-kit/skills/what-did-i-get-done/SKILL.md +++ b/cursor-team-kit/skills/what-did-i-get-done/SKILL.md @@ -26,6 +26,6 @@ Need a short, high-signal summary of work completed in a specific time range (fo ## Output -- One short summary suitable for Slack +- One short summary suitable for a status update - Real date range - Optional 2-5 bullets for major changes only diff --git a/cursor-team-kit/skills/workflow-from-chats/SKILL.md b/cursor-team-kit/skills/workflow-from-chats/SKILL.md new file mode 100644 index 0000000..a5170df --- /dev/null +++ b/cursor-team-kit/skills/workflow-from-chats/SKILL.md @@ -0,0 +1,50 @@ +--- +name: workflow-from-chats +description: Extract durable working preferences from recent Cursor chats and convert them into skills, rules, or workflow docs. Use when asked to learn preferences, mine feedback, personalize workflows, or generate team/person-specific agent guidance. +--- + +# Workflow From Chats + +Infer durable working preferences from recent chats. Do not summarize chats; extract reusable workflow guidance. + +## Scope + +- Default to the last 7 days unless the user asks for a different window. +- Read parent transcripts and relevant subagent transcripts. Use subagent content as evidence, but cite only parent conversations. +- Do not expose local transcript paths, secrets, customer data, private chat content, or credentials. + +## Workflow + +1. State the target workflow or preference surface in one paragraph. +2. Build an internal transcript inventory: title/topic, parent conversation ID, approximate date, completion state, relevant subagents, and why it may contain preference evidence. +3. Scan for explicit preferences, corrections, and workflow markers such as "I prefer", "always", "never", "not what I asked", "stop", "review", "PR", "CI", "logs", and "skill". +4. Extract preference atoms: trigger, workflow step, decision rule, quality bar, stop condition, evidence, and confidence. +5. Rate confidence as strong, medium, weak, or contradicted. +6. Cluster by workflow shape rather than transcript: shipping, review, simplification, debugging, capture, communication, delegation, or validation. +7. Choose the artifact: new skill, skill edit, rule, workflow doc, or no artifact. +8. Draft only the reusable guidance. Filter anecdotes that will not help future tasks. + +## Confidence + +- Strong: explicit user preference, workflow-changing correction, repeated parent-chat pattern, or direct request to encode behavior. +- Medium: accepted workflow, repeated tool/model/validation preference, or subagent consensus that the parent used successfully. +- Weak: agent-chosen behavior with no user feedback, one ambiguous transcript, or a likely task-specific correction. +- Contradicted: evidence points in incompatible directions; ask the user before writing files. + +## Artifact Choice + +- Skill: recurring multi-step workflow with clear triggers. +- Rule: general behavior that should apply broadly. +- Workflow doc: useful context that is not reliably triggerable. +- No artifact: situational, stale, or low-confidence observation. + +## Output + +Return a concise synthesis first: + +- Target workflow. +- Evidence corpus with parent conversation citations only. +- Preference profile. +- Adopt, consider, dismissed. +- Proposed artifacts. +- Open questions only if they block writing. From d6e675f00cbce2b0531c8307d0a0fc1900db63a0 Mon Sep 17 00:00:00 2001 From: ericzakariasson Date: Thu, 30 Apr 2026 10:31:57 +0200 Subject: [PATCH 2/3] Remove maximum-throughput skill from team kit Made-with: Cursor --- cursor-team-kit/.cursor-plugin/plugin.json | 2 +- cursor-team-kit/README.md | 1 - .../skills/maximum-throughput/SKILL.md | 82 ------------------- 3 files changed, 1 insertion(+), 84 deletions(-) delete mode 100644 cursor-team-kit/skills/maximum-throughput/SKILL.md diff --git a/cursor-team-kit/.cursor-plugin/plugin.json b/cursor-team-kit/.cursor-plugin/plugin.json index ef6181f..6c22cae 100644 --- a/cursor-team-kit/.cursor-plugin/plugin.json +++ b/cursor-team-kit/.cursor-plugin/plugin.json @@ -2,7 +2,7 @@ "name": "cursor-team-kit", "displayName": "Cursor Team Kit", "version": "1.1.0", - "description": "Internal workflows used by Cursor developers for CI, code review, shipping, local CLI/UI automation harnesses, verify-this, maximum-throughput, test reliability, code cleanup, and work summaries. Designed to work without requiring third-party service integrations.", + "description": "Internal workflows used by Cursor developers for CI, code review, shipping, local CLI/UI automation harnesses, verify-this, test reliability, code cleanup, and work summaries. Designed to work without requiring third-party service integrations.", "author": { "name": "Cursor", "email": "plugins@cursor.com" diff --git a/cursor-team-kit/README.md b/cursor-team-kit/README.md index 89e8ca7..be5b176 100644 --- a/cursor-team-kit/README.md +++ b/cursor-team-kit/README.md @@ -17,7 +17,6 @@ Internal-style workflows for CI, code review, shipping, and test reliability. Th | `loop-on-ci` | Watch CI runs and iterate on failures until checks pass | | `review-and-ship` | Run a structured review, commit changes, and open a PR | | `pr-review-canvas` | Generate an interactive HTML PR walkthrough with annotated, categorized diffs | -| `maximum-throughput` | Split independent engineering work into concurrent lanes with clean handoffs | | `verify-this` | Prove or disprove claims with baseline/treatment artifacts and a clear verdict | | `cli-automation-harness` | Build or adapt a local harness to drive and profile interactive CLIs or TUIs | | `ui-automation-harness` | Build or adapt a local browser/CDP harness for web or Electron UIs | diff --git a/cursor-team-kit/skills/maximum-throughput/SKILL.md b/cursor-team-kit/skills/maximum-throughput/SKILL.md deleted file mode 100644 index 361c256..0000000 --- a/cursor-team-kit/skills/maximum-throughput/SKILL.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -name: maximum-throughput -description: Split independent engineering work into concurrent lanes with clear contracts and mergeable handoffs. Use when a task can be explored, reviewed, tested, or implemented in parallel. ---- - -# Maximum Throughput - -When work can run independently, split it early. The goal is not more agents; it is shorter feedback loops with clean synthesis. - -## Decide What Can Split - -Good split points: - -- Different subsystems or directories. -- Independent hypotheses for a bug. -- Separate review lenses: correctness, security, performance, tests. -- One shared interface with independent implementations. -- Gathering once, then parallel reasoning over the same context. - -Bad split points: - -- Two tasks that constantly need each other's intermediate decisions. -- Multiple agents editing the same small file. -- A single causal chain where the next step depends on the previous result. -- Tiny tasks where coordination costs more than the work. - -## Delegation Modes - -Blank-slate: - -- Use for independent exploration or scoped implementation. -- Include all required context in the prompt: paths, constraints, commands, expected output. -- Ask for concrete handoff fields: files read, findings, risks, tests, next steps. - -Shared-context: - -- Gather expensive context once. -- Write it to a local file or concise note. -- Send parallel lanes to reason over that same context from different angles. -- Merge findings by theme, not by which lane produced them. - -## Contracts - -Before parallel implementation, define the boundary: - -- Data shape or interface. -- Ownership of files. -- Test command each lane should run. -- What counts as done. -- Handoff format. - -If no contract exists, create one before dispatching work. - -## Synthesis - -- Deduplicate repeated findings. -- Treat independently confirmed findings as higher confidence. -- Call out conflicts directly. -- Check that independently produced changes compose into a valid build. -- Run the narrowest validation that covers the merged result. - -## Handoff Format - -Ask each lane to return: - -```text -Summary: -Files touched/read: -Key findings: -Risks: -Validation: -Follow-ups: -``` - -For review tasks, include severity and exact file/symbol references. For implementation tasks, include test output and any skipped checks. - -## Guardrails - -- Do not parallelize destructive or stateful operations against the same resource. -- Do not hide uncertainty during synthesis. -- Do not merge code from parallel lanes without reading the diff. -- Prefer a smaller number of well-separated lanes over broad, redundant fan-out. From d018dc4d9161cc5bfa6be12fcf330445970a6b7c Mon Sep 17 00:00:00 2001 From: ericzakariasson Date: Thu, 30 Apr 2026 13:12:11 +0200 Subject: [PATCH 3/3] Rename automation harness skills to control-cli and control-ui Made-with: Cursor --- cursor-team-kit/.cursor-plugin/plugin.json | 2 +- cursor-team-kit/README.md | 4 ++-- .../skills/{cli-automation-harness => control-cli}/SKILL.md | 4 ++-- .../skills/{ui-automation-harness => control-ui}/SKILL.md | 4 ++-- cursor-team-kit/skills/verify-this/SKILL.md | 4 ++-- 5 files changed, 9 insertions(+), 9 deletions(-) rename cursor-team-kit/skills/{cli-automation-harness => control-cli}/SKILL.md (98%) rename cursor-team-kit/skills/{ui-automation-harness => control-ui}/SKILL.md (98%) diff --git a/cursor-team-kit/.cursor-plugin/plugin.json b/cursor-team-kit/.cursor-plugin/plugin.json index 6c22cae..8f9f213 100644 --- a/cursor-team-kit/.cursor-plugin/plugin.json +++ b/cursor-team-kit/.cursor-plugin/plugin.json @@ -2,7 +2,7 @@ "name": "cursor-team-kit", "displayName": "Cursor Team Kit", "version": "1.1.0", - "description": "Internal workflows used by Cursor developers for CI, code review, shipping, local CLI/UI automation harnesses, verify-this, test reliability, code cleanup, and work summaries. Designed to work without requiring third-party service integrations.", + "description": "Internal workflows used by Cursor developers for CI, code review, shipping, control-cli, control-ui, verify-this, test reliability, code cleanup, and work summaries. Designed to work without requiring third-party service integrations.", "author": { "name": "Cursor", "email": "plugins@cursor.com" diff --git a/cursor-team-kit/README.md b/cursor-team-kit/README.md index be5b176..dbaa8e7 100644 --- a/cursor-team-kit/README.md +++ b/cursor-team-kit/README.md @@ -18,8 +18,8 @@ Internal-style workflows for CI, code review, shipping, and test reliability. Th | `review-and-ship` | Run a structured review, commit changes, and open a PR | | `pr-review-canvas` | Generate an interactive HTML PR walkthrough with annotated, categorized diffs | | `verify-this` | Prove or disprove claims with baseline/treatment artifacts and a clear verdict | -| `cli-automation-harness` | Build or adapt a local harness to drive and profile interactive CLIs or TUIs | -| `ui-automation-harness` | Build or adapt a local browser/CDP harness for web or Electron UIs | +| `control-cli` | Build or adapt a local harness to drive and profile interactive CLIs or TUIs | +| `control-ui` | Build or adapt a local browser/CDP harness for web or Electron UIs | | `make-pr-easy-to-review` | Clean noisy PR history, improve descriptions, and add reviewer guidance | | `run-smoke-tests` | Run Playwright smoke tests and triage failures | | `fix-ci` | Find failing CI jobs, inspect logs, and apply focused fixes | diff --git a/cursor-team-kit/skills/cli-automation-harness/SKILL.md b/cursor-team-kit/skills/control-cli/SKILL.md similarity index 98% rename from cursor-team-kit/skills/cli-automation-harness/SKILL.md rename to cursor-team-kit/skills/control-cli/SKILL.md index 39c91b2..69289ac 100644 --- a/cursor-team-kit/skills/cli-automation-harness/SKILL.md +++ b/cursor-team-kit/skills/control-cli/SKILL.md @@ -1,9 +1,9 @@ --- -name: cli-automation-harness +name: control-cli description: Build or adapt a local harness to drive, inspect, and profile an interactive CLI or TUI without external services. Use for CLI UX checks, startup regressions, memory leaks, hangs, prompt flows, or terminal demos. --- -# CLI Automation Harness +# Control CLI Use a repeatable local harness to exercise an interactive CLI instead of poking at it manually. First reuse the repo's own test/demo harness if it exists; otherwise assemble a temporary harness from standard local tools. diff --git a/cursor-team-kit/skills/ui-automation-harness/SKILL.md b/cursor-team-kit/skills/control-ui/SKILL.md similarity index 98% rename from cursor-team-kit/skills/ui-automation-harness/SKILL.md rename to cursor-team-kit/skills/control-ui/SKILL.md index f6fc6db..68484fd 100644 --- a/cursor-team-kit/skills/ui-automation-harness/SKILL.md +++ b/cursor-team-kit/skills/control-ui/SKILL.md @@ -1,9 +1,9 @@ --- -name: ui-automation-harness +name: control-ui description: Build or adapt a local browser/CDP harness to drive and inspect a web, IDE, or Electron UI. Use for local UI verification, screenshots, accessibility snapshots, perf profiles, visual diffs, or reproducing UI bugs. --- -# UI Automation Harness +# Control UI Use local browser automation to verify UI behavior with evidence. First reuse the repo's own Playwright, browser, or Electron harness if it exists; otherwise assemble a temporary local harness around the app's dev server or Chromium debug port. diff --git a/cursor-team-kit/skills/verify-this/SKILL.md b/cursor-team-kit/skills/verify-this/SKILL.md index 4f7e348..e17217e 100644 --- a/cursor-team-kit/skills/verify-this/SKILL.md +++ b/cursor-team-kit/skills/verify-this/SKILL.md @@ -28,8 +28,8 @@ Do not use this for vague claims like "the code is cleaner". Ask for a measurabl ## Local Surfaces - Code behavior: focused unit/integration tests or a minimal repro script. -- CLI/TUI behavior: `cli-automation-harness`, terminal transcript, or demo recording. -- UI behavior: `ui-automation-harness`, screenshots, accessibility snapshots, or browser traces. +- CLI/TUI behavior: `control-cli`, terminal transcript, or demo recording. +- UI behavior: `control-ui`, screenshots, accessibility snapshots, or browser traces. - API behavior: local HTTP/RPC request and response diff. - Performance: same-machine baseline/treatment timings or CPU profiles. - Memory: heap snapshots before and after the suspected operation.