diff --git a/.cursor-plugin/marketplace.json b/.cursor-plugin/marketplace.json index 1396952..a15da0f 100644 --- a/.cursor-plugin/marketplace.json +++ b/.cursor-plugin/marketplace.json @@ -21,7 +21,7 @@ { "name": "cursor-team-kit", "source": "cursor-team-kit", - "description": "Internal team workflows used by Cursor developers for CI, code review, and shipping." + "description": "Internal team workflows used by Cursor developers for CI, code review, shipping, local automation, and verification." }, { "name": "create-plugin", diff --git a/README.md b/README.md index ce4943e..455f2f7 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ Official Cursor plugins for popular developer tools, frameworks, and SaaS produc | `name` | Plugin | Author | Category | `description` (from marketplace) | |:-------|:-------|:-------|:---------|:-------------------------------------| | `continual-learning` | [Continual Learning](continual-learning/) | Cursor | Developer Tools | Incremental transcript-driven memory updates for AGENTS.md using high-signal bullet points only. | -| `cursor-team-kit` | [Cursor Team Kit](cursor-team-kit/) | Cursor | Developer Tools | Internal team workflows used by Cursor developers for CI, code review, and shipping. | +| `cursor-team-kit` | [Cursor Team Kit](cursor-team-kit/) | Cursor | Developer Tools | Internal team workflows used by Cursor developers for CI, code review, shipping, local automation, and verification. | | `create-plugin` | [Create Plugin](create-plugin/) | Cursor | Developer Tools | Scaffold and validate new Cursor plugins. | | `agent-compatibility` | [Agent Compatibility](agent-compatibility/) | Cursor | Developer Tools | CLI-backed repo compatibility scans plus Cursor agents that audit startup, validation, and docs against reality. | | `cli-for-agent` | [CLI for Agents](cli-for-agent/) | Cursor | Developer Tools | Patterns for designing CLIs that coding agents can run reliably: flags, help with examples, pipelines, errors, idempotency, dry-run. | diff --git a/cursor-team-kit/.cursor-plugin/plugin.json b/cursor-team-kit/.cursor-plugin/plugin.json index 549ba3d..8f9f213 100644 --- a/cursor-team-kit/.cursor-plugin/plugin.json +++ b/cursor-team-kit/.cursor-plugin/plugin.json @@ -1,8 +1,8 @@ { "name": "cursor-team-kit", "displayName": "Cursor Team Kit", - "version": "1.0.0", - "description": "Internal workflows used by Cursor developers for CI, code review, and shipping. Covers the full dev loop: CI monitoring and fixing, PR creation, merge conflicts, smoke tests, compiler checks, code cleanup, and work summaries.", + "version": "1.1.0", + "description": "Internal workflows used by Cursor developers for CI, code review, shipping, control-cli, control-ui, verify-this, test reliability, code cleanup, and work summaries. Designed to work without requiring third-party service integrations.", "author": { "name": "Cursor", "email": "plugins@cursor.com" @@ -16,13 +16,17 @@ "ci", "code-review", "shipping", - "testing" + "testing", + "verification", + "local-automation" ], "category": "developer-tools", "tags": [ "internal-workflows", "quality", - "delivery" + "delivery", + "review", + "automation" ], "skills": "./skills/", "agents": "./agents/", diff --git a/cursor-team-kit/README.md b/cursor-team-kit/README.md index 91e938f..dbaa8e7 100644 --- a/cursor-team-kit/README.md +++ b/cursor-team-kit/README.md @@ -1,6 +1,6 @@ # Cursor Team Kit plugin -Internal-style workflows for CI, code review, shipping, and test reliability. +Internal-style workflows for CI, code review, shipping, and test reliability. The kit is designed to be plug and play without requiring third-party service integrations. ## Installation @@ -17,6 +17,10 @@ Internal-style workflows for CI, code review, shipping, and test reliability. | `loop-on-ci` | Watch CI runs and iterate on failures until checks pass | | `review-and-ship` | Run a structured review, commit changes, and open a PR | | `pr-review-canvas` | Generate an interactive HTML PR walkthrough with annotated, categorized diffs | +| `verify-this` | Prove or disprove claims with baseline/treatment artifacts and a clear verdict | +| `control-cli` | Build or adapt a local harness to drive and profile interactive CLIs or TUIs | +| `control-ui` | Build or adapt a local browser/CDP harness for web or Electron UIs | +| `make-pr-easy-to-review` | Clean noisy PR history, improve descriptions, and add reviewer guidance | | `run-smoke-tests` | Run Playwright smoke tests and triage failures | | `fix-ci` | Find failing CI jobs, inspect logs, and apply focused fixes | | `new-branch-and-pr` | Create a fresh branch, complete work, and open a pull request | @@ -26,6 +30,7 @@ Internal-style workflows for CI, code review, shipping, and test reliability. | `weekly-review` | Generate a weekly recap of shipped work with bugfix/tech-debt/net-new highlights | | `fix-merge-conflicts` | Resolve merge conflicts, validate build/tests, and summarize decisions | | `deslop` | Remove AI-generated code slop and clean up code style | +| `workflow-from-chats` | Extract durable working preferences from chats into skills, rules, or docs | ### Agents diff --git a/cursor-team-kit/agents/ci-watcher.md b/cursor-team-kit/agents/ci-watcher.md index 8801f81..8246bf1 100644 --- a/cursor-team-kit/agents/ci-watcher.md +++ b/cursor-team-kit/agents/ci-watcher.md @@ -1,13 +1,13 @@ --- name: ci-watcher -description: Watch GitHub CI for the current branch and report pass/fail with relevant failure logs. Use when waiting for CI results or CI has failed. Use proactively to monitor branch CI. +description: Watch PR CI for the current branch and report pass/fail with relevant failure links. Use when waiting for CI results or CI has failed. Use proactively to monitor branch CI. model: fast is_background: true --- # CI watcher -CI monitoring specialist for GitHub Actions. +CI monitoring specialist for PR-attached checks. ## Trigger @@ -16,12 +16,13 @@ Use when waiting for CI results, CI has failed, or when proactively monitoring b ## Workflow 1. Determine current branch: `git branch --show-current` -2. Find latest run for that branch: `gh run list --branch --limit 1` -3. Watch to completion: `gh run watch --exit-status` -4. If failed, fetch failed logs: `gh run view --log-failed` +2. Resolve the PR: `gh pr view --json number,url,headRefName` +3. Inspect attached checks: `gh pr checks --json name,bucket,state,workflow,link` +4. If checks are pending, watch: `gh pr checks --watch --fail-fast` +5. If a GitHub Actions check failed, fetch logs with `gh run view --log-failed`; otherwise, return the check link and concise next step. ## Output - CI status (passed/failed) -- Workflow/run metadata -- If failed: concise failure excerpt and likely next step +- PR and check metadata +- If failed: concise failure excerpt or external check link and likely next step diff --git a/cursor-team-kit/skills/control-cli/SKILL.md b/cursor-team-kit/skills/control-cli/SKILL.md new file mode 100644 index 0000000..69289ac --- /dev/null +++ b/cursor-team-kit/skills/control-cli/SKILL.md @@ -0,0 +1,109 @@ +--- +name: control-cli +description: Build or adapt a local harness to drive, inspect, and profile an interactive CLI or TUI without external services. Use for CLI UX checks, startup regressions, memory leaks, hangs, prompt flows, or terminal demos. +--- + +# Control CLI + +Use a repeatable local harness to exercise an interactive CLI instead of poking at it manually. First reuse the repo's own test/demo harness if it exists; otherwise assemble a temporary harness from standard local tools. + +## What It Is Used For + +- Reproducing CLI/TUI bugs with deterministic input. +- Verifying keyboard flows, prompts, interrupts, resize behavior, and terminal layout. +- Capturing before/after transcripts for bug fixes. +- Profiling startup time, slow operations, hangs, or memory growth. +- Recording a short terminal demo when output is easier to show than explain. + +## Harness Loop + +1. Identify the command under test and the smallest reproducible workspace. +2. Discover existing local harnesses: package scripts, e2e tests, demo recorders, expect scripts, or PTY helpers. +3. If no harness exists, launch the CLI in an isolated terminal session with deterministic env vars. +4. Capture the current screen before interacting. +5. Send one action at a time: text, Enter, arrows, Escape, Ctrl-C, resize. +6. Wait for a concrete screen pattern or prompt before the next action. +7. Save the transcript and any profile artifacts. +8. Kill the session cleanly. + +## Harness Options + +- Repo-native harness: prefer checked-in scripts because they know the app's startup, env, and prompts. +- `tmux`: managed sessions, `capture-pane`, `send-keys`, attach/detach. +- PTY probe: use a short Python, Node, or Expect script when tmux is unavailable. +- Runtime inspector: use Node or Bun inspector for CPU profiles, heap snapshots, and live evaluation. +- Terminal recorder: use repo-local demo tools or asciinema-compatible tools when the user asks for a demo. + +## Minimal tmux Harness + +```bash +SESSION="cli-harness-$(date +%s)" +tmux new-session -d -s "$SESSION" -- +tmux capture-pane -pt "$SESSION" +tmux send-keys -t "$SESSION" "help" Enter +tmux capture-pane -pt "$SESSION" +tmux kill-session -t "$SESSION" +``` + +For Node CLIs: + +```bash +NODE_OPTIONS="--inspect=127.0.0.1:0" tmux new-session -d -s "$SESSION" -- +``` + +Read the terminal output to find the inspector URL, then use Chrome DevTools-compatible tooling if profiling is needed. + +## Minimal PTY Harness + +Use a PTY script when you need deterministic waits in a repo that does not have tmux or a demo harness. Keep it temporary unless the user asks to add a reusable test. + +```python +import os +import pty +import select +import subprocess +import time + +master_fd, slave_fd = pty.openpty() +proc = subprocess.Popen( + ["", ""], + stdin=slave_fd, + stdout=slave_fd, + stderr=slave_fd, + close_fds=True, +) +os.close(slave_fd) + +deadline = time.time() + 30 +buffer = b"" +while time.time() < deadline: + ready, _, _ = select.select([master_fd], [], [], 0.25) + if not ready: + continue + chunk = os.read(master_fd, 4096) + buffer += chunk + if b"" in buffer: + os.write(master_fd, b"help\n") + break + +print(buffer.decode(errors="replace")) +proc.terminate() +os.close(master_fd) +``` + +If the CLI needs richer terminal control, use `pty.fork()` or an existing PTY library. + +## Profiling Recipes + +- Startup regression: capture baseline and treatment startup timings under the same machine, env, and command. +- Slow operation: start a CPU profile, perform the operation, stop the profile, and compare top self-time functions. +- Memory leak: force GC if available, take a heap snapshot, perform the operation repeatedly, force GC again, and take another snapshot. +- Hang: capture the screen, active handles/resources, and a stack/CPU sample before interrupting. + +## Guardrails + +- Prefer deterministic waits over sleeps. If you must sleep, explain why. +- Do not send credentials or destructive commands into a controlled session. +- Keep the harness in `/tmp` unless the repo already has a testing/demo harness. +- Do not hard-code paths from another repository. Adapt commands to the current repo's scripts and runtime. +- Clean up tmux sessions, temp dirs, inspector processes, and demo artifacts unless the user asks to keep them. diff --git a/cursor-team-kit/skills/control-ui/SKILL.md b/cursor-team-kit/skills/control-ui/SKILL.md new file mode 100644 index 0000000..68484fd --- /dev/null +++ b/cursor-team-kit/skills/control-ui/SKILL.md @@ -0,0 +1,109 @@ +--- +name: control-ui +description: Build or adapt a local browser/CDP harness to drive and inspect a web, IDE, or Electron UI. Use for local UI verification, screenshots, accessibility snapshots, perf profiles, visual diffs, or reproducing UI bugs. +--- + +# Control UI + +Use local browser automation to verify UI behavior with evidence. First reuse the repo's own Playwright, browser, or Electron harness if it exists; otherwise assemble a temporary local harness around the app's dev server or Chromium debug port. + +## What It Is Used For + +- Reproducing UI bugs that depend on real browser focus, keyboard input, scrolling, resizing, or rendering. +- Verifying visual or accessibility changes with screenshots and snapshots. +- Checking local web, IDE, or Electron behavior before shipping. +- Capturing console logs, network logs, CPU profiles, traces, or heap snapshots. +- Creating before/after evidence for `verify-this`. + +## Setup Pattern + +1. Start the app locally using the repo's documented dev command. +2. Discover existing local harnesses: Playwright tests, Cypress specs, Storybook, browser scripts, Electron launch scripts, or snapshot tools. +3. For a web app, connect to the local URL with the existing browser tooling. +4. For Electron/Chromium, enable a remote debugging port when supported. +5. Select the correct page by stable app markers, not by tab order alone. +6. Prefer accessibility roles, labels, and stable `data-*` selectors over coordinates. + +## Generic Web Harness + +Use the repo's installed browser tooling when possible. If the repo already has Playwright, a minimal one-off probe looks like: + +```javascript +import { chromium } from "playwright"; + +const browser = await chromium.launch(); +const page = await browser.newPage({ viewport: { width: 1280, height: 800 } }); +await page.goto("http://127.0.0.1:"); +await page.getByRole("button", { name: /submit/i }).click(); +await page.screenshot({ path: "/tmp/ui-harness-after.png", fullPage: true }); +await browser.close(); +``` + +Do not add Playwright as a project dependency just for this probe unless the user asks. Prefer existing dev dependencies or external browser tools already available in the environment. + +## Generic CDP Harness + +For Electron or a Chromium app launched with `--remote-debugging-port=`, connect over CDP: + +```javascript +import { chromium } from "playwright"; + +const browser = await chromium.connectOverCDP("http://127.0.0.1:"); +const pages = browser.contexts().flatMap((context) => context.pages()); +let page; +for (const candidate of pages) { + if (await candidate.locator("").count()) { + page = candidate; + break; + } +} + +if (!page) { + console.log(await Promise.all(pages.map(async (p) => ({ + title: await p.title(), + url: p.url(), + })))); + throw new Error("No matching app page found"); +} + +await page.screenshot({ path: "/tmp/ui-harness-cdp.png", fullPage: true }); +await browser.close(); +``` + +Replace `` with a stable marker from the current repo, such as a root app node, landmark, or product-specific `data-*` attribute. + +## Interaction Loop + +1. Capture a page snapshot or screenshot before acting. +2. Choose a target from the latest page structure. +3. Perform exactly one structural action: click, type, keypress, drag, scroll, navigate, or resize. +4. Capture a fresh snapshot/screenshot. +5. Verify the expected state change. +6. Save artifacts for before/after comparisons when the user asked for proof. + +## CDP Capabilities + +Use raw CDP only when higher-level browser APIs are insufficient: + +- Performance: CPU profiles, traces, paint flashing, FPS meter, layout shift inspection. +- Memory: heap snapshots and forced GC for leak investigations. +- Network: request blocking, throttling, cache disablement, request/response logs. +- Rendering: viewport changes, color scheme emulation, reduced motion, accessibility checks. +- Debugging: console streaming, exception capture, DOM snapshots. + +## Page Selection + +When multiple app windows/tabs share a debug port: + +- Prefer a positive marker for the surface under test, such as an app root selector. +- Use a negative marker to avoid the wrong surface when necessary. +- If no page matches, list available page titles and URLs instead of guessing. + +## Guardrails + +- Do not rely on stale element references after navigation or structural changes. +- Avoid coordinate clicks unless a fresh screenshot was captured immediately before the click. +- Keep test data local and disposable. +- Do not store screenshots or heap snapshots from privacy-sensitive workspaces unless the user explicitly agrees. +- Do not hard-code selectors, ports, or script paths from another repository. Discover the current repo's local app markers. +- Clean up dev servers, debug sessions, and temp profiles when done. diff --git a/cursor-team-kit/skills/fix-ci/SKILL.md b/cursor-team-kit/skills/fix-ci/SKILL.md index 7f9fb3f..60adbdd 100644 --- a/cursor-team-kit/skills/fix-ci/SKILL.md +++ b/cursor-team-kit/skills/fix-ci/SKILL.md @@ -1,25 +1,26 @@ --- name: fix-ci -description: Find failing CI jobs, inspect logs, and apply focused fixes +description: Find failing PR checks, inspect logs or external check links, and apply focused fixes --- # Fix CI ## Trigger -Branch CI is failing and needs a fast, iterative path to green checks. +Branch or PR CI is failing and needs a fast, iterative path to green checks. ## Workflow -1. Identify the latest run for the current branch. -2. Inspect failed jobs and extract the first actionable error. +1. Resolve the active PR and inspect `gh pr checks --json name,bucket,state,workflow,link`. +2. Inspect failed jobs and extract the first actionable error. Use GitHub Actions logs when available; otherwise use the check link to identify the failing command or service. 3. Apply the smallest safe fix. -4. Re-run CI and repeat until green. +4. Push, re-check the PR check set, and repeat until green. ## Guardrails - Fix one actionable failure at a time. - Prefer minimal, low-risk changes before broader refactors. +- Keep `gh pr checks` as the source of truth for overall PR CI state. ## Output diff --git a/cursor-team-kit/skills/get-pr-comments/SKILL.md b/cursor-team-kit/skills/get-pr-comments/SKILL.md index af50321..396aad5 100644 --- a/cursor-team-kit/skills/get-pr-comments/SKILL.md +++ b/cursor-team-kit/skills/get-pr-comments/SKILL.md @@ -20,4 +20,4 @@ Need a concise, actionable summary of feedback on the active pull request. - Grouped feedback summary - Action list ordered by priority -- Open questions that still need clarification +- Open questions that still need clarification \ No newline at end of file diff --git a/cursor-team-kit/skills/loop-on-ci/SKILL.md b/cursor-team-kit/skills/loop-on-ci/SKILL.md index aad5565..23a3860 100644 --- a/cursor-team-kit/skills/loop-on-ci/SKILL.md +++ b/cursor-team-kit/skills/loop-on-ci/SKILL.md @@ -1,31 +1,37 @@ --- name: loop-on-ci -description: Watch CI runs and iterate on failures until all checks pass +description: Monitor PR checks and fix failures until green. Uses gh pr checks as the source of truth for PR-attached checks. --- # Loop on CI ## Trigger -Need to watch branch CI and iterate on failures until green. +Need to watch a branch or pull request and iterate on CI failures until all required checks are green. + +Use `gh pr checks` as the source of truth. It includes all PR-attached checks, while `gh run list` only covers GitHub Actions. ## Workflow -1. Find the current branch and latest workflow run. -2. Wait for CI completion with `gh run watch --exit-status`. -3. If failed, inspect failed logs, implement a focused fix, commit, and push. -4. Repeat until all required checks pass. +1. Resolve the PR for the current branch. +2. Inspect current PR checks before waiting. +3. If checks already failed, diagnose those failures first. +4. If checks are pending, watch with `gh pr checks --watch --fail-fast`. +5. After each push, re-check the full PR check set and repeat until green. ## Commands ```bash -# Latest run for current branch -gh run list --branch "$(git branch --show-current)" --limit 5 +# Resolve the active PR +gh pr view --json number,url,headRefName + +# Inspect all attached checks +gh pr checks --json name,bucket,state,workflow,link -# Block until completion (0 on pass, non-zero on fail) -gh run watch --exit-status +# Watch pending checks and fail fast +gh pr checks --watch --fail-fast -# Inspect failed jobs +# GitHub Actions logs, when the failing check links to a GHA run gh run view --log-failed ``` @@ -33,7 +39,9 @@ gh run view --log-failed - Keep each fix scoped to a single failure cause when possible. - Do not bypass hooks (`--no-verify`) to force progress. +- If the failure is clearly unrelated to the PR and appears fixed on main, merge latest main instead of bloating the PR with unrelated fixes. - If failures are flaky, retry once and report flake evidence. +- Re-run `gh pr checks --json name,bucket,state,workflow,link` after every push; the check set can change. ## Output diff --git a/cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md b/cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md new file mode 100644 index 0000000..f5343c0 --- /dev/null +++ b/cursor-team-kit/skills/make-pr-easy-to-review/SKILL.md @@ -0,0 +1,59 @@ +--- +name: make-pr-easy-to-review +description: Prepare PRs for review by cleaning noisy history, improving PR descriptions, and adding reviewer guidance without changing code behavior. Use for "make this easy to review", "tidy this PR", "clean up commits", or "annotate the diff". +--- + +# Make PR Easy to Review + +Prepare a PR so a reviewer can quickly understand the intent, important files, and risk. The default goal is reviewability without behavior changes. + +## Workflow + +1. Resolve the target PR from the user-provided URL or current branch. +2. Inspect commits, diff size, changed paths, generated files, and PR description. +3. Identify reviewability issues: noisy commits, stale description, unrelated changes, mixed mechanical and logic changes, missing tests, or unclear reviewer entry points. +4. Propose a plan before rewriting history or force-pushing. +5. Apply safe improvements, then verify the tree or diff still matches the intended code. + +## History Cleanup + +Only rewrite history when the user asks for it or agrees to the plan. Before rewriting: + +```bash +gh pr view --json title,headRefName,baseRefName,state,commits +git fetch origin +ORIGINAL_TREE=$(git rev-parse origin/^{tree}) +``` + +Good commit groupings usually follow dependency order: + +1. Schema/storage or generated API definitions. +2. Core logic. +3. Wiring and integration. +4. UI or surface behavior. +5. Tests. + +After rewriting, verify content identity: + +```bash +echo "Original tree: $ORIGINAL_TREE" +echo "Current tree: $(git rev-parse HEAD^{tree})" +git diff origin/ --stat +``` + +Do not push if the tree changed unintentionally. + +## Reviewer Guidance + +When code behavior should stay untouched, prefer PR description and review notes: + +- Add a TL;DR that matches the actual diff. +- Separate core files from generated or mechanical files. +- Call out risky behavior changes, migration order, rollout plan, and test coverage. +- Link issue trackers, dashboards, or design docs when they explain intent. + +## Guardrails + +- Never hide meaningful behavior changes inside "cleanup". +- Do not bypass hooks unless the user explicitly asks. +- If the PR is too large to make reviewable with notes, recommend splitting instead of polishing around the problem. diff --git a/cursor-team-kit/skills/review-and-ship/SKILL.md b/cursor-team-kit/skills/review-and-ship/SKILL.md index aaf0e56..000e753 100644 --- a/cursor-team-kit/skills/review-and-ship/SKILL.md +++ b/cursor-team-kit/skills/review-and-ship/SKILL.md @@ -1,21 +1,22 @@ --- name: review-and-ship -description: Run a structured review, close key issues, and ship changes via PR +description: Review the current branch for bugs, intent fit, and test coverage; run or write tests; commit focused work; open or update a PR. --- # Review and ship ## Trigger -Reviewing changes before shipping. Close key issues and open/update PR. +Reviewing changes before shipping. Close key issues, verify behavior, and open or update a PR. ## Workflow -1. Review diff against base branch and identify behavior-impacting risks. -2. Run or update tests for changed behavior. -3. Fix critical issues before finalizing. -4. Commit selective files with a concise message. -5. Push branch and open or update a PR. +1. Gather context: diff against base branch, uncommitted changes, recent commits, changed files, and user intent from recent relevant chats if useful. +2. Run targeted tests for changed behavior. If no focused tests exist, decide whether to add them or document the gap. +3. Review for correctness, regressions, security, and intent fit. Use parallel subagents for larger diffs. +4. Fix critical issues before finalizing and re-run affected tests. +5. Commit selective files with a concise message. +6. Push branch and open or update a PR. ## Suggested Checks @@ -23,6 +24,7 @@ Reviewing changes before shipping. Close key issues and open/update PR. git fetch origin main git diff origin/main...HEAD git status +gh pr checks --json name,bucket,state,workflow,link ``` ## Guardrails @@ -30,6 +32,7 @@ git status - Prioritize correctness, security, and regressions over style-only comments. - Keep commits focused and avoid unrelated file changes. - If pre-commit checks fail, fix the issues rather than bypassing hooks. +- Use `gh pr checks` instead of GitHub Actions-only commands when judging PR readiness. ## Output diff --git a/cursor-team-kit/skills/verify-this/SKILL.md b/cursor-team-kit/skills/verify-this/SKILL.md new file mode 100644 index 0000000..e17217e --- /dev/null +++ b/cursor-team-kit/skills/verify-this/SKILL.md @@ -0,0 +1,74 @@ +--- +name: verify-this +description: Verify a claim with fresh local evidence: restate it falsifiably, capture baseline and treatment, compare artifacts, and return VERIFIED, NOT VERIFIED, or INCONCLUSIVE. +--- + +# Verify This + +Verification is not a recap. It proves or disproves a specific claim with repeatable evidence. + +## When To Use + +- The user asks "verify this", "prove it works", "did this fix it", or "show me the evidence". +- A bug fix needs a before/after repro. +- A UI, CLI, API, performance, or memory claim needs measurement. +- A test passes but the user-visible behavior still needs confirmation. + +Do not use this for vague claims like "the code is cleaner". Ask for a measurable claim first. + +## Workflow + +1. Restate the claim in falsifiable form: condition, metric, and threshold. +2. Pick the smallest local surface that can disprove it. +3. Capture a baseline from the old state: merge base, parent commit, failing branch, or current broken repro. +4. Capture treatment from the changed state with the same command, data, warmup, and environment. +5. Compare raw artifacts: numbers, screenshots, terminal transcripts, HTTP responses, profiles, heap snapshots, or test output. +6. Return exactly one verdict: `VERIFIED`, `NOT VERIFIED`, or `INCONCLUSIVE`. + +## Local Surfaces + +- Code behavior: focused unit/integration tests or a minimal repro script. +- CLI/TUI behavior: `control-cli`, terminal transcript, or demo recording. +- UI behavior: `control-ui`, screenshots, accessibility snapshots, or browser traces. +- API behavior: local HTTP/RPC request and response diff. +- Performance: same-machine baseline/treatment timings or CPU profiles. +- Memory: heap snapshots before and after the suspected operation. + +## Artifact Layout + +When safe to write artifacts: + +```text +/tmp/verify-this// +├── claim.md +├── timeline.md +├── baseline/ +├── treatment/ +├── diff/ +└── verdict.md +``` + +If artifacts may contain sensitive code, prompts, screenshots, HTTP bodies, or heap data, keep only the minimal inline evidence unless the user agrees to disk storage. + +## Verdict Rules + +- `VERIFIED`: baseline and treatment differ in the predicted direction, by the claimed threshold, with no obvious confound. +- `NOT VERIFIED`: the behavior is unchanged, moves the wrong way, or misses the threshold. +- `INCONCLUSIVE`: no valid baseline, noisy signal, failed measurement, or an environment difference invalidates the comparison. + +## Output + +Use this shape: + +```text +VERIFIED | NOT VERIFIED | INCONCLUSIVE +Claim: + +Evidence: +: baseline=<...>, treatment=<...>, delta=<...>, threshold=<...> + +Reasoning: + +``` + +Do not soften a negative result. A clear `NOT VERIFIED` is useful. diff --git a/cursor-team-kit/skills/what-did-i-get-done/SKILL.md b/cursor-team-kit/skills/what-did-i-get-done/SKILL.md index 51995c1..e6772b2 100644 --- a/cursor-team-kit/skills/what-did-i-get-done/SKILL.md +++ b/cursor-team-kit/skills/what-did-i-get-done/SKILL.md @@ -26,6 +26,6 @@ Need a short, high-signal summary of work completed in a specific time range (fo ## Output -- One short summary suitable for Slack +- One short summary suitable for a status update - Real date range - Optional 2-5 bullets for major changes only diff --git a/cursor-team-kit/skills/workflow-from-chats/SKILL.md b/cursor-team-kit/skills/workflow-from-chats/SKILL.md new file mode 100644 index 0000000..a5170df --- /dev/null +++ b/cursor-team-kit/skills/workflow-from-chats/SKILL.md @@ -0,0 +1,50 @@ +--- +name: workflow-from-chats +description: Extract durable working preferences from recent Cursor chats and convert them into skills, rules, or workflow docs. Use when asked to learn preferences, mine feedback, personalize workflows, or generate team/person-specific agent guidance. +--- + +# Workflow From Chats + +Infer durable working preferences from recent chats. Do not summarize chats; extract reusable workflow guidance. + +## Scope + +- Default to the last 7 days unless the user asks for a different window. +- Read parent transcripts and relevant subagent transcripts. Use subagent content as evidence, but cite only parent conversations. +- Do not expose local transcript paths, secrets, customer data, private chat content, or credentials. + +## Workflow + +1. State the target workflow or preference surface in one paragraph. +2. Build an internal transcript inventory: title/topic, parent conversation ID, approximate date, completion state, relevant subagents, and why it may contain preference evidence. +3. Scan for explicit preferences, corrections, and workflow markers such as "I prefer", "always", "never", "not what I asked", "stop", "review", "PR", "CI", "logs", and "skill". +4. Extract preference atoms: trigger, workflow step, decision rule, quality bar, stop condition, evidence, and confidence. +5. Rate confidence as strong, medium, weak, or contradicted. +6. Cluster by workflow shape rather than transcript: shipping, review, simplification, debugging, capture, communication, delegation, or validation. +7. Choose the artifact: new skill, skill edit, rule, workflow doc, or no artifact. +8. Draft only the reusable guidance. Filter anecdotes that will not help future tasks. + +## Confidence + +- Strong: explicit user preference, workflow-changing correction, repeated parent-chat pattern, or direct request to encode behavior. +- Medium: accepted workflow, repeated tool/model/validation preference, or subagent consensus that the parent used successfully. +- Weak: agent-chosen behavior with no user feedback, one ambiguous transcript, or a likely task-specific correction. +- Contradicted: evidence points in incompatible directions; ask the user before writing files. + +## Artifact Choice + +- Skill: recurring multi-step workflow with clear triggers. +- Rule: general behavior that should apply broadly. +- Workflow doc: useful context that is not reliably triggerable. +- No artifact: situational, stale, or low-confidence observation. + +## Output + +Return a concise synthesis first: + +- Target workflow. +- Evidence corpus with parent conversation citations only. +- Preference profile. +- Adopt, consider, dismissed. +- Proposed artifacts. +- Open questions only if they block writing.