From 91fd9d458fc4c31eab0ec928e3ff5866a7870c65 Mon Sep 17 00:00:00 2001 From: John Myers Date: Tue, 2 Jun 2026 17:07:15 -0700 Subject: [PATCH 01/20] chore(gator): add gator gate skill --- .agents/skills/gator-gate/SKILL.md | 621 +++++++++++++++++++++++++++++ 1 file changed, 621 insertions(+) create mode 100644 .agents/skills/gator-gate/SKILL.md diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md new file mode 100644 index 000000000..85eb87d48 --- /dev/null +++ b/.agents/skills/gator-gate/SKILL.md @@ -0,0 +1,621 @@ +--- +name: gator-gate +description: Validate and monitor OpenShell GitHub issues and PRs using the gator:* state machine. Use when asked to triage issues/PRs for project validity, gate PRs, run gator, validate submissions, or monitor PRs toward merge readiness. +--- + +# Gator Gate + +Validate OpenShell GitHub issues and pull requests for project fit, then monitor valid PRs until they are ready for maintainer approval. + +This skill is a gating workflow. It can start from any issue or PR state, inspect the current `gator:*` label, and continue the correct next action. + +## Skill Location + +Codex and other agent harnesses should load this skill from the repository path `.agents/skills/gator-gate/SKILL.md`. After this branch is merged, the canonical GitHub location is . + +## Prerequisites + +- The `gh` CLI must be authenticated (`gh auth status`) +- You must be in the OpenShell repository root +- GitHub write permissions are required to apply labels, comment, close issues/PRs, or post `/ok to test` + +## Authority Rules + +- Do not push commits to a contributor's PR branch by default. +- You may push changes only when explicitly instructed by a GitHub comment from a maintainer or by a direct operator prompt. +- Do not post `/ok to test ` unless the current GitHub user has maintainer authority. +- Code review is code-only. Do not run pre-commit, unit tests, or E2E locally as part of the initial PR review unless explicitly instructed. +- Security vulnerabilities must not be triaged through public GitHub issues. Follow `SECURITY.md`. + +Maintainer authority means one of: + +- User is in the NVIDIA `openshell-maintainers` team +- User is a CODEOWNER listed in `.github/CODEOWNERS` +- Repository permission is `admin`, `maintain`, or `write` for maintainer-only actions such as `/ok to test` + +Use these checks where needed: + +```bash +gh api user --jq '.login' +gh api repos/NVIDIA/OpenShell/collaborators//permission --jq '{permission,role_name}' +gh api orgs/NVIDIA/teams/openshell-maintainers/members --jq '.[].login' +``` + +If a permission or team-membership query fails due to API access, fall back to CODEOWNERS and repository permission where possible. If authority cannot be verified, do not perform maintainer-only actions. + +## Comment Marker + +All comments posted by this skill must begin with this marker: + +```markdown +> **gator-agent** +``` + +Use one canonical gator comment per issue or PR when possible. Edit it for state summaries if practical; otherwise post a new marked comment for material transitions. + +## Labels + +There must be at most one `gator:*` label on an issue or PR at any time. + +| Label | Meaning | +|-------|---------| +| `gator:follow-up-needed` | Needs submitter or maintainer clarification; 48 business-hour TTL applies | +| `gator:blocked` | Process blocker prevents validation or monitoring from progressing | +| `gator:validated` | Issue is valid and ready for work; no active PR monitoring needed | +| `gator:in-review` | PR is valid and in agent review or author-feedback loop | +| `gator:watch-pipeline` | Review feedback is resolved; CI/CD monitoring is active | +| `gator:approval-needed` | Agent work is complete; maintainer approval or merge decision remains | + +If labels are missing and you have permission to create them, create them with clear descriptions. Otherwise report the missing labels to the operator. + +```bash +gh label create "gator:follow-up-needed" --description "Gator needs submitter or maintainer follow-up" --color "FBCA04" +gh label create "gator:blocked" --description "Gator is blocked by process or repository gates" --color "BFD4F2" +gh label create "gator:validated" --description "Gator validated this issue as ready for work" --color "0E8A16" +gh label create "gator:in-review" --description "Gator is reviewing or awaiting PR review feedback" --color "1D76DB" +gh label create "gator:watch-pipeline" --description "Gator is monitoring PR CI/CD status" --color "5319E7" +gh label create "gator:approval-needed" --description "Gator completed review; maintainer approval needed" --color "C5DEF5" +``` + +When changing state, remove all existing `gator:*` labels first, then add the new one. + +```bash +gh issue edit --remove-label "gator:follow-up-needed" --remove-label "gator:blocked" --remove-label "gator:validated" --remove-label "gator:in-review" --remove-label "gator:watch-pipeline" --remove-label "gator:approval-needed" +gh issue edit --add-label "gator:" +``` + +Pull requests are also GitHub issues for label operations, so `gh issue edit ` is valid for PR labels. + +## Invocation Modes + +The user may provide: + +- A GitHub issue number +- A GitHub PR number +- Both an issue and a PR number +- No number, with an instruction to process untriaged or active gator items + +Resolve PRs and issues carefully: + +```bash +gh issue view --json number,title,body,state,author,labels,comments,createdAt,updatedAt,closedAt,url +gh pr view --json number,title,body,state,author,labels,comments,reviews,closingIssuesReferences,files,isDraft,mergeStateStatus,reviewDecision,headRefOid,headRefName,baseRefName,url +``` + +For a PR-only input, derive linked issues from `closingIssuesReferences`, PR body references such as `Fixes #123`, and issue comments that mention the PR. If no linked issue exists, validate the PR directly. + +## State Machine + +```text +No gator label + -> gator:follow-up-needed missing why, UX path, repro, RFC/roadmap link, or author action + -> gator:blocked process blocker prevents progress + -> gator:validated issue is valid and ready for work + -> gator:in-review PR is valid and enters monitoring + -> close not planned invalid or out of project scope + +gator:follow-up-needed + -> gator:validated issue clarified and valid + -> gator:in-review PR clarified and valid + -> gator:blocked process blocker discovered + -> close not planned 48 business-hour TTL expired + +gator:blocked + -> previous intended state blocker resolved + -> stay blocked blocker still present + -> nudge responsible party blocker unchanged after 48 business hours + -> stop closed by vouch gate; wait for vouch and reopen + +gator:validated + -> stop issue is already ready for work, no new PR or comments + -> gator:in-review linked PR appears and is valid + -> re-evaluate new substantive comments or labels change scope + +gator:in-review + -> gator:watch-pipeline review feedback resolved + -> nudge PR author review feedback unanswered after 48 business hours + -> gator:follow-up-needed author action needed + -> gator:blocked draft, vouch, DCO, merge conflict, or authority blocker + +gator:watch-pipeline + -> gator:approval-needed required checks are green + -> gator:in-review new review feedback or code changes need attention + -> gator:follow-up-needed author action needed for failures + -> gator:blocked process blocker prevents test execution + +gator:approval-needed + -> stop human maintainers take over + -> nudge maintainers no maintainer action after 48 business hours + -> gator:in-review maintainer requests changes or author updates PR +``` + +## Step 1: Fetch Context + +Fetch issue, PR, comments, reviews, files, labels, and linked references. Also inspect existing gator state. + +For PRs, record: + +- PR number and URL +- Head SHA from `headRefOid` +- Linked issue numbers +- Draft status +- Merge state +- Review decision +- Changed files and affected subsystems +- Existing `test:*` labels + +For issues, record: + +- Issue number and URL +- Author and author association where available +- Current labels +- Whether a linked PR exists +- Last human or maintainer comment after any gator follow-up request + +## Step 2: Recover From Current State + +If exactly one `gator:*` label exists, resume from that state in the state machine. + +If multiple `gator:*` labels exist: + +1. Treat this as label drift. +2. Read recent comments and labels to infer the most advanced safe state. +3. Comment with the correction. +4. Remove all but the chosen `gator:*` label. + +If no `gator:*` label exists, begin validation. + +## Watch Loop Rules + +Every gator state is a watch state. On each invocation, determine the current state, inspect the latest issue/PR activity, and either advance to the next state, keep waiting, or post a TTL nudge. + +Do not stop after a one-shot check when a PR is in an active waiting state unless the operator explicitly asks for a one-shot status check. Enter a polling loop and state the interval and stop conditions before waiting. + +Default live-watch cadence: + +- Poll every 15 minutes for PRs in active states: `gator:in-review`, `gator:watch-pipeline`, `gator:approval-needed`, and `gator:blocked`. +- Watch PRs indefinitely across gator state transitions until they close, merge, or the operator stops the session. +- Poll every 60 minutes for issue-only `gator:follow-up-needed` or issue-only `gator:blocked` states until they progress, close, or reach a TTL threshold. +- Stop immediately for issue-only `gator:validated` items that have no associated PR. +- Do not stop PR monitoring just because the gator state changes, a human comments, or new commits arrive. Treat those as triggers to re-evaluate and continue from the new state. +- Stop PR monitoring only when the PR closes, merges, the operator stops the session, or an unrecoverable process blocker prevents further agent action. + +Use a concise loop summary before waiting, for example: "Watching PR #123 every 15 minutes until it closes, merges, or the session is stopped; comments, commits, and gator state changes will trigger re-evaluation and continued monitoring." + +Use 48 business hours as the default inactivity threshold for states that are waiting on a person. Business hours are Monday through Friday; do not count Saturday or Sunday. + +State-specific monitoring: + +- `gator:follow-up-needed`: wait for submitter or maintainer clarification. If no substantive response arrives after 48 business hours, close as not planned or close the PR with a TTL-expired comment. +- `gator:blocked`: re-check the blocker. If resolved, continue to the previous intended state. If still blocked after 48 business hours, nudge the responsible party unless the PR was auto-closed by the vouch system. +- `gator:validated`: for an issue-only item with no associated PR, stop; the issue is ready for work. If an associated PR exists or appears during a later invocation, validate the PR and move it to `gator:in-review`. If new information changes the scope, re-run validation. +- `gator:in-review`: watch for author commits, author responses, review comments, and unresolved gator findings. If feedback is addressed, move to E2E/test-label decision and then `gator:watch-pipeline`. If feedback is unanswered after 48 business hours, nudge the PR author. Continue watching after either action. +- `gator:watch-pipeline`: watch checks until green, failed, or blocked. Move to `gator:approval-needed` only when required checks are green and no review feedback remains. Continue watching after the state transition because maintainer feedback can arrive later. +- `gator:approval-needed`: watch for maintainer approval, merge, closure, new commits, author responses, or maintainer requested changes. If no maintainer action occurs after 48 business hours, nudge maintainers and CODEOWNERS. If humans request changes, move back to `gator:in-review` and continue watching author follow-up. + +When calculating a nudge TTL, use the latest relevant event for that state: + +- The first comment that entered the current state +- The most recent gator comment in the current state +- The most recent comment or review from the expected actor +- The most recent commit pushed to the PR, when waiting on code changes + +Do not post repeated nudges more often than once per 48 business hours for the same state and actor. + +## Step 3: Check Process Blockers + +Before project-validity review, check blockers. + +Move to `gator:blocked` when any of these apply: + +- PR is draft and not ready for review +- PR is blocked by the vouch system or was auto-closed for lack of vouch +- DCO is missing or failing +- PR has merge conflicts or `mergeStateStatus` indicates dirty/blocked for conflict reasons +- Required `/ok to test ` is needed and the current user lacks maintainer authority +- Required CI cannot run because the copy-pr mirror is missing or stale and maintainer authority is unavailable + +For auto-closed vouch-gate PRs, do not treat the proposal as invalid. Comment only if useful, then stop and wait until the author is vouched and the PR is reopened. + +For blocked open PRs, post a concise gator comment that lists the blocker and the exact next human action. On later invocations, re-check the blocker and nudge the responsible party after 48 business hours if it remains unresolved. + +## Step 4: Duplicate Detection + +For newer issues and PRs, check for duplicates before deciding validity. Duplicate detection is a project-fit input, not a substitute for human judgment. + +Search for existing issues and PRs using the title, subsystem labels, changed files, key error strings, and important feature terms: + +```bash +gh search issues --repo NVIDIA/OpenShell "" --state open --json number,title,state,url,labels,updatedAt +gh search issues --repo NVIDIA/OpenShell "" --state closed --json number,title,state,url,labels,updatedAt +gh search prs --repo NVIDIA/OpenShell "" --state open --json number,title,state,url,labels,updatedAt +gh search prs --repo NVIDIA/OpenShell "" --state closed --json number,title,state,url,labels,updatedAt +``` + +Treat items as duplicate candidates when they share the same user-visible problem, requested capability, affected subsystem, or implementation approach. Do not rely on title similarity alone. + +If a submission is an exact duplicate of an open validated issue or active PR: + +1. Comment with the matching issue or PR. +2. Apply `duplicate` if available. +3. Close only when the duplicate relationship is clear and no extra author-specific context is needed. + +If a submission appears related but may contain new constraints, reproduction details, or a different use case: + +1. Move to `gator:follow-up-needed`. +2. Link the duplicate candidates. +3. Ask the author to explain what is different or whether the older issue/PR covers their need. +4. Flag the candidate duplicate set for human review in the comment. + +If a PR duplicates another open PR or implements a feature already being reviewed elsewhere, move to `gator:follow-up-needed` unless a maintainer has already directed both PRs to proceed independently. + +## Step 5: Auto-Validation + +Auto-validate submissions from maintainers, but still review PR implementations. + +Auto-validation applies when the submitter is: + +- A CODEOWNER +- In `@NVIDIA/openshell-maintainers` + +For maintainer-authored issues without PRs, move to `gator:validated` unless the issue is clearly security-sensitive and belongs outside GitHub. + +For maintainer-authored PRs, move to `gator:in-review` and start PR monitoring. Auto-validation means the change is project-valid; it does not mean the implementation is merge-ready. + +## Step 6: Validate Issues and PRs + +Apply the criteria below in order. If evaluating an issue/PR pair, validate both as one submission but set each object to its appropriate current state: + +- Issue without PR: `gator:validated` +- PR with or without linked issue: `gator:in-review` +- Issue linked to a valid active PR: `gator:validated` on the issue and `gator:in-review` on the PR + +### Already Validated Issue + +If a PR is mapped to an issue that is already valid for the same work, consider the PR project-valid and enter `gator:in-review` unless the PR clearly exceeds the issue scope. + +### RFCs + +For PRs that add or modify `rfc/**`, validate against `rfc/README.md` and `rfc/0000-template/README.md`: + +- RFC lives in `rfc/NNNN-short-name/README.md` +- Front matter includes `authors`, `state`, and `links` +- State is one of `draft`, `review`, `accepted`, `rejected`, `implemented`, `superseded` +- RFC has summary, motivation, non-goals, proposal, implementation plan, risks, alternatives, prior art, and open questions +- RFC is appropriate for cross-cutting, architectural, API, process, or multi-team decisions +- Small bug fixes, small single-component features, docs, dependency updates, and interface-preserving refactors should not use RFCs + +Distinguish structural validity from acceptance. A structurally valid RFC PR can enter `gator:in-review`, but implementation work should not be considered ready until the RFC is accepted or an explicit maintainer says otherwise. + +### Small Concentrated Work + +Validate small and concentrated work when it has clear motivation and one of these shapes: + +- One subsystem: gateway, CLI, supervisor, drivers, network proxy, policy, sandbox, TUI, docs, build/release +- Refactor that removes duplicate code or simplifies internals without UX or functional impact +- Logical packaging refactor, such as splitting crates or separating proto/native schema boundaries +- Test improvements for important code paths or features +- Concentrated bug fix with reproducibility steps and a clear test path +- TUI, CLI, or API quality-of-life improvement with a clear user path +- Driver improvement that makes sandbox lifecycle management easier or more efficient +- Documentation clarification, typo fix, errata, or missing documentation +- CI/CD/build/release improvement, including Snap, package, release, or test harness work + +Documentation changes from non-maintainers must not reorder ToC items, change fundamental hierarchy, or restructure docs without a clear maintainer-approved reason. + +### Provider V2 and Credential Support + +Provider V2 work is a supported high-traction area, but require all of the following: + +- Clear UX path for how users configure and use the provider feature in OpenShell +- Clear statement of why the change is important +- Clear statement of who will use it +- Security boundary analysis for credential handling +- Explanation of whether secrets remain hidden from the sandbox agent + +Be skeptical of changes that expose raw credentials to agents or weaken the credential proxy model, even if the user story is clear. + +### Large or Cross-Cutting Work + +For larger changes that impact multiple subsystems, introduce major architecture changes, or touch high single-digit or double-digit file counts, require at least one: + +- Fits an existing `roadmap` issue +- Directly follows an already validated issue or PR +- Has an accepted or actively reviewed RFC for the design +- Has explicit maintainer confirmation in the issue or PR thread + +If this evidence is missing, use `gator:follow-up-needed` and ask for roadmap/RFC/linkage or maintainer clarification. + +### Follow-Up Triggers + +Use `gator:follow-up-needed` when the submission: + +- Does not meet validation criteria yet +- Lacks practical demonstration of why the author is submitting it +- Lacks reproduction steps for a bug +- Lacks a clear UX path for a user-facing feature +- Supports a narrow upstream project convenience without showing why OpenShell should own it +- Suggests swapping core OpenShell components for another project's technology without a strong OpenShell-specific reason +- Introduces CLI/API/UX changes that only work for one driver implementation +- Overlaps existing work and needs reconciliation with the linked issue/PR/RFC + +When requesting follow-up, ask only for the minimal missing information needed to validate. + +### Invalid or Out of Scope + +Close as not planned or wontfix when the submission is clearly outside OpenShell's scope, duplicates a resolved decision, weakens a project invariant without acceptable rationale, or remains unvalidated after the follow-up TTL. + +Comment before closing and include a concise reason. Apply `wontfix` if appropriate and available. + +## Step 7: Follow-Up TTL + +When applying `gator:follow-up-needed`, post a comment with: + +- What information is missing +- Who needs to respond, usually the original submitter +- That the item may be closed if no author or maintainer response arrives within 48 business hours + +Business hours are Monday through Friday. Do not count Saturday or Sunday toward the 48-hour TTL. + +Any substantive comment from the original submitter or a maintainer resets the clock. Maintainers may also manually change labels; respect the latest maintainer-applied state. + +Bot comments and gator-agent comments do not reset the clock. + +If TTL expires: + +1. Comment that the TTL elapsed. +2. State that the issue or PR can be reopened or re-run through gator when the missing information is available. +3. Close the issue as not planned or close the PR. + +## Step 8: PR Review Loop + +When a PR enters `gator:in-review`, run an independent code-only review. + +Use the `principal-engineer-reviewer` sub-agent. Include: + +- PR title, body, linked issues, labels, and files +- Full diff or enough chunked diff context to review all changes +- Instruction to focus on correctness, regressions, security, maintainability, and missing tests +- Instruction not to rely on local test execution + +Post findings as a gator comment or a GitHub PR review: + +- Use inline comments for line-specific defects +- Use a general comment for design concerns, missing tests, or summary feedback +- Do not nitpick style unless it affects maintainability or project conventions + +If findings require author changes, remain in `gator:in-review` or move to `gator:follow-up-needed` if the author must clarify the proposal before code review can continue. + +If no blocking findings remain, decide whether E2E labels are needed, then move to `gator:watch-pipeline`. + +When resuming a PR already in `gator:in-review`, check whether gator review findings or maintainer review comments are still unanswered. If the PR author has pushed commits or replied after the latest feedback, re-review only the relevant changes and decide whether the feedback is resolved. + +If review feedback is waiting on the PR author for more than 48 business hours, post a single author nudge. Use the latest of these timestamps as the TTL start: + +- The gator review comment that requested changes +- The latest maintainer review requesting changes +- The latest gator author-nudge comment +- The latest author commit or author response + +Do not move to `gator:watch-pipeline` until review feedback is addressed or explicitly waived by a maintainer. + +## Step 9: E2E and Test Label Decision + +Apply or recommend `test:*` labels based on changed files and behavior. + +Use `test:e2e` for changes that affect: + +- Sandbox lifecycle +- Gateway/supervisor interaction +- Policy enforcement +- Network proxy behavior +- Provider credential flow +- Docker, Podman, VM, or Kubernetes driver behavior +- Release packaging that needs a runtime smoke test + +Use `test:e2e-gpu` for GPU runtime, CDI, CUDA, GPU driver, or GPU policy behavior. + +Use `test:e2e-kubernetes` for Kubernetes HA, Helm, Agent Sandbox CRDs, Kubernetes scheduling, namespace, or controller behavior when the Kubernetes-specific suite is needed. + +After applying a `test:*` label, read the bot comment that is posted by the E2E Label Help workflow and follow its instructions. + +If a mirror is missing or stale and you have maintainer authority, post: + +```text +/ok to test +``` + +If you do not have maintainer authority, move to `gator:blocked` and state that a maintainer must post `/ok to test `. + +## Step 10: Pipeline Watch Loop + +When in `gator:watch-pipeline`, monitor PR checks and workflow runs. + +Use: + +```bash +gh pr checks +gh run list --branch +``` + +Required gates include at least: + +- `OpenShell / Branch Checks` +- `OpenShell / Helm Lint` +- `OpenShell / E2E` when `test:e2e` is applied +- `OpenShell / GPU E2E` when `test:e2e-gpu` is applied + +If checks are pending, wait a reasonable interval and re-check. + +If checks fail: + +- Inspect failed logs with `gh run view --log-failed` +- Determine whether the failure is PR-caused, flaky, or infrastructure-related +- If author changes are required, comment and move to `gator:in-review` or `gator:follow-up-needed` +- If maintainer action is required, move to `gator:blocked` +- If explicitly authorized to push fixes, make the minimal fix and continue watching + +When all required checks are green and no review feedback remains, move to `gator:approval-needed`. + +## Step 11: Approval Needed + +When applying `gator:approval-needed`, post a concise handoff comment: + +- Validation summary +- Review status +- CI status +- E2E labels and outcomes +- Remaining action: maintainer approval/merge decision + +Do not approve or merge unless explicitly instructed and authorized. + +When resuming an item already in `gator:approval-needed`, check whether maintainer approval has been waiting for more than 48 business hours since the latest of: + +- The first `gator:approval-needed` handoff comment +- The most recent maintainer comment or review +- The most recent gator maintainer-nudge comment + +If more than 48 business hours have elapsed, post a single nudge comment tagging `@NVIDIA/openshell-maintainers` and any relevant CODEOWNERS. For PRs, derive relevant CODEOWNERS from `.github/CODEOWNERS` and the changed files; because OpenShell has broad ownership, include the broad owner set when no more specific owner exists. + +Do not post repeated nudges more often than once per 48 business hours. If the PR is no longer green, has new review feedback, or has changed materially, move it back to `gator:in-review` instead of nudging. + +## Comment Templates + +### Follow-Up Needed + +```markdown +> **gator-agent** + +## Follow-Up Needed + +I cannot validate this submission yet because . + +Please provide . If the original submitter or a maintainer does not respond within 48 business hours, this may be closed as not planned. Weekend hours do not count toward the TTL. +``` + +### Blocked + +```markdown +> **gator-agent** + +## Blocked + +Gator is blocked by . + +Next action: . +``` + +### Validated Issue + +```markdown +> **gator-agent** + +## Validated + +This issue is valid for OpenShell because . + +Recommended next step: . +``` + +### PR Review Handoff + +```markdown +> **gator-agent** + +## PR Review Status + +Validation: +Head SHA: `` + +Review findings: +- + +Next state: `` +``` + +### Approval Needed + +```markdown +> **gator-agent** + +## Maintainer Approval Needed + +Gator validation and PR monitoring are complete. + +Validation: +Review: +Checks: +E2E: + +Human maintainer approval or merge decision is now required. +``` + +### Maintainer Nudge + +```markdown +> **gator-agent** + +## Maintainer Review Nudge + +This PR has been in `gator:approval-needed` for more than 48 business hours with no maintainer approval or merge decision. + +@NVIDIA/openshell-maintainers , can someone review and either approve, request changes, or close this out? +``` + +### Author Nudge + +```markdown +> **gator-agent** + +## Author Follow-Up Nudge + +This PR has been in `gator:in-review` for more than 48 business hours with unresolved review feedback. + +@, please respond to the review comments or push an update. If this is no longer planned, please say so and a maintainer can close it out. +``` + +### Blocker Nudge + +```markdown +> **gator-agent** + +## Blocker Follow-Up Nudge + +This item is still blocked by after more than 48 business hours. + +Next action: . +``` + +### Possible Duplicate + +```markdown +> **gator-agent** + +## Possible Duplicate + +This looks related to existing work: + +- : + +Please confirm whether this submission has different requirements or reproduction details. A maintainer should review the duplicate relationship before this proceeds. +``` From 1a21bba3fd8ef4ba0a68c0eb64daabb3827507b3 Mon Sep 17 00:00:00 2001 From: John Myers Date: Tue, 2 Jun 2026 17:59:11 -0700 Subject: [PATCH 02/20] chore(gator): add sandbox launcher scaffold --- openshell-agents/gator/README.md | 35 +++ openshell-agents/gator/policy.yaml | 18 ++ .../gator/providers/github-gator.yaml | 67 +++++ openshell-agents/gator/run.sh | 256 ++++++++++++++++++ openshell-agents/gator/sandbox-agent.sh | 81 ++++++ 5 files changed, 457 insertions(+) create mode 100644 openshell-agents/gator/README.md create mode 100644 openshell-agents/gator/policy.yaml create mode 100644 openshell-agents/gator/providers/github-gator.yaml create mode 100755 openshell-agents/gator/run.sh create mode 100755 openshell-agents/gator/sandbox-agent.sh diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md new file mode 100644 index 000000000..da9ba4567 --- /dev/null +++ b/openshell-agents/gator/README.md @@ -0,0 +1,35 @@ +# Gator Agent + +Launch a headless Codex sandbox that runs the `gator-gate` skill against OpenShell issues and pull requests. + +## Prerequisites + +- `gh` is authenticated on the host and has access to `NVIDIA/OpenShell` and `NVIDIA/OpenShell-Community`. +- `codex login` has created `$HOME/.codex/auth.json`. +- The active gateway has the default `codex` provider profile available. +- The sandbox image contains `codex`, `gh`, `git`, `node`, and `bash`. + +## Usage + +```shell +./openshell-agents/gator/run.sh \ + --gateway docker-dev \ + "Run gator on PR 1536 and keep watching until it closes or merges." +``` + +Use `--codex-bin "$(command -v codex)"` when the sandbox image has an older Codex CLI than the model requires. + +The launcher: + +- Imports `providers/github-gator.yaml`. +- Creates or updates the `github-gator` provider from `gh auth token`. +- Creates or updates the default `codex` provider from `$HOME/.codex/auth.json` using profile-backed `--from-existing` discovery. +- Requests a gateway refresh for the Codex access-token credential when refresh metadata is configured. +- Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. +- Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. +- Optionally uploads a host Codex executable as `/sandbox/payload/codex`. +- Starts `codex exec` without a TTY. + +The GitHub provider profile intentionally does not allow GraphQL because OpenShell's GraphQL policy can constrain operation fields but not repository arguments. The sandbox prompt instructs the agent to use REST via `gh api` for the two allowed repositories. + +Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the default Codex provider uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. diff --git a/openshell-agents/gator/policy.yaml b/openshell-agents/gator/policy.yaml new file mode 100644 index 000000000..407baef22 --- /dev/null +++ b/openshell-agents/gator/policy.yaml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +version: 1 + +filesystem_policy: + include_workdir: true + read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log] + read_write: [/sandbox, /tmp, /dev/null] + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: {} diff --git a/openshell-agents/gator/providers/github-gator.yaml b/openshell-agents/gator/providers/github-gator.yaml new file mode 100644 index 000000000..178b740f1 --- /dev/null +++ b/openshell-agents/gator/providers/github-gator.yaml @@ -0,0 +1,67 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: github-gator +display_name: GitHub Gator +description: Repo-scoped GitHub access for the OpenShell gator gate agent +category: source_control +credentials: + - name: GITHUB_TOKEN + description: GitHub token used by the gator gate agent + env_vars: [GITHUB_TOKEN, GH_TOKEN] + required: true + auth_style: bearer + header_name: authorization +discovery: + credentials: [GITHUB_TOKEN] +endpoints: + - host: api.github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /repos/NVIDIA/OpenShell } + - allow: { method: GET, path: /repos/NVIDIA/OpenShell/** } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/issues/*/comments } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/issues/comments/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/issues/*/labels } + - allow: { method: PUT, path: /repos/NVIDIA/OpenShell/issues/*/labels } + - allow: { method: DELETE, path: /repos/NVIDIA/OpenShell/issues/*/labels/* } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/issues/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/labels } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/labels/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/reviews } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/reviews/*/comments } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/statuses/* } + - allow: { method: GET, path: /repos/NVIDIA/OpenShell-Community } + - allow: { method: GET, path: /repos/NVIDIA/OpenShell-Community/** } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/issues/*/comments } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/issues/comments/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/issues/*/labels } + - allow: { method: PUT, path: /repos/NVIDIA/OpenShell-Community/issues/*/labels } + - allow: { method: DELETE, path: /repos/NVIDIA/OpenShell-Community/issues/*/labels/* } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/issues/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/labels } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/labels/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews/*/comments } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/statuses/* } + - host: github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /NVIDIA/OpenShell } + - allow: { method: GET, path: /NVIDIA/OpenShell/** } + - allow: { method: POST, path: /NVIDIA/OpenShell/**/git-upload-pack } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community/** } + - allow: { method: POST, path: /NVIDIA/OpenShell-Community/**/git-upload-pack } +binaries: + - /usr/bin/gh + - /usr/local/bin/gh + - /usr/bin/git + - /usr/local/bin/git + - /usr/bin/codex + - /usr/local/bin/codex + - /usr/lib/node_modules/@openai/** diff --git a/openshell-agents/gator/run.sh b/openshell-agents/gator/run.sh new file mode 100755 index 000000000..e0fe445d3 --- /dev/null +++ b/openshell-agents/gator/run.sh @@ -0,0 +1,256 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" +GATOR_DIR="$ROOT_DIR/openshell-agents/gator" +SKILL_FILE="$ROOT_DIR/.agents/skills/gator-gate/SKILL.md" + +OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" +GATEWAY="${GATOR_GATEWAY:-docker-dev}" +SANDBOX_NAME="${GATOR_SANDBOX_NAME:-gator-$(date +%Y%m%d%H%M%S)}" +SANDBOX_FROM="${GATOR_SANDBOX_FROM:-base}" +GITHUB_PROVIDER="${GATOR_GITHUB_PROVIDER:-github-gator}" +CODEX_PROVIDER="${GATOR_CODEX_PROVIDER:-codex}" +CODEX_ACCESS_CREDENTIAL_KEY="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-CODEX_AUTH_ACCESS_TOKEN}" +CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" +CODEX_REASONING="${CODEX_REASONING:-high}" +CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" +BACKGROUND=0 + +usage() { + cat <<'EOF' +Usage: openshell-agents/gator/run.sh [options] "gator prompt" + +Options: + --gateway NAME Gateway name to use (default: docker-dev) + --name NAME Sandbox name (default: gator-) + --from IMAGE Sandbox source/image (default: base) + --github-provider NAME GitHub provider name (default: github-gator) + --codex-provider NAME Codex provider name (default: codex) + --codex-access-key KEY Codex access-token credential key (default: CODEX_AUTH_ACCESS_TOKEN) + --codex-bin PATH Upload this Codex executable into the sandbox + --background Run sandbox create in the background and write a log + -h, --help Show this help +EOF +} + +fail() { + echo "error: $*" >&2 + exit 1 +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +openshell_cmd() { + "$OPENSHELL_BIN" --gateway "$GATEWAY" "$@" +} + +upsert_provider() { + local name="$1" + local type="$2" + shift 2 + + if openshell_cmd provider get "$name" >/dev/null 2>&1; then + openshell_cmd provider update "$name" "$@" >/dev/null + else + openshell_cmd provider create --name "$name" --type "$type" "$@" >/dev/null + fi +} + +refresh_provider_if_configured() { + local provider="$1" + local credential_key="$2" + local status_output + + status_output="$(openshell_cmd provider refresh status "$provider" --credential-key "$credential_key")" + if [[ "$status_output" == No\ refresh\ configuration* ]]; then + echo "No refresh metadata for $provider/$credential_key; using the currently stored credential." + return 0 + fi + + openshell_cmd provider refresh rotate "$provider" --credential-key "$credential_key" >/dev/null + echo "Requested gateway refresh for $provider/$credential_key." +} + +import_provider_profile() { + local profile_file="$1" + local import_output + + # Custom profile import is create-only. Replace it when possible so repeat + # runs track this checkout, but keep going if a live sandbox is still using + # the already-imported profile. + openshell_cmd provider profile delete github-gator >/dev/null 2>&1 || true + if import_output="$(openshell_cmd provider profile import --file "$profile_file" 2>&1)"; then + return 0 + fi + if [[ "$import_output" == *"already exists"* ]]; then + echo "Provider profile already exists: $profile_file" + return 0 + fi + + printf '%s\n' "$import_output" >&2 + return 1 +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --gateway) + [[ $# -ge 2 ]] || fail "--gateway requires a value" + GATEWAY="$2" + shift 2 + ;; + --name) + [[ $# -ge 2 ]] || fail "--name requires a value" + SANDBOX_NAME="$2" + shift 2 + ;; + --from) + [[ $# -ge 2 ]] || fail "--from requires a value" + SANDBOX_FROM="$2" + shift 2 + ;; + --github-provider) + [[ $# -ge 2 ]] || fail "--github-provider requires a value" + GITHUB_PROVIDER="$2" + shift 2 + ;; + --codex-provider) + [[ $# -ge 2 ]] || fail "--codex-provider requires a value" + CODEX_PROVIDER="$2" + shift 2 + ;; + --codex-access-key) + [[ $# -ge 2 ]] || fail "--codex-access-key requires a value" + CODEX_ACCESS_CREDENTIAL_KEY="$2" + shift 2 + ;; + --codex-bin) + [[ $# -ge 2 ]] || fail "--codex-bin requires a value" + CODEX_LOCAL_BIN="$2" + shift 2 + ;; + --background) + BACKGROUND=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + --) + shift + break + ;; + -*) + fail "unknown option: $1" + ;; + *) + break + ;; + esac +done + +[[ $# -gt 0 ]] || { usage >&2; exit 2; } +USER_PROMPT="$*" + +require_cmd gh +require_cmd jq +require_cmd "$OPENSHELL_BIN" +[[ -f "$SKILL_FILE" ]] || fail "missing gator skill: $SKILL_FILE" +[[ -f "$HOME/.codex/auth.json" ]] || fail "missing local Codex auth; run: codex login" + +CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "$HOME/.codex/auth.json")" +CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "$HOME/.codex/auth.json")" +CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "$HOME/.codex/auth.json")" +CODEX_AUTH_ID_TOKEN="$(jq -r '.tokens.id_token // empty' "$HOME/.codex/auth.json")" +[[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "Codex auth is missing tokens.access_token" +[[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "Codex auth is missing tokens.refresh_token" +[[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "Codex auth is missing tokens.account_id" + +GITHUB_TOKEN="$(gh auth token)" +[[ -n "$GITHUB_TOKEN" ]] || fail "gh auth token returned empty output" + +export CODEX_AUTH_ACCESS_TOKEN +export CODEX_AUTH_REFRESH_TOKEN +export CODEX_AUTH_ACCOUNT_ID +export CODEX_AUTH_ID_TOKEN +export GITHUB_TOKEN + +PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-gator.XXXXXX")" +PAYLOAD_DIR="$PAYLOAD_PARENT/payload" +cleanup() { + rm -rf "$PAYLOAD_PARENT" +} +trap cleanup EXIT + +mkdir -p "$PAYLOAD_DIR/.agents/skills/gator-gate" +cp "$SKILL_FILE" "$PAYLOAD_DIR/.agents/skills/gator-gate/SKILL.md" +cp "$GATOR_DIR/sandbox-agent.sh" "$PAYLOAD_DIR/sandbox-agent.sh" +chmod +x "$PAYLOAD_DIR/sandbox-agent.sh" +if [[ -n "$CODEX_LOCAL_BIN" ]]; then + [[ -x "$CODEX_LOCAL_BIN" ]] || fail "--codex-bin is not executable: $CODEX_LOCAL_BIN" + cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/codex" + chmod +x "$PAYLOAD_DIR/codex" +fi +cat > "$PAYLOAD_DIR/gator-prompt.md" </dev/null +openshell_cmd settings set --global --key agent_policy_proposals_enabled --value true --yes >/dev/null +openshell_cmd settings set --global --key proposal_approval_mode --value auto --yes >/dev/null + +SANDBOX_CMD=( + "$OPENSHELL_BIN" --gateway "$GATEWAY" sandbox create + --name "$SANDBOX_NAME" + --from "$SANDBOX_FROM" + --provider "$CODEX_PROVIDER" + --provider "$GITHUB_PROVIDER" + --policy "$GATOR_DIR/policy.yaml" + --upload "$PAYLOAD_DIR:/sandbox" + --no-git-ignore + --no-auto-providers + --no-tty + -- env "CODEX_MODEL=$CODEX_MODEL" "CODEX_REASONING=$CODEX_REASONING" bash /sandbox/payload/sandbox-agent.sh +) + +echo "Launching gator sandbox '$SANDBOX_NAME' on gateway '$GATEWAY'..." +if [[ "$BACKGROUND" == "1" ]]; then + mkdir -p "$GATOR_DIR/logs" + LOG_FILE="$GATOR_DIR/logs/${SANDBOX_NAME}.log" + trap - EXIT + ( + trap cleanup EXIT + "${SANDBOX_CMD[@]}" + ) >"$LOG_FILE" 2>&1 & + echo "Started in background. Log: $LOG_FILE" +else + "${SANDBOX_CMD[@]}" +fi diff --git a/openshell-agents/gator/sandbox-agent.sh b/openshell-agents/gator/sandbox-agent.sh new file mode 100755 index 000000000..716ed93c2 --- /dev/null +++ b/openshell-agents/gator/sandbox-agent.sh @@ -0,0 +1,81 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env CODEX_AUTH_ACCESS_TOKEN +require_env CODEX_AUTH_REFRESH_TOKEN +require_env CODEX_AUTH_ACCOUNT_ID +require_env GITHUB_TOKEN + +export GH_TOKEN="$GITHUB_TOKEN" +export HOME=/sandbox/home + +mkdir -p "$HOME/.codex" +node - <<'NODE' +const fs = require("fs"); +const path = `${process.env.HOME}/.codex/auth.json`; +const b64u = (obj) => Buffer.from(JSON.stringify(obj)).toString("base64url"); +const now = Math.floor(Date.now() / 1000); +const fallbackIdToken = [ + b64u({ alg: "none", typ: "JWT" }), + b64u({ + iss: "https://auth.openai.com", + aud: "codex", + sub: "openshell-gator", + email: "gator@openshell.local", + iat: now, + exp: now + 3600, + }), + "placeholder", +].join("."); + +fs.writeFileSync(path, JSON.stringify({ + auth_mode: "chatgpt", + OPENAI_API_KEY: null, + tokens: { + id_token: fallbackIdToken, + access_token: process.env.CODEX_AUTH_ACCESS_TOKEN, + refresh_token: process.env.CODEX_AUTH_REFRESH_TOKEN, + account_id: process.env.CODEX_AUTH_ACCOUNT_ID, + }, + last_refresh: new Date().toISOString(), +}, null, 2)); +NODE +chmod 600 "$HOME/.codex/auth.json" + +WORK="$(mktemp -d)" +cd "$WORK" + +CODEX_BIN="${CODEX_BIN:-codex}" +if [[ -x /sandbox/payload/codex ]]; then + CODEX_BIN=/sandbox/payload/codex +fi +CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" +CODEX_REASONING="${CODEX_REASONING:-high}" + +CODEX_EXEC_ARGS=( + exec + --skip-git-repo-check + --sandbox danger-full-access + --ephemeral +) + +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-user-config"; then + CODEX_EXEC_ARGS+=(--ignore-user-config) +fi +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-rules"; then + CODEX_EXEC_ARGS+=(--ignore-rules) +fi + +exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ + -c "model=\"${CODEX_MODEL}\"" \ + -c "model_reasoning_effort=\"${CODEX_REASONING}\"" \ + "$(cat /sandbox/payload/gator-prompt.md)" From ba80e91408c4fe59450d70317b00767832e27872 Mon Sep 17 00:00:00 2001 From: John Myers Date: Wed, 3 Jun 2026 12:34:06 -0700 Subject: [PATCH 03/20] chore(gator): add codex image and docs checks --- .agents/skills/gator-gate/SKILL.md | 8 ++ openshell-agents/gator/.gitignore | 1 + openshell-agents/gator/Dockerfile | 94 +++++++++++++++++++ openshell-agents/gator/README.md | 8 +- openshell-agents/gator/run.sh | 18 +++- .../gator/scripts/install-codex.sh | 17 ++++ 6 files changed, 141 insertions(+), 5 deletions(-) create mode 100644 openshell-agents/gator/.gitignore create mode 100644 openshell-agents/gator/Dockerfile create mode 100755 openshell-agents/gator/scripts/install-codex.sh diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index 85eb87d48..a536eb4b2 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -333,6 +333,8 @@ Provider V2 work is a supported high-traction area, but require all of the follo - Security boundary analysis for credential handling - Explanation of whether secrets remain hidden from the sandbox agent +Provider additions and updates must use providers v2 through provider profiles. Treat any new or modified legacy `ProviderDiscoverySpec` entries as a blocking review finding unless a maintainer explicitly requests the legacy path. Do not ask contributors to update both systems for compatibility; the provider profile is the source of truth for new provider network policy, credentials, discovery, and refresh metadata. + Be skeptical of changes that expose raw credentials to agents or weaken the credential proxy model, even if the user story is clear. ### Large or Cross-Cutting Work @@ -396,6 +398,7 @@ Use the `principal-engineer-reviewer` sub-agent. Include: - PR title, body, linked issues, labels, and files - Full diff or enough chunked diff context to review all changes - Instruction to focus on correctness, regressions, security, maintainability, and missing tests +- Instruction to check whether direct UX changes update the Fern docs under `docs/` and navigation when needed - Instruction not to rely on local test execution Post findings as a gator comment or a GitHub PR review: @@ -406,6 +409,8 @@ Post findings as a gator comment or a GitHub PR review: If findings require author changes, remain in `gator:in-review` or move to `gator:follow-up-needed` if the author must clarify the proposal before code review can continue. +For validated PRs with direct user-facing UX changes, require Fern docs updates before moving to `gator:watch-pipeline`. Direct UX changes include CLI commands/flags/output, sandbox behavior visible to users, provider setup flows, gateway configuration fields, TUI screens, published API behavior, policy syntax, installation/packaging behavior, and documented workflows. Accept either relevant updates under `docs/` plus `docs/index.yml` navigation when needed, or a clear maintainer-authored explanation in the PR that docs are intentionally unnecessary. If docs are missing and no explanation exists, treat it as review feedback. + If no blocking findings remain, decide whether E2E labels are needed, then move to `gator:watch-pipeline`. When resuming a PR already in `gator:in-review`, check whether gator review findings or maintainer review comments are still unanswered. If the PR author has pushed commits or replied after the latest feedback, re-review only the relevant changes and decide whether the feedback is resolved. @@ -550,6 +555,8 @@ Head SHA: `` Review findings: - +Docs: + Next state: `` ``` @@ -564,6 +571,7 @@ Gator validation and PR monitoring are complete. Validation: Review: +Docs: Checks: E2E: diff --git a/openshell-agents/gator/.gitignore b/openshell-agents/gator/.gitignore new file mode 100644 index 000000000..333c1e910 --- /dev/null +++ b/openshell-agents/gator/.gitignore @@ -0,0 +1 @@ +logs/ diff --git a/openshell-agents/gator/Dockerfile b/openshell-agents/gator/Dockerfile new file mode 100644 index 000000000..6437b24e0 --- /dev/null +++ b/openshell-agents/gator/Dockerfile @@ -0,0 +1,94 @@ +# syntax=docker/dockerfile:1 + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Gator sandbox image. +# +# This mirrors the OpenShell Community base image's core system and developer +# tooling, but keeps the agent surface focused on Codex + GitHub tooling for the +# gator-gate workflow. + +FROM nvcr.io/nvidia/base/ubuntu:noble-20251013 AS system + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /sandbox + +# Core system dependencies copied from the community base sandbox image. +# iproute2: network namespace management (ip netns, veth pairs) +# iptables: legacy bypass detection (kept for transition) +# nftables: bypass detection; log + reject rules for direct connection diagnostics +# dnsutils: dig, nslookup +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + dnsutils \ + iproute2 \ + iptables \ + nftables \ + iputils-ping \ + net-tools \ + netcat-openbsd \ + openssh-sftp-server \ + procps \ + traceroute \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -r supervisor && useradd -r -g supervisor -s /usr/sbin/nologin supervisor && \ + groupadd -r sandbox && useradd -r -g sandbox -d /sandbox -s /bin/bash sandbox + +FROM system AS devtools + +# Node.js 22 + build toolchain. Keep the default apt installs aligned with the +# community base image, then add the small CLI tools gator commonly needs. +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y --no-install-recommends \ + build-essential \ + git \ + jq \ + less \ + nodejs=22.22.1-1nodesource1 \ + ripgrep \ + vim-tiny \ + nano \ + && rm -rf /var/lib/apt/lists/* \ + && npm install -g npm@11.11.0 + +# GitHub CLI +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list && \ + apt-get update && apt-get install -y --no-install-recommends gh && \ + rm -rf /var/lib/apt/lists/* + +COPY scripts/install-codex.sh /usr/local/bin/install-codex.sh +ARG CODEX_VERSION=latest +RUN chmod 755 /usr/local/bin/install-codex.sh && \ + /usr/local/bin/install-codex.sh "$CODEX_VERSION" + +# Provider profiles include both /usr/bin and /usr/local/bin variants for common +# tools. Create the /usr/local/bin aliases in this image so sandbox symlink +# resolution does not warn about missing alternate paths during policy reloads. +RUN ln -sf /usr/bin/gh /usr/local/bin/gh && \ + ln -sf /usr/bin/git /usr/local/bin/git && \ + ln -sf /usr/bin/codex /usr/local/bin/codex + +FROM devtools AS final + +ENV PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin" + +RUN mkdir -p /etc/openshell +COPY policy.yaml /etc/openshell/policy.yaml + +RUN printf 'export PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin"\nexport PS1="\\u@\\h:\\w\\$ "\n' \ + > /sandbox/.bashrc && \ + printf '[ -f ~/.bashrc ] && . ~/.bashrc\n' > /sandbox/.profile && \ + chown -R sandbox:sandbox /sandbox + +USER sandbox + +ENTRYPOINT ["/bin/bash"] diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index da9ba4567..d2b609a8a 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -7,7 +7,7 @@ Launch a headless Codex sandbox that runs the `gator-gate` skill against OpenShe - `gh` is authenticated on the host and has access to `NVIDIA/OpenShell` and `NVIDIA/OpenShell-Community`. - `codex login` has created `$HOME/.codex/auth.json`. - The active gateway has the default `codex` provider profile available. -- The sandbox image contains `codex`, `gh`, `git`, `node`, and `bash`. +- A local gateway is available when using the default local Dockerfile source. ## Usage @@ -17,7 +17,9 @@ Launch a headless Codex sandbox that runs the `gator-gate` skill against OpenShe "Run gator on PR 1536 and keep watching until it closes or merges." ``` -Use `--codex-bin "$(command -v codex)"` when the sandbox image has an older Codex CLI than the model requires. +By default the launcher uses `openshell-agents/gator` as the sandbox source. Local gateways build `openshell-agents/gator/Dockerfile`, which installs the latest stable `@openai/codex` package at image build time. Use `--from ` to run a prebuilt image on remote gateways. + +Use `--codex-bin "$(command -v codex)"` only when the host executable is compatible with the sandbox OS and architecture. The launcher: @@ -26,9 +28,11 @@ The launcher: - Creates or updates the default `codex` provider from `$HOME/.codex/auth.json` using profile-backed `--from-existing` discovery. - Requests a gateway refresh for the Codex access-token credential when refresh metadata is configured. - Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. +- Uses the gator image policy copied to `/etc/openshell/policy.yaml`. - Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. - Optionally uploads a host Codex executable as `/sandbox/payload/codex`. - Starts `codex exec` without a TTY. +- Deletes the sandbox automatically after Codex exits. Pass `--keep` to preserve it for debugging. The GitHub provider profile intentionally does not allow GraphQL because OpenShell's GraphQL policy can constrain operation fields but not repository arguments. The sandbox prompt instructs the agent to use REST via `gh api` for the two allowed repositories. diff --git a/openshell-agents/gator/run.sh b/openshell-agents/gator/run.sh index e0fe445d3..acd509e4a 100755 --- a/openshell-agents/gator/run.sh +++ b/openshell-agents/gator/run.sh @@ -12,7 +12,7 @@ SKILL_FILE="$ROOT_DIR/.agents/skills/gator-gate/SKILL.md" OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" GATEWAY="${GATOR_GATEWAY:-docker-dev}" SANDBOX_NAME="${GATOR_SANDBOX_NAME:-gator-$(date +%Y%m%d%H%M%S)}" -SANDBOX_FROM="${GATOR_SANDBOX_FROM:-base}" +SANDBOX_FROM="${GATOR_SANDBOX_FROM:-$GATOR_DIR}" GITHUB_PROVIDER="${GATOR_GITHUB_PROVIDER:-github-gator}" CODEX_PROVIDER="${GATOR_CODEX_PROVIDER:-codex}" CODEX_ACCESS_CREDENTIAL_KEY="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-CODEX_AUTH_ACCESS_TOKEN}" @@ -20,6 +20,7 @@ CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" CODEX_REASONING="${CODEX_REASONING:-high}" CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" BACKGROUND=0 +KEEP_SANDBOX=0 usage() { cat <<'EOF' @@ -28,12 +29,13 @@ Usage: openshell-agents/gator/run.sh [options] "gator prompt" Options: --gateway NAME Gateway name to use (default: docker-dev) --name NAME Sandbox name (default: gator-) - --from IMAGE Sandbox source/image (default: base) + --from IMAGE Sandbox source/image (default: openshell-agents/gator) --github-provider NAME GitHub provider name (default: github-gator) --codex-provider NAME Codex provider name (default: codex) --codex-access-key KEY Codex access-token credential key (default: CODEX_AUTH_ACCESS_TOKEN) --codex-bin PATH Upload this Codex executable into the sandbox --background Run sandbox create in the background and write a log + --keep Keep the sandbox after Codex exits (default: delete on exit) -h, --help Show this help EOF } @@ -139,6 +141,10 @@ while [[ $# -gt 0 ]]; do BACKGROUND=1 shift ;; + --keep) + KEEP_SANDBOX=1 + shift + ;; -h|--help) usage exit 0 @@ -227,17 +233,23 @@ openshell_cmd settings set --global --key providers_v2_enabled --value true --ye openshell_cmd settings set --global --key agent_policy_proposals_enabled --value true --yes >/dev/null openshell_cmd settings set --global --key proposal_approval_mode --value auto --yes >/dev/null +KEEP_ARGS=() +if [[ "$KEEP_SANDBOX" != "1" ]]; then + KEEP_ARGS+=(--no-keep) +fi + SANDBOX_CMD=( + env -u OPENSHELL_SANDBOX_POLICY "$OPENSHELL_BIN" --gateway "$GATEWAY" sandbox create --name "$SANDBOX_NAME" --from "$SANDBOX_FROM" --provider "$CODEX_PROVIDER" --provider "$GITHUB_PROVIDER" - --policy "$GATOR_DIR/policy.yaml" --upload "$PAYLOAD_DIR:/sandbox" --no-git-ignore --no-auto-providers --no-tty + "${KEEP_ARGS[@]}" -- env "CODEX_MODEL=$CODEX_MODEL" "CODEX_REASONING=$CODEX_REASONING" bash /sandbox/payload/sandbox-agent.sh ) diff --git a/openshell-agents/gator/scripts/install-codex.sh b/openshell-agents/gator/scripts/install-codex.sh new file mode 100755 index 000000000..833ef9679 --- /dev/null +++ b/openshell-agents/gator/scripts/install-codex.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +version="${1:-${CODEX_VERSION:-latest}}" + +if [[ "$version" != "latest" && ! "$version" =~ ^[0-9]+(\.[0-9]+){0,2}(-[0-9A-Za-z.-]+)?$ ]]; then + echo "unsupported Codex version: $version" >&2 + exit 2 +fi + +npm install -g "@openai/codex@${version}" +npm cache clean --force >/dev/null 2>&1 || true +codex --version From 9825e16abf3053716d600fe3e5b8d85bf6f1e7fb Mon Sep 17 00:00:00 2001 From: John Myers Date: Wed, 3 Jun 2026 13:25:40 -0700 Subject: [PATCH 04/20] chore(gator): fold approved provider policy rules --- .../gator/providers/github-gator.yaml | 24 +++++++++++++++++++ providers/codex.yaml | 5 ++++ 2 files changed, 29 insertions(+) diff --git a/openshell-agents/gator/providers/github-gator.yaml b/openshell-agents/gator/providers/github-gator.yaml index 178b740f1..54e3c3612 100644 --- a/openshell-agents/gator/providers/github-gator.yaml +++ b/openshell-agents/gator/providers/github-gator.yaml @@ -20,6 +20,15 @@ endpoints: protocol: rest enforcement: enforce rules: + - allow: { method: GET, path: /user } + - allow: + method: GET + path: /search/issues + query: + q: + any: + - "*repo:NVIDIA/OpenShell*" + - "*repo:NVIDIA/OpenShell-Community*" - allow: { method: GET, path: /repos/NVIDIA/OpenShell } - allow: { method: GET, path: /repos/NVIDIA/OpenShell/** } - allow: { method: POST, path: /repos/NVIDIA/OpenShell/issues/*/comments } @@ -30,6 +39,7 @@ endpoints: - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/issues/* } - allow: { method: POST, path: /repos/NVIDIA/OpenShell/labels } - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/labels/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/comments } - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/reviews } - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/reviews/*/comments } - allow: { method: POST, path: /repos/NVIDIA/OpenShell/statuses/* } @@ -43,6 +53,7 @@ endpoints: - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/issues/* } - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/labels } - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/labels/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/comments } - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews } - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews/*/comments } - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/statuses/* } @@ -57,6 +68,19 @@ endpoints: - allow: { method: GET, path: /NVIDIA/OpenShell-Community } - allow: { method: GET, path: /NVIDIA/OpenShell-Community/** } - allow: { method: POST, path: /NVIDIA/OpenShell-Community/**/git-upload-pack } + - host: codeload.github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /NVIDIA/OpenShell/** } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community/** } + - host: results-receiver.actions.githubusercontent.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /rest/runs/** } binaries: - /usr/bin/gh - /usr/local/bin/gh diff --git a/providers/codex.yaml b/providers/codex.yaml index 7edd86a97..5396333bd 100644 --- a/providers/codex.yaml +++ b/providers/codex.yaml @@ -45,4 +45,9 @@ endpoints: protocol: rest access: read-write enforcement: enforce + - host: files.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce binaries: [/usr/bin/codex, /usr/local/bin/codex, /usr/lib/node_modules/@openai/**] From 567bc88e95de8d323aa327c442883a00d26447e7 Mon Sep 17 00:00:00 2001 From: John Myers Date: Thu, 4 Jun 2026 07:34:41 -0700 Subject: [PATCH 05/20] chore(gator): add deterministic reviewer runner --- .agents/skills/gator-gate/SKILL.md | 2 + openshell-agents/gator/README.md | 1 + openshell-agents/gator/reviewer-agent.sh | 57 ++++++++++++++++++++++++ openshell-agents/gator/run.sh | 7 +++ 4 files changed, 67 insertions(+) create mode 100755 openshell-agents/gator/reviewer-agent.sh diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index a536eb4b2..5e466a942 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -401,6 +401,8 @@ Use the `principal-engineer-reviewer` sub-agent. Include: - Instruction to check whether direct UX changes update the Fern docs under `docs/` and navigation when needed - Instruction not to rely on local test execution +When running inside the `openshell-agents/gator` sandbox launcher, invoke the reviewer with `bash /sandbox/payload/reviewer-agent.sh < review-task.md`. Put the PR metadata, linked issue context, and diff/file context in `review-task.md`, save the reviewer output, and use it as the independent review result. The main gator process remains responsible for labels, comments, docs gates, and CI monitoring. + Post findings as a gator comment or a GitHub PR review: - Use inline comments for line-specific defects diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index d2b609a8a..7e2565721 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -30,6 +30,7 @@ The launcher: - Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. - Uses the gator image policy copied to `/etc/openshell/policy.yaml`. - Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. +- Uploads `.claude/agents/principal-engineer-reviewer.md` and `reviewer-agent.sh` so `gator-gate` can run a deterministic independent reviewer Codex execution. - Optionally uploads a host Codex executable as `/sandbox/payload/codex`. - Starts `codex exec` without a TTY. - Deletes the sandbox automatically after Codex exits. Pass `--keep` to preserve it for debugging. diff --git a/openshell-agents/gator/reviewer-agent.sh b/openshell-agents/gator/reviewer-agent.sh new file mode 100755 index 000000000..ab723bacd --- /dev/null +++ b/openshell-agents/gator/reviewer-agent.sh @@ -0,0 +1,57 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +REVIEWER_PROMPT="${REVIEWER_PROMPT:-/sandbox/payload/.claude/agents/principal-engineer-reviewer.md}" +[[ -f "$REVIEWER_PROMPT" ]] || { + echo "missing reviewer prompt: $REVIEWER_PROMPT" >&2 + exit 1 +} + +CODEX_BIN="${CODEX_BIN:-codex}" +if [[ -x /sandbox/payload/codex ]]; then + CODEX_BIN=/sandbox/payload/codex +fi + +CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" +CODEX_REASONING="${CODEX_REASONING:-high}" + +TASK_FILE="$(mktemp)" +PROMPT_FILE="$(mktemp)" +cleanup() { + rm -f "$TASK_FILE" "$PROMPT_FILE" +} +trap cleanup EXIT + +cat >"$TASK_FILE" + +{ + printf '%s\n\n' 'You are running as the principal-engineer-reviewer sub-agent for OpenShell gator-gate.' + printf '%s\n\n' 'Follow this agent definition exactly:' + cat "$REVIEWER_PROMPT" + printf '\n%s\n\n' 'Reviewer task:' + cat "$TASK_FILE" + printf '\n%s\n' 'Return the review only. Do not mutate repository state, labels, comments, or PRs.' +} >"$PROMPT_FILE" + +CODEX_EXEC_ARGS=( + exec + --skip-git-repo-check + --sandbox danger-full-access + --ephemeral +) + +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-user-config"; then + CODEX_EXEC_ARGS+=(--ignore-user-config) +fi +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-rules"; then + CODEX_EXEC_ARGS+=(--ignore-rules) +fi + +exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ + -c "model=\"${CODEX_MODEL}\"" \ + -c "model_reasoning_effort=\"${CODEX_REASONING}\"" \ + "$(cat "$PROMPT_FILE")" diff --git a/openshell-agents/gator/run.sh b/openshell-agents/gator/run.sh index acd509e4a..9df1f7c9c 100755 --- a/openshell-agents/gator/run.sh +++ b/openshell-agents/gator/run.sh @@ -8,6 +8,7 @@ set -euo pipefail ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" GATOR_DIR="$ROOT_DIR/openshell-agents/gator" SKILL_FILE="$ROOT_DIR/.agents/skills/gator-gate/SKILL.md" +REVIEWER_AGENT_FILE="$ROOT_DIR/.claude/agents/principal-engineer-reviewer.md" OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" GATEWAY="${GATOR_GATEWAY:-docker-dev}" @@ -169,6 +170,7 @@ require_cmd gh require_cmd jq require_cmd "$OPENSHELL_BIN" [[ -f "$SKILL_FILE" ]] || fail "missing gator skill: $SKILL_FILE" +[[ -f "$REVIEWER_AGENT_FILE" ]] || fail "missing reviewer agent: $REVIEWER_AGENT_FILE" [[ -f "$HOME/.codex/auth.json" ]] || fail "missing local Codex auth; run: codex login" CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "$HOME/.codex/auth.json")" @@ -196,9 +198,13 @@ cleanup() { trap cleanup EXIT mkdir -p "$PAYLOAD_DIR/.agents/skills/gator-gate" +mkdir -p "$PAYLOAD_DIR/.claude/agents" cp "$SKILL_FILE" "$PAYLOAD_DIR/.agents/skills/gator-gate/SKILL.md" +cp "$REVIEWER_AGENT_FILE" "$PAYLOAD_DIR/.claude/agents/principal-engineer-reviewer.md" cp "$GATOR_DIR/sandbox-agent.sh" "$PAYLOAD_DIR/sandbox-agent.sh" +cp "$GATOR_DIR/reviewer-agent.sh" "$PAYLOAD_DIR/reviewer-agent.sh" chmod +x "$PAYLOAD_DIR/sandbox-agent.sh" +chmod +x "$PAYLOAD_DIR/reviewer-agent.sh" if [[ -n "$CODEX_LOCAL_BIN" ]]; then [[ -x "$CODEX_LOCAL_BIN" ]] || fail "--codex-bin is not executable: $CODEX_LOCAL_BIN" cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/codex" @@ -218,6 +224,7 @@ Important sandbox constraints: - Keep watching active PRs until they close, merge, or the operator stops the sandbox. - Do not push to contributor branches unless the operator explicitly instructs you to do so. - If you receive 403 errors from the sandbox proxy, inspect the JSON response and propose a policy update to allow the requested action if the response contains a structured error message. +- When the gator skill requires the \`principal-engineer-reviewer\` sub-agent, run a bounded independent review with \`bash /sandbox/payload/reviewer-agent.sh < review-task.md\`. Include PR metadata and full diff/file context in \`review-task.md\`, save the output, and use it as the independent reviewer result while the main gator process continues labels, comments, docs, and CI gating. Operator request: From 3dd6607006df7a7b3c2c399b5b2681735be53fa4 Mon Sep 17 00:00:00 2001 From: John Myers Date: Thu, 4 Jun 2026 07:59:17 -0700 Subject: [PATCH 06/20] chore(gator): clarify ok-to-test comments --- .agents/skills/gator-gate/SKILL.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index 5e466a942..8894c8440 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -452,6 +452,8 @@ If a mirror is missing or stale and you have maintainer authority, post: /ok to test ``` +The `/ok to test ` comment must contain only that command. Do not include the `> **gator-agent**` marker, explanations, Markdown fences, or any other text in the same comment. + If you do not have maintainer authority, move to `gator:blocked` and state that a maintainer must post `/ok to test `. ## Step 10: Pipeline Watch Loop From c3066ac8257747b41807879796468e199698522b Mon Sep 17 00:00:00 2001 From: John Myers Date: Thu, 4 Jun 2026 08:06:45 -0700 Subject: [PATCH 07/20] chore(gator): structure launcher harnesses --- .agents/skills/gator-gate/SKILL.md | 2 +- openshell-agents/gator/Dockerfile | 6 +- openshell-agents/gator/README.md | 22 ++--- .../codex}/install-codex.sh | 0 .../{ => harnesses/codex}/reviewer-agent.sh | 4 +- .../{ => harnesses/codex}/sandbox-agent.sh | 4 +- openshell-agents/gator/run.sh | 81 +++++++++++++------ 7 files changed, 77 insertions(+), 42 deletions(-) rename openshell-agents/gator/{scripts => harnesses/codex}/install-codex.sh (100%) rename openshell-agents/gator/{ => harnesses/codex}/reviewer-agent.sh (93%) rename openshell-agents/gator/{ => harnesses/codex}/sandbox-agent.sh (94%) diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index 8894c8440..f4631825f 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -401,7 +401,7 @@ Use the `principal-engineer-reviewer` sub-agent. Include: - Instruction to check whether direct UX changes update the Fern docs under `docs/` and navigation when needed - Instruction not to rely on local test execution -When running inside the `openshell-agents/gator` sandbox launcher, invoke the reviewer with `bash /sandbox/payload/reviewer-agent.sh < review-task.md`. Put the PR metadata, linked issue context, and diff/file context in `review-task.md`, save the reviewer output, and use it as the independent review result. The main gator process remains responsible for labels, comments, docs gates, and CI monitoring. +When running inside the `openshell-agents/gator` sandbox launcher, invoke the reviewer command specified in the sandbox prompt. For the Codex harness, use `bash /sandbox/payload/harnesses/codex/reviewer-agent.sh < review-task.md`. Put the PR metadata, linked issue context, and diff/file context in `review-task.md`, save the reviewer output, and use it as the independent review result. The main gator process remains responsible for labels, comments, docs gates, and CI monitoring. Post findings as a gator comment or a GitHub PR review: diff --git a/openshell-agents/gator/Dockerfile b/openshell-agents/gator/Dockerfile index 6437b24e0..4148ed4d4 100644 --- a/openshell-agents/gator/Dockerfile +++ b/openshell-agents/gator/Dockerfile @@ -6,8 +6,8 @@ # Gator sandbox image. # # This mirrors the OpenShell Community base image's core system and developer -# tooling, but keeps the agent surface focused on Codex + GitHub tooling for the -# gator-gate workflow. +# tooling, but keeps the initial agent surface focused on Codex + GitHub tooling +# for the gator-gate workflow. FROM nvcr.io/nvidia/base/ubuntu:noble-20251013 AS system @@ -65,7 +65,7 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ apt-get update && apt-get install -y --no-install-recommends gh && \ rm -rf /var/lib/apt/lists/* -COPY scripts/install-codex.sh /usr/local/bin/install-codex.sh +COPY harnesses/codex/install-codex.sh /usr/local/bin/install-codex.sh ARG CODEX_VERSION=latest RUN chmod 755 /usr/local/bin/install-codex.sh && \ /usr/local/bin/install-codex.sh "$CODEX_VERSION" diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index 7e2565721..68e082521 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -1,12 +1,12 @@ # Gator Agent -Launch a headless Codex sandbox that runs the `gator-gate` skill against OpenShell issues and pull requests. +Launch a headless sandbox harness that runs the `gator-gate` skill against OpenShell issues and pull requests. The default and currently only supported harness is Codex. ## Prerequisites - `gh` is authenticated on the host and has access to `NVIDIA/OpenShell` and `NVIDIA/OpenShell-Community`. -- `codex login` has created `$HOME/.codex/auth.json`. -- The active gateway has the default `codex` provider profile available. +- For `--harness codex`, `codex login` has created `$HOME/.codex/auth.json`. +- For `--harness codex`, the active gateway has the default `codex` provider profile available. - A local gateway is available when using the default local Dockerfile source. ## Usage @@ -14,26 +14,30 @@ Launch a headless Codex sandbox that runs the `gator-gate` skill against OpenShe ```shell ./openshell-agents/gator/run.sh \ --gateway docker-dev \ + --harness codex \ "Run gator on PR 1536 and keep watching until it closes or merges." ``` By default the launcher uses `openshell-agents/gator` as the sandbox source. Local gateways build `openshell-agents/gator/Dockerfile`, which installs the latest stable `@openai/codex` package at image build time. Use `--from ` to run a prebuilt image on remote gateways. +Use `--harness codex` to select Codex explicitly. Other harness names are rejected until their support scripts and provider setup are added under `harnesses//`. + Use `--codex-bin "$(command -v codex)"` only when the host executable is compatible with the sandbox OS and architecture. The launcher: - Imports `providers/github-gator.yaml`. - Creates or updates the `github-gator` provider from `gh auth token`. -- Creates or updates the default `codex` provider from `$HOME/.codex/auth.json` using profile-backed `--from-existing` discovery. -- Requests a gateway refresh for the Codex access-token credential when refresh metadata is configured. +- Selects the requested harness and uploads its scripts from `harnesses//` into the sandbox payload. +- For `--harness codex`, creates or updates the default `codex` provider from `$HOME/.codex/auth.json` using profile-backed `--from-existing` discovery. +- For `--harness codex`, requests a gateway refresh for the Codex access-token credential when refresh metadata is configured. - Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. - Uses the gator image policy copied to `/etc/openshell/policy.yaml`. - Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. -- Uploads `.claude/agents/principal-engineer-reviewer.md` and `reviewer-agent.sh` so `gator-gate` can run a deterministic independent reviewer Codex execution. -- Optionally uploads a host Codex executable as `/sandbox/payload/codex`. -- Starts `codex exec` without a TTY. -- Deletes the sandbox automatically after Codex exits. Pass `--keep` to preserve it for debugging. +- Uploads `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution. +- For `--harness codex`, optionally uploads a host Codex executable as `/sandbox/payload/harnesses/codex/codex`. +- Starts the selected harness without a TTY. +- Deletes the sandbox automatically after the harness exits. Pass `--keep` to preserve it for debugging. The GitHub provider profile intentionally does not allow GraphQL because OpenShell's GraphQL policy can constrain operation fields but not repository arguments. The sandbox prompt instructs the agent to use REST via `gh api` for the two allowed repositories. diff --git a/openshell-agents/gator/scripts/install-codex.sh b/openshell-agents/gator/harnesses/codex/install-codex.sh similarity index 100% rename from openshell-agents/gator/scripts/install-codex.sh rename to openshell-agents/gator/harnesses/codex/install-codex.sh diff --git a/openshell-agents/gator/reviewer-agent.sh b/openshell-agents/gator/harnesses/codex/reviewer-agent.sh similarity index 93% rename from openshell-agents/gator/reviewer-agent.sh rename to openshell-agents/gator/harnesses/codex/reviewer-agent.sh index ab723bacd..f66a8d6c0 100755 --- a/openshell-agents/gator/reviewer-agent.sh +++ b/openshell-agents/gator/harnesses/codex/reviewer-agent.sh @@ -12,8 +12,8 @@ REVIEWER_PROMPT="${REVIEWER_PROMPT:-/sandbox/payload/.claude/agents/principal-en } CODEX_BIN="${CODEX_BIN:-codex}" -if [[ -x /sandbox/payload/codex ]]; then - CODEX_BIN=/sandbox/payload/codex +if [[ -x /sandbox/payload/harnesses/codex/codex ]]; then + CODEX_BIN=/sandbox/payload/harnesses/codex/codex fi CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" diff --git a/openshell-agents/gator/sandbox-agent.sh b/openshell-agents/gator/harnesses/codex/sandbox-agent.sh similarity index 94% rename from openshell-agents/gator/sandbox-agent.sh rename to openshell-agents/gator/harnesses/codex/sandbox-agent.sh index 716ed93c2..2ff7e0d0f 100755 --- a/openshell-agents/gator/sandbox-agent.sh +++ b/openshell-agents/gator/harnesses/codex/sandbox-agent.sh @@ -55,8 +55,8 @@ WORK="$(mktemp -d)" cd "$WORK" CODEX_BIN="${CODEX_BIN:-codex}" -if [[ -x /sandbox/payload/codex ]]; then - CODEX_BIN=/sandbox/payload/codex +if [[ -x /sandbox/payload/harnesses/codex/codex ]]; then + CODEX_BIN=/sandbox/payload/harnesses/codex/codex fi CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" CODEX_REASONING="${CODEX_REASONING:-high}" diff --git a/openshell-agents/gator/run.sh b/openshell-agents/gator/run.sh index 9df1f7c9c..f4a560559 100755 --- a/openshell-agents/gator/run.sh +++ b/openshell-agents/gator/run.sh @@ -14,6 +14,7 @@ OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" GATEWAY="${GATOR_GATEWAY:-docker-dev}" SANDBOX_NAME="${GATOR_SANDBOX_NAME:-gator-$(date +%Y%m%d%H%M%S)}" SANDBOX_FROM="${GATOR_SANDBOX_FROM:-$GATOR_DIR}" +HARNESS="${GATOR_HARNESS:-codex}" GITHUB_PROVIDER="${GATOR_GITHUB_PROVIDER:-github-gator}" CODEX_PROVIDER="${GATOR_CODEX_PROVIDER:-codex}" CODEX_ACCESS_CREDENTIAL_KEY="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-CODEX_AUTH_ACCESS_TOKEN}" @@ -31,12 +32,13 @@ Options: --gateway NAME Gateway name to use (default: docker-dev) --name NAME Sandbox name (default: gator-) --from IMAGE Sandbox source/image (default: openshell-agents/gator) + --harness NAME Agent harness to run (default: codex; supported: codex) --github-provider NAME GitHub provider name (default: github-gator) --codex-provider NAME Codex provider name (default: codex) --codex-access-key KEY Codex access-token credential key (default: CODEX_AUTH_ACCESS_TOKEN) --codex-bin PATH Upload this Codex executable into the sandbox --background Run sandbox create in the background and write a log - --keep Keep the sandbox after Codex exits (default: delete on exit) + --keep Keep the sandbox after the harness exits (default: delete on exit) -h, --help Show this help EOF } @@ -118,6 +120,11 @@ while [[ $# -gt 0 ]]; do SANDBOX_FROM="$2" shift 2 ;; + --harness) + [[ $# -ge 2 ]] || fail "--harness requires a value" + HARNESS="$2" + shift 2 + ;; --github-provider) [[ $# -ge 2 ]] || fail "--github-provider requires a value" GITHUB_PROVIDER="$2" @@ -167,27 +174,45 @@ done USER_PROMPT="$*" require_cmd gh -require_cmd jq require_cmd "$OPENSHELL_BIN" [[ -f "$SKILL_FILE" ]] || fail "missing gator skill: $SKILL_FILE" [[ -f "$REVIEWER_AGENT_FILE" ]] || fail "missing reviewer agent: $REVIEWER_AGENT_FILE" -[[ -f "$HOME/.codex/auth.json" ]] || fail "missing local Codex auth; run: codex login" -CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "$HOME/.codex/auth.json")" -CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "$HOME/.codex/auth.json")" -CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "$HOME/.codex/auth.json")" -CODEX_AUTH_ID_TOKEN="$(jq -r '.tokens.id_token // empty' "$HOME/.codex/auth.json")" -[[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "Codex auth is missing tokens.access_token" -[[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "Codex auth is missing tokens.refresh_token" -[[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "Codex auth is missing tokens.account_id" +HARNESS_DIR="$GATOR_DIR/harnesses/$HARNESS" +HARNESS_ENTRYPOINT="/sandbox/payload/harnesses/$HARNESS/sandbox-agent.sh" +HARNESS_REVIEWER_COMMAND="bash /sandbox/payload/harnesses/$HARNESS/reviewer-agent.sh < review-task.md" +HARNESS_PROVIDER_ARGS=() +HARNESS_ENV_ARGS=() + +case "$HARNESS" in + codex) + require_cmd jq + [[ -d "$HARNESS_DIR" ]] || fail "missing harness directory: $HARNESS_DIR" + [[ -f "$HOME/.codex/auth.json" ]] || fail "missing local Codex auth; run: codex login" + + CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "$HOME/.codex/auth.json")" + CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "$HOME/.codex/auth.json")" + CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "$HOME/.codex/auth.json")" + CODEX_AUTH_ID_TOKEN="$(jq -r '.tokens.id_token // empty' "$HOME/.codex/auth.json")" + [[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "Codex auth is missing tokens.access_token" + [[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "Codex auth is missing tokens.refresh_token" + [[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "Codex auth is missing tokens.account_id" + + export CODEX_AUTH_ACCESS_TOKEN + export CODEX_AUTH_REFRESH_TOKEN + export CODEX_AUTH_ACCOUNT_ID + export CODEX_AUTH_ID_TOKEN + HARNESS_PROVIDER_ARGS=(--provider "$CODEX_PROVIDER") + HARNESS_ENV_ARGS=("CODEX_MODEL=$CODEX_MODEL" "CODEX_REASONING=$CODEX_REASONING") + ;; + *) + fail "unsupported harness: $HARNESS (supported: codex)" + ;; +esac GITHUB_TOKEN="$(gh auth token)" [[ -n "$GITHUB_TOKEN" ]] || fail "gh auth token returned empty output" -export CODEX_AUTH_ACCESS_TOKEN -export CODEX_AUTH_REFRESH_TOKEN -export CODEX_AUTH_ACCOUNT_ID -export CODEX_AUTH_ID_TOKEN export GITHUB_TOKEN PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-gator.XXXXXX")" @@ -199,20 +224,22 @@ trap cleanup EXIT mkdir -p "$PAYLOAD_DIR/.agents/skills/gator-gate" mkdir -p "$PAYLOAD_DIR/.claude/agents" +mkdir -p "$PAYLOAD_DIR/harnesses" cp "$SKILL_FILE" "$PAYLOAD_DIR/.agents/skills/gator-gate/SKILL.md" cp "$REVIEWER_AGENT_FILE" "$PAYLOAD_DIR/.claude/agents/principal-engineer-reviewer.md" -cp "$GATOR_DIR/sandbox-agent.sh" "$PAYLOAD_DIR/sandbox-agent.sh" -cp "$GATOR_DIR/reviewer-agent.sh" "$PAYLOAD_DIR/reviewer-agent.sh" -chmod +x "$PAYLOAD_DIR/sandbox-agent.sh" -chmod +x "$PAYLOAD_DIR/reviewer-agent.sh" +cp -R "$HARNESS_DIR" "$PAYLOAD_DIR/harnesses/$HARNESS" +chmod +x "$PAYLOAD_DIR/harnesses/$HARNESS"/*.sh if [[ -n "$CODEX_LOCAL_BIN" ]]; then [[ -x "$CODEX_LOCAL_BIN" ]] || fail "--codex-bin is not executable: $CODEX_LOCAL_BIN" - cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/codex" - chmod +x "$PAYLOAD_DIR/codex" + [[ "$HARNESS" == "codex" ]] || fail "--codex-bin is only valid with --harness codex" + cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/harnesses/codex/codex" + chmod +x "$PAYLOAD_DIR/harnesses/codex/codex" fi cat > "$PAYLOAD_DIR/gator-prompt.md" </dev/null openshell_cmd settings set --global --key agent_policy_proposals_enabled --value true --yes >/dev/null @@ -250,14 +281,14 @@ SANDBOX_CMD=( "$OPENSHELL_BIN" --gateway "$GATEWAY" sandbox create --name "$SANDBOX_NAME" --from "$SANDBOX_FROM" - --provider "$CODEX_PROVIDER" --provider "$GITHUB_PROVIDER" + "${HARNESS_PROVIDER_ARGS[@]}" --upload "$PAYLOAD_DIR:/sandbox" --no-git-ignore --no-auto-providers --no-tty "${KEEP_ARGS[@]}" - -- env "CODEX_MODEL=$CODEX_MODEL" "CODEX_REASONING=$CODEX_REASONING" bash /sandbox/payload/sandbox-agent.sh + -- env "${HARNESS_ENV_ARGS[@]}" bash "$HARNESS_ENTRYPOINT" ) echo "Launching gator sandbox '$SANDBOX_NAME' on gateway '$GATEWAY'..." From 9141c1bbb858028c38ec960327efa372bd04f4ef Mon Sep 17 00:00:00 2001 From: John Myers Date: Thu, 4 Jun 2026 08:24:51 -0700 Subject: [PATCH 08/20] chore(gator): require e2e for dependabot --- .agents/skills/gator-gate/SKILL.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index f4631825f..dfb05c400 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -393,6 +393,8 @@ If TTL expires: When a PR enters `gator:in-review`, run an independent code-only review. +For PRs authored by `dependabot[bot]`, the primary gator responsibility is dependency-update validation, not normal feature review. Do a quick sanity check for suspicious changes outside expected dependency manifests or lockfiles, then ensure the full required test suite runs, including E2E, and watch for breakages caused by the update. + Use the `principal-engineer-reviewer` sub-agent. Include: - PR title, body, linked issues, labels, and files @@ -430,6 +432,8 @@ Do not move to `gator:watch-pipeline` until review feedback is addressed or expl Apply or recommend `test:*` labels based on changed files and behavior. +Always apply or require `test:e2e` for PRs authored by `dependabot[bot]`. Dependabot PRs must run the full required test suite, including E2E, even when the dependency update appears isolated to manifests or lockfiles. + Use `test:e2e` for changes that affect: - Sandbox lifecycle From b8758801b0bc473630f1e4fbc454d1143dce224d Mon Sep 17 00:00:00 2001 From: John Myers Date: Thu, 4 Jun 2026 09:32:05 -0700 Subject: [PATCH 09/20] chore(gator): add codex refresh profile --- openshell-agents/gator/README.md | 8 +-- .../gator/harnesses/codex/sandbox-agent.sh | 5 +- .../gator/providers/codex-gator.yaml | 64 +++++++++++++++++++ openshell-agents/gator/run.sh | 55 ++++++++-------- 4 files changed, 98 insertions(+), 34 deletions(-) create mode 100644 openshell-agents/gator/providers/codex-gator.yaml diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index 68e082521..41f4e5933 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -6,7 +6,7 @@ Launch a headless sandbox harness that runs the `gator-gate` skill against OpenS - `gh` is authenticated on the host and has access to `NVIDIA/OpenShell` and `NVIDIA/OpenShell-Community`. - For `--harness codex`, `codex login` has created `$HOME/.codex/auth.json`. -- For `--harness codex`, the active gateway has the default `codex` provider profile available. +- For `--harness codex`, local Codex auth must include an access token, refresh token, and account ID. - A local gateway is available when using the default local Dockerfile source. ## Usage @@ -29,8 +29,8 @@ The launcher: - Imports `providers/github-gator.yaml`. - Creates or updates the `github-gator` provider from `gh auth token`. - Selects the requested harness and uploads its scripts from `harnesses//` into the sandbox payload. -- For `--harness codex`, creates or updates the default `codex` provider from `$HOME/.codex/auth.json` using profile-backed `--from-existing` discovery. -- For `--harness codex`, requests a gateway refresh for the Codex access-token credential when refresh metadata is configured. +- For `--harness codex`, imports `providers/codex-gator.yaml`, creates or updates the `codex-gator` provider from `$HOME/.codex/auth.json`, and stores the refresh token as gateway-only refresh material. +- For `--harness codex`, configures gateway-managed refresh for `CODEX_AUTH_ACCESS_TOKEN` and rotates it before launching the sandbox. - Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. - Uses the gator image policy copied to `/etc/openshell/policy.yaml`. - Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. @@ -41,4 +41,4 @@ The launcher: The GitHub provider profile intentionally does not allow GraphQL because OpenShell's GraphQL policy can constrain operation fields but not repository arguments. The sandbox prompt instructs the agent to use REST via `gh api` for the two allowed repositories. -Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the default Codex provider uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. +Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the gator Codex profile uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. diff --git a/openshell-agents/gator/harnesses/codex/sandbox-agent.sh b/openshell-agents/gator/harnesses/codex/sandbox-agent.sh index 2ff7e0d0f..a6f34a3b0 100755 --- a/openshell-agents/gator/harnesses/codex/sandbox-agent.sh +++ b/openshell-agents/gator/harnesses/codex/sandbox-agent.sh @@ -11,7 +11,6 @@ require_env() { } require_env CODEX_AUTH_ACCESS_TOKEN -require_env CODEX_AUTH_REFRESH_TOKEN require_env CODEX_AUTH_ACCOUNT_ID require_env GITHUB_TOKEN @@ -41,9 +40,9 @@ fs.writeFileSync(path, JSON.stringify({ auth_mode: "chatgpt", OPENAI_API_KEY: null, tokens: { - id_token: fallbackIdToken, + id_token: process.env.CODEX_AUTH_ID_TOKEN || fallbackIdToken, access_token: process.env.CODEX_AUTH_ACCESS_TOKEN, - refresh_token: process.env.CODEX_AUTH_REFRESH_TOKEN, + refresh_token: process.env.CODEX_AUTH_REFRESH_TOKEN || "gateway-managed-refresh-token", account_id: process.env.CODEX_AUTH_ACCOUNT_ID, }, last_refresh: new Date().toISOString(), diff --git a/openshell-agents/gator/providers/codex-gator.yaml b/openshell-agents/gator/providers/codex-gator.yaml new file mode 100644 index 000000000..99735fcd9 --- /dev/null +++ b/openshell-agents/gator/providers/codex-gator.yaml @@ -0,0 +1,64 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: codex-gator +display_name: Codex Gator +description: OpenAI Codex CLI profile for gator with gateway-managed access-token refresh +category: agent +inference_capable: true +credentials: + - name: access_token + description: Codex OAuth access token refreshed by the gateway from refresh material + env_vars: [CODEX_AUTH_ACCESS_TOKEN] + required: true + auth_style: bearer + header_name: authorization + refresh: + strategy: oauth2_refresh_token + token_url: https://auth.openai.com/oauth/token + refresh_before_seconds: 300 + max_lifetime_seconds: 3600 + material: + - name: client_id + description: Codex OAuth client ID + required: true + - name: refresh_token + description: Codex OAuth refresh token from local auth.json + required: true + secret: true + - name: account_id + description: Codex account identifier + env_vars: [CODEX_AUTH_ACCOUNT_ID] + required: true + - name: id_token + description: Codex OAuth ID token + env_vars: [CODEX_AUTH_ID_TOKEN] +discovery: + credentials: [access_token, account_id, id_token] +endpoints: + - host: api.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: auth.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: chatgpt.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: ab.chatgpt.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: files.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce +binaries: [/usr/bin/codex, /usr/local/bin/codex, /usr/lib/node_modules/@openai/**] diff --git a/openshell-agents/gator/run.sh b/openshell-agents/gator/run.sh index f4a560559..5c7f073cc 100755 --- a/openshell-agents/gator/run.sh +++ b/openshell-agents/gator/run.sh @@ -16,8 +16,11 @@ SANDBOX_NAME="${GATOR_SANDBOX_NAME:-gator-$(date +%Y%m%d%H%M%S)}" SANDBOX_FROM="${GATOR_SANDBOX_FROM:-$GATOR_DIR}" HARNESS="${GATOR_HARNESS:-codex}" GITHUB_PROVIDER="${GATOR_GITHUB_PROVIDER:-github-gator}" -CODEX_PROVIDER="${GATOR_CODEX_PROVIDER:-codex}" +CODEX_PROVIDER="${GATOR_CODEX_PROVIDER:-codex-gator}" +CODEX_PROVIDER_PROFILE="${GATOR_CODEX_PROVIDER_PROFILE:-codex-gator}" CODEX_ACCESS_CREDENTIAL_KEY="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-CODEX_AUTH_ACCESS_TOKEN}" +# Upstream Codex OAuth client ID from codex-rs/login/src/auth/manager.rs. +CODEX_OAUTH_CLIENT_ID="${GATOR_CODEX_OAUTH_CLIENT_ID:-app_EMoamEEZ73f0CkXaXp7hrann}" CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" CODEX_REASONING="${CODEX_REASONING:-high}" CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" @@ -34,7 +37,7 @@ Options: --from IMAGE Sandbox source/image (default: openshell-agents/gator) --harness NAME Agent harness to run (default: codex; supported: codex) --github-provider NAME GitHub provider name (default: github-gator) - --codex-provider NAME Codex provider name (default: codex) + --codex-provider NAME Codex provider name (default: codex-gator) --codex-access-key KEY Codex access-token credential key (default: CODEX_AUTH_ACCESS_TOKEN) --codex-bin PATH Upload this Codex executable into the sandbox --background Run sandbox create in the background and write a log @@ -68,29 +71,15 @@ upsert_provider() { fi } -refresh_provider_if_configured() { - local provider="$1" - local credential_key="$2" - local status_output - - status_output="$(openshell_cmd provider refresh status "$provider" --credential-key "$credential_key")" - if [[ "$status_output" == No\ refresh\ configuration* ]]; then - echo "No refresh metadata for $provider/$credential_key; using the currently stored credential." - return 0 - fi - - openshell_cmd provider refresh rotate "$provider" --credential-key "$credential_key" >/dev/null - echo "Requested gateway refresh for $provider/$credential_key." -} - import_provider_profile() { - local profile_file="$1" + local profile_id="$1" + local profile_file="$2" local import_output # Custom profile import is create-only. Replace it when possible so repeat # runs track this checkout, but keep going if a live sandbox is still using # the already-imported profile. - openshell_cmd provider profile delete github-gator >/dev/null 2>&1 || true + openshell_cmd provider profile delete "$profile_id" >/dev/null 2>&1 || true if import_output="$(openshell_cmd provider profile import --file "$profile_file" 2>&1)"; then return 0 fi @@ -103,6 +92,18 @@ import_provider_profile() { return 1 } +configure_codex_refresh() { + openshell_cmd provider refresh configure "$CODEX_PROVIDER" \ + --credential-key "$CODEX_ACCESS_CREDENTIAL_KEY" \ + --strategy oauth2_refresh_token \ + --material "client_id=$CODEX_OAUTH_CLIENT_ID" \ + --material "refresh_token=$CODEX_AUTH_REFRESH_TOKEN" \ + --secret-material-key refresh_token >/dev/null + openshell_cmd provider refresh rotate "$CODEX_PROVIDER" \ + --credential-key "$CODEX_ACCESS_CREDENTIAL_KEY" >/dev/null + echo "Configured gateway refresh for $CODEX_PROVIDER/$CODEX_ACCESS_CREDENTIAL_KEY." +} + while [[ $# -gt 0 ]]; do case "$1" in --gateway) @@ -199,7 +200,6 @@ case "$HARNESS" in [[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "Codex auth is missing tokens.account_id" export CODEX_AUTH_ACCESS_TOKEN - export CODEX_AUTH_REFRESH_TOKEN export CODEX_AUTH_ACCOUNT_ID export CODEX_AUTH_ID_TOKEN HARNESS_PROVIDER_ARGS=(--provider "$CODEX_PROVIDER") @@ -258,19 +258,20 @@ Operator request: $USER_PROMPT EOF -import_provider_profile "$GATOR_DIR/providers/github-gator.yaml" +openshell_cmd settings set --global --key providers_v2_enabled --value true --yes >/dev/null +openshell_cmd settings set --global --key agent_policy_proposals_enabled --value true --yes >/dev/null +openshell_cmd settings set --global --key proposal_approval_mode --value auto --yes >/dev/null + +import_provider_profile github-gator "$GATOR_DIR/providers/github-gator.yaml" upsert_provider "$GITHUB_PROVIDER" github-gator --credential GITHUB_TOKEN case "$HARNESS" in codex) - upsert_provider "$CODEX_PROVIDER" codex --from-existing - refresh_provider_if_configured "$CODEX_PROVIDER" "$CODEX_ACCESS_CREDENTIAL_KEY" + import_provider_profile "$CODEX_PROVIDER_PROFILE" "$GATOR_DIR/providers/$CODEX_PROVIDER_PROFILE.yaml" + upsert_provider "$CODEX_PROVIDER" "$CODEX_PROVIDER_PROFILE" --from-existing + configure_codex_refresh ;; esac -openshell_cmd settings set --global --key providers_v2_enabled --value true --yes >/dev/null -openshell_cmd settings set --global --key agent_policy_proposals_enabled --value true --yes >/dev/null -openshell_cmd settings set --global --key proposal_approval_mode --value auto --yes >/dev/null - KEEP_ARGS=() if [[ "$KEEP_SANDBOX" != "1" ]]; then KEEP_ARGS+=(--no-keep) From c7306cdbc99e2dcb714a404f23ee3bf929ae9a48 Mon Sep 17 00:00:00 2001 From: John Myers Date: Fri, 5 Jun 2026 14:07:03 -0700 Subject: [PATCH 10/20] chore(gator): wip manifest agent launcher --- .agents/skills/gator-gate/SKILL.md | 2 +- architecture/build.md | 8 +- .../{gator/Dockerfile => Dockerfile.gator} | 4 +- openshell-agents/README.md | 132 ++++ openshell-agents/gator/README.md | 21 +- openshell-agents/gator/agent.yaml | 85 +++ openshell-agents/gator/prompts/gator.md | 20 + .../gator/providers/codex-gator.yaml | 5 +- .../gator/providers/github-gator.yaml | 12 + openshell-agents/gator/run.sh | 307 --------- openshell-agents/run.sh | 605 ++++++++++++++++++ openshell-agents/runtime/entrypoint.sh | 21 + .../harnesses/codex/exec.sh} | 16 +- .../harnesses/codex/install-codex.sh | 0 .../harnesses/codex/subagent.sh} | 25 +- openshell-agents/runtime/subagent.sh | 19 + 16 files changed, 943 insertions(+), 339 deletions(-) rename openshell-agents/{gator/Dockerfile => Dockerfile.gator} (96%) create mode 100644 openshell-agents/README.md create mode 100644 openshell-agents/gator/agent.yaml create mode 100644 openshell-agents/gator/prompts/gator.md delete mode 100755 openshell-agents/gator/run.sh create mode 100755 openshell-agents/run.sh create mode 100755 openshell-agents/runtime/entrypoint.sh rename openshell-agents/{gator/harnesses/codex/sandbox-agent.sh => runtime/harnesses/codex/exec.sh} (86%) rename openshell-agents/{gator => runtime}/harnesses/codex/install-codex.sh (100%) mode change 100755 => 100644 rename openshell-agents/{gator/harnesses/codex/reviewer-agent.sh => runtime/harnesses/codex/subagent.sh} (63%) create mode 100755 openshell-agents/runtime/subagent.sh diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index dfb05c400..595ac24ad 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -403,7 +403,7 @@ Use the `principal-engineer-reviewer` sub-agent. Include: - Instruction to check whether direct UX changes update the Fern docs under `docs/` and navigation when needed - Instruction not to rely on local test execution -When running inside the `openshell-agents/gator` sandbox launcher, invoke the reviewer command specified in the sandbox prompt. For the Codex harness, use `bash /sandbox/payload/harnesses/codex/reviewer-agent.sh < review-task.md`. Put the PR metadata, linked issue context, and diff/file context in `review-task.md`, save the reviewer output, and use it as the independent review result. The main gator process remains responsible for labels, comments, docs gates, and CI monitoring. +When running inside the `openshell-agents/gator` sandbox launcher, invoke the reviewer command specified in the sandbox prompt. Use `task.md` for the subagent input. Put the PR metadata, linked issue context, and diff/file context in `task.md`, save the reviewer output, and use it as the independent review result. The main gator process remains responsible for labels, comments, docs gates, and CI monitoring. Post findings as a gator comment or a GitHub PR review: diff --git a/architecture/build.md b/architecture/build.md index 200be8b1e..059a40459 100644 --- a/architecture/build.md +++ b/architecture/build.md @@ -16,9 +16,15 @@ OpenShell builds these main artifacts: | Supervisor container image | `deploy/docker/Dockerfile.supervisor` | | Helm chart | `deploy/helm/openshell` | | VM driver/runtime assets | `crates/openshell-driver-vm` | +| Agent sandbox launchers | `openshell-agents/` manifests, images, and shared runtime adapters | | Published docs site | `docs/` rendered by Fern config in `fern/` | -Sandbox community images are built outside this repository. +Sandbox community images are built outside this repository. Repository-owned +agent launchers use manifest files under `openshell-agents//` to describe +agent intent, provider profile IDs, prompt templates, skills, subagents, and +harness defaults. Agent directories do not own harness implementations. The +shared runtime under `openshell-agents/runtime/` provides the sandbox entrypoint, +harness install helpers, and harness-specific execution adapters. ## Linux Runtime Environments diff --git a/openshell-agents/gator/Dockerfile b/openshell-agents/Dockerfile.gator similarity index 96% rename from openshell-agents/gator/Dockerfile rename to openshell-agents/Dockerfile.gator index 4148ed4d4..03206ad16 100644 --- a/openshell-agents/gator/Dockerfile +++ b/openshell-agents/Dockerfile.gator @@ -65,7 +65,7 @@ RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ apt-get update && apt-get install -y --no-install-recommends gh && \ rm -rf /var/lib/apt/lists/* -COPY harnesses/codex/install-codex.sh /usr/local/bin/install-codex.sh +COPY runtime/harnesses/codex/install-codex.sh /usr/local/bin/install-codex.sh ARG CODEX_VERSION=latest RUN chmod 755 /usr/local/bin/install-codex.sh && \ /usr/local/bin/install-codex.sh "$CODEX_VERSION" @@ -82,7 +82,7 @@ FROM devtools AS final ENV PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin" RUN mkdir -p /etc/openshell -COPY policy.yaml /etc/openshell/policy.yaml +COPY gator/policy.yaml /etc/openshell/policy.yaml RUN printf 'export PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin"\nexport PS1="\\u@\\h:\\w\\$ "\n' \ > /sandbox/.bashrc && \ diff --git a/openshell-agents/README.md b/openshell-agents/README.md new file mode 100644 index 000000000..1ac0ab578 --- /dev/null +++ b/openshell-agents/README.md @@ -0,0 +1,132 @@ +# OpenShell Agents + +`openshell-agents/` contains repository-owned agent launchers. An agent is a +manifest plus prompt assets that the shared launcher turns into an OpenShell +sandbox run. Agents do not own harness implementations. Harness-specific setup +and execution live in `runtime/harnesses//`. + +## Directory Layout + +```text +openshell-agents/ + run.sh # Generic manifest-driven launcher + runtime/ # Shared in-sandbox runtime + entrypoint.sh # Dispatches to the selected harness adapter + subagent.sh # Generic subagent dispatcher + harnesses/ + codex/ # Codex install and execution adapter + / + agent.yaml # Agent manifest + prompts/ # Prompt templates rendered at launch + providers/ # Provider profile YAML files for this agent + policy.yaml # Optional image policy source +``` + +Agent directories should contain agent-specific intent and payloads: manifests, +prompt templates, provider profiles, policies, and references to skills or +subagents. They should not contain `harnesses/codex`, `harnesses/opencode`, or +similar runtime code. + +## Agent Manifest + +Each agent has an `agent.yaml` manifest. The launcher currently reads these +sections: + +- `id`, `display_name`, `description`: human and runtime identity. +- `sandbox`: default sandbox name prefix, gateway, source image or Dockerfile, + and background log directory. +- `harness`: default harness and per-harness settings such as model and + reasoning effort. +- `profile_paths`: ordered directories to scan for provider profile YAML files. +- `settings`: gateway settings to apply before launch. +- `providers`: provider instances to create or update, credential sources, and + optional refresh configuration. +- `skills`: files to inject into the sandbox payload. +- `subagents`: subagent definitions to inject into the sandbox payload. +- `prompt_template`: prompt template rendered into `/sandbox/payload/agent-prompt.md`. + +Manifest paths support these prefixes: + +- `repo://path`: resolve from the repository root. +- `agent://path`: resolve from the agent directory. +- Relative paths without a prefix: resolve from the agent directory. +- Absolute paths: use as-is. + +## Launch Order + +`openshell-agents/run.sh` performs the launch in this order: + +1. Parse CLI flags and select the agent directory from `--agent`. +2. Load `agent.yaml`, select the requested harness, and reject unsupported + harness names. +3. Resolve sandbox defaults from the manifest and CLI/environment overrides. +4. Build a temporary payload directory. +5. Copy `runtime/` into the payload so every agent uses the same in-sandbox + entrypoint and harness adapters. +6. Optionally copy a host Codex binary into the shared Codex runtime path when + `--codex-bin` is supplied. +7. Copy manifest-declared skills and subagents into the payload. +8. Render the prompt template with runtime values such as `{{HARNESS}}`, + `{{SUBAGENT_COMMAND}}`, and `{{USER_PROMPT}}`. +9. Apply manifest-declared gateway settings. +10. Resolve provider profile IDs by scanning `profile_paths` in order. +11. Import each provider profile into the gateway. If an active profile already + exists, the launcher keeps going and uses it. +12. Resolve provider credentials from host commands, JSON files, or literal + manifest values. +13. Create or update each provider instance and attach every selected provider + to the sandbox. +14. Configure and rotate refresh-backed provider credentials when declared by + the manifest. +15. Run `openshell sandbox create` with the rendered payload uploaded to + `/sandbox`. +16. Inside the sandbox, run `/sandbox/payload/runtime/entrypoint.sh`. +17. The runtime entrypoint dispatches to + `/sandbox/payload/runtime/harnesses//exec.sh`. +18. Harness adapters prepare harness-local auth/config and execute the agent + prompt headlessly. + +## Subagents + +The launcher injects subagent definitions under `/sandbox/payload/subagents/`. +Prompt templates should refer to the generic command instead of a harness-specific +script: + +```shell +bash /sandbox/payload/runtime/subagent.sh < task.md +``` + +The shared subagent dispatcher forwards the task to the active harness adapter. +For Codex, this runs a separate bounded `codex exec` invocation using the same +model and reasoning defaults as the parent harness. + +## Providers + +Listing a provider in `agent.yaml` means the provider is attached to the sandbox. +Provider profiles describe credential shape, endpoint policy, discovery metadata, +and refresh metadata. The launcher only creates provider instances and supplies +runtime credential values. + +`profile_paths` are ordered. The first profile file with the requested `id` wins. +If the same directory contains duplicate profile IDs, the launcher fails. If a +later profile path contains a profile ID that was already found, the launcher +warns that the later file is shadowed. + +## Gator Example + +`gator/` is the first manifest-driven agent. It uses: + +- `gator/agent.yaml` for the launch contract. +- `gator/prompts/gator.md` for the rendered operator prompt. +- `gator/providers/` for scoped GitHub and Codex provider profiles. +- `Dockerfile.gator` for the local sandbox image. +- `runtime/harnesses/codex/` for Codex installation and execution. + +Run it through the generic launcher: + +```shell +./openshell-agents/run.sh \ + --agent gator \ + --gateway docker-dev \ + "Run gator on PR 1536 and keep watching until it closes or merges." +``` diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index 41f4e5933..37a2d7bd0 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -1,6 +1,6 @@ # Gator Agent -Launch a headless sandbox harness that runs the `gator-gate` skill against OpenShell issues and pull requests. The default and currently only supported harness is Codex. +Launch a headless sandbox agent that runs the `gator-gate` skill against OpenShell issues and pull requests. The default and currently only supported harness is Codex. ## Prerequisites @@ -12,33 +12,36 @@ Launch a headless sandbox harness that runs the `gator-gate` skill against OpenS ## Usage ```shell -./openshell-agents/gator/run.sh \ +./openshell-agents/run.sh \ + --agent gator \ --gateway docker-dev \ --harness codex \ "Run gator on PR 1536 and keep watching until it closes or merges." ``` -By default the launcher uses `openshell-agents/gator` as the sandbox source. Local gateways build `openshell-agents/gator/Dockerfile`, which installs the latest stable `@openai/codex` package at image build time. Use `--from ` to run a prebuilt image on remote gateways. +By default the launcher uses `openshell-agents/Dockerfile.gator` as the sandbox source. Local gateways build that Dockerfile with `openshell-agents/` as the build context, which lets the image use shared harness install scripts from `runtime/` and gator-specific policy from `gator/policy.yaml`. Use `--from ` to run a prebuilt image on remote gateways. -Use `--harness codex` to select Codex explicitly. Other harness names are rejected until their support scripts and provider setup are added under `harnesses//`. +Use `--harness codex` to select Codex explicitly. Other harness names are rejected until their support is added to `agent.yaml` and `openshell-agents/runtime/harnesses//`. Agent directories do not carry their own harness implementations; they provide prompt templates and optional skills or subagents for the shared runtime to inject. Use `--codex-bin "$(command -v codex)"` only when the host executable is compatible with the sandbox OS and architecture. +The manifest-driven launcher at `openshell-agents/run.sh` reads `agent.yaml`, which defines the agent prompt template, provider profile IDs, provider credential sources, gateway settings, skills, subagents, sandbox defaults, and harness defaults. The shared sandbox entrypoint at `openshell-agents/runtime/entrypoint.sh` dispatches to the selected harness adapter. + The launcher: -- Imports `providers/github-gator.yaml`. +- Scans `profile_paths` in manifest order and imports `providers/github-gator.yaml`. - Creates or updates the `github-gator` provider from `gh auth token`. -- Selects the requested harness and uploads its scripts from `harnesses//` into the sandbox payload. +- Selects the requested harness and uploads the common runtime into the sandbox payload. - For `--harness codex`, imports `providers/codex-gator.yaml`, creates or updates the `codex-gator` provider from `$HOME/.codex/auth.json`, and stores the refresh token as gateway-only refresh material. - For `--harness codex`, configures gateway-managed refresh for `CODEX_AUTH_ACCESS_TOKEN` and rotates it before launching the sandbox. - Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. - Uses the gator image policy copied to `/etc/openshell/policy.yaml`. - Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. -- Uploads `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution. -- For `--harness codex`, optionally uploads a host Codex executable as `/sandbox/payload/harnesses/codex/codex`. +- Uploads `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution through `/sandbox/payload/runtime/subagent.sh principal-engineer-reviewer < task.md`. +- For `--harness codex`, optionally uploads a host Codex executable as `/sandbox/payload/runtime/harnesses/codex/codex`. - Starts the selected harness without a TTY. - Deletes the sandbox automatically after the harness exits. Pass `--keep` to preserve it for debugging. -The GitHub provider profile intentionally does not allow GraphQL because OpenShell's GraphQL policy can constrain operation fields but not repository arguments. The sandbox prompt instructs the agent to use REST via `gh api` for the two allowed repositories. +The GitHub provider profile allows read-only GraphQL queries on `api.github.com/graphql` so `gh` read paths can use GraphQL when needed. Write operations remain REST-only and scoped to the two allowed repositories. Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the gator Codex profile uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. diff --git a/openshell-agents/gator/agent.yaml b/openshell-agents/gator/agent.yaml new file mode 100644 index 000000000..43c4fd6a2 --- /dev/null +++ b/openshell-agents/gator/agent.yaml @@ -0,0 +1,85 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: gator +display_name: Gator Gate Agent +description: Validate and monitor OpenShell GitHub issues and pull requests through the gator state machine. + +sandbox: + name_prefix: gator + from: agent://../Dockerfile.gator + gateway: docker-dev + background_log_dir: logs + +harness: + default: codex + supported: + codex: + model: gpt-5.5 + reasoning: high + +profile_paths: + - providers + +settings: + - key: providers_v2_enabled + value: true + - key: agent_policy_proposals_enabled + value: true + - key: proposal_approval_mode + value: auto + +providers: + - id: github + name: github-gator + profile: github-gator + credential_mode: explicit + credentials: + - env: GITHUB_TOKEN + source: + kind: host_command + command: gh auth token + export: true + + - id: codex + name: codex-gator + profile: codex-gator + harness: codex + credential_mode: from_existing + credentials: + - env: CODEX_AUTH_ACCESS_TOKEN + source: + kind: file_json + path: ~/.codex/auth.json + query: tokens.access_token + export: true + - env: CODEX_AUTH_ACCOUNT_ID + source: + kind: file_json + path: ~/.codex/auth.json + query: tokens.account_id + export: true + refresh: + credential_key: CODEX_AUTH_ACCESS_TOKEN + strategy: oauth2-refresh-token + materials: + - name: client_id + value: app_EMoamEEZ73f0CkXaXp7hrann + - name: refresh_token + secret: true + source: + kind: file_json + path: ~/.codex/auth.json + query: tokens.refresh_token + +skills: + - id: gator-gate + source: repo://.agents/skills/gator-gate/SKILL.md + destination: .agents/skills/gator-gate/SKILL.md + +subagents: + - id: principal-engineer-reviewer + source: repo://.claude/agents/principal-engineer-reviewer.md + destination: subagents/principal-engineer-reviewer.md + +prompt_template: prompts/gator.md diff --git a/openshell-agents/gator/prompts/gator.md b/openshell-agents/gator/prompts/gator.md new file mode 100644 index 000000000..ed1d741bc --- /dev/null +++ b/openshell-agents/gator/prompts/gator.md @@ -0,0 +1,20 @@ +You are running inside an OpenShell sandbox as the gator gate agent. + +Active harness: {{HARNESS}}. + +Load and follow this skill exactly: + +/sandbox/payload/.agents/skills/gator-gate/SKILL.md + +Important sandbox constraints: + +- GitHub REST write access is scoped to NVIDIA/OpenShell and NVIDIA/OpenShell-Community. +- GitHub GraphQL access is read-only. Prefer REST endpoints for write actions and use GraphQL-backed `gh` reads when useful. +- Keep watching active PRs until they close, merge, or the operator stops the sandbox. +- Do not push to contributor branches unless the operator explicitly instructs you to do so. +- If you receive 403 errors from the sandbox proxy, inspect the JSON response and propose a policy update to allow the requested action if the response contains a structured error message. +- When the gator skill requires the `principal-engineer-reviewer` sub-agent, run a bounded independent review with `{{SUBAGENT_COMMAND}}`. Include PR metadata and full diff/file context in `task.md`, save the output, and use it as the independent reviewer result while the main gator process continues labels, comments, docs, and CI gating. + +Operator request: + +{{USER_PROMPT}} diff --git a/openshell-agents/gator/providers/codex-gator.yaml b/openshell-agents/gator/providers/codex-gator.yaml index 99735fcd9..c0820e5c5 100644 --- a/openshell-agents/gator/providers/codex-gator.yaml +++ b/openshell-agents/gator/providers/codex-gator.yaml @@ -30,11 +30,8 @@ credentials: description: Codex account identifier env_vars: [CODEX_AUTH_ACCOUNT_ID] required: true - - name: id_token - description: Codex OAuth ID token - env_vars: [CODEX_AUTH_ID_TOKEN] discovery: - credentials: [access_token, account_id, id_token] + credentials: [access_token, account_id] endpoints: - host: api.openai.com port: 443 diff --git a/openshell-agents/gator/providers/github-gator.yaml b/openshell-agents/gator/providers/github-gator.yaml index 54e3c3612..d74e48025 100644 --- a/openshell-agents/gator/providers/github-gator.yaml +++ b/openshell-agents/gator/providers/github-gator.yaml @@ -57,6 +57,14 @@ endpoints: - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews } - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews/*/comments } - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/statuses/* } + - host: api.github.com + port: 443 + path: /graphql + protocol: graphql + enforcement: enforce + rules: + - allow: + operation_type: query - host: github.com port: 443 protocol: rest @@ -64,10 +72,14 @@ endpoints: rules: - allow: { method: GET, path: /NVIDIA/OpenShell } - allow: { method: GET, path: /NVIDIA/OpenShell/** } + - allow: { method: GET, path: /NVIDIA/OpenShell.git/** } - allow: { method: POST, path: /NVIDIA/OpenShell/**/git-upload-pack } + - allow: { method: POST, path: /NVIDIA/OpenShell.git/**/git-upload-pack } - allow: { method: GET, path: /NVIDIA/OpenShell-Community } - allow: { method: GET, path: /NVIDIA/OpenShell-Community/** } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community.git/** } - allow: { method: POST, path: /NVIDIA/OpenShell-Community/**/git-upload-pack } + - allow: { method: POST, path: /NVIDIA/OpenShell-Community.git/**/git-upload-pack } - host: codeload.github.com port: 443 protocol: rest diff --git a/openshell-agents/gator/run.sh b/openshell-agents/gator/run.sh deleted file mode 100755 index 5c7f073cc..000000000 --- a/openshell-agents/gator/run.sh +++ /dev/null @@ -1,307 +0,0 @@ -#!/usr/bin/env bash - -# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# SPDX-License-Identifier: Apache-2.0 - -set -euo pipefail - -ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/../.." && pwd)" -GATOR_DIR="$ROOT_DIR/openshell-agents/gator" -SKILL_FILE="$ROOT_DIR/.agents/skills/gator-gate/SKILL.md" -REVIEWER_AGENT_FILE="$ROOT_DIR/.claude/agents/principal-engineer-reviewer.md" - -OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" -GATEWAY="${GATOR_GATEWAY:-docker-dev}" -SANDBOX_NAME="${GATOR_SANDBOX_NAME:-gator-$(date +%Y%m%d%H%M%S)}" -SANDBOX_FROM="${GATOR_SANDBOX_FROM:-$GATOR_DIR}" -HARNESS="${GATOR_HARNESS:-codex}" -GITHUB_PROVIDER="${GATOR_GITHUB_PROVIDER:-github-gator}" -CODEX_PROVIDER="${GATOR_CODEX_PROVIDER:-codex-gator}" -CODEX_PROVIDER_PROFILE="${GATOR_CODEX_PROVIDER_PROFILE:-codex-gator}" -CODEX_ACCESS_CREDENTIAL_KEY="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-CODEX_AUTH_ACCESS_TOKEN}" -# Upstream Codex OAuth client ID from codex-rs/login/src/auth/manager.rs. -CODEX_OAUTH_CLIENT_ID="${GATOR_CODEX_OAUTH_CLIENT_ID:-app_EMoamEEZ73f0CkXaXp7hrann}" -CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" -CODEX_REASONING="${CODEX_REASONING:-high}" -CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" -BACKGROUND=0 -KEEP_SANDBOX=0 - -usage() { - cat <<'EOF' -Usage: openshell-agents/gator/run.sh [options] "gator prompt" - -Options: - --gateway NAME Gateway name to use (default: docker-dev) - --name NAME Sandbox name (default: gator-) - --from IMAGE Sandbox source/image (default: openshell-agents/gator) - --harness NAME Agent harness to run (default: codex; supported: codex) - --github-provider NAME GitHub provider name (default: github-gator) - --codex-provider NAME Codex provider name (default: codex-gator) - --codex-access-key KEY Codex access-token credential key (default: CODEX_AUTH_ACCESS_TOKEN) - --codex-bin PATH Upload this Codex executable into the sandbox - --background Run sandbox create in the background and write a log - --keep Keep the sandbox after the harness exits (default: delete on exit) - -h, --help Show this help -EOF -} - -fail() { - echo "error: $*" >&2 - exit 1 -} - -require_cmd() { - command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" -} - -openshell_cmd() { - "$OPENSHELL_BIN" --gateway "$GATEWAY" "$@" -} - -upsert_provider() { - local name="$1" - local type="$2" - shift 2 - - if openshell_cmd provider get "$name" >/dev/null 2>&1; then - openshell_cmd provider update "$name" "$@" >/dev/null - else - openshell_cmd provider create --name "$name" --type "$type" "$@" >/dev/null - fi -} - -import_provider_profile() { - local profile_id="$1" - local profile_file="$2" - local import_output - - # Custom profile import is create-only. Replace it when possible so repeat - # runs track this checkout, but keep going if a live sandbox is still using - # the already-imported profile. - openshell_cmd provider profile delete "$profile_id" >/dev/null 2>&1 || true - if import_output="$(openshell_cmd provider profile import --file "$profile_file" 2>&1)"; then - return 0 - fi - if [[ "$import_output" == *"already exists"* ]]; then - echo "Provider profile already exists: $profile_file" - return 0 - fi - - printf '%s\n' "$import_output" >&2 - return 1 -} - -configure_codex_refresh() { - openshell_cmd provider refresh configure "$CODEX_PROVIDER" \ - --credential-key "$CODEX_ACCESS_CREDENTIAL_KEY" \ - --strategy oauth2_refresh_token \ - --material "client_id=$CODEX_OAUTH_CLIENT_ID" \ - --material "refresh_token=$CODEX_AUTH_REFRESH_TOKEN" \ - --secret-material-key refresh_token >/dev/null - openshell_cmd provider refresh rotate "$CODEX_PROVIDER" \ - --credential-key "$CODEX_ACCESS_CREDENTIAL_KEY" >/dev/null - echo "Configured gateway refresh for $CODEX_PROVIDER/$CODEX_ACCESS_CREDENTIAL_KEY." -} - -while [[ $# -gt 0 ]]; do - case "$1" in - --gateway) - [[ $# -ge 2 ]] || fail "--gateway requires a value" - GATEWAY="$2" - shift 2 - ;; - --name) - [[ $# -ge 2 ]] || fail "--name requires a value" - SANDBOX_NAME="$2" - shift 2 - ;; - --from) - [[ $# -ge 2 ]] || fail "--from requires a value" - SANDBOX_FROM="$2" - shift 2 - ;; - --harness) - [[ $# -ge 2 ]] || fail "--harness requires a value" - HARNESS="$2" - shift 2 - ;; - --github-provider) - [[ $# -ge 2 ]] || fail "--github-provider requires a value" - GITHUB_PROVIDER="$2" - shift 2 - ;; - --codex-provider) - [[ $# -ge 2 ]] || fail "--codex-provider requires a value" - CODEX_PROVIDER="$2" - shift 2 - ;; - --codex-access-key) - [[ $# -ge 2 ]] || fail "--codex-access-key requires a value" - CODEX_ACCESS_CREDENTIAL_KEY="$2" - shift 2 - ;; - --codex-bin) - [[ $# -ge 2 ]] || fail "--codex-bin requires a value" - CODEX_LOCAL_BIN="$2" - shift 2 - ;; - --background) - BACKGROUND=1 - shift - ;; - --keep) - KEEP_SANDBOX=1 - shift - ;; - -h|--help) - usage - exit 0 - ;; - --) - shift - break - ;; - -*) - fail "unknown option: $1" - ;; - *) - break - ;; - esac -done - -[[ $# -gt 0 ]] || { usage >&2; exit 2; } -USER_PROMPT="$*" - -require_cmd gh -require_cmd "$OPENSHELL_BIN" -[[ -f "$SKILL_FILE" ]] || fail "missing gator skill: $SKILL_FILE" -[[ -f "$REVIEWER_AGENT_FILE" ]] || fail "missing reviewer agent: $REVIEWER_AGENT_FILE" - -HARNESS_DIR="$GATOR_DIR/harnesses/$HARNESS" -HARNESS_ENTRYPOINT="/sandbox/payload/harnesses/$HARNESS/sandbox-agent.sh" -HARNESS_REVIEWER_COMMAND="bash /sandbox/payload/harnesses/$HARNESS/reviewer-agent.sh < review-task.md" -HARNESS_PROVIDER_ARGS=() -HARNESS_ENV_ARGS=() - -case "$HARNESS" in - codex) - require_cmd jq - [[ -d "$HARNESS_DIR" ]] || fail "missing harness directory: $HARNESS_DIR" - [[ -f "$HOME/.codex/auth.json" ]] || fail "missing local Codex auth; run: codex login" - - CODEX_AUTH_ACCESS_TOKEN="$(jq -r '.tokens.access_token // empty' "$HOME/.codex/auth.json")" - CODEX_AUTH_REFRESH_TOKEN="$(jq -r '.tokens.refresh_token // empty' "$HOME/.codex/auth.json")" - CODEX_AUTH_ACCOUNT_ID="$(jq -r '.tokens.account_id // empty' "$HOME/.codex/auth.json")" - CODEX_AUTH_ID_TOKEN="$(jq -r '.tokens.id_token // empty' "$HOME/.codex/auth.json")" - [[ -n "$CODEX_AUTH_ACCESS_TOKEN" ]] || fail "Codex auth is missing tokens.access_token" - [[ -n "$CODEX_AUTH_REFRESH_TOKEN" ]] || fail "Codex auth is missing tokens.refresh_token" - [[ -n "$CODEX_AUTH_ACCOUNT_ID" ]] || fail "Codex auth is missing tokens.account_id" - - export CODEX_AUTH_ACCESS_TOKEN - export CODEX_AUTH_ACCOUNT_ID - export CODEX_AUTH_ID_TOKEN - HARNESS_PROVIDER_ARGS=(--provider "$CODEX_PROVIDER") - HARNESS_ENV_ARGS=("CODEX_MODEL=$CODEX_MODEL" "CODEX_REASONING=$CODEX_REASONING") - ;; - *) - fail "unsupported harness: $HARNESS (supported: codex)" - ;; -esac - -GITHUB_TOKEN="$(gh auth token)" -[[ -n "$GITHUB_TOKEN" ]] || fail "gh auth token returned empty output" - -export GITHUB_TOKEN - -PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-gator.XXXXXX")" -PAYLOAD_DIR="$PAYLOAD_PARENT/payload" -cleanup() { - rm -rf "$PAYLOAD_PARENT" -} -trap cleanup EXIT - -mkdir -p "$PAYLOAD_DIR/.agents/skills/gator-gate" -mkdir -p "$PAYLOAD_DIR/.claude/agents" -mkdir -p "$PAYLOAD_DIR/harnesses" -cp "$SKILL_FILE" "$PAYLOAD_DIR/.agents/skills/gator-gate/SKILL.md" -cp "$REVIEWER_AGENT_FILE" "$PAYLOAD_DIR/.claude/agents/principal-engineer-reviewer.md" -cp -R "$HARNESS_DIR" "$PAYLOAD_DIR/harnesses/$HARNESS" -chmod +x "$PAYLOAD_DIR/harnesses/$HARNESS"/*.sh -if [[ -n "$CODEX_LOCAL_BIN" ]]; then - [[ -x "$CODEX_LOCAL_BIN" ]] || fail "--codex-bin is not executable: $CODEX_LOCAL_BIN" - [[ "$HARNESS" == "codex" ]] || fail "--codex-bin is only valid with --harness codex" - cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/harnesses/codex/codex" - chmod +x "$PAYLOAD_DIR/harnesses/codex/codex" -fi -cat > "$PAYLOAD_DIR/gator-prompt.md" </dev/null -openshell_cmd settings set --global --key agent_policy_proposals_enabled --value true --yes >/dev/null -openshell_cmd settings set --global --key proposal_approval_mode --value auto --yes >/dev/null - -import_provider_profile github-gator "$GATOR_DIR/providers/github-gator.yaml" -upsert_provider "$GITHUB_PROVIDER" github-gator --credential GITHUB_TOKEN -case "$HARNESS" in - codex) - import_provider_profile "$CODEX_PROVIDER_PROFILE" "$GATOR_DIR/providers/$CODEX_PROVIDER_PROFILE.yaml" - upsert_provider "$CODEX_PROVIDER" "$CODEX_PROVIDER_PROFILE" --from-existing - configure_codex_refresh - ;; -esac - -KEEP_ARGS=() -if [[ "$KEEP_SANDBOX" != "1" ]]; then - KEEP_ARGS+=(--no-keep) -fi - -SANDBOX_CMD=( - env -u OPENSHELL_SANDBOX_POLICY - "$OPENSHELL_BIN" --gateway "$GATEWAY" sandbox create - --name "$SANDBOX_NAME" - --from "$SANDBOX_FROM" - --provider "$GITHUB_PROVIDER" - "${HARNESS_PROVIDER_ARGS[@]}" - --upload "$PAYLOAD_DIR:/sandbox" - --no-git-ignore - --no-auto-providers - --no-tty - "${KEEP_ARGS[@]}" - -- env "${HARNESS_ENV_ARGS[@]}" bash "$HARNESS_ENTRYPOINT" -) - -echo "Launching gator sandbox '$SANDBOX_NAME' on gateway '$GATEWAY'..." -if [[ "$BACKGROUND" == "1" ]]; then - mkdir -p "$GATOR_DIR/logs" - LOG_FILE="$GATOR_DIR/logs/${SANDBOX_NAME}.log" - trap - EXIT - ( - trap cleanup EXIT - "${SANDBOX_CMD[@]}" - ) >"$LOG_FILE" 2>&1 & - echo "Started in background. Log: $LOG_FILE" -else - "${SANDBOX_CMD[@]}" -fi diff --git a/openshell-agents/run.sh b/openshell-agents/run.sh new file mode 100755 index 000000000..c9b59331f --- /dev/null +++ b/openshell-agents/run.sh @@ -0,0 +1,605 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" +AGENT_ARG="${OPENSHELL_AGENT_DIR:-}" +GATEWAY_OVERRIDE="" +SANDBOX_NAME_OVERRIDE="" +SANDBOX_FROM_OVERRIDE="" +HARNESS_OVERRIDE="${GATOR_HARNESS:-}" +GITHUB_PROVIDER_OVERRIDE="${GATOR_GITHUB_PROVIDER:-}" +CODEX_PROVIDER_OVERRIDE="${GATOR_CODEX_PROVIDER:-}" +CODEX_PROVIDER_PROFILE_OVERRIDE="${GATOR_CODEX_PROVIDER_PROFILE:-}" +CODEX_ACCESS_KEY_OVERRIDE="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-}" +CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" +BACKGROUND=0 +KEEP_SANDBOX=0 + +usage() { + printf '%s\n' 'Usage: openshell-agents/run.sh --agent [options] "agent prompt"' + cat <<'EOF' + +Options: + --agent NAME|PATH Agent manifest directory or name under openshell-agents/ + --gateway NAME Gateway name to use + --name NAME Sandbox name + --from IMAGE Sandbox source/image + --harness NAME Agent harness to run + --github-provider NAME Override the github-gator provider instance name + --codex-provider NAME Override the codex-gator provider instance name + --codex-access-key KEY Override the Codex access-token credential key + --codex-bin PATH Upload this Codex executable into the sandbox + --background Run sandbox create in the background and write a log + --keep Keep the sandbox after the harness exits + -h, --help Show this help +EOF +} + +fail() { + echo "error: $*" >&2 + exit 1 +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --agent) + [[ $# -ge 2 ]] || fail "--agent requires a value" + AGENT_ARG="$2" + shift 2 + ;; + --gateway) + [[ $# -ge 2 ]] || fail "--gateway requires a value" + GATEWAY_OVERRIDE="$2" + shift 2 + ;; + --name) + [[ $# -ge 2 ]] || fail "--name requires a value" + SANDBOX_NAME_OVERRIDE="$2" + shift 2 + ;; + --from) + [[ $# -ge 2 ]] || fail "--from requires a value" + SANDBOX_FROM_OVERRIDE="$2" + shift 2 + ;; + --harness) + [[ $# -ge 2 ]] || fail "--harness requires a value" + HARNESS_OVERRIDE="$2" + shift 2 + ;; + --github-provider) + [[ $# -ge 2 ]] || fail "--github-provider requires a value" + GITHUB_PROVIDER_OVERRIDE="$2" + shift 2 + ;; + --codex-provider) + [[ $# -ge 2 ]] || fail "--codex-provider requires a value" + CODEX_PROVIDER_OVERRIDE="$2" + shift 2 + ;; + --codex-access-key) + [[ $# -ge 2 ]] || fail "--codex-access-key requires a value" + CODEX_ACCESS_KEY_OVERRIDE="$2" + shift 2 + ;; + --codex-bin) + [[ $# -ge 2 ]] || fail "--codex-bin requires a value" + CODEX_LOCAL_BIN="$2" + shift 2 + ;; + --background) + BACKGROUND=1 + shift + ;; + --keep) + KEEP_SANDBOX=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + --) + shift + break + ;; + -* ) + fail "unknown option: $1" + ;; + *) + break + ;; + esac +done + +[[ -n "$AGENT_ARG" ]] || { usage >&2; exit 2; } +[[ $# -gt 0 ]] || { usage >&2; exit 2; } +USER_PROMPT="$*" + +case "$AGENT_ARG" in + /*|*/*) + AGENT_DIR="$AGENT_ARG" + ;; + *) + AGENT_DIR="$SCRIPT_DIR/$AGENT_ARG" + ;; +esac + +[[ -d "$AGENT_DIR" ]] || fail "missing agent directory: $AGENT_DIR" +AGENT_DIR="$(cd "$AGENT_DIR" && pwd)" +MANIFEST_FILE="$AGENT_DIR/agent.yaml" +[[ -f "$MANIFEST_FILE" ]] || fail "missing agent manifest: $MANIFEST_FILE" + +require_cmd ruby +require_cmd "$OPENSHELL_BIN" + +CONFIG_FILE="$(mktemp "${TMPDIR:-/tmp}/openshell-agent-config.XXXXXX")" +cleanup_config() { + rm -f "$CONFIG_FILE" +} +trap cleanup_config EXIT + +ruby -ryaml -rshellwords - "$MANIFEST_FILE" "$HARNESS_OVERRIDE" >"$CONFIG_FILE" <<'RUBY' +manifest = YAML.load_file(ARGV[0]) || {} +harness = ARGV[1].to_s.empty? ? manifest.dig("harness", "default").to_s : ARGV[1].to_s +supported = manifest.dig("harness", "supported") || {} +abort "unsupported harness: #{harness} (supported: #{supported.keys.join(', ')})" unless supported.key?(harness) + +def sh(value) + Shellwords.escape(value.to_s) +end + +def emit(name, value) + puts "#{name}=#{sh(value)}" +end + +def emit_array(name, values) + puts "#{name}=(#{values.map { |value| sh(value) }.join(' ')})" +end + +harness_config = supported[harness] || {} +emit "AGENT_ID", manifest.fetch("id") +emit "AGENT_DISPLAY_NAME", manifest.fetch("display_name", manifest.fetch("id")) +emit "HARNESS", harness +emit "HARNESS_MODEL", harness_config.fetch("model", "") +emit "HARNESS_REASONING", harness_config.fetch("reasoning", "") +emit "SANDBOX_NAME_PREFIX", manifest.dig("sandbox", "name_prefix") || manifest.fetch("id") +emit "SANDBOX_FROM_DEFAULT", manifest.dig("sandbox", "from") || "agent://." +emit "GATEWAY_DEFAULT", manifest.dig("sandbox", "gateway") || "docker-dev" +emit "BACKGROUND_LOG_DIR", manifest.dig("sandbox", "background_log_dir") || "logs" +emit "PROMPT_TEMPLATE", manifest.fetch("prompt_template") +emit_array "PROFILE_PATHS", manifest.fetch("profile_paths", []) + +settings = manifest.fetch("settings", []) +emit "SETTING_COUNT", settings.length +settings.each_with_index do |setting, index| + emit "SETTING_#{index}_KEY", setting.fetch("key") + emit "SETTING_#{index}_VALUE", setting.fetch("value") +end + +providers = manifest.fetch("providers", []).select do |provider| + provider["harness"].nil? || provider["harness"] == harness +end +emit "PROVIDER_COUNT", providers.length +providers.each_with_index do |provider, index| + emit "PROVIDER_#{index}_ID", provider.fetch("id") + emit "PROVIDER_#{index}_NAME", provider.fetch("name") + emit "PROVIDER_#{index}_PROFILE", provider.fetch("profile") + emit "PROVIDER_#{index}_CREDENTIAL_MODE", provider.fetch("credential_mode", "explicit") + credentials = provider.fetch("credentials", []) + emit "PROVIDER_#{index}_CREDENTIAL_COUNT", credentials.length + credentials.each_with_index do |credential, credential_index| + source = credential.fetch("source", {}) + prefix = "PROVIDER_#{index}_CREDENTIAL_#{credential_index}" + emit "#{prefix}_ENV", credential.fetch("env") + emit "#{prefix}_EXPORT", credential.fetch("export", true) + emit "#{prefix}_KIND", source.fetch("kind", "value") + emit "#{prefix}_COMMAND", source.fetch("command", "") + emit "#{prefix}_PATH", source.fetch("path", "") + emit "#{prefix}_QUERY", source.fetch("query", "") + emit "#{prefix}_VALUE", source.fetch("value", "") + end + + refresh = provider["refresh"] || {} + emit "PROVIDER_#{index}_REFRESH_ENABLED", refresh.empty? ? "false" : "true" + emit "PROVIDER_#{index}_REFRESH_CREDENTIAL_KEY", refresh.fetch("credential_key", "") + emit "PROVIDER_#{index}_REFRESH_STRATEGY", refresh.fetch("strategy", "") + materials = refresh.fetch("materials", []) + emit "PROVIDER_#{index}_REFRESH_MATERIAL_COUNT", materials.length + materials.each_with_index do |material, material_index| + source = material.fetch("source", {}) + prefix = "PROVIDER_#{index}_REFRESH_MATERIAL_#{material_index}" + emit "#{prefix}_NAME", material.fetch("name") + emit "#{prefix}_SECRET", material.fetch("secret", false) + emit "#{prefix}_KIND", source.fetch("kind", material.key?("value") ? "value" : "") + emit "#{prefix}_COMMAND", source.fetch("command", "") + emit "#{prefix}_PATH", source.fetch("path", "") + emit "#{prefix}_QUERY", source.fetch("query", "") + emit "#{prefix}_VALUE", material.fetch("value", source.fetch("value", "")) + end +end + +uploads = [] +manifest.fetch("skills", []).each do |skill| + uploads << [skill.fetch("source"), skill.fetch("destination")] +end +manifest.fetch("subagents", []).each do |subagent| + uploads << [subagent.fetch("source"), subagent.fetch("destination")] +end +emit "UPLOAD_COUNT", uploads.length +uploads.each_with_index do |(source, destination), index| + emit "UPLOAD_#{index}_SOURCE", source + emit "UPLOAD_#{index}_DESTINATION", destination +end +RUBY + +# shellcheck source=/dev/null +source "$CONFIG_FILE" + +set_var() { + printf -v "$1" '%s' "$2" +} + +resolve_manifest_path() { + local path="$1" + case "$path" in + repo://*) printf '%s/%s' "$ROOT_DIR" "${path#repo://}" ;; + agent://*) printf '%s/%s' "$AGENT_DIR" "${path#agent://}" ;; + /*) printf '%s' "$path" ;; + *) printf '%s/%s' "$AGENT_DIR" "$path" ;; + esac +} + +expand_home_path() { + local path="$1" + case "$path" in + \~) printf '%s' "$HOME" ;; + \~/*) printf '%s/%s' "$HOME" "${path#\~/}" ;; + *) printf '%s' "$path" ;; + esac +} + +openshell_cmd() { + "$OPENSHELL_BIN" --gateway "$GATEWAY" "$@" +} + +upsert_provider() { + local name="$1" + local type="$2" + shift 2 + + if openshell_cmd provider get "$name" >/dev/null 2>&1; then + openshell_cmd provider update "$name" "$@" >/dev/null + else + openshell_cmd provider create --name "$name" --type "$type" "$@" >/dev/null + fi +} + +import_provider_profile() { + local profile_id="$1" + local profile_file="$2" + local import_output + + openshell_cmd provider profile delete "$profile_id" >/dev/null 2>&1 || true + if import_output="$(openshell_cmd provider profile import --file "$profile_file" 2>&1)"; then + return 0 + fi + if [[ "$import_output" == *"already exists"* ]]; then + echo "Provider profile already exists: $profile_file" + return 0 + fi + + printf '%s\n' "$import_output" >&2 + return 1 +} + +resolve_profile_file() { + local profile_id="$1" + ruby -ryaml - "$MANIFEST_FILE" "$ROOT_DIR" "$AGENT_DIR" "$profile_id" <<'RUBY' +manifest_path, root_dir, agent_dir, profile_id = ARGV +manifest = YAML.load_file(manifest_path) || {} + +def resolve(path, root_dir, agent_dir) + case path.to_s + when /^repo:\/\// then File.expand_path(path.delete_prefix("repo://"), root_dir) + when /^agent:\/\// then File.expand_path(path.delete_prefix("agent://"), agent_dir) + when /^\// then path + else File.expand_path(path, agent_dir) + end +end + +selected = nil +manifest.fetch("profile_paths", []).each do |raw_path| + dir = resolve(raw_path, root_dir, agent_dir) + next unless File.directory?(dir) + + ids = {} + Dir.glob(File.join(dir, "*.{yaml,yml}")).sort.each do |file| + data = YAML.load_file(file) || {} + id = data["id"] + next if id.nil? || id.to_s.empty? + if ids.key?(id) + abort "duplicate provider profile id '#{id}' in #{dir}: #{ids[id]} and #{file}" + end + ids[id] = file + rescue Psych::SyntaxError => error + abort "invalid provider profile YAML #{file}: #{error.message}" + end + + match = ids[profile_id] + next unless match + if selected + warn "warning: provider profile #{profile_id} in #{match} is shadowed by #{selected}" + else + selected = match + end +end + +abort "provider profile not found in profile_paths: #{profile_id}" unless selected +puts selected +RUBY +} + +resolve_source_value() { + local kind="$1" + local command_value="$2" + local path_value="$3" + local query_value="$4" + local literal_value="$5" + + case "$kind" in + host_command) + bash -lc "$command_value" + ;; + file_json) + local expanded_path + expanded_path="$(expand_home_path "$path_value")" + [[ -f "$expanded_path" ]] || fail "missing credential file: $expanded_path" + ruby -rjson - "$expanded_path" "$query_value" <<'RUBY' +path, query = ARGV +value = JSON.parse(File.read(path)) +query.split(".").each do |part| + value = value.fetch(part) +end +print value.to_s +RUBY + ;; + value) + printf '%s' "$literal_value" + ;; + *) + fail "unsupported credential source kind: $kind" + ;; + esac +} + +configure_provider_refresh() { + local provider_index="$1" + local provider_name_var="PROVIDER_${provider_index}_NAME" + local key_var="PROVIDER_${provider_index}_REFRESH_CREDENTIAL_KEY" + local strategy_var="PROVIDER_${provider_index}_REFRESH_STRATEGY" + local count_var="PROVIDER_${provider_index}_REFRESH_MATERIAL_COUNT" + local provider_name="${!provider_name_var}" + local credential_key="${!key_var}" + local strategy="${!strategy_var}" + local material_count="${!count_var}" + local args=( + provider refresh configure "$provider_name" + --credential-key "$credential_key" + --strategy "$strategy" + ) + + local material_index + for ((material_index = 0; material_index < material_count; material_index++)); do + local prefix="PROVIDER_${provider_index}_REFRESH_MATERIAL_${material_index}" + local name_var="${prefix}_NAME" + local secret_var="${prefix}_SECRET" + local kind_var="${prefix}_KIND" + local command_var="${prefix}_COMMAND" + local path_var="${prefix}_PATH" + local query_var="${prefix}_QUERY" + local value_var="${prefix}_VALUE" + local material_name="${!name_var}" + local material_value + + if [[ "$material_name" == "client_id" && -n "${GATOR_CODEX_OAUTH_CLIENT_ID:-}" ]]; then + material_value="$GATOR_CODEX_OAUTH_CLIENT_ID" + else + material_value="$(resolve_source_value "${!kind_var}" "${!command_var}" "${!path_var}" "${!query_var}" "${!value_var}")" + fi + [[ -n "$material_value" ]] || fail "empty refresh material: $provider_name/$material_name" + args+=(--material "$material_name=$material_value") + if [[ "${!secret_var}" == "true" ]]; then + args+=(--secret-material-key "$material_name") + fi + done + + openshell_cmd "${args[@]}" >/dev/null + openshell_cmd provider refresh rotate "$provider_name" --credential-key "$credential_key" >/dev/null + echo "Configured gateway refresh for $provider_name/$credential_key." +} + +GATEWAY="${GATEWAY_OVERRIDE:-${GATOR_GATEWAY:-$GATEWAY_DEFAULT}}" +SANDBOX_NAME="${SANDBOX_NAME_OVERRIDE:-${GATOR_SANDBOX_NAME:-$SANDBOX_NAME_PREFIX-$(date +%Y%m%d%H%M%S)}}" +SANDBOX_FROM="${SANDBOX_FROM_OVERRIDE:-${GATOR_SANDBOX_FROM:-$(resolve_manifest_path "$SANDBOX_FROM_DEFAULT")}}" + +for ((provider_index = 0; provider_index < PROVIDER_COUNT; provider_index++)); do + profile_var="PROVIDER_${provider_index}_PROFILE" + name_var="PROVIDER_${provider_index}_NAME" + refresh_key_var="PROVIDER_${provider_index}_REFRESH_CREDENTIAL_KEY" + case "${!profile_var}" in + github-gator) + [[ -z "$GITHUB_PROVIDER_OVERRIDE" ]] || set_var "$name_var" "$GITHUB_PROVIDER_OVERRIDE" + ;; + codex-gator) + [[ -z "$CODEX_PROVIDER_OVERRIDE" ]] || set_var "$name_var" "$CODEX_PROVIDER_OVERRIDE" + [[ -z "$CODEX_PROVIDER_PROFILE_OVERRIDE" ]] || set_var "$profile_var" "$CODEX_PROVIDER_PROFILE_OVERRIDE" + [[ -z "$CODEX_ACCESS_KEY_OVERRIDE" ]] || set_var "$refresh_key_var" "$CODEX_ACCESS_KEY_OVERRIDE" + ;; + esac +done + +PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent.XXXXXX")" +PAYLOAD_DIR="$PAYLOAD_PARENT/payload" +cleanup_payload() { + rm -rf "$PAYLOAD_PARENT" +} +trap 'cleanup_config; cleanup_payload' EXIT + +mkdir -p "$PAYLOAD_DIR" +cp -R "$SCRIPT_DIR/runtime" "$PAYLOAD_DIR/runtime" +chmod +x "$PAYLOAD_DIR/runtime"/*.sh +chmod +x "$PAYLOAD_DIR/runtime/harnesses/$HARNESS"/*.sh + +if [[ -n "$CODEX_LOCAL_BIN" ]]; then + [[ -x "$CODEX_LOCAL_BIN" ]] || fail "--codex-bin is not executable: $CODEX_LOCAL_BIN" + [[ "$HARNESS" == "codex" ]] || fail "--codex-bin is only valid with --harness codex" + cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/runtime/harnesses/codex/codex" + chmod +x "$PAYLOAD_DIR/runtime/harnesses/codex/codex" +fi + +for ((upload_index = 0; upload_index < UPLOAD_COUNT; upload_index++)); do + source_var="UPLOAD_${upload_index}_SOURCE" + destination_var="UPLOAD_${upload_index}_DESTINATION" + source_path="$(resolve_manifest_path "${!source_var}")" + destination_path="$PAYLOAD_DIR/${!destination_var}" + [[ -f "$source_path" ]] || fail "missing payload source: $source_path" + mkdir -p "$(dirname "$destination_path")" + cp "$source_path" "$destination_path" +done + +SUBAGENT_COMMAND="bash /sandbox/payload/runtime/subagent.sh principal-engineer-reviewer < task.md" +PROMPT_TEMPLATE_PATH="$(resolve_manifest_path "$PROMPT_TEMPLATE")" +[[ -f "$PROMPT_TEMPLATE_PATH" ]] || fail "missing prompt template: $PROMPT_TEMPLATE_PATH" +ruby - "$PROMPT_TEMPLATE_PATH" "$PAYLOAD_DIR/agent-prompt.md" "$HARNESS" "$SUBAGENT_COMMAND" "$USER_PROMPT" <<'RUBY' +template_path, output_path, harness, subagent_command, user_prompt = ARGV +values = { + "HARNESS" => harness, + "SUBAGENT_COMMAND" => subagent_command, + "USER_PROMPT" => user_prompt, +} +template = File.read(template_path) +rendered = template.gsub(/\{\{([A-Z0-9_]+)\}\}/) do + values.fetch(Regexp.last_match(1)) +end +File.write(output_path, rendered) +RUBY + +for ((setting_index = 0; setting_index < SETTING_COUNT; setting_index++)); do + key_var="SETTING_${setting_index}_KEY" + value_var="SETTING_${setting_index}_VALUE" + openshell_cmd settings set --global --key "${!key_var}" --value "${!value_var}" --yes >/dev/null +done + +PROVIDER_ARGS=() +for ((provider_index = 0; provider_index < PROVIDER_COUNT; provider_index++)); do + name_var="PROVIDER_${provider_index}_NAME" + profile_var="PROVIDER_${provider_index}_PROFILE" + mode_var="PROVIDER_${provider_index}_CREDENTIAL_MODE" + credential_count_var="PROVIDER_${provider_index}_CREDENTIAL_COUNT" + refresh_enabled_var="PROVIDER_${provider_index}_REFRESH_ENABLED" + provider_name="${!name_var}" + profile_id="${!profile_var}" + credential_mode="${!mode_var}" + credential_count="${!credential_count_var}" + profile_file="$(resolve_profile_file "$profile_id")" + + import_provider_profile "$profile_id" "$profile_file" + + credential_args=() + for ((credential_index = 0; credential_index < credential_count; credential_index++)); do + prefix="PROVIDER_${provider_index}_CREDENTIAL_${credential_index}" + env_var="${prefix}_ENV" + export_var="${prefix}_EXPORT" + kind_var="${prefix}_KIND" + command_var="${prefix}_COMMAND" + path_var="${prefix}_PATH" + query_var="${prefix}_QUERY" + value_var="${prefix}_VALUE" + credential_env="${!env_var}" + credential_value="$(resolve_source_value "${!kind_var}" "${!command_var}" "${!path_var}" "${!query_var}" "${!value_var}")" + [[ -n "$credential_value" ]] || fail "empty credential value: $provider_name/$credential_env" + if [[ "${!export_var}" == "true" ]]; then + export "$credential_env=$credential_value" + fi + if [[ "$credential_mode" == "explicit" ]]; then + credential_args+=(--credential "$credential_env") + fi + done + + case "$credential_mode" in + explicit) + upsert_provider "$provider_name" "$profile_id" "${credential_args[@]}" + ;; + from_existing) + upsert_provider "$provider_name" "$profile_id" --from-existing + ;; + *) + fail "unsupported credential_mode for $provider_name: $credential_mode" + ;; + esac + + if [[ "${!refresh_enabled_var}" == "true" ]]; then + configure_provider_refresh "$provider_index" + fi + PROVIDER_ARGS+=(--provider "$provider_name") +done + +KEEP_ARGS=() +if [[ "$KEEP_SANDBOX" != "1" ]]; then + KEEP_ARGS+=(--no-keep) +fi + +HARNESS_ENV_ARGS=( + "OPENSHELL_AGENT_ID=$AGENT_ID" + "OPENSHELL_AGENT_HARNESS=$HARNESS" +) + +case "$HARNESS" in + codex) + HARNESS_ENV_ARGS+=( + "CODEX_MODEL=${CODEX_MODEL:-$HARNESS_MODEL}" + "CODEX_REASONING=${CODEX_REASONING:-$HARNESS_REASONING}" + ) + ;; +esac + +SANDBOX_CMD=( + env -u OPENSHELL_SANDBOX_POLICY + "$OPENSHELL_BIN" --gateway "$GATEWAY" sandbox create + --name "$SANDBOX_NAME" + --from "$SANDBOX_FROM" + "${PROVIDER_ARGS[@]}" + --upload "$PAYLOAD_DIR:/sandbox" + --no-git-ignore + --no-auto-providers + --no-tty + "${KEEP_ARGS[@]}" + -- env "${HARNESS_ENV_ARGS[@]}" bash /sandbox/payload/runtime/entrypoint.sh +) + +echo "Launching $AGENT_DISPLAY_NAME sandbox '$SANDBOX_NAME' on gateway '$GATEWAY'..." +if [[ "$BACKGROUND" == "1" ]]; then + LOG_DIR="$(resolve_manifest_path "$BACKGROUND_LOG_DIR")" + mkdir -p "$LOG_DIR" + LOG_FILE="$LOG_DIR/${SANDBOX_NAME}.log" + trap - EXIT + ( + trap 'cleanup_config; cleanup_payload' EXIT + "${SANDBOX_CMD[@]}" + ) >"$LOG_FILE" 2>&1 & + echo "Started in background. Log: $LOG_FILE" +else + "${SANDBOX_CMD[@]}" +fi diff --git a/openshell-agents/runtime/entrypoint.sh b/openshell-agents/runtime/entrypoint.sh new file mode 100755 index 000000000..a9dc8aa3e --- /dev/null +++ b/openshell-agents/runtime/entrypoint.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env OPENSHELL_AGENT_HARNESS + +PROMPT_FILE="${OPENSHELL_AGENT_PROMPT:-/sandbox/payload/agent-prompt.md}" +ADAPTER="/sandbox/payload/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" + +[[ -f "$PROMPT_FILE" ]] || { echo "missing agent prompt: $PROMPT_FILE" >&2; exit 1; } +[[ -x "$ADAPTER" ]] || { echo "missing harness adapter: $ADAPTER" >&2; exit 1; } + +exec bash "$ADAPTER" "$PROMPT_FILE" diff --git a/openshell-agents/gator/harnesses/codex/sandbox-agent.sh b/openshell-agents/runtime/harnesses/codex/exec.sh similarity index 86% rename from openshell-agents/gator/harnesses/codex/sandbox-agent.sh rename to openshell-agents/runtime/harnesses/codex/exec.sh index a6f34a3b0..af89dcf96 100755 --- a/openshell-agents/gator/harnesses/codex/sandbox-agent.sh +++ b/openshell-agents/runtime/harnesses/codex/exec.sh @@ -5,6 +5,11 @@ set -euo pipefail +if [[ $# -ne 1 ]]; then + echo "usage: exec.sh " >&2 + exit 2 +fi + require_env() { local name="$1" [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } @@ -14,6 +19,7 @@ require_env CODEX_AUTH_ACCESS_TOKEN require_env CODEX_AUTH_ACCOUNT_ID require_env GITHUB_TOKEN +PROMPT_FILE="$1" export GH_TOKEN="$GITHUB_TOKEN" export HOME=/sandbox/home @@ -28,8 +34,8 @@ const fallbackIdToken = [ b64u({ iss: "https://auth.openai.com", aud: "codex", - sub: "openshell-gator", - email: "gator@openshell.local", + sub: "openshell-agent", + email: "agent@openshell.local", iat: now, exp: now + 3600, }), @@ -54,8 +60,8 @@ WORK="$(mktemp -d)" cd "$WORK" CODEX_BIN="${CODEX_BIN:-codex}" -if [[ -x /sandbox/payload/harnesses/codex/codex ]]; then - CODEX_BIN=/sandbox/payload/harnesses/codex/codex +if [[ -x /sandbox/payload/runtime/harnesses/codex/codex ]]; then + CODEX_BIN=/sandbox/payload/runtime/harnesses/codex/codex fi CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" CODEX_REASONING="${CODEX_REASONING:-high}" @@ -77,4 +83,4 @@ fi exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ -c "model=\"${CODEX_MODEL}\"" \ -c "model_reasoning_effort=\"${CODEX_REASONING}\"" \ - "$(cat /sandbox/payload/gator-prompt.md)" + "$(<"$PROMPT_FILE")" diff --git a/openshell-agents/gator/harnesses/codex/install-codex.sh b/openshell-agents/runtime/harnesses/codex/install-codex.sh old mode 100755 new mode 100644 similarity index 100% rename from openshell-agents/gator/harnesses/codex/install-codex.sh rename to openshell-agents/runtime/harnesses/codex/install-codex.sh diff --git a/openshell-agents/gator/harnesses/codex/reviewer-agent.sh b/openshell-agents/runtime/harnesses/codex/subagent.sh similarity index 63% rename from openshell-agents/gator/harnesses/codex/reviewer-agent.sh rename to openshell-agents/runtime/harnesses/codex/subagent.sh index f66a8d6c0..119463492 100755 --- a/openshell-agents/gator/harnesses/codex/reviewer-agent.sh +++ b/openshell-agents/runtime/harnesses/codex/subagent.sh @@ -5,15 +5,21 @@ set -euo pipefail -REVIEWER_PROMPT="${REVIEWER_PROMPT:-/sandbox/payload/.claude/agents/principal-engineer-reviewer.md}" -[[ -f "$REVIEWER_PROMPT" ]] || { - echo "missing reviewer prompt: $REVIEWER_PROMPT" >&2 +if [[ $# -ne 1 ]]; then + echo "usage: subagent.sh < task.md" >&2 + exit 2 +fi + +SUBAGENT_ID="$1" +SUBAGENT_PROMPT="/sandbox/payload/subagents/$SUBAGENT_ID.md" +[[ -f "$SUBAGENT_PROMPT" ]] || { + echo "missing subagent prompt: $SUBAGENT_PROMPT" >&2 exit 1 } CODEX_BIN="${CODEX_BIN:-codex}" -if [[ -x /sandbox/payload/harnesses/codex/codex ]]; then - CODEX_BIN=/sandbox/payload/harnesses/codex/codex +if [[ -x /sandbox/payload/runtime/harnesses/codex/codex ]]; then + CODEX_BIN=/sandbox/payload/runtime/harnesses/codex/codex fi CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" @@ -29,12 +35,11 @@ trap cleanup EXIT cat >"$TASK_FILE" { - printf '%s\n\n' 'You are running as the principal-engineer-reviewer sub-agent for OpenShell gator-gate.' + printf '%s\n\n' "You are running as the $SUBAGENT_ID sub-agent inside an OpenShell sandbox." printf '%s\n\n' 'Follow this agent definition exactly:' - cat "$REVIEWER_PROMPT" - printf '\n%s\n\n' 'Reviewer task:' + cat "$SUBAGENT_PROMPT" + printf '\n%s\n\n' 'Task:' cat "$TASK_FILE" - printf '\n%s\n' 'Return the review only. Do not mutate repository state, labels, comments, or PRs.' } >"$PROMPT_FILE" CODEX_EXEC_ARGS=( @@ -54,4 +59,4 @@ fi exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ -c "model=\"${CODEX_MODEL}\"" \ -c "model_reasoning_effort=\"${CODEX_REASONING}\"" \ - "$(cat "$PROMPT_FILE")" + - <"$PROMPT_FILE" diff --git a/openshell-agents/runtime/subagent.sh b/openshell-agents/runtime/subagent.sh new file mode 100755 index 000000000..f544487e6 --- /dev/null +++ b/openshell-agents/runtime/subagent.sh @@ -0,0 +1,19 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "usage: /sandbox/payload/runtime/subagent.sh < task.md" >&2 + exit 2 +fi + +HARNESS="${OPENSHELL_AGENT_HARNESS:-}" +[[ -n "$HARNESS" ]] || { echo "missing required env: OPENSHELL_AGENT_HARNESS" >&2; exit 1; } + +ADAPTER="/sandbox/payload/runtime/harnesses/$HARNESS/subagent.sh" +[[ -x "$ADAPTER" ]] || { echo "missing subagent adapter: $ADAPTER" >&2; exit 1; } + +exec bash "$ADAPTER" "$1" From d810646908686c3036b0e29f01aa161aa23dd68b Mon Sep 17 00:00:00 2001 From: John Myers Date: Fri, 5 Jun 2026 15:07:57 -0700 Subject: [PATCH 11/20] feat(agents): supervise watch cycles in sandbox --- .agents/skills/gator-gate/SKILL.md | 10 +- architecture/build.md | 6 +- openshell-agents/README.md | 43 ++++++- openshell-agents/gator/README.md | 7 +- openshell-agents/gator/agent.yaml | 5 + openshell-agents/gator/prompts/gator.md | 4 + openshell-agents/run.sh | 44 ++++++- openshell-agents/runtime/entrypoint.sh | 8 +- openshell-agents/runtime/supervisor.sh | 147 ++++++++++++++++++++++++ 9 files changed, 258 insertions(+), 16 deletions(-) create mode 100755 openshell-agents/runtime/supervisor.sh diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index 595ac24ad..494a88841 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -189,7 +189,15 @@ If no `gator:*` label exists, begin validation. Every gator state is a watch state. On each invocation, determine the current state, inspect the latest issue/PR activity, and either advance to the next state, keep waiting, or post a TTL nudge. -Do not stop after a one-shot check when a PR is in an active waiting state unless the operator explicitly asks for a one-shot status check. Enter a polling loop and state the interval and stop conditions before waiting. +When `OPENSHELL_AGENT_RUN_MODE=watch`, the OpenShell agent supervisor owns the sleep/relaunch loop. In that mode, perform exactly one reconciliation cycle, do not run `sleep 900` or an unbounded polling loop inside the harness, and finish with a single final-line result sentinel: + +```text +OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":900,"reason":"checks_pending"} +``` + +Use `status=waiting` for routine CI/PR activity waits, `status=blocked` for human or process blockers, `status=complete` for closed/merged/terminal items, `status=terminal_failure` for unrecoverable errors, and `status=transient_failure` only when the supervisor should retry soon. The supervisor will sleep and invoke the harness again with fresh GitHub state. + +When not running under supervised watch mode, do not stop after a one-shot check when a PR is in an active waiting state unless the operator explicitly asks for a one-shot status check. Enter a polling loop and state the interval and stop conditions before waiting. Default live-watch cadence: diff --git a/architecture/build.md b/architecture/build.md index 059a40459..3a0c051f1 100644 --- a/architecture/build.md +++ b/architecture/build.md @@ -24,7 +24,11 @@ agent launchers use manifest files under `openshell-agents//` to describe agent intent, provider profile IDs, prompt templates, skills, subagents, and harness defaults. Agent directories do not own harness implementations. The shared runtime under `openshell-agents/runtime/` provides the sandbox entrypoint, -harness install helpers, and harness-specific execution adapters. +harness install helpers, an in-sandbox supervisor, and harness-specific execution +adapters. The supervisor supports one-shot execution and long-lived watch mode: +watch mode keeps the sandbox alive but runs harnesses as bounded child cycles, +sleeping between cycles without holding a model transport connection open. Agent +durable state remains domain-specific rather than stored in the sandbox runtime. ## Linux Runtime Environments diff --git a/openshell-agents/README.md b/openshell-agents/README.md index 1ac0ab578..6b56cd53a 100644 --- a/openshell-agents/README.md +++ b/openshell-agents/README.md @@ -11,7 +11,8 @@ and execution live in `runtime/harnesses//`. openshell-agents/ run.sh # Generic manifest-driven launcher runtime/ # Shared in-sandbox runtime - entrypoint.sh # Dispatches to the selected harness adapter + entrypoint.sh # Starts the in-sandbox supervisor + supervisor.sh # Runs bounded harness cycles in once/watch mode subagent.sh # Generic subagent dispatcher harnesses/ codex/ # Codex install and execution adapter @@ -37,6 +38,8 @@ sections: and background log directory. - `harness`: default harness and per-harness settings such as model and reasoning effort. +- `runtime`: in-sandbox run mode (`once` or `watch`), watch poll interval, and + transient failure retry limit. - `profile_paths`: ordered directories to scan for provider profile YAML files. - `settings`: gateway settings to apply before launch. - `providers`: provider instances to create or update, credential sources, and @@ -67,7 +70,8 @@ Manifest paths support these prefixes: `--codex-bin` is supplied. 7. Copy manifest-declared skills and subagents into the payload. 8. Render the prompt template with runtime values such as `{{HARNESS}}`, - `{{SUBAGENT_COMMAND}}`, and `{{USER_PROMPT}}`. + `{{RUN_MODE}}`, `{{POLL_INTERVAL_SECONDS}}`, `{{SUBAGENT_COMMAND}}`, and + `{{USER_PROMPT}}`. 9. Apply manifest-declared gateway settings. 10. Resolve provider profile IDs by scanning `profile_paths` in order. 11. Import each provider profile into the gateway. If an active profile already @@ -81,11 +85,40 @@ Manifest paths support these prefixes: 15. Run `openshell sandbox create` with the rendered payload uploaded to `/sandbox`. 16. Inside the sandbox, run `/sandbox/payload/runtime/entrypoint.sh`. -17. The runtime entrypoint dispatches to - `/sandbox/payload/runtime/harnesses//exec.sh`. -18. Harness adapters prepare harness-local auth/config and execute the agent +17. The runtime entrypoint starts `/sandbox/payload/runtime/supervisor.sh`. +18. The supervisor invokes `/sandbox/payload/runtime/harnesses//exec.sh` + as a bounded child execution. +19. Harness adapters prepare harness-local auth/config and execute the agent prompt headlessly. +## Runtime Modes + +Agents can run in `once` or `watch` mode. In `once` mode the supervisor runs one +harness cycle and exits with the harness result unless the agent emits an +`OPENSHELL_AGENT_RESULT` sentinel. + +In `watch` mode the sandbox stays alive while the supervisor repeatedly runs +bounded harness cycles. The harness must not sleep or poll indefinitely. Instead, +it performs one reconciliation cycle, then prints a final-line sentinel: + +```text +OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":900,"reason":"checks_pending"} +``` + +Supported statuses are `complete`, `waiting`, `blocked`, `transient_failure`, and +`terminal_failure`. The supervisor sleeps between `waiting` or `blocked` cycles +without keeping the harness connected, then launches a fresh harness cycle inside +the same sandbox. This keeps long-lived agents resilient to harness transport +disconnects while leaving durable state ownership to the agent domain. + +The shared runtime does not prescribe the durable state store. Gator uses GitHub +labels, comments, reviews, and checks. Other agents can use a repository branch, +issue tracker, object store, database, or another domain-specific store as long +as each cycle can reconcile from that state. + +Use `--once` or `--watch` to override the manifest default. Use +`--poll-interval ` to override the watch sleep interval. + ## Subagents The launcher injects subagent definitions under `/sandbox/payload/subagents/`. diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index 37a2d7bd0..7098ba33b 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -25,7 +25,7 @@ Use `--harness codex` to select Codex explicitly. Other harness names are reject Use `--codex-bin "$(command -v codex)"` only when the host executable is compatible with the sandbox OS and architecture. -The manifest-driven launcher at `openshell-agents/run.sh` reads `agent.yaml`, which defines the agent prompt template, provider profile IDs, provider credential sources, gateway settings, skills, subagents, sandbox defaults, and harness defaults. The shared sandbox entrypoint at `openshell-agents/runtime/entrypoint.sh` dispatches to the selected harness adapter. +The manifest-driven launcher at `openshell-agents/run.sh` reads `agent.yaml`, which defines the agent prompt template, provider profile IDs, provider credential sources, gateway settings, skills, subagents, sandbox defaults, runtime mode, and harness defaults. The shared sandbox entrypoint at `openshell-agents/runtime/entrypoint.sh` starts the in-sandbox supervisor, which invokes the selected harness adapter for bounded cycles. The launcher: @@ -40,8 +40,11 @@ The launcher: - Uploads `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution through `/sandbox/payload/runtime/subagent.sh principal-engineer-reviewer < task.md`. - For `--harness codex`, optionally uploads a host Codex executable as `/sandbox/payload/runtime/harnesses/codex/codex`. - Starts the selected harness without a TTY. -- Deletes the sandbox automatically after the harness exits. Pass `--keep` to preserve it for debugging. +- Runs gator in `watch` mode by default. The sandbox stays alive while the supervisor sleeps between bounded Codex cycles, so Codex is not connected during passive PR waits. +- Deletes the sandbox automatically after the supervisor exits. Pass `--keep` to preserve it for debugging. The GitHub provider profile allows read-only GraphQL queries on `api.github.com/graphql` so `gh` read paths can use GraphQL when needed. Write operations remain REST-only and scoped to the two allowed repositories. Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the gator Codex profile uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. + +Use `--once` for a single reconciliation cycle. Use `--poll-interval ` to change the default 15-minute watch cadence. diff --git a/openshell-agents/gator/agent.yaml b/openshell-agents/gator/agent.yaml index 43c4fd6a2..e5e4bec44 100644 --- a/openshell-agents/gator/agent.yaml +++ b/openshell-agents/gator/agent.yaml @@ -18,6 +18,11 @@ harness: model: gpt-5.5 reasoning: high +runtime: + mode: watch + poll_interval_seconds: 900 + max_transient_failures: 5 + profile_paths: - providers diff --git a/openshell-agents/gator/prompts/gator.md b/openshell-agents/gator/prompts/gator.md index ed1d741bc..14f1f7afa 100644 --- a/openshell-agents/gator/prompts/gator.md +++ b/openshell-agents/gator/prompts/gator.md @@ -1,6 +1,7 @@ You are running inside an OpenShell sandbox as the gator gate agent. Active harness: {{HARNESS}}. +Runtime mode: {{RUN_MODE}}. Load and follow this skill exactly: @@ -11,6 +12,9 @@ Important sandbox constraints: - GitHub REST write access is scoped to NVIDIA/OpenShell and NVIDIA/OpenShell-Community. - GitHub GraphQL access is read-only. Prefer REST endpoints for write actions and use GraphQL-backed `gh` reads when useful. - Keep watching active PRs until they close, merge, or the operator stops the sandbox. +- In `watch` runtime mode, do not run passive sleep or polling loops inside Codex. Perform one bounded reconciliation cycle, then print one `OPENSHELL_AGENT_RESULT` line as the final line of output and stop. The in-sandbox supervisor will sleep and relaunch the harness for the next cycle. +- In `watch` runtime mode, when the next action is to keep waiting, use this exact final-line format with a reason and poll interval: `OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":{{POLL_INTERVAL_SECONDS}},"reason":"checks_pending"}`. Use `blocked` when waiting on a human/process blocker, `complete` when the issue or PR reached a terminal state, `terminal_failure` for unrecoverable errors, and `transient_failure` only when the supervisor should retry soon. +- In `once` runtime mode, run one bounded cycle unless the operator explicitly asks you to watch inline. Still print `OPENSHELL_AGENT_RESULT {"status":"complete","reason":"one_shot_complete"}` when finished. - Do not push to contributor branches unless the operator explicitly instructs you to do so. - If you receive 403 errors from the sandbox proxy, inspect the JSON response and propose a policy update to allow the requested action if the response contains a structured error message. - When the gator skill requires the `principal-engineer-reviewer` sub-agent, run a bounded independent review with `{{SUBAGENT_COMMAND}}`. Include PR metadata and full diff/file context in `task.md`, save the output, and use it as the independent reviewer result while the main gator process continues labels, comments, docs, and CI gating. diff --git a/openshell-agents/run.sh b/openshell-agents/run.sh index c9b59331f..dd4ce68cd 100755 --- a/openshell-agents/run.sh +++ b/openshell-agents/run.sh @@ -19,6 +19,9 @@ CODEX_PROVIDER_OVERRIDE="${GATOR_CODEX_PROVIDER:-}" CODEX_PROVIDER_PROFILE_OVERRIDE="${GATOR_CODEX_PROVIDER_PROFILE:-}" CODEX_ACCESS_KEY_OVERRIDE="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-}" CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" +RUN_MODE_OVERRIDE="${OPENSHELL_AGENT_RUN_MODE:-}" +POLL_INTERVAL_OVERRIDE="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-}" +MAX_TRANSIENT_FAILURES_OVERRIDE="${OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES:-}" BACKGROUND=0 KEEP_SANDBOX=0 @@ -36,6 +39,9 @@ Options: --codex-provider NAME Override the codex-gator provider instance name --codex-access-key KEY Override the Codex access-token credential key --codex-bin PATH Upload this Codex executable into the sandbox + --once Run one bounded agent cycle + --watch Keep the sandbox alive and re-run bounded cycles + --poll-interval SECONDS Sleep duration between watch cycles --background Run sandbox create in the background and write a log --keep Keep the sandbox after the harness exits -h, --help Show this help @@ -98,6 +104,19 @@ while [[ $# -gt 0 ]]; do CODEX_LOCAL_BIN="$2" shift 2 ;; + --once) + RUN_MODE_OVERRIDE="once" + shift + ;; + --watch) + RUN_MODE_OVERRIDE="watch" + shift + ;; + --poll-interval) + [[ $# -ge 2 ]] || fail "--poll-interval requires a value" + POLL_INTERVAL_OVERRIDE="$2" + shift 2 + ;; --background) BACKGROUND=1 shift @@ -181,6 +200,11 @@ emit "BACKGROUND_LOG_DIR", manifest.dig("sandbox", "background_log_dir") || "log emit "PROMPT_TEMPLATE", manifest.fetch("prompt_template") emit_array "PROFILE_PATHS", manifest.fetch("profile_paths", []) +runtime = manifest.fetch("runtime", {}) +emit "RUNTIME_MODE", runtime.fetch("mode", "once") +emit "RUNTIME_POLL_INTERVAL_SECONDS", runtime.fetch("poll_interval_seconds", 900) +emit "RUNTIME_MAX_TRANSIENT_FAILURES", runtime.fetch("max_transient_failures", 5) + settings = manifest.fetch("settings", []) emit "SETTING_COUNT", settings.length settings.each_with_index do |setting, index| @@ -433,6 +457,17 @@ configure_provider_refresh() { GATEWAY="${GATEWAY_OVERRIDE:-${GATOR_GATEWAY:-$GATEWAY_DEFAULT}}" SANDBOX_NAME="${SANDBOX_NAME_OVERRIDE:-${GATOR_SANDBOX_NAME:-$SANDBOX_NAME_PREFIX-$(date +%Y%m%d%H%M%S)}}" SANDBOX_FROM="${SANDBOX_FROM_OVERRIDE:-${GATOR_SANDBOX_FROM:-$(resolve_manifest_path "$SANDBOX_FROM_DEFAULT")}}" +RUN_MODE="${RUN_MODE_OVERRIDE:-$RUNTIME_MODE}" +POLL_INTERVAL_SECONDS="${POLL_INTERVAL_OVERRIDE:-$RUNTIME_POLL_INTERVAL_SECONDS}" +MAX_TRANSIENT_FAILURES="${MAX_TRANSIENT_FAILURES_OVERRIDE:-$RUNTIME_MAX_TRANSIENT_FAILURES}" + +case "$RUN_MODE" in + once|watch) ;; + *) fail "unsupported runtime mode: $RUN_MODE" ;; +esac +[[ "$POLL_INTERVAL_SECONDS" =~ ^[0-9]+$ ]] || fail "--poll-interval must be an integer number of seconds" +[[ "$MAX_TRANSIENT_FAILURES" =~ ^[0-9]+$ ]] || fail "max_transient_failures must be an integer" +[[ "$POLL_INTERVAL_SECONDS" -gt 0 ]] || fail "--poll-interval must be greater than zero" for ((provider_index = 0; provider_index < PROVIDER_COUNT; provider_index++)); do profile_var="PROVIDER_${provider_index}_PROFILE" @@ -482,11 +517,13 @@ done SUBAGENT_COMMAND="bash /sandbox/payload/runtime/subagent.sh principal-engineer-reviewer < task.md" PROMPT_TEMPLATE_PATH="$(resolve_manifest_path "$PROMPT_TEMPLATE")" [[ -f "$PROMPT_TEMPLATE_PATH" ]] || fail "missing prompt template: $PROMPT_TEMPLATE_PATH" -ruby - "$PROMPT_TEMPLATE_PATH" "$PAYLOAD_DIR/agent-prompt.md" "$HARNESS" "$SUBAGENT_COMMAND" "$USER_PROMPT" <<'RUBY' -template_path, output_path, harness, subagent_command, user_prompt = ARGV +ruby - "$PROMPT_TEMPLATE_PATH" "$PAYLOAD_DIR/agent-prompt.md" "$HARNESS" "$SUBAGENT_COMMAND" "$RUN_MODE" "$POLL_INTERVAL_SECONDS" "$USER_PROMPT" <<'RUBY' +template_path, output_path, harness, subagent_command, run_mode, poll_interval_seconds, user_prompt = ARGV values = { "HARNESS" => harness, "SUBAGENT_COMMAND" => subagent_command, + "RUN_MODE" => run_mode, + "POLL_INTERVAL_SECONDS" => poll_interval_seconds, "USER_PROMPT" => user_prompt, } template = File.read(template_path) @@ -564,6 +601,9 @@ fi HARNESS_ENV_ARGS=( "OPENSHELL_AGENT_ID=$AGENT_ID" "OPENSHELL_AGENT_HARNESS=$HARNESS" + "OPENSHELL_AGENT_RUN_MODE=$RUN_MODE" + "OPENSHELL_AGENT_POLL_INTERVAL_SECONDS=$POLL_INTERVAL_SECONDS" + "OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES=$MAX_TRANSIENT_FAILURES" ) case "$HARNESS" in diff --git a/openshell-agents/runtime/entrypoint.sh b/openshell-agents/runtime/entrypoint.sh index a9dc8aa3e..c2e5b57ea 100755 --- a/openshell-agents/runtime/entrypoint.sh +++ b/openshell-agents/runtime/entrypoint.sh @@ -12,10 +12,8 @@ require_env() { require_env OPENSHELL_AGENT_HARNESS -PROMPT_FILE="${OPENSHELL_AGENT_PROMPT:-/sandbox/payload/agent-prompt.md}" -ADAPTER="/sandbox/payload/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" +SUPERVISOR="/sandbox/payload/runtime/supervisor.sh" -[[ -f "$PROMPT_FILE" ]] || { echo "missing agent prompt: $PROMPT_FILE" >&2; exit 1; } -[[ -x "$ADAPTER" ]] || { echo "missing harness adapter: $ADAPTER" >&2; exit 1; } +[[ -x "$SUPERVISOR" ]] || { echo "missing agent supervisor: $SUPERVISOR" >&2; exit 1; } -exec bash "$ADAPTER" "$PROMPT_FILE" +exec bash "$SUPERVISOR" diff --git a/openshell-agents/runtime/supervisor.sh b/openshell-agents/runtime/supervisor.sh new file mode 100755 index 000000000..52ddb0b16 --- /dev/null +++ b/openshell-agents/runtime/supervisor.sh @@ -0,0 +1,147 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env OPENSHELL_AGENT_HARNESS + +PROMPT_FILE="${OPENSHELL_AGENT_PROMPT:-/sandbox/payload/agent-prompt.md}" +ADAPTER="/sandbox/payload/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" +RUN_MODE="${OPENSHELL_AGENT_RUN_MODE:-once}" +POLL_INTERVAL_SECONDS="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-900}" +MAX_TRANSIENT_FAILURES="${OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES:-5}" + +[[ -f "$PROMPT_FILE" ]] || { echo "missing agent prompt: $PROMPT_FILE" >&2; exit 1; } +[[ -x "$ADAPTER" ]] || { echo "missing harness adapter: $ADAPTER" >&2; exit 1; } + +case "$RUN_MODE" in + once|watch) ;; + *) echo "unsupported agent run mode: $RUN_MODE" >&2; exit 2 ;; +esac +[[ "$POLL_INTERVAL_SECONDS" =~ ^[0-9]+$ ]] || { echo "OPENSHELL_AGENT_POLL_INTERVAL_SECONDS must be an integer" >&2; exit 2; } +[[ "$MAX_TRANSIENT_FAILURES" =~ ^[0-9]+$ ]] || { echo "OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES must be an integer" >&2; exit 2; } +[[ "$POLL_INTERVAL_SECONDS" -gt 0 ]] || { echo "OPENSHELL_AGENT_POLL_INTERVAL_SECONDS must be greater than zero" >&2; exit 2; } + +json_string_field() { + local json="$1" + local key="$2" + printf '%s' "$json" | sed -nE "s/.*\"$key\"[[:space:]]*:[[:space:]]*\"([^\"]*)\".*/\1/p" +} + +json_number_field() { + local json="$1" + local key="$2" + printf '%s' "$json" | sed -nE "s/.*\"$key\"[[:space:]]*:[[:space:]]*([0-9]+).*/\1/p" +} + +classify_transient_failure() { + local output_file="$1" + grep -Eiq 'stream disconnected before completion|failed to connect to websocket|Reconnecting\.\.\.|Broken pipe|Connection to sandbox closed by remote host|peer closed connection without sending TLS close_notify' "$output_file" +} + +run_cycle() { + local output_file="$1" + + set +e + bash "$ADAPTER" "$PROMPT_FILE" 2>&1 | tee "$output_file" + local status=${PIPESTATUS[0]} + set -e + + return "$status" +} + +cycle=0 +transient_failures=0 +transient_backoff_seconds=30 + +while true; do + cycle=$((cycle + 1)) + echo "openshell-agent: starting $RUN_MODE cycle $cycle with harness $OPENSHELL_AGENT_HARNESS" >&2 + output_file="$(mktemp /tmp/openshell-agent-cycle.XXXXXX)" + + if run_cycle "$output_file"; then + harness_status=0 + else + harness_status=$? + fi + + result_line="$(grep -E '^OPENSHELL_AGENT_RESULT[[:space:]]+' "$output_file" | tail -n 1 || true)" + result_json="${result_line#OPENSHELL_AGENT_RESULT }" + + if [[ -z "$result_line" ]]; then + if [[ "$RUN_MODE" == "once" ]]; then + rm -f "$output_file" + exit "$harness_status" + fi + if [[ "$harness_status" -ne 0 ]] && classify_transient_failure "$output_file" && [[ "$transient_failures" -lt "$MAX_TRANSIENT_FAILURES" ]]; then + transient_failures=$((transient_failures + 1)) + echo "openshell-agent: transient harness failure $transient_failures/$MAX_TRANSIENT_FAILURES; retrying in ${transient_backoff_seconds}s" >&2 + rm -f "$output_file" + sleep "$transient_backoff_seconds" + transient_backoff_seconds=$((transient_backoff_seconds * 2)) + if [[ "$transient_backoff_seconds" -gt "$POLL_INTERVAL_SECONDS" ]]; then + transient_backoff_seconds="$POLL_INTERVAL_SECONDS" + fi + continue + fi + echo "openshell-agent: watch-mode harness exited without OPENSHELL_AGENT_RESULT" >&2 + rm -f "$output_file" + if [[ "$harness_status" -ne 0 ]]; then + exit "$harness_status" + fi + exit 1 + fi + + status="$(json_string_field "$result_json" status)" + reason="$(json_string_field "$result_json" reason)" + next_poll_seconds="$(json_number_field "$result_json" next_poll_seconds)" + [[ -n "$next_poll_seconds" ]] || next_poll_seconds="$POLL_INTERVAL_SECONDS" + [[ -n "$reason" ]] || reason="unspecified" + + rm -f "$output_file" + + case "$status" in + complete) + echo "openshell-agent: complete ($reason)" >&2 + exit 0 + ;; + waiting|blocked) + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: $status ($reason)" >&2 + exit 0 + fi + transient_failures=0 + transient_backoff_seconds=30 + echo "openshell-agent: $status ($reason); sleeping ${next_poll_seconds}s outside harness" >&2 + sleep "$next_poll_seconds" + ;; + transient_failure) + if [[ "$transient_failures" -ge "$MAX_TRANSIENT_FAILURES" ]]; then + echo "openshell-agent: transient failure limit reached ($reason)" >&2 + exit 1 + fi + transient_failures=$((transient_failures + 1)) + echo "openshell-agent: transient failure $transient_failures/$MAX_TRANSIENT_FAILURES ($reason); retrying in ${transient_backoff_seconds}s" >&2 + sleep "$transient_backoff_seconds" + transient_backoff_seconds=$((transient_backoff_seconds * 2)) + if [[ "$transient_backoff_seconds" -gt "$POLL_INTERVAL_SECONDS" ]]; then + transient_backoff_seconds="$POLL_INTERVAL_SECONDS" + fi + ;; + terminal_failure|failed|failure) + echo "openshell-agent: terminal failure ($reason)" >&2 + exit 1 + ;; + *) + echo "openshell-agent: invalid OPENSHELL_AGENT_RESULT status: ${status:-}" >&2 + exit 1 + ;; + esac +done From 3b111f173d1d5c80a2900026987eca5a81ef8b55 Mon Sep 17 00:00:00 2001 From: John Myers Date: Fri, 5 Jun 2026 16:41:25 -0700 Subject: [PATCH 12/20] fix(agents): preserve gateway refresh state --- openshell-agents/README.md | 8 ++++++++ openshell-agents/gator/README.md | 2 ++ openshell-agents/run.sh | 29 ++++++++++++++++++++++++++--- 3 files changed, 36 insertions(+), 3 deletions(-) diff --git a/openshell-agents/README.md b/openshell-agents/README.md index 6b56cd53a..47359332c 100644 --- a/openshell-agents/README.md +++ b/openshell-agents/README.md @@ -119,6 +119,14 @@ as each cycle can reconcile from that state. Use `--once` or `--watch` to override the manifest default. Use `--poll-interval ` to override the watch sleep interval. +Refresh-backed providers are bootstrapped from manifest credential sources when +no gateway refresh state exists. Later launches preserve gateway-owned refresh +material and request a credential rotation first. If that rotation fails, the +launcher treats the host credential source as a repair source, replaces the +gateway refresh material, and retries rotation once. Use `--reset-refresh` to +skip the preserve-first path and intentionally replace gateway refresh material +from the host credential source before rotating. + ## Subagents The launcher injects subagent definitions under `/sandbox/payload/subagents/`. diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index 7098ba33b..ae15c57d6 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -48,3 +48,5 @@ The GitHub provider profile allows read-only GraphQL queries on `api.github.com/ Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the gator Codex profile uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. Use `--once` for a single reconciliation cycle. Use `--poll-interval ` to change the default 15-minute watch cadence. + +The launcher preserves existing gateway-owned Codex refresh material by default so multiple gator sandboxes do not overwrite each other's refresh-token lineage from host Codex auth. If gateway rotation fails, the launcher automatically resets gateway refresh material from host Codex auth and retries once. After `codex logout && codex login`, you can also pass `--reset-refresh` to force that reset before rotation. diff --git a/openshell-agents/run.sh b/openshell-agents/run.sh index dd4ce68cd..83e75701c 100755 --- a/openshell-agents/run.sh +++ b/openshell-agents/run.sh @@ -22,6 +22,7 @@ CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" RUN_MODE_OVERRIDE="${OPENSHELL_AGENT_RUN_MODE:-}" POLL_INTERVAL_OVERRIDE="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-}" MAX_TRANSIENT_FAILURES_OVERRIDE="${OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES:-}" +RESET_REFRESH="${OPENSHELL_AGENT_RESET_REFRESH:-0}" BACKGROUND=0 KEEP_SANDBOX=0 @@ -42,6 +43,7 @@ Options: --once Run one bounded agent cycle --watch Keep the sandbox alive and re-run bounded cycles --poll-interval SECONDS Sleep duration between watch cycles + --reset-refresh Replace gateway-owned refresh material from host auth before rotating --background Run sandbox create in the background and write a log --keep Keep the sandbox after the harness exits -h, --help Show this help @@ -117,6 +119,10 @@ while [[ $# -gt 0 ]]; do POLL_INTERVAL_OVERRIDE="$2" shift 2 ;; + --reset-refresh) + RESET_REFRESH=1 + shift + ;; --background) BACKGROUND=1 shift @@ -449,9 +455,26 @@ configure_provider_refresh() { fi done - openshell_cmd "${args[@]}" >/dev/null - openshell_cmd provider refresh rotate "$provider_name" --credential-key "$credential_key" >/dev/null - echo "Configured gateway refresh for $provider_name/$credential_key." + local status_output + local rotate_output + status_output="$(openshell_cmd provider refresh status "$provider_name" --credential-key "$credential_key" 2>&1 || true)" + if [[ "$RESET_REFRESH" != "1" && "$status_output" != *"No refresh configuration found"* ]]; then + echo "Preserving existing gateway refresh state for $provider_name/$credential_key. Use --reset-refresh to replace it from host auth." + else + openshell_cmd "${args[@]}" >/dev/null + echo "Configured gateway refresh for $provider_name/$credential_key." + fi + if ! rotate_output="$(openshell_cmd provider refresh rotate "$provider_name" --credential-key "$credential_key" 2>&1)"; then + if [[ "$RESET_REFRESH" != "1" && "$status_output" != *"No refresh configuration found"* ]]; then + echo "Gateway refresh rotation failed; resetting $provider_name/$credential_key from host auth and retrying once." >&2 + openshell_cmd "${args[@]}" >/dev/null + openshell_cmd provider refresh rotate "$provider_name" --credential-key "$credential_key" >/dev/null + else + printf '%s\n' "$rotate_output" >&2 + return 1 + fi + fi + echo "Rotated gateway refresh credential for $provider_name/$credential_key." } GATEWAY="${GATEWAY_OVERRIDE:-${GATOR_GATEWAY:-$GATEWAY_DEFAULT}}" From 1057af2138ff36983084ef0a768a2166a4ae09ff Mon Sep 17 00:00:00 2001 From: John Myers Date: Fri, 5 Jun 2026 17:36:45 -0700 Subject: [PATCH 13/20] fix(gator): continue human response threads --- .agents/skills/gator-gate/SKILL.md | 37 ++++++++++++++++++++++++++++-- 1 file changed, 35 insertions(+), 2 deletions(-) diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index 494a88841..876e7f8b6 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -51,7 +51,21 @@ All comments posted by this skill must begin with this marker: > **gator-agent** ``` -Use one canonical gator comment per issue or PR when possible. Edit it for state summaries if practical; otherwise post a new marked comment for material transitions. +Use one canonical gator comment per issue or PR for baseline state summaries when possible. Edit it only for housekeeping updates that do not respond to new human activity. + +When gator is continuing a conversation after a human comment, review, or requested change, post a new marked comment. Do not edit an older comment for these conversational turns, because that hides the progression from PR readers. + +## Human Comment Disposition + +Every substantive human comment or review after a gator request must be addressed in the next gator action. Do not silently keep the same state when an author, maintainer, or reviewer responds. + +When a human response claims that requested changes were made, re-check the latest head and publicly disposition the response in a new marked comment: + +- If the response resolves the feedback, say it is resolved and move to the next state. +- If the response does not resolve the feedback, explicitly acknowledge the response and list what remains unresolved. +- If the response is ambiguous, ask the minimal clarifying question and keep the appropriate waiting state. + +The disposition must mention the relevant human response by author or timestamp when useful, include the current head SHA for PRs, and explain the next expected action. Do not edit the canonical gator comment for this disposition; continue the thread with a new comment so PR readers can see that new activity occurred after the human response. ## Labels @@ -425,7 +439,7 @@ For validated PRs with direct user-facing UX changes, require Fern docs updates If no blocking findings remain, decide whether E2E labels are needed, then move to `gator:watch-pipeline`. -When resuming a PR already in `gator:in-review`, check whether gator review findings or maintainer review comments are still unanswered. If the PR author has pushed commits or replied after the latest feedback, re-review only the relevant changes and decide whether the feedback is resolved. +When resuming a PR already in `gator:in-review`, check whether gator review findings or maintainer review comments are still unanswered. If the PR author has pushed commits or replied after the latest feedback, re-review only the relevant changes, decide whether the feedback is resolved, and publicly disposition the author response as described in Human Comment Disposition. If review feedback is waiting on the PR author for more than 48 business hours, post a single author nudge. Use the latest of these timestamps as the TTL start: @@ -576,6 +590,25 @@ Docs: ` ``` +### Human Response Disposition + +Post this as a new comment after a substantive author, maintainer, or reviewer response. Do not edit an older gator comment for this case. + +```markdown +> **gator-agent** + +## Re-check After Update + +I re-evaluated latest head `` after 's comment: "". + +Disposition: . + +Remaining items: +- + +Next state: `` +``` + ### Approval Needed ```markdown From 8b835358ff4c1dc772ce68a6d1d96794da1966cc Mon Sep 17 00:00:00 2001 From: John Myers Date: Sun, 7 Jun 2026 08:55:00 -0700 Subject: [PATCH 14/20] fix(agents): keep watch supervisor retrying --- architecture/build.md | 6 +- openshell-agents/README.md | 9 +- openshell-agents/runtime/entrypoint.sh | 3 +- openshell-agents/runtime/supervisor.sh | 128 ++++++++++---- openshell-agents/runtime/supervisor_test.sh | 183 ++++++++++++++++++++ 5 files changed, 293 insertions(+), 36 deletions(-) create mode 100755 openshell-agents/runtime/supervisor_test.sh diff --git a/architecture/build.md b/architecture/build.md index 3a0c051f1..deff3c121 100644 --- a/architecture/build.md +++ b/architecture/build.md @@ -27,8 +27,10 @@ shared runtime under `openshell-agents/runtime/` provides the sandbox entrypoint harness install helpers, an in-sandbox supervisor, and harness-specific execution adapters. The supervisor supports one-shot execution and long-lived watch mode: watch mode keeps the sandbox alive but runs harnesses as bounded child cycles, -sleeping between cycles without holding a model transport connection open. Agent -durable state remains domain-specific rather than stored in the sandbox runtime. +sleeping between cycles without holding a model transport connection open. Watch +mode retries harness transport failures and malformed cycle results with bounded +backoff until the agent reports a terminal state. Agent durable state remains +domain-specific rather than stored in the sandbox runtime. ## Linux Runtime Environments diff --git a/openshell-agents/README.md b/openshell-agents/README.md index 47359332c..22a8894d8 100644 --- a/openshell-agents/README.md +++ b/openshell-agents/README.md @@ -39,7 +39,7 @@ sections: - `harness`: default harness and per-harness settings such as model and reasoning effort. - `runtime`: in-sandbox run mode (`once` or `watch`), watch poll interval, and - transient failure retry limit. + transient failure logging threshold. - `profile_paths`: ordered directories to scan for provider profile YAML files. - `settings`: gateway settings to apply before launch. - `providers`: provider instances to create or update, credential sources, and @@ -108,8 +108,11 @@ OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":900,"reason":"che Supported statuses are `complete`, `waiting`, `blocked`, `transient_failure`, and `terminal_failure`. The supervisor sleeps between `waiting` or `blocked` cycles without keeping the harness connected, then launches a fresh harness cycle inside -the same sandbox. This keeps long-lived agents resilient to harness transport -disconnects while leaving durable state ownership to the agent domain. +the same sandbox. In `watch` mode, missing or malformed result sentinels and +harness transport failures are retried indefinitely with bounded backoff; only +`complete` and `terminal_failure` stop the supervisor. This keeps long-lived +agents resilient to upstream model errors while leaving durable state ownership +to the agent domain. The shared runtime does not prescribe the durable state store. Gator uses GitHub labels, comments, reviews, and checks. Other agents can use a repository branch, diff --git a/openshell-agents/runtime/entrypoint.sh b/openshell-agents/runtime/entrypoint.sh index c2e5b57ea..643b525e5 100755 --- a/openshell-agents/runtime/entrypoint.sh +++ b/openshell-agents/runtime/entrypoint.sh @@ -12,7 +12,8 @@ require_env() { require_env OPENSHELL_AGENT_HARNESS -SUPERVISOR="/sandbox/payload/runtime/supervisor.sh" +PAYLOAD_DIR="${OPENSHELL_AGENT_PAYLOAD_DIR:-/sandbox/payload}" +SUPERVISOR="$PAYLOAD_DIR/runtime/supervisor.sh" [[ -x "$SUPERVISOR" ]] || { echo "missing agent supervisor: $SUPERVISOR" >&2; exit 1; } diff --git a/openshell-agents/runtime/supervisor.sh b/openshell-agents/runtime/supervisor.sh index 52ddb0b16..de27c8aa6 100755 --- a/openshell-agents/runtime/supervisor.sh +++ b/openshell-agents/runtime/supervisor.sh @@ -12,11 +12,13 @@ require_env() { require_env OPENSHELL_AGENT_HARNESS -PROMPT_FILE="${OPENSHELL_AGENT_PROMPT:-/sandbox/payload/agent-prompt.md}" -ADAPTER="/sandbox/payload/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" +PAYLOAD_DIR="${OPENSHELL_AGENT_PAYLOAD_DIR:-/sandbox/payload}" +PROMPT_FILE="${OPENSHELL_AGENT_PROMPT:-$PAYLOAD_DIR/agent-prompt.md}" +ADAPTER="$PAYLOAD_DIR/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" RUN_MODE="${OPENSHELL_AGENT_RUN_MODE:-once}" POLL_INTERVAL_SECONDS="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-900}" MAX_TRANSIENT_FAILURES="${OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES:-5}" +MAX_SLEEP_SECONDS=86400 [[ -f "$PROMPT_FILE" ]] || { echo "missing agent prompt: $PROMPT_FILE" >&2; exit 1; } [[ -x "$ADAPTER" ]] || { echo "missing harness adapter: $ADAPTER" >&2; exit 1; } @@ -41,11 +43,76 @@ json_number_field() { printf '%s' "$json" | sed -nE "s/.*\"$key\"[[:space:]]*:[[:space:]]*([0-9]+).*/\1/p" } +valid_result_json() { + local json="$1" + + if command -v jq >/dev/null 2>&1; then + printf '%s' "$json" | jq -e 'type == "object"' >/dev/null 2>&1 + return + fi + if command -v python3 >/dev/null 2>&1; then + printf '%s' "$json" | python3 -c ' +import json +import sys + +try: + value = json.load(sys.stdin) +except Exception: + sys.exit(1) + +sys.exit(0 if isinstance(value, dict) else 1) +' >/dev/null 2>&1 + return + fi + return 1 +} + classify_transient_failure() { local output_file="$1" grep -Eiq 'stream disconnected before completion|failed to connect to websocket|Reconnecting\.\.\.|Broken pipe|Connection to sandbox closed by remote host|peer closed connection without sending TLS close_notify' "$output_file" } +safe_sleep_seconds() { + local value="$1" + + if [[ ! "$value" =~ ^[0-9]+$ ]] || [[ "$value" -le 0 ]]; then + printf '%s\n' "$POLL_INTERVAL_SECONDS" + return + fi + if [[ "$value" -gt "$MAX_SLEEP_SECONDS" ]]; then + printf '%s\n' "$MAX_SLEEP_SECONDS" + return + fi + printf '%s\n' "$value" +} + +retry_watch_cycle() { + local reason="$1" + transient_failures=$((transient_failures + 1)) + + if [[ "$MAX_TRANSIENT_FAILURES" -gt 0 ]]; then + if [[ $((transient_failures % MAX_TRANSIENT_FAILURES)) -eq 0 ]]; then + echo "openshell-agent: transient watch failure $transient_failures ($reason); still retrying in ${transient_backoff_seconds}s" >&2 + else + echo "openshell-agent: transient watch failure $transient_failures ($reason); retrying in ${transient_backoff_seconds}s" >&2 + fi + else + echo "openshell-agent: transient watch failure $transient_failures ($reason); retrying in ${transient_backoff_seconds}s" >&2 + fi + sleep "$transient_backoff_seconds" + transient_backoff_seconds=$((transient_backoff_seconds * 2)) + cap_transient_backoff +} + +cap_transient_backoff() { + if [[ "$transient_backoff_seconds" -gt "$POLL_INTERVAL_SECONDS" ]]; then + transient_backoff_seconds="$POLL_INTERVAL_SECONDS" + fi + if [[ "$transient_backoff_seconds" -gt "$MAX_SLEEP_SECONDS" ]]; then + transient_backoff_seconds="$MAX_SLEEP_SECONDS" + fi +} + run_cycle() { local output_file="$1" @@ -60,6 +127,7 @@ run_cycle() { cycle=0 transient_failures=0 transient_backoff_seconds=30 +cap_transient_backoff while true; do cycle=$((cycle + 1)) @@ -78,31 +146,34 @@ while true; do if [[ -z "$result_line" ]]; then if [[ "$RUN_MODE" == "once" ]]; then rm -f "$output_file" - exit "$harness_status" - fi - if [[ "$harness_status" -ne 0 ]] && classify_transient_failure "$output_file" && [[ "$transient_failures" -lt "$MAX_TRANSIENT_FAILURES" ]]; then - transient_failures=$((transient_failures + 1)) - echo "openshell-agent: transient harness failure $transient_failures/$MAX_TRANSIENT_FAILURES; retrying in ${transient_backoff_seconds}s" >&2 - rm -f "$output_file" - sleep "$transient_backoff_seconds" - transient_backoff_seconds=$((transient_backoff_seconds * 2)) - if [[ "$transient_backoff_seconds" -gt "$POLL_INTERVAL_SECONDS" ]]; then - transient_backoff_seconds="$POLL_INTERVAL_SECONDS" + if [[ "$harness_status" -ne 0 ]]; then + exit "$harness_status" fi - continue + exit 1 + fi + retry_reason="missing OPENSHELL_AGENT_RESULT after harness exit $harness_status" + if classify_transient_failure "$output_file"; then + retry_reason="$retry_reason; upstream transport failure detected" fi - echo "openshell-agent: watch-mode harness exited without OPENSHELL_AGENT_RESULT" >&2 rm -f "$output_file" - if [[ "$harness_status" -ne 0 ]]; then - exit "$harness_status" + retry_watch_cycle "$retry_reason" + continue + fi + + if ! valid_result_json "$result_json"; then + rm -f "$output_file" + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: malformed OPENSHELL_AGENT_RESULT JSON" >&2 + exit 1 fi - exit 1 + retry_watch_cycle "malformed OPENSHELL_AGENT_RESULT JSON" + continue fi status="$(json_string_field "$result_json" status)" reason="$(json_string_field "$result_json" reason)" next_poll_seconds="$(json_number_field "$result_json" next_poll_seconds)" - [[ -n "$next_poll_seconds" ]] || next_poll_seconds="$POLL_INTERVAL_SECONDS" + next_poll_seconds="$(safe_sleep_seconds "$next_poll_seconds")" [[ -n "$reason" ]] || reason="unspecified" rm -f "$output_file" @@ -123,25 +194,22 @@ while true; do sleep "$next_poll_seconds" ;; transient_failure) - if [[ "$transient_failures" -ge "$MAX_TRANSIENT_FAILURES" ]]; then - echo "openshell-agent: transient failure limit reached ($reason)" >&2 + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: transient failure ($reason)" >&2 exit 1 fi - transient_failures=$((transient_failures + 1)) - echo "openshell-agent: transient failure $transient_failures/$MAX_TRANSIENT_FAILURES ($reason); retrying in ${transient_backoff_seconds}s" >&2 - sleep "$transient_backoff_seconds" - transient_backoff_seconds=$((transient_backoff_seconds * 2)) - if [[ "$transient_backoff_seconds" -gt "$POLL_INTERVAL_SECONDS" ]]; then - transient_backoff_seconds="$POLL_INTERVAL_SECONDS" - fi + retry_watch_cycle "$reason" ;; - terminal_failure|failed|failure) + terminal_failure) echo "openshell-agent: terminal failure ($reason)" >&2 exit 1 ;; *) - echo "openshell-agent: invalid OPENSHELL_AGENT_RESULT status: ${status:-}" >&2 - exit 1 + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: invalid OPENSHELL_AGENT_RESULT status: ${status:-}" >&2 + exit 1 + fi + retry_watch_cycle "invalid OPENSHELL_AGENT_RESULT status: ${status:-}" ;; esac done diff --git a/openshell-agents/runtime/supervisor_test.sh b/openshell-agents/runtime/supervisor_test.sh new file mode 100755 index 000000000..236107dfe --- /dev/null +++ b/openshell-agents/runtime/supervisor_test.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SUPERVISOR_UNDER_TEST="${SUPERVISOR_UNDER_TEST:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/supervisor.sh}" + +fail() { + printf 'not ok - %s\n' "$*" >&2 + exit 1 +} + +assert_contains() { + local file="$1" + local expected="$2" + if ! grep -Fq "$expected" "$file"; then + printf 'missing expected text: %s\n' "$expected" >&2 + printf '%s\n' '--- output ---' >&2 + sed -n '1,200p' "$file" >&2 + fail "assert_contains failed" + fi +} + +make_payload() { + local dir="$1" + local adapter_body="$2" + + mkdir -p "$dir/runtime/harnesses/test" + printf 'test prompt\n' > "$dir/agent-prompt.md" + cat > "$dir/runtime/harnesses/test/exec.sh" < "$output_file" 2>&1 + local status=$? + set -e + return "$status" +} + +test_once_requires_sentinel() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" "exit 0" + + if run_supervisor "$tmp/payload" once "$tmp/output"; then + fail "once mode succeeded without sentinel" + fi + printf 'ok - once requires sentinel\n' +} + +test_watch_retries_missing_sentinel_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 3 ]]; then + printf "%s\n" "ERROR: stream disconnected before completion" >&2 + exit 1 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "transient watch failure 1" + assert_contains "$tmp/output" "transient watch failure 2" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries missing sentinel until complete\n' +} + +test_watch_retries_invalid_status_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 2 ]]; then + printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"nonsense\",\"reason\":\"bad\"}" + exit 0 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "invalid OPENSHELL_AGENT_RESULT status: nonsense" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries invalid status until complete\n' +} + +test_watch_retries_malformed_terminal_json_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 2 ]]; then + printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\"" + exit 0 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "malformed OPENSHELL_AGENT_RESULT JSON" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries malformed terminal JSON until complete\n' +} + +test_watch_retries_failed_alias_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 2 ]]; then + printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"failed\",\"reason\":\"legacy\"}" + exit 0 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "invalid OPENSHELL_AGENT_RESULT status: failed" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries failed alias until complete\n' +} + +test_watch_terminal_failure_exits() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" 'printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"terminal_failure\",\"reason\":\"fatal\"}"' + + if run_supervisor "$tmp/payload" watch "$tmp/output"; then + fail "watch mode succeeded after terminal failure" + fi + assert_contains "$tmp/output" "openshell-agent: terminal failure (fatal)" + printf 'ok - watch terminal failure exits\n' +} + +test_once_requires_sentinel +test_watch_retries_missing_sentinel_until_complete +test_watch_retries_invalid_status_until_complete +test_watch_retries_malformed_terminal_json_until_complete +test_watch_retries_failed_alias_until_complete +test_watch_terminal_failure_exits From 6846b3becfd4d4490160af3efc669d888a618fca Mon Sep 17 00:00:00 2001 From: John Myers Date: Mon, 8 Jun 2026 12:43:01 -0700 Subject: [PATCH 15/20] fix(agents): use refreshed Codex credential aliases --- architecture/build.md | 5 ++++- openshell-agents/README.md | 5 +++++ openshell-agents/runtime/harnesses/codex/exec.sh | 15 +++++++++++---- 3 files changed, 20 insertions(+), 5 deletions(-) diff --git a/architecture/build.md b/architecture/build.md index deff3c121..4a6ba6349 100644 --- a/architecture/build.md +++ b/architecture/build.md @@ -30,7 +30,10 @@ watch mode keeps the sandbox alive but runs harnesses as bounded child cycles, sleeping between cycles without holding a model transport connection open. Watch mode retries harness transport failures and malformed cycle results with bounded backoff until the agent reports a terminal state. Agent durable state remains -domain-specific rather than stored in the sandbox runtime. +domain-specific rather than stored in the sandbox runtime. Harnesses that persist +provider-managed credentials use current-name placeholders such as +`openshell:resolve:env:GITHUB_TOKEN`, not revision-scoped placeholders, so the +sandbox proxy can resolve the latest refreshed credential during long watch runs. ## Linux Runtime Environments diff --git a/openshell-agents/README.md b/openshell-agents/README.md index 22a8894d8..4a1f25dcb 100644 --- a/openshell-agents/README.md +++ b/openshell-agents/README.md @@ -130,6 +130,11 @@ gateway refresh material, and retries rotation once. Use `--reset-refresh` to skip the preserve-first path and intentionally replace gateway refresh material from the host credential source before rotating. +Long-lived harnesses must not persist revision-scoped provider placeholders such +as `openshell:resolve:env:v123_TOKEN` into files they reuse across refreshes. +Persist the current-name alias, for example `openshell:resolve:env:TOKEN`, so the +sandbox proxy resolves the latest gateway-refreshed credential on each request. + ## Subagents The launcher injects subagent definitions under `/sandbox/payload/subagents/`. diff --git a/openshell-agents/runtime/harnesses/codex/exec.sh b/openshell-agents/runtime/harnesses/codex/exec.sh index af89dcf96..8143e2658 100755 --- a/openshell-agents/runtime/harnesses/codex/exec.sh +++ b/openshell-agents/runtime/harnesses/codex/exec.sh @@ -28,6 +28,13 @@ node - <<'NODE' const fs = require("fs"); const path = `${process.env.HOME}/.codex/auth.json`; const b64u = (obj) => Buffer.from(JSON.stringify(obj)).toString("base64url"); +const providerPlaceholder = (envName) => { + const value = process.env[envName]; + if (value && value.startsWith("openshell:resolve:env:")) { + return `openshell:resolve:env:${envName}`; + } + return value; +}; const now = Math.floor(Date.now() / 1000); const fallbackIdToken = [ b64u({ alg: "none", typ: "JWT" }), @@ -46,10 +53,10 @@ fs.writeFileSync(path, JSON.stringify({ auth_mode: "chatgpt", OPENAI_API_KEY: null, tokens: { - id_token: process.env.CODEX_AUTH_ID_TOKEN || fallbackIdToken, - access_token: process.env.CODEX_AUTH_ACCESS_TOKEN, - refresh_token: process.env.CODEX_AUTH_REFRESH_TOKEN || "gateway-managed-refresh-token", - account_id: process.env.CODEX_AUTH_ACCOUNT_ID, + id_token: providerPlaceholder("CODEX_AUTH_ID_TOKEN") || fallbackIdToken, + access_token: providerPlaceholder("CODEX_AUTH_ACCESS_TOKEN"), + refresh_token: providerPlaceholder("CODEX_AUTH_REFRESH_TOKEN") || "gateway-managed-refresh-token", + account_id: providerPlaceholder("CODEX_AUTH_ACCOUNT_ID"), }, last_refresh: new Date().toISOString(), }, null, 2)); From 34c571ee36f90e8048e3a7d91ca51749042d38d9 Mon Sep 17 00:00:00 2001 From: John Myers Date: Mon, 8 Jun 2026 22:10:01 -0700 Subject: [PATCH 16/20] fix(gator): avoid misleading gh auth checks --- .agents/skills/gator-gate/SKILL.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index 876e7f8b6..a50012c04 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -15,10 +15,12 @@ Codex and other agent harnesses should load this skill from the repository path ## Prerequisites -- The `gh` CLI must be authenticated (`gh auth status`) +- The `gh` CLI must be able to call GitHub APIs (`gh api user --jq '.login'`) - You must be in the OpenShell repository root - GitHub write permissions are required to apply labels, comment, close issues/PRs, or post `/ok to test` +Do not use `gh auth status` as the authentication health check inside provider-backed sandboxes. Scoped provider tokens may be exposed as `openshell:resolve:env:*` placeholders and `gh auth status` probes endpoints outside the gator policy, causing false "token is invalid" reports even when allowed `gh api` and `gh pr` calls succeed. Use `gh api user --jq '.login'` and a repo-scoped probe instead. + ## Authority Rules - Do not push commits to a contributor's PR branch by default. From 10bc74a722778d89641686431d0ee808814c3548 Mon Sep 17 00:00:00 2001 From: John Myers Date: Mon, 8 Jun 2026 22:23:39 -0700 Subject: [PATCH 17/20] docs(agents): remove architecture build update --- architecture/build.md | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/architecture/build.md b/architecture/build.md index 4a6ba6349..200be8b1e 100644 --- a/architecture/build.md +++ b/architecture/build.md @@ -16,24 +16,9 @@ OpenShell builds these main artifacts: | Supervisor container image | `deploy/docker/Dockerfile.supervisor` | | Helm chart | `deploy/helm/openshell` | | VM driver/runtime assets | `crates/openshell-driver-vm` | -| Agent sandbox launchers | `openshell-agents/` manifests, images, and shared runtime adapters | | Published docs site | `docs/` rendered by Fern config in `fern/` | -Sandbox community images are built outside this repository. Repository-owned -agent launchers use manifest files under `openshell-agents//` to describe -agent intent, provider profile IDs, prompt templates, skills, subagents, and -harness defaults. Agent directories do not own harness implementations. The -shared runtime under `openshell-agents/runtime/` provides the sandbox entrypoint, -harness install helpers, an in-sandbox supervisor, and harness-specific execution -adapters. The supervisor supports one-shot execution and long-lived watch mode: -watch mode keeps the sandbox alive but runs harnesses as bounded child cycles, -sleeping between cycles without holding a model transport connection open. Watch -mode retries harness transport failures and malformed cycle results with bounded -backoff until the agent reports a terminal state. Agent durable state remains -domain-specific rather than stored in the sandbox runtime. Harnesses that persist -provider-managed credentials use current-name placeholders such as -`openshell:resolve:env:GITHUB_TOKEN`, not revision-scoped placeholders, so the -sandbox proxy can resolve the latest refreshed credential during long watch runs. +Sandbox community images are built outside this repository. ## Linux Runtime Environments From 87b2a104cdd496259458ffb0f469190b3b755b1c Mon Sep 17 00:00:00 2001 From: John Myers Date: Mon, 8 Jun 2026 23:05:43 -0700 Subject: [PATCH 18/20] fix(gator): use REST-backed GitHub writes --- .agents/skills/gator-gate/SKILL.md | 25 +++++++++++++------ .../gator/providers/github-gator.yaml | 2 ++ 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md index a50012c04..d9482b4ff 100644 --- a/.agents/skills/gator-gate/SKILL.md +++ b/.agents/skills/gator-gate/SKILL.md @@ -21,6 +21,15 @@ Codex and other agent harnesses should load this skill from the repository path Do not use `gh auth status` as the authentication health check inside provider-backed sandboxes. Scoped provider tokens may be exposed as `openshell:resolve:env:*` placeholders and `gh auth status` probes endpoints outside the gator policy, causing false "token is invalid" reports even when allowed `gh api` and `gh pr` calls succeed. Use `gh api user --jq '.login'` and a repo-scoped probe instead. +Use REST-backed `gh api` for GitHub write actions inside gator sandboxes. Do not rely on `gh issue edit`, `gh pr edit`, or other high-level write commands when a REST path is available, because some of them use GraphQL mutations and gator policy allows GraphQL reads only. Do not fall back to `curl` for credentialed GitHub writes unless the active provider policy explicitly allows the `curl` binary for the same scoped endpoint. Preferred write shapes: + +```bash +jq -Rs '{body:.}' comment.md > /tmp/comment.json +gh api --method POST repos/NVIDIA/OpenShell/issues//comments --input /tmp/comment.json --jq .html_url +gh api --method POST repos/NVIDIA/OpenShell/issues//labels -f labels[]="gator:" +gh api --method DELETE repos/NVIDIA/OpenShell/issues//labels/gator%3Ablocked --silent || true +``` + ## Authority Rules - Do not push commits to a contributor's PR branch by default. @@ -96,11 +105,13 @@ gh label create "gator:approval-needed" --description "Gator completed review; m When changing state, remove all existing `gator:*` labels first, then add the new one. ```bash -gh issue edit --remove-label "gator:follow-up-needed" --remove-label "gator:blocked" --remove-label "gator:validated" --remove-label "gator:in-review" --remove-label "gator:watch-pipeline" --remove-label "gator:approval-needed" -gh issue edit --add-label "gator:" +for label in gator%3Afollow-up-needed gator%3Ablocked gator%3Avalidated gator%3Ain-review gator%3Awatch-pipeline gator%3Aapproval-needed; do + gh api --method DELETE repos/NVIDIA/OpenShell/issues//labels/$label --silent || true +done +gh api --method POST repos/NVIDIA/OpenShell/issues//labels -f labels[]="gator:" ``` -Pull requests are also GitHub issues for label operations, so `gh issue edit ` is valid for PR labels. +Pull requests are also GitHub issues for label operations, so the REST issue label endpoints are valid for PR labels. ## Invocation Modes @@ -217,14 +228,14 @@ When not running under supervised watch mode, do not stop after a one-shot check Default live-watch cadence: -- Poll every 15 minutes for PRs in active states: `gator:in-review`, `gator:watch-pipeline`, `gator:approval-needed`, and `gator:blocked`. -- Watch PRs indefinitely across gator state transitions until they close, merge, or the operator stops the session. -- Poll every 60 minutes for issue-only `gator:follow-up-needed` or issue-only `gator:blocked` states until they progress, close, or reach a TTL threshold. +- For supervised watch mode, set `next_poll_seconds` to 900 for PRs in active states: `gator:in-review`, `gator:watch-pipeline`, `gator:approval-needed`, and `gator:blocked`. +- Watch PRs indefinitely across gator state transitions until they close, merge, or the operator stops the session. In supervised watch mode this means return a `waiting` or `blocked` result sentinel and let the supervisor sleep outside the model session. +- For supervised watch mode, set `next_poll_seconds` to 3600 for issue-only `gator:follow-up-needed` or issue-only `gator:blocked` states until they progress, close, or reach a TTL threshold. - Stop immediately for issue-only `gator:validated` items that have no associated PR. - Do not stop PR monitoring just because the gator state changes, a human comments, or new commits arrive. Treat those as triggers to re-evaluate and continue from the new state. - Stop PR monitoring only when the PR closes, merges, the operator stops the session, or an unrecoverable process blocker prevents further agent action. -Use a concise loop summary before waiting, for example: "Watching PR #123 every 15 minutes until it closes, merges, or the session is stopped; comments, commits, and gator state changes will trigger re-evaluation and continued monitoring." +Use a concise cycle summary before returning the result sentinel, for example: "No action needed for PR #123; supervisor should recheck in 15 minutes until it closes, merges, or the session is stopped." Use 48 business hours as the default inactivity threshold for states that are waiting on a person. Business hours are Monday through Friday; do not count Saturday or Sunday. diff --git a/openshell-agents/gator/providers/github-gator.yaml b/openshell-agents/gator/providers/github-gator.yaml index d74e48025..654254971 100644 --- a/openshell-agents/gator/providers/github-gator.yaml +++ b/openshell-agents/gator/providers/github-gator.yaml @@ -96,6 +96,8 @@ endpoints: binaries: - /usr/bin/gh - /usr/local/bin/gh + - /usr/bin/curl + - /usr/local/bin/curl - /usr/bin/git - /usr/local/bin/git - /usr/bin/codex From c479d52cfb88e3ddf6edf9f9c625eaa9e5d902b6 Mon Sep 17 00:00:00 2001 From: John Myers Date: Tue, 9 Jun 2026 08:09:14 -0700 Subject: [PATCH 19/20] fix(agents): bake immutable agent payloads --- openshell-agents/README.md | 51 ++++++++----- openshell-agents/gator/README.md | 10 +-- openshell-agents/gator/prompts/gator.md | 2 +- openshell-agents/run.sh | 75 ++++++++++++++++++- openshell-agents/runtime/entrypoint.sh | 3 +- .../runtime/harnesses/codex/exec.sh | 6 +- .../runtime/harnesses/codex/subagent.sh | 8 +- openshell-agents/runtime/subagent.sh | 6 +- openshell-agents/runtime/supervisor.sh | 5 +- openshell-agents/runtime/supervisor_test.sh | 6 +- 10 files changed, 129 insertions(+), 43 deletions(-) diff --git a/openshell-agents/README.md b/openshell-agents/README.md index 4a1f25dcb..4f747bec3 100644 --- a/openshell-agents/README.md +++ b/openshell-agents/README.md @@ -46,7 +46,8 @@ sections: optional refresh configuration. - `skills`: files to inject into the sandbox payload. - `subagents`: subagent definitions to inject into the sandbox payload. -- `prompt_template`: prompt template rendered into `/sandbox/payload/agent-prompt.md`. +- `prompt_template`: prompt template rendered into the immutable agent payload as + `agent-prompt.md`. Manifest paths support these prefixes: @@ -72,25 +73,34 @@ Manifest paths support these prefixes: 8. Render the prompt template with runtime values such as `{{HARNESS}}`, `{{RUN_MODE}}`, `{{POLL_INTERVAL_SECONDS}}`, `{{SUBAGENT_COMMAND}}`, and `{{USER_PROMPT}}`. -9. Apply manifest-declared gateway settings. -10. Resolve provider profile IDs by scanning `profile_paths` in order. -11. Import each provider profile into the gateway. If an active profile already - exists, the launcher keeps going and uses it. -12. Resolve provider credentials from host commands, JSON files, or literal - manifest values. -13. Create or update each provider instance and attach every selected provider - to the sandbox. -14. Configure and rotate refresh-backed provider credentials when declared by - the manifest. -15. Run `openshell sandbox create` with the rendered payload uploaded to - `/sandbox`. -16. Inside the sandbox, run `/sandbox/payload/runtime/entrypoint.sh`. -17. The runtime entrypoint starts `/sandbox/payload/runtime/supervisor.sh`. -18. The supervisor invokes `/sandbox/payload/runtime/harnesses//exec.sh` - as a bounded child execution. -19. Harness adapters prepare harness-local auth/config and execute the agent +9. Build a temporary Docker context that bakes the rendered payload into + `/etc/openshell/agent-payload`. +10. Apply manifest-declared gateway settings. +11. Resolve provider profile IDs by scanning `profile_paths` in order. +12. Import each provider profile into the gateway. If an active profile already + exists, the launcher keeps going and uses it. +13. Resolve provider credentials from host commands, JSON files, or literal + manifest values. +14. Create or update each provider instance and attach every selected provider + to the sandbox. +15. Configure and rotate refresh-backed provider credentials when declared by + the manifest. +16. Run `openshell sandbox create` from that temporary Dockerfile source. +17. Inside the sandbox, run `/etc/openshell/agent-payload/runtime/entrypoint.sh`. +18. The runtime entrypoint starts + `/etc/openshell/agent-payload/runtime/supervisor.sh`. +19. The supervisor invokes + `/etc/openshell/agent-payload/runtime/harnesses//exec.sh` as a + bounded child execution. +20. Harness adapters prepare harness-local auth/config and execute the agent prompt headlessly. +The payload directory is baked into the image under `/etc/openshell`, which the +gator filesystem policy mounts read-only for agent processes. Prompts, skills, +subagent definitions, and runtime scripts are agent guts, not workspace state. +Agents should write session artifacts, checkouts, temporary files, and future +memory records under `/sandbox` or `/tmp` instead. + ## Runtime Modes Agents can run in `once` or `watch` mode. In `once` mode the supervisor runs one @@ -137,12 +147,13 @@ sandbox proxy resolves the latest gateway-refreshed credential on each request. ## Subagents -The launcher injects subagent definitions under `/sandbox/payload/subagents/`. +The launcher injects subagent definitions under +`/etc/openshell/agent-payload/subagents/`. Prompt templates should refer to the generic command instead of a harness-specific script: ```shell -bash /sandbox/payload/runtime/subagent.sh < task.md +bash /etc/openshell/agent-payload/runtime/subagent.sh < task.md ``` The shared subagent dispatcher forwards the task to the active harness adapter. diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md index ae15c57d6..9b3718c68 100644 --- a/openshell-agents/gator/README.md +++ b/openshell-agents/gator/README.md @@ -19,7 +19,7 @@ Launch a headless sandbox agent that runs the `gator-gate` skill against OpenShe "Run gator on PR 1536 and keep watching until it closes or merges." ``` -By default the launcher uses `openshell-agents/Dockerfile.gator` as the sandbox source. Local gateways build that Dockerfile with `openshell-agents/` as the build context, which lets the image use shared harness install scripts from `runtime/` and gator-specific policy from `gator/policy.yaml`. Use `--from ` to run a prebuilt image on remote gateways. +By default the launcher uses `openshell-agents/Dockerfile.gator` as the sandbox source. Local gateways build that Dockerfile with `openshell-agents/` as the build context, which lets the image use shared harness install scripts from `runtime/` and gator-specific policy from `gator/policy.yaml`. The launcher bakes rendered prompts, skills, subagents, and runtime files into `/etc/openshell/agent-payload`, so `--from` must point to a local Dockerfile or directory containing a Dockerfile. Use `--harness codex` to select Codex explicitly. Other harness names are rejected until their support is added to `agent.yaml` and `openshell-agents/runtime/harnesses//`. Agent directories do not carry their own harness implementations; they provide prompt templates and optional skills or subagents for the shared runtime to inject. @@ -31,14 +31,14 @@ The launcher: - Scans `profile_paths` in manifest order and imports `providers/github-gator.yaml`. - Creates or updates the `github-gator` provider from `gh auth token`. -- Selects the requested harness and uploads the common runtime into the sandbox payload. +- Selects the requested harness and bakes the common runtime into the immutable sandbox payload. - For `--harness codex`, imports `providers/codex-gator.yaml`, creates or updates the `codex-gator` provider from `$HOME/.codex/auth.json`, and stores the refresh token as gateway-only refresh material. - For `--harness codex`, configures gateway-managed refresh for `CODEX_AUTH_ACCESS_TOKEN` and rotates it before launching the sandbox. - Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. - Uses the gator image policy copied to `/etc/openshell/policy.yaml`. -- Uploads the current `.agents/skills/gator-gate/SKILL.md` into the sandbox payload. -- Uploads `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution through `/sandbox/payload/runtime/subagent.sh principal-engineer-reviewer < task.md`. -- For `--harness codex`, optionally uploads a host Codex executable as `/sandbox/payload/runtime/harnesses/codex/codex`. +- Bakes the current `.agents/skills/gator-gate/SKILL.md` into `/etc/openshell/agent-payload`. +- Bakes `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution through `/etc/openshell/agent-payload/runtime/subagent.sh principal-engineer-reviewer < task.md`. +- For `--harness codex`, optionally bakes a host Codex executable as `/etc/openshell/agent-payload/runtime/harnesses/codex/codex`. - Starts the selected harness without a TTY. - Runs gator in `watch` mode by default. The sandbox stays alive while the supervisor sleeps between bounded Codex cycles, so Codex is not connected during passive PR waits. - Deletes the sandbox automatically after the supervisor exits. Pass `--keep` to preserve it for debugging. diff --git a/openshell-agents/gator/prompts/gator.md b/openshell-agents/gator/prompts/gator.md index 14f1f7afa..4e1a2cddd 100644 --- a/openshell-agents/gator/prompts/gator.md +++ b/openshell-agents/gator/prompts/gator.md @@ -5,7 +5,7 @@ Runtime mode: {{RUN_MODE}}. Load and follow this skill exactly: -/sandbox/payload/.agents/skills/gator-gate/SKILL.md +/etc/openshell/agent-payload/.agents/skills/gator-gate/SKILL.md Important sandbox constraints: diff --git a/openshell-agents/run.sh b/openshell-agents/run.sh index 83e75701c..7047a92d6 100755 --- a/openshell-agents/run.sh +++ b/openshell-agents/run.sh @@ -34,7 +34,7 @@ Options: --agent NAME|PATH Agent manifest directory or name under openshell-agents/ --gateway NAME Gateway name to use --name NAME Sandbox name - --from IMAGE Sandbox source/image + --from DOCKERFILE|DIR Local Dockerfile source for the sandbox image --harness NAME Agent harness to run --github-provider NAME Override the github-gator provider instance name --codex-provider NAME Override the codex-gator provider instance name @@ -510,6 +510,7 @@ done PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent.XXXXXX")" PAYLOAD_DIR="$PAYLOAD_PARENT/payload" +PAYLOAD_IMAGE_DIR="/etc/openshell/agent-payload" cleanup_payload() { rm -rf "$PAYLOAD_PARENT" } @@ -537,7 +538,7 @@ for ((upload_index = 0; upload_index < UPLOAD_COUNT; upload_index++)); do cp "$source_path" "$destination_path" done -SUBAGENT_COMMAND="bash /sandbox/payload/runtime/subagent.sh principal-engineer-reviewer < task.md" +SUBAGENT_COMMAND="bash $PAYLOAD_IMAGE_DIR/runtime/subagent.sh principal-engineer-reviewer < task.md" PROMPT_TEMPLATE_PATH="$(resolve_manifest_path "$PROMPT_TEMPLATE")" [[ -f "$PROMPT_TEMPLATE_PATH" ]] || fail "missing prompt template: $PROMPT_TEMPLATE_PATH" ruby - "$PROMPT_TEMPLATE_PATH" "$PAYLOAD_DIR/agent-prompt.md" "$HARNESS" "$SUBAGENT_COMMAND" "$RUN_MODE" "$POLL_INTERVAL_SECONDS" "$USER_PROMPT" <<'RUBY' @@ -556,6 +557,73 @@ end File.write(output_path, rendered) RUBY +prepare_immutable_sandbox_source() { + local source="$1" + local dockerfile + local context + + if [[ -f "$source" ]]; then + local lower_name + lower_name="$(basename "$source" | tr '[:upper:]' '[:lower:]')" + [[ "$lower_name" == *dockerfile* || "$lower_name" == *.dockerfile ]] || fail "immutable agent payload requires --from to be a Dockerfile path or directory: $source" + dockerfile="$(cd "$(dirname "$source")" && pwd)/$(basename "$source")" + context="$(cd "$(dirname "$source")" && pwd)" + elif [[ -d "$source" && -f "$source/Dockerfile" ]]; then + context="$(cd "$source" && pwd)" + dockerfile="$context/Dockerfile" + else + fail "immutable agent payload requires a local Dockerfile source; --from '$source' cannot receive read-only agent guts" + fi + + local build_context="$PAYLOAD_PARENT/build-context" + mkdir -p "$build_context" + ( + cd "$context" + tar --exclude './gator/logs' --exclude './logs' -cf - . + ) | ( + cd "$build_context" + tar -xf - + ) + + rm -rf "$build_context/openshell-agent-payload" + mkdir -p "$build_context/openshell-agent-payload" + cp -R "$PAYLOAD_DIR/." "$build_context/openshell-agent-payload/" + + if [[ -L "$build_context/.dockerignore" ]]; then + rm -f "$build_context/.dockerignore" + fi + + { + printf '\n# OpenShell staged immutable agent payload\n' + printf '!openshell-agent-payload\n' + printf '!openshell-agent-payload/**\n' + } >> "$build_context/.dockerignore" + + local rel_dockerfile + rel_dockerfile="${dockerfile#$context/}" + local build_dockerfile="$build_context/$rel_dockerfile" + [[ -f "$build_dockerfile" ]] || fail "failed to stage Dockerfile: $rel_dockerfile" + [[ ! -L "$build_dockerfile" ]] || fail "staged Dockerfile must not be a symlink: $rel_dockerfile" + + ruby - "$build_dockerfile" "$PAYLOAD_IMAGE_DIR" <<'RUBY' +dockerfile_path, payload_image_dir = ARGV +lines = File.readlines(dockerfile_path) +final_stage_start = lines.rindex { |line| line.strip.start_with?("FROM ") } || 0 +final_user = lines[final_stage_start..].reverse.find { |line| line.strip.start_with?("USER ") }&.strip +File.open(dockerfile_path, "a") do |file| + file.puts + file.puts "USER root" + file.puts "COPY openshell-agent-payload/ #{payload_image_dir}/" + file.puts "RUN chmod -R a-w #{payload_image_dir}" + file.puts final_user if final_user +end +RUBY + + SANDBOX_FROM="$build_dockerfile" +} + +prepare_immutable_sandbox_source "$SANDBOX_FROM" + for ((setting_index = 0; setting_index < SETTING_COUNT; setting_index++)); do key_var="SETTING_${setting_index}_KEY" value_var="SETTING_${setting_index}_VALUE" @@ -644,12 +712,11 @@ SANDBOX_CMD=( --name "$SANDBOX_NAME" --from "$SANDBOX_FROM" "${PROVIDER_ARGS[@]}" - --upload "$PAYLOAD_DIR:/sandbox" --no-git-ignore --no-auto-providers --no-tty "${KEEP_ARGS[@]}" - -- env "${HARNESS_ENV_ARGS[@]}" bash /sandbox/payload/runtime/entrypoint.sh + -- env "${HARNESS_ENV_ARGS[@]}" bash "$PAYLOAD_IMAGE_DIR/runtime/entrypoint.sh" ) echo "Launching $AGENT_DISPLAY_NAME sandbox '$SANDBOX_NAME' on gateway '$GATEWAY'..." diff --git a/openshell-agents/runtime/entrypoint.sh b/openshell-agents/runtime/entrypoint.sh index 643b525e5..fd27b6d78 100755 --- a/openshell-agents/runtime/entrypoint.sh +++ b/openshell-agents/runtime/entrypoint.sh @@ -12,7 +12,8 @@ require_env() { require_env OPENSHELL_AGENT_HARNESS -PAYLOAD_DIR="${OPENSHELL_AGENT_PAYLOAD_DIR:-/sandbox/payload}" +RUNTIME_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$RUNTIME_DIR/.." && pwd)" SUPERVISOR="$PAYLOAD_DIR/runtime/supervisor.sh" [[ -x "$SUPERVISOR" ]] || { echo "missing agent supervisor: $SUPERVISOR" >&2; exit 1; } diff --git a/openshell-agents/runtime/harnesses/codex/exec.sh b/openshell-agents/runtime/harnesses/codex/exec.sh index 8143e2658..693aaac0b 100755 --- a/openshell-agents/runtime/harnesses/codex/exec.sh +++ b/openshell-agents/runtime/harnesses/codex/exec.sh @@ -67,8 +67,10 @@ WORK="$(mktemp -d)" cd "$WORK" CODEX_BIN="${CODEX_BIN:-codex}" -if [[ -x /sandbox/payload/runtime/harnesses/codex/codex ]]; then - CODEX_BIN=/sandbox/payload/runtime/harnesses/codex/codex +ADAPTER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$ADAPTER_DIR/../../.." && pwd)" +if [[ -x "$PAYLOAD_DIR/runtime/harnesses/codex/codex" ]]; then + CODEX_BIN="$PAYLOAD_DIR/runtime/harnesses/codex/codex" fi CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" CODEX_REASONING="${CODEX_REASONING:-high}" diff --git a/openshell-agents/runtime/harnesses/codex/subagent.sh b/openshell-agents/runtime/harnesses/codex/subagent.sh index 119463492..cd4cb3078 100755 --- a/openshell-agents/runtime/harnesses/codex/subagent.sh +++ b/openshell-agents/runtime/harnesses/codex/subagent.sh @@ -11,15 +11,17 @@ if [[ $# -ne 1 ]]; then fi SUBAGENT_ID="$1" -SUBAGENT_PROMPT="/sandbox/payload/subagents/$SUBAGENT_ID.md" +ADAPTER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$ADAPTER_DIR/../../.." && pwd)" +SUBAGENT_PROMPT="$PAYLOAD_DIR/subagents/$SUBAGENT_ID.md" [[ -f "$SUBAGENT_PROMPT" ]] || { echo "missing subagent prompt: $SUBAGENT_PROMPT" >&2 exit 1 } CODEX_BIN="${CODEX_BIN:-codex}" -if [[ -x /sandbox/payload/runtime/harnesses/codex/codex ]]; then - CODEX_BIN=/sandbox/payload/runtime/harnesses/codex/codex +if [[ -x "$PAYLOAD_DIR/runtime/harnesses/codex/codex" ]]; then + CODEX_BIN="$PAYLOAD_DIR/runtime/harnesses/codex/codex" fi CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" diff --git a/openshell-agents/runtime/subagent.sh b/openshell-agents/runtime/subagent.sh index f544487e6..e116b083e 100755 --- a/openshell-agents/runtime/subagent.sh +++ b/openshell-agents/runtime/subagent.sh @@ -6,14 +6,16 @@ set -euo pipefail if [[ $# -ne 1 ]]; then - echo "usage: /sandbox/payload/runtime/subagent.sh < task.md" >&2 + echo "usage: subagent.sh < task.md" >&2 exit 2 fi HARNESS="${OPENSHELL_AGENT_HARNESS:-}" [[ -n "$HARNESS" ]] || { echo "missing required env: OPENSHELL_AGENT_HARNESS" >&2; exit 1; } +RUNTIME_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$RUNTIME_DIR/.." && pwd)" -ADAPTER="/sandbox/payload/runtime/harnesses/$HARNESS/subagent.sh" +ADAPTER="$PAYLOAD_DIR/runtime/harnesses/$HARNESS/subagent.sh" [[ -x "$ADAPTER" ]] || { echo "missing subagent adapter: $ADAPTER" >&2; exit 1; } exec bash "$ADAPTER" "$1" diff --git a/openshell-agents/runtime/supervisor.sh b/openshell-agents/runtime/supervisor.sh index de27c8aa6..914f723e7 100755 --- a/openshell-agents/runtime/supervisor.sh +++ b/openshell-agents/runtime/supervisor.sh @@ -12,8 +12,9 @@ require_env() { require_env OPENSHELL_AGENT_HARNESS -PAYLOAD_DIR="${OPENSHELL_AGENT_PAYLOAD_DIR:-/sandbox/payload}" -PROMPT_FILE="${OPENSHELL_AGENT_PROMPT:-$PAYLOAD_DIR/agent-prompt.md}" +RUNTIME_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$RUNTIME_DIR/.." && pwd)" +PROMPT_FILE="$PAYLOAD_DIR/agent-prompt.md" ADAPTER="$PAYLOAD_DIR/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" RUN_MODE="${OPENSHELL_AGENT_RUN_MODE:-once}" POLL_INTERVAL_SECONDS="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-900}" diff --git a/openshell-agents/runtime/supervisor_test.sh b/openshell-agents/runtime/supervisor_test.sh index 236107dfe..2bc07654c 100755 --- a/openshell-agents/runtime/supervisor_test.sh +++ b/openshell-agents/runtime/supervisor_test.sh @@ -29,6 +29,7 @@ make_payload() { mkdir -p "$dir/runtime/harnesses/test" printf 'test prompt\n' > "$dir/agent-prompt.md" + cp "$SUPERVISOR_UNDER_TEST" "$dir/runtime/supervisor.sh" cat > "$dir/runtime/harnesses/test/exec.sh" < "$output_file" 2>&1 + bash "$payload_dir/runtime/supervisor.sh" > "$output_file" 2>&1 local status=$? set -e return "$status" From 7c3a2eb2cc263c8b377ab1b72e93b771e91fbf49 Mon Sep 17 00:00:00 2001 From: John Myers Date: Tue, 9 Jun 2026 13:39:51 -0700 Subject: [PATCH 20/20] fix(agents): upload writable agent workspace --- openshell-agents/run.sh | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/openshell-agents/run.sh b/openshell-agents/run.sh index 7047a92d6..6f0b85bc0 100755 --- a/openshell-agents/run.sh +++ b/openshell-agents/run.sh @@ -510,13 +510,14 @@ done PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent.XXXXXX")" PAYLOAD_DIR="$PAYLOAD_PARENT/payload" +WORKSPACE_UPLOAD_DIR="$PAYLOAD_PARENT/workspace" PAYLOAD_IMAGE_DIR="/etc/openshell/agent-payload" cleanup_payload() { rm -rf "$PAYLOAD_PARENT" } trap 'cleanup_config; cleanup_payload' EXIT -mkdir -p "$PAYLOAD_DIR" +mkdir -p "$PAYLOAD_DIR" "$WORKSPACE_UPLOAD_DIR" cp -R "$SCRIPT_DIR/runtime" "$PAYLOAD_DIR/runtime" chmod +x "$PAYLOAD_DIR/runtime"/*.sh chmod +x "$PAYLOAD_DIR/runtime/harnesses/$HARNESS"/*.sh @@ -712,6 +713,7 @@ SANDBOX_CMD=( --name "$SANDBOX_NAME" --from "$SANDBOX_FROM" "${PROVIDER_ARGS[@]}" + --upload "$WORKSPACE_UPLOAD_DIR:/sandbox" --no-git-ignore --no-auto-providers --no-tty