diff --git a/.agents/skills/gator-gate/SKILL.md b/.agents/skills/gator-gate/SKILL.md new file mode 100644 index 000000000..d9482b4ff --- /dev/null +++ b/.agents/skills/gator-gate/SKILL.md @@ -0,0 +1,691 @@ +--- +name: gator-gate +description: Validate and monitor OpenShell GitHub issues and PRs using the gator:* state machine. Use when asked to triage issues/PRs for project validity, gate PRs, run gator, validate submissions, or monitor PRs toward merge readiness. +--- + +# Gator Gate + +Validate OpenShell GitHub issues and pull requests for project fit, then monitor valid PRs until they are ready for maintainer approval. + +This skill is a gating workflow. It can start from any issue or PR state, inspect the current `gator:*` label, and continue the correct next action. + +## Skill Location + +Codex and other agent harnesses should load this skill from the repository path `.agents/skills/gator-gate/SKILL.md`. After this branch is merged, the canonical GitHub location is . + +## Prerequisites + +- The `gh` CLI must be able to call GitHub APIs (`gh api user --jq '.login'`) +- You must be in the OpenShell repository root +- GitHub write permissions are required to apply labels, comment, close issues/PRs, or post `/ok to test` + +Do not use `gh auth status` as the authentication health check inside provider-backed sandboxes. Scoped provider tokens may be exposed as `openshell:resolve:env:*` placeholders and `gh auth status` probes endpoints outside the gator policy, causing false "token is invalid" reports even when allowed `gh api` and `gh pr` calls succeed. Use `gh api user --jq '.login'` and a repo-scoped probe instead. + +Use REST-backed `gh api` for GitHub write actions inside gator sandboxes. Do not rely on `gh issue edit`, `gh pr edit`, or other high-level write commands when a REST path is available, because some of them use GraphQL mutations and gator policy allows GraphQL reads only. Do not fall back to `curl` for credentialed GitHub writes unless the active provider policy explicitly allows the `curl` binary for the same scoped endpoint. Preferred write shapes: + +```bash +jq -Rs '{body:.}' comment.md > /tmp/comment.json +gh api --method POST repos/NVIDIA/OpenShell/issues//comments --input /tmp/comment.json --jq .html_url +gh api --method POST repos/NVIDIA/OpenShell/issues//labels -f labels[]="gator:" +gh api --method DELETE repos/NVIDIA/OpenShell/issues//labels/gator%3Ablocked --silent || true +``` + +## Authority Rules + +- Do not push commits to a contributor's PR branch by default. +- You may push changes only when explicitly instructed by a GitHub comment from a maintainer or by a direct operator prompt. +- Do not post `/ok to test ` unless the current GitHub user has maintainer authority. +- Code review is code-only. Do not run pre-commit, unit tests, or E2E locally as part of the initial PR review unless explicitly instructed. +- Security vulnerabilities must not be triaged through public GitHub issues. Follow `SECURITY.md`. + +Maintainer authority means one of: + +- User is in the NVIDIA `openshell-maintainers` team +- User is a CODEOWNER listed in `.github/CODEOWNERS` +- Repository permission is `admin`, `maintain`, or `write` for maintainer-only actions such as `/ok to test` + +Use these checks where needed: + +```bash +gh api user --jq '.login' +gh api repos/NVIDIA/OpenShell/collaborators//permission --jq '{permission,role_name}' +gh api orgs/NVIDIA/teams/openshell-maintainers/members --jq '.[].login' +``` + +If a permission or team-membership query fails due to API access, fall back to CODEOWNERS and repository permission where possible. If authority cannot be verified, do not perform maintainer-only actions. + +## Comment Marker + +All comments posted by this skill must begin with this marker: + +```markdown +> **gator-agent** +``` + +Use one canonical gator comment per issue or PR for baseline state summaries when possible. Edit it only for housekeeping updates that do not respond to new human activity. + +When gator is continuing a conversation after a human comment, review, or requested change, post a new marked comment. Do not edit an older comment for these conversational turns, because that hides the progression from PR readers. + +## Human Comment Disposition + +Every substantive human comment or review after a gator request must be addressed in the next gator action. Do not silently keep the same state when an author, maintainer, or reviewer responds. + +When a human response claims that requested changes were made, re-check the latest head and publicly disposition the response in a new marked comment: + +- If the response resolves the feedback, say it is resolved and move to the next state. +- If the response does not resolve the feedback, explicitly acknowledge the response and list what remains unresolved. +- If the response is ambiguous, ask the minimal clarifying question and keep the appropriate waiting state. + +The disposition must mention the relevant human response by author or timestamp when useful, include the current head SHA for PRs, and explain the next expected action. Do not edit the canonical gator comment for this disposition; continue the thread with a new comment so PR readers can see that new activity occurred after the human response. + +## Labels + +There must be at most one `gator:*` label on an issue or PR at any time. + +| Label | Meaning | +|-------|---------| +| `gator:follow-up-needed` | Needs submitter or maintainer clarification; 48 business-hour TTL applies | +| `gator:blocked` | Process blocker prevents validation or monitoring from progressing | +| `gator:validated` | Issue is valid and ready for work; no active PR monitoring needed | +| `gator:in-review` | PR is valid and in agent review or author-feedback loop | +| `gator:watch-pipeline` | Review feedback is resolved; CI/CD monitoring is active | +| `gator:approval-needed` | Agent work is complete; maintainer approval or merge decision remains | + +If labels are missing and you have permission to create them, create them with clear descriptions. Otherwise report the missing labels to the operator. + +```bash +gh label create "gator:follow-up-needed" --description "Gator needs submitter or maintainer follow-up" --color "FBCA04" +gh label create "gator:blocked" --description "Gator is blocked by process or repository gates" --color "BFD4F2" +gh label create "gator:validated" --description "Gator validated this issue as ready for work" --color "0E8A16" +gh label create "gator:in-review" --description "Gator is reviewing or awaiting PR review feedback" --color "1D76DB" +gh label create "gator:watch-pipeline" --description "Gator is monitoring PR CI/CD status" --color "5319E7" +gh label create "gator:approval-needed" --description "Gator completed review; maintainer approval needed" --color "C5DEF5" +``` + +When changing state, remove all existing `gator:*` labels first, then add the new one. + +```bash +for label in gator%3Afollow-up-needed gator%3Ablocked gator%3Avalidated gator%3Ain-review gator%3Awatch-pipeline gator%3Aapproval-needed; do + gh api --method DELETE repos/NVIDIA/OpenShell/issues//labels/$label --silent || true +done +gh api --method POST repos/NVIDIA/OpenShell/issues//labels -f labels[]="gator:" +``` + +Pull requests are also GitHub issues for label operations, so the REST issue label endpoints are valid for PR labels. + +## Invocation Modes + +The user may provide: + +- A GitHub issue number +- A GitHub PR number +- Both an issue and a PR number +- No number, with an instruction to process untriaged or active gator items + +Resolve PRs and issues carefully: + +```bash +gh issue view --json number,title,body,state,author,labels,comments,createdAt,updatedAt,closedAt,url +gh pr view --json number,title,body,state,author,labels,comments,reviews,closingIssuesReferences,files,isDraft,mergeStateStatus,reviewDecision,headRefOid,headRefName,baseRefName,url +``` + +For a PR-only input, derive linked issues from `closingIssuesReferences`, PR body references such as `Fixes #123`, and issue comments that mention the PR. If no linked issue exists, validate the PR directly. + +## State Machine + +```text +No gator label + -> gator:follow-up-needed missing why, UX path, repro, RFC/roadmap link, or author action + -> gator:blocked process blocker prevents progress + -> gator:validated issue is valid and ready for work + -> gator:in-review PR is valid and enters monitoring + -> close not planned invalid or out of project scope + +gator:follow-up-needed + -> gator:validated issue clarified and valid + -> gator:in-review PR clarified and valid + -> gator:blocked process blocker discovered + -> close not planned 48 business-hour TTL expired + +gator:blocked + -> previous intended state blocker resolved + -> stay blocked blocker still present + -> nudge responsible party blocker unchanged after 48 business hours + -> stop closed by vouch gate; wait for vouch and reopen + +gator:validated + -> stop issue is already ready for work, no new PR or comments + -> gator:in-review linked PR appears and is valid + -> re-evaluate new substantive comments or labels change scope + +gator:in-review + -> gator:watch-pipeline review feedback resolved + -> nudge PR author review feedback unanswered after 48 business hours + -> gator:follow-up-needed author action needed + -> gator:blocked draft, vouch, DCO, merge conflict, or authority blocker + +gator:watch-pipeline + -> gator:approval-needed required checks are green + -> gator:in-review new review feedback or code changes need attention + -> gator:follow-up-needed author action needed for failures + -> gator:blocked process blocker prevents test execution + +gator:approval-needed + -> stop human maintainers take over + -> nudge maintainers no maintainer action after 48 business hours + -> gator:in-review maintainer requests changes or author updates PR +``` + +## Step 1: Fetch Context + +Fetch issue, PR, comments, reviews, files, labels, and linked references. Also inspect existing gator state. + +For PRs, record: + +- PR number and URL +- Head SHA from `headRefOid` +- Linked issue numbers +- Draft status +- Merge state +- Review decision +- Changed files and affected subsystems +- Existing `test:*` labels + +For issues, record: + +- Issue number and URL +- Author and author association where available +- Current labels +- Whether a linked PR exists +- Last human or maintainer comment after any gator follow-up request + +## Step 2: Recover From Current State + +If exactly one `gator:*` label exists, resume from that state in the state machine. + +If multiple `gator:*` labels exist: + +1. Treat this as label drift. +2. Read recent comments and labels to infer the most advanced safe state. +3. Comment with the correction. +4. Remove all but the chosen `gator:*` label. + +If no `gator:*` label exists, begin validation. + +## Watch Loop Rules + +Every gator state is a watch state. On each invocation, determine the current state, inspect the latest issue/PR activity, and either advance to the next state, keep waiting, or post a TTL nudge. + +When `OPENSHELL_AGENT_RUN_MODE=watch`, the OpenShell agent supervisor owns the sleep/relaunch loop. In that mode, perform exactly one reconciliation cycle, do not run `sleep 900` or an unbounded polling loop inside the harness, and finish with a single final-line result sentinel: + +```text +OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":900,"reason":"checks_pending"} +``` + +Use `status=waiting` for routine CI/PR activity waits, `status=blocked` for human or process blockers, `status=complete` for closed/merged/terminal items, `status=terminal_failure` for unrecoverable errors, and `status=transient_failure` only when the supervisor should retry soon. The supervisor will sleep and invoke the harness again with fresh GitHub state. + +When not running under supervised watch mode, do not stop after a one-shot check when a PR is in an active waiting state unless the operator explicitly asks for a one-shot status check. Enter a polling loop and state the interval and stop conditions before waiting. + +Default live-watch cadence: + +- For supervised watch mode, set `next_poll_seconds` to 900 for PRs in active states: `gator:in-review`, `gator:watch-pipeline`, `gator:approval-needed`, and `gator:blocked`. +- Watch PRs indefinitely across gator state transitions until they close, merge, or the operator stops the session. In supervised watch mode this means return a `waiting` or `blocked` result sentinel and let the supervisor sleep outside the model session. +- For supervised watch mode, set `next_poll_seconds` to 3600 for issue-only `gator:follow-up-needed` or issue-only `gator:blocked` states until they progress, close, or reach a TTL threshold. +- Stop immediately for issue-only `gator:validated` items that have no associated PR. +- Do not stop PR monitoring just because the gator state changes, a human comments, or new commits arrive. Treat those as triggers to re-evaluate and continue from the new state. +- Stop PR monitoring only when the PR closes, merges, the operator stops the session, or an unrecoverable process blocker prevents further agent action. + +Use a concise cycle summary before returning the result sentinel, for example: "No action needed for PR #123; supervisor should recheck in 15 minutes until it closes, merges, or the session is stopped." + +Use 48 business hours as the default inactivity threshold for states that are waiting on a person. Business hours are Monday through Friday; do not count Saturday or Sunday. + +State-specific monitoring: + +- `gator:follow-up-needed`: wait for submitter or maintainer clarification. If no substantive response arrives after 48 business hours, close as not planned or close the PR with a TTL-expired comment. +- `gator:blocked`: re-check the blocker. If resolved, continue to the previous intended state. If still blocked after 48 business hours, nudge the responsible party unless the PR was auto-closed by the vouch system. +- `gator:validated`: for an issue-only item with no associated PR, stop; the issue is ready for work. If an associated PR exists or appears during a later invocation, validate the PR and move it to `gator:in-review`. If new information changes the scope, re-run validation. +- `gator:in-review`: watch for author commits, author responses, review comments, and unresolved gator findings. If feedback is addressed, move to E2E/test-label decision and then `gator:watch-pipeline`. If feedback is unanswered after 48 business hours, nudge the PR author. Continue watching after either action. +- `gator:watch-pipeline`: watch checks until green, failed, or blocked. Move to `gator:approval-needed` only when required checks are green and no review feedback remains. Continue watching after the state transition because maintainer feedback can arrive later. +- `gator:approval-needed`: watch for maintainer approval, merge, closure, new commits, author responses, or maintainer requested changes. If no maintainer action occurs after 48 business hours, nudge maintainers and CODEOWNERS. If humans request changes, move back to `gator:in-review` and continue watching author follow-up. + +When calculating a nudge TTL, use the latest relevant event for that state: + +- The first comment that entered the current state +- The most recent gator comment in the current state +- The most recent comment or review from the expected actor +- The most recent commit pushed to the PR, when waiting on code changes + +Do not post repeated nudges more often than once per 48 business hours for the same state and actor. + +## Step 3: Check Process Blockers + +Before project-validity review, check blockers. + +Move to `gator:blocked` when any of these apply: + +- PR is draft and not ready for review +- PR is blocked by the vouch system or was auto-closed for lack of vouch +- DCO is missing or failing +- PR has merge conflicts or `mergeStateStatus` indicates dirty/blocked for conflict reasons +- Required `/ok to test ` is needed and the current user lacks maintainer authority +- Required CI cannot run because the copy-pr mirror is missing or stale and maintainer authority is unavailable + +For auto-closed vouch-gate PRs, do not treat the proposal as invalid. Comment only if useful, then stop and wait until the author is vouched and the PR is reopened. + +For blocked open PRs, post a concise gator comment that lists the blocker and the exact next human action. On later invocations, re-check the blocker and nudge the responsible party after 48 business hours if it remains unresolved. + +## Step 4: Duplicate Detection + +For newer issues and PRs, check for duplicates before deciding validity. Duplicate detection is a project-fit input, not a substitute for human judgment. + +Search for existing issues and PRs using the title, subsystem labels, changed files, key error strings, and important feature terms: + +```bash +gh search issues --repo NVIDIA/OpenShell "" --state open --json number,title,state,url,labels,updatedAt +gh search issues --repo NVIDIA/OpenShell "" --state closed --json number,title,state,url,labels,updatedAt +gh search prs --repo NVIDIA/OpenShell "" --state open --json number,title,state,url,labels,updatedAt +gh search prs --repo NVIDIA/OpenShell "" --state closed --json number,title,state,url,labels,updatedAt +``` + +Treat items as duplicate candidates when they share the same user-visible problem, requested capability, affected subsystem, or implementation approach. Do not rely on title similarity alone. + +If a submission is an exact duplicate of an open validated issue or active PR: + +1. Comment with the matching issue or PR. +2. Apply `duplicate` if available. +3. Close only when the duplicate relationship is clear and no extra author-specific context is needed. + +If a submission appears related but may contain new constraints, reproduction details, or a different use case: + +1. Move to `gator:follow-up-needed`. +2. Link the duplicate candidates. +3. Ask the author to explain what is different or whether the older issue/PR covers their need. +4. Flag the candidate duplicate set for human review in the comment. + +If a PR duplicates another open PR or implements a feature already being reviewed elsewhere, move to `gator:follow-up-needed` unless a maintainer has already directed both PRs to proceed independently. + +## Step 5: Auto-Validation + +Auto-validate submissions from maintainers, but still review PR implementations. + +Auto-validation applies when the submitter is: + +- A CODEOWNER +- In `@NVIDIA/openshell-maintainers` + +For maintainer-authored issues without PRs, move to `gator:validated` unless the issue is clearly security-sensitive and belongs outside GitHub. + +For maintainer-authored PRs, move to `gator:in-review` and start PR monitoring. Auto-validation means the change is project-valid; it does not mean the implementation is merge-ready. + +## Step 6: Validate Issues and PRs + +Apply the criteria below in order. If evaluating an issue/PR pair, validate both as one submission but set each object to its appropriate current state: + +- Issue without PR: `gator:validated` +- PR with or without linked issue: `gator:in-review` +- Issue linked to a valid active PR: `gator:validated` on the issue and `gator:in-review` on the PR + +### Already Validated Issue + +If a PR is mapped to an issue that is already valid for the same work, consider the PR project-valid and enter `gator:in-review` unless the PR clearly exceeds the issue scope. + +### RFCs + +For PRs that add or modify `rfc/**`, validate against `rfc/README.md` and `rfc/0000-template/README.md`: + +- RFC lives in `rfc/NNNN-short-name/README.md` +- Front matter includes `authors`, `state`, and `links` +- State is one of `draft`, `review`, `accepted`, `rejected`, `implemented`, `superseded` +- RFC has summary, motivation, non-goals, proposal, implementation plan, risks, alternatives, prior art, and open questions +- RFC is appropriate for cross-cutting, architectural, API, process, or multi-team decisions +- Small bug fixes, small single-component features, docs, dependency updates, and interface-preserving refactors should not use RFCs + +Distinguish structural validity from acceptance. A structurally valid RFC PR can enter `gator:in-review`, but implementation work should not be considered ready until the RFC is accepted or an explicit maintainer says otherwise. + +### Small Concentrated Work + +Validate small and concentrated work when it has clear motivation and one of these shapes: + +- One subsystem: gateway, CLI, supervisor, drivers, network proxy, policy, sandbox, TUI, docs, build/release +- Refactor that removes duplicate code or simplifies internals without UX or functional impact +- Logical packaging refactor, such as splitting crates or separating proto/native schema boundaries +- Test improvements for important code paths or features +- Concentrated bug fix with reproducibility steps and a clear test path +- TUI, CLI, or API quality-of-life improvement with a clear user path +- Driver improvement that makes sandbox lifecycle management easier or more efficient +- Documentation clarification, typo fix, errata, or missing documentation +- CI/CD/build/release improvement, including Snap, package, release, or test harness work + +Documentation changes from non-maintainers must not reorder ToC items, change fundamental hierarchy, or restructure docs without a clear maintainer-approved reason. + +### Provider V2 and Credential Support + +Provider V2 work is a supported high-traction area, but require all of the following: + +- Clear UX path for how users configure and use the provider feature in OpenShell +- Clear statement of why the change is important +- Clear statement of who will use it +- Security boundary analysis for credential handling +- Explanation of whether secrets remain hidden from the sandbox agent + +Provider additions and updates must use providers v2 through provider profiles. Treat any new or modified legacy `ProviderDiscoverySpec` entries as a blocking review finding unless a maintainer explicitly requests the legacy path. Do not ask contributors to update both systems for compatibility; the provider profile is the source of truth for new provider network policy, credentials, discovery, and refresh metadata. + +Be skeptical of changes that expose raw credentials to agents or weaken the credential proxy model, even if the user story is clear. + +### Large or Cross-Cutting Work + +For larger changes that impact multiple subsystems, introduce major architecture changes, or touch high single-digit or double-digit file counts, require at least one: + +- Fits an existing `roadmap` issue +- Directly follows an already validated issue or PR +- Has an accepted or actively reviewed RFC for the design +- Has explicit maintainer confirmation in the issue or PR thread + +If this evidence is missing, use `gator:follow-up-needed` and ask for roadmap/RFC/linkage or maintainer clarification. + +### Follow-Up Triggers + +Use `gator:follow-up-needed` when the submission: + +- Does not meet validation criteria yet +- Lacks practical demonstration of why the author is submitting it +- Lacks reproduction steps for a bug +- Lacks a clear UX path for a user-facing feature +- Supports a narrow upstream project convenience without showing why OpenShell should own it +- Suggests swapping core OpenShell components for another project's technology without a strong OpenShell-specific reason +- Introduces CLI/API/UX changes that only work for one driver implementation +- Overlaps existing work and needs reconciliation with the linked issue/PR/RFC + +When requesting follow-up, ask only for the minimal missing information needed to validate. + +### Invalid or Out of Scope + +Close as not planned or wontfix when the submission is clearly outside OpenShell's scope, duplicates a resolved decision, weakens a project invariant without acceptable rationale, or remains unvalidated after the follow-up TTL. + +Comment before closing and include a concise reason. Apply `wontfix` if appropriate and available. + +## Step 7: Follow-Up TTL + +When applying `gator:follow-up-needed`, post a comment with: + +- What information is missing +- Who needs to respond, usually the original submitter +- That the item may be closed if no author or maintainer response arrives within 48 business hours + +Business hours are Monday through Friday. Do not count Saturday or Sunday toward the 48-hour TTL. + +Any substantive comment from the original submitter or a maintainer resets the clock. Maintainers may also manually change labels; respect the latest maintainer-applied state. + +Bot comments and gator-agent comments do not reset the clock. + +If TTL expires: + +1. Comment that the TTL elapsed. +2. State that the issue or PR can be reopened or re-run through gator when the missing information is available. +3. Close the issue as not planned or close the PR. + +## Step 8: PR Review Loop + +When a PR enters `gator:in-review`, run an independent code-only review. + +For PRs authored by `dependabot[bot]`, the primary gator responsibility is dependency-update validation, not normal feature review. Do a quick sanity check for suspicious changes outside expected dependency manifests or lockfiles, then ensure the full required test suite runs, including E2E, and watch for breakages caused by the update. + +Use the `principal-engineer-reviewer` sub-agent. Include: + +- PR title, body, linked issues, labels, and files +- Full diff or enough chunked diff context to review all changes +- Instruction to focus on correctness, regressions, security, maintainability, and missing tests +- Instruction to check whether direct UX changes update the Fern docs under `docs/` and navigation when needed +- Instruction not to rely on local test execution + +When running inside the `openshell-agents/gator` sandbox launcher, invoke the reviewer command specified in the sandbox prompt. Use `task.md` for the subagent input. Put the PR metadata, linked issue context, and diff/file context in `task.md`, save the reviewer output, and use it as the independent review result. The main gator process remains responsible for labels, comments, docs gates, and CI monitoring. + +Post findings as a gator comment or a GitHub PR review: + +- Use inline comments for line-specific defects +- Use a general comment for design concerns, missing tests, or summary feedback +- Do not nitpick style unless it affects maintainability or project conventions + +If findings require author changes, remain in `gator:in-review` or move to `gator:follow-up-needed` if the author must clarify the proposal before code review can continue. + +For validated PRs with direct user-facing UX changes, require Fern docs updates before moving to `gator:watch-pipeline`. Direct UX changes include CLI commands/flags/output, sandbox behavior visible to users, provider setup flows, gateway configuration fields, TUI screens, published API behavior, policy syntax, installation/packaging behavior, and documented workflows. Accept either relevant updates under `docs/` plus `docs/index.yml` navigation when needed, or a clear maintainer-authored explanation in the PR that docs are intentionally unnecessary. If docs are missing and no explanation exists, treat it as review feedback. + +If no blocking findings remain, decide whether E2E labels are needed, then move to `gator:watch-pipeline`. + +When resuming a PR already in `gator:in-review`, check whether gator review findings or maintainer review comments are still unanswered. If the PR author has pushed commits or replied after the latest feedback, re-review only the relevant changes, decide whether the feedback is resolved, and publicly disposition the author response as described in Human Comment Disposition. + +If review feedback is waiting on the PR author for more than 48 business hours, post a single author nudge. Use the latest of these timestamps as the TTL start: + +- The gator review comment that requested changes +- The latest maintainer review requesting changes +- The latest gator author-nudge comment +- The latest author commit or author response + +Do not move to `gator:watch-pipeline` until review feedback is addressed or explicitly waived by a maintainer. + +## Step 9: E2E and Test Label Decision + +Apply or recommend `test:*` labels based on changed files and behavior. + +Always apply or require `test:e2e` for PRs authored by `dependabot[bot]`. Dependabot PRs must run the full required test suite, including E2E, even when the dependency update appears isolated to manifests or lockfiles. + +Use `test:e2e` for changes that affect: + +- Sandbox lifecycle +- Gateway/supervisor interaction +- Policy enforcement +- Network proxy behavior +- Provider credential flow +- Docker, Podman, VM, or Kubernetes driver behavior +- Release packaging that needs a runtime smoke test + +Use `test:e2e-gpu` for GPU runtime, CDI, CUDA, GPU driver, or GPU policy behavior. + +Use `test:e2e-kubernetes` for Kubernetes HA, Helm, Agent Sandbox CRDs, Kubernetes scheduling, namespace, or controller behavior when the Kubernetes-specific suite is needed. + +After applying a `test:*` label, read the bot comment that is posted by the E2E Label Help workflow and follow its instructions. + +If a mirror is missing or stale and you have maintainer authority, post: + +```text +/ok to test +``` + +The `/ok to test ` comment must contain only that command. Do not include the `> **gator-agent**` marker, explanations, Markdown fences, or any other text in the same comment. + +If you do not have maintainer authority, move to `gator:blocked` and state that a maintainer must post `/ok to test `. + +## Step 10: Pipeline Watch Loop + +When in `gator:watch-pipeline`, monitor PR checks and workflow runs. + +Use: + +```bash +gh pr checks +gh run list --branch +``` + +Required gates include at least: + +- `OpenShell / Branch Checks` +- `OpenShell / Helm Lint` +- `OpenShell / E2E` when `test:e2e` is applied +- `OpenShell / GPU E2E` when `test:e2e-gpu` is applied + +If checks are pending, wait a reasonable interval and re-check. + +If checks fail: + +- Inspect failed logs with `gh run view --log-failed` +- Determine whether the failure is PR-caused, flaky, or infrastructure-related +- If author changes are required, comment and move to `gator:in-review` or `gator:follow-up-needed` +- If maintainer action is required, move to `gator:blocked` +- If explicitly authorized to push fixes, make the minimal fix and continue watching + +When all required checks are green and no review feedback remains, move to `gator:approval-needed`. + +## Step 11: Approval Needed + +When applying `gator:approval-needed`, post a concise handoff comment: + +- Validation summary +- Review status +- CI status +- E2E labels and outcomes +- Remaining action: maintainer approval/merge decision + +Do not approve or merge unless explicitly instructed and authorized. + +When resuming an item already in `gator:approval-needed`, check whether maintainer approval has been waiting for more than 48 business hours since the latest of: + +- The first `gator:approval-needed` handoff comment +- The most recent maintainer comment or review +- The most recent gator maintainer-nudge comment + +If more than 48 business hours have elapsed, post a single nudge comment tagging `@NVIDIA/openshell-maintainers` and any relevant CODEOWNERS. For PRs, derive relevant CODEOWNERS from `.github/CODEOWNERS` and the changed files; because OpenShell has broad ownership, include the broad owner set when no more specific owner exists. + +Do not post repeated nudges more often than once per 48 business hours. If the PR is no longer green, has new review feedback, or has changed materially, move it back to `gator:in-review` instead of nudging. + +## Comment Templates + +### Follow-Up Needed + +```markdown +> **gator-agent** + +## Follow-Up Needed + +I cannot validate this submission yet because . + +Please provide . If the original submitter or a maintainer does not respond within 48 business hours, this may be closed as not planned. Weekend hours do not count toward the TTL. +``` + +### Blocked + +```markdown +> **gator-agent** + +## Blocked + +Gator is blocked by . + +Next action: . +``` + +### Validated Issue + +```markdown +> **gator-agent** + +## Validated + +This issue is valid for OpenShell because . + +Recommended next step: . +``` + +### PR Review Handoff + +```markdown +> **gator-agent** + +## PR Review Status + +Validation: +Head SHA: `` + +Review findings: +- + +Docs: + +Next state: `` +``` + +### Human Response Disposition + +Post this as a new comment after a substantive author, maintainer, or reviewer response. Do not edit an older gator comment for this case. + +```markdown +> **gator-agent** + +## Re-check After Update + +I re-evaluated latest head `` after 's comment: "". + +Disposition: . + +Remaining items: +- + +Next state: `` +``` + +### Approval Needed + +```markdown +> **gator-agent** + +## Maintainer Approval Needed + +Gator validation and PR monitoring are complete. + +Validation: +Review: +Docs: +Checks: +E2E: + +Human maintainer approval or merge decision is now required. +``` + +### Maintainer Nudge + +```markdown +> **gator-agent** + +## Maintainer Review Nudge + +This PR has been in `gator:approval-needed` for more than 48 business hours with no maintainer approval or merge decision. + +@NVIDIA/openshell-maintainers , can someone review and either approve, request changes, or close this out? +``` + +### Author Nudge + +```markdown +> **gator-agent** + +## Author Follow-Up Nudge + +This PR has been in `gator:in-review` for more than 48 business hours with unresolved review feedback. + +@, please respond to the review comments or push an update. If this is no longer planned, please say so and a maintainer can close it out. +``` + +### Blocker Nudge + +```markdown +> **gator-agent** + +## Blocker Follow-Up Nudge + +This item is still blocked by after more than 48 business hours. + +Next action: . +``` + +### Possible Duplicate + +```markdown +> **gator-agent** + +## Possible Duplicate + +This looks related to existing work: + +- : + +Please confirm whether this submission has different requirements or reproduction details. A maintainer should review the duplicate relationship before this proceeds. +``` diff --git a/openshell-agents/Dockerfile.gator b/openshell-agents/Dockerfile.gator new file mode 100644 index 000000000..03206ad16 --- /dev/null +++ b/openshell-agents/Dockerfile.gator @@ -0,0 +1,94 @@ +# syntax=docker/dockerfile:1 + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +# Gator sandbox image. +# +# This mirrors the OpenShell Community base image's core system and developer +# tooling, but keeps the initial agent surface focused on Codex + GitHub tooling +# for the gator-gate workflow. + +FROM nvcr.io/nvidia/base/ubuntu:noble-20251013 AS system + +ENV DEBIAN_FRONTEND=noninteractive \ + PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /sandbox + +# Core system dependencies copied from the community base sandbox image. +# iproute2: network namespace management (ip netns, veth pairs) +# iptables: legacy bypass detection (kept for transition) +# nftables: bypass detection; log + reject rules for direct connection diagnostics +# dnsutils: dig, nslookup +RUN apt-get update && apt-get install -y --no-install-recommends \ + ca-certificates \ + curl \ + dnsutils \ + iproute2 \ + iptables \ + nftables \ + iputils-ping \ + net-tools \ + netcat-openbsd \ + openssh-sftp-server \ + procps \ + traceroute \ + && rm -rf /var/lib/apt/lists/* + +RUN groupadd -r supervisor && useradd -r -g supervisor -s /usr/sbin/nologin supervisor && \ + groupadd -r sandbox && useradd -r -g sandbox -d /sandbox -s /bin/bash sandbox + +FROM system AS devtools + +# Node.js 22 + build toolchain. Keep the default apt installs aligned with the +# community base image, then add the small CLI tools gator commonly needs. +RUN curl -fsSL https://deb.nodesource.com/setup_22.x | bash - && \ + apt-get install -y --no-install-recommends \ + build-essential \ + git \ + jq \ + less \ + nodejs=22.22.1-1nodesource1 \ + ripgrep \ + vim-tiny \ + nano \ + && rm -rf /var/lib/apt/lists/* \ + && npm install -g npm@11.11.0 + +# GitHub CLI +RUN curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg \ + -o /usr/share/keyrings/githubcli-archive-keyring.gpg && \ + echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" \ + > /etc/apt/sources.list.d/github-cli.list && \ + apt-get update && apt-get install -y --no-install-recommends gh && \ + rm -rf /var/lib/apt/lists/* + +COPY runtime/harnesses/codex/install-codex.sh /usr/local/bin/install-codex.sh +ARG CODEX_VERSION=latest +RUN chmod 755 /usr/local/bin/install-codex.sh && \ + /usr/local/bin/install-codex.sh "$CODEX_VERSION" + +# Provider profiles include both /usr/bin and /usr/local/bin variants for common +# tools. Create the /usr/local/bin aliases in this image so sandbox symlink +# resolution does not warn about missing alternate paths during policy reloads. +RUN ln -sf /usr/bin/gh /usr/local/bin/gh && \ + ln -sf /usr/bin/git /usr/local/bin/git && \ + ln -sf /usr/bin/codex /usr/local/bin/codex + +FROM devtools AS final + +ENV PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin" + +RUN mkdir -p /etc/openshell +COPY gator/policy.yaml /etc/openshell/policy.yaml + +RUN printf 'export PATH="/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin"\nexport PS1="\\u@\\h:\\w\\$ "\n' \ + > /sandbox/.bashrc && \ + printf '[ -f ~/.bashrc ] && . ~/.bashrc\n' > /sandbox/.profile && \ + chown -R sandbox:sandbox /sandbox + +USER sandbox + +ENTRYPOINT ["/bin/bash"] diff --git a/openshell-agents/README.md b/openshell-agents/README.md new file mode 100644 index 000000000..4f747bec3 --- /dev/null +++ b/openshell-agents/README.md @@ -0,0 +1,192 @@ +# OpenShell Agents + +`openshell-agents/` contains repository-owned agent launchers. An agent is a +manifest plus prompt assets that the shared launcher turns into an OpenShell +sandbox run. Agents do not own harness implementations. Harness-specific setup +and execution live in `runtime/harnesses//`. + +## Directory Layout + +```text +openshell-agents/ + run.sh # Generic manifest-driven launcher + runtime/ # Shared in-sandbox runtime + entrypoint.sh # Starts the in-sandbox supervisor + supervisor.sh # Runs bounded harness cycles in once/watch mode + subagent.sh # Generic subagent dispatcher + harnesses/ + codex/ # Codex install and execution adapter + / + agent.yaml # Agent manifest + prompts/ # Prompt templates rendered at launch + providers/ # Provider profile YAML files for this agent + policy.yaml # Optional image policy source +``` + +Agent directories should contain agent-specific intent and payloads: manifests, +prompt templates, provider profiles, policies, and references to skills or +subagents. They should not contain `harnesses/codex`, `harnesses/opencode`, or +similar runtime code. + +## Agent Manifest + +Each agent has an `agent.yaml` manifest. The launcher currently reads these +sections: + +- `id`, `display_name`, `description`: human and runtime identity. +- `sandbox`: default sandbox name prefix, gateway, source image or Dockerfile, + and background log directory. +- `harness`: default harness and per-harness settings such as model and + reasoning effort. +- `runtime`: in-sandbox run mode (`once` or `watch`), watch poll interval, and + transient failure logging threshold. +- `profile_paths`: ordered directories to scan for provider profile YAML files. +- `settings`: gateway settings to apply before launch. +- `providers`: provider instances to create or update, credential sources, and + optional refresh configuration. +- `skills`: files to inject into the sandbox payload. +- `subagents`: subagent definitions to inject into the sandbox payload. +- `prompt_template`: prompt template rendered into the immutable agent payload as + `agent-prompt.md`. + +Manifest paths support these prefixes: + +- `repo://path`: resolve from the repository root. +- `agent://path`: resolve from the agent directory. +- Relative paths without a prefix: resolve from the agent directory. +- Absolute paths: use as-is. + +## Launch Order + +`openshell-agents/run.sh` performs the launch in this order: + +1. Parse CLI flags and select the agent directory from `--agent`. +2. Load `agent.yaml`, select the requested harness, and reject unsupported + harness names. +3. Resolve sandbox defaults from the manifest and CLI/environment overrides. +4. Build a temporary payload directory. +5. Copy `runtime/` into the payload so every agent uses the same in-sandbox + entrypoint and harness adapters. +6. Optionally copy a host Codex binary into the shared Codex runtime path when + `--codex-bin` is supplied. +7. Copy manifest-declared skills and subagents into the payload. +8. Render the prompt template with runtime values such as `{{HARNESS}}`, + `{{RUN_MODE}}`, `{{POLL_INTERVAL_SECONDS}}`, `{{SUBAGENT_COMMAND}}`, and + `{{USER_PROMPT}}`. +9. Build a temporary Docker context that bakes the rendered payload into + `/etc/openshell/agent-payload`. +10. Apply manifest-declared gateway settings. +11. Resolve provider profile IDs by scanning `profile_paths` in order. +12. Import each provider profile into the gateway. If an active profile already + exists, the launcher keeps going and uses it. +13. Resolve provider credentials from host commands, JSON files, or literal + manifest values. +14. Create or update each provider instance and attach every selected provider + to the sandbox. +15. Configure and rotate refresh-backed provider credentials when declared by + the manifest. +16. Run `openshell sandbox create` from that temporary Dockerfile source. +17. Inside the sandbox, run `/etc/openshell/agent-payload/runtime/entrypoint.sh`. +18. The runtime entrypoint starts + `/etc/openshell/agent-payload/runtime/supervisor.sh`. +19. The supervisor invokes + `/etc/openshell/agent-payload/runtime/harnesses//exec.sh` as a + bounded child execution. +20. Harness adapters prepare harness-local auth/config and execute the agent + prompt headlessly. + +The payload directory is baked into the image under `/etc/openshell`, which the +gator filesystem policy mounts read-only for agent processes. Prompts, skills, +subagent definitions, and runtime scripts are agent guts, not workspace state. +Agents should write session artifacts, checkouts, temporary files, and future +memory records under `/sandbox` or `/tmp` instead. + +## Runtime Modes + +Agents can run in `once` or `watch` mode. In `once` mode the supervisor runs one +harness cycle and exits with the harness result unless the agent emits an +`OPENSHELL_AGENT_RESULT` sentinel. + +In `watch` mode the sandbox stays alive while the supervisor repeatedly runs +bounded harness cycles. The harness must not sleep or poll indefinitely. Instead, +it performs one reconciliation cycle, then prints a final-line sentinel: + +```text +OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":900,"reason":"checks_pending"} +``` + +Supported statuses are `complete`, `waiting`, `blocked`, `transient_failure`, and +`terminal_failure`. The supervisor sleeps between `waiting` or `blocked` cycles +without keeping the harness connected, then launches a fresh harness cycle inside +the same sandbox. In `watch` mode, missing or malformed result sentinels and +harness transport failures are retried indefinitely with bounded backoff; only +`complete` and `terminal_failure` stop the supervisor. This keeps long-lived +agents resilient to upstream model errors while leaving durable state ownership +to the agent domain. + +The shared runtime does not prescribe the durable state store. Gator uses GitHub +labels, comments, reviews, and checks. Other agents can use a repository branch, +issue tracker, object store, database, or another domain-specific store as long +as each cycle can reconcile from that state. + +Use `--once` or `--watch` to override the manifest default. Use +`--poll-interval ` to override the watch sleep interval. + +Refresh-backed providers are bootstrapped from manifest credential sources when +no gateway refresh state exists. Later launches preserve gateway-owned refresh +material and request a credential rotation first. If that rotation fails, the +launcher treats the host credential source as a repair source, replaces the +gateway refresh material, and retries rotation once. Use `--reset-refresh` to +skip the preserve-first path and intentionally replace gateway refresh material +from the host credential source before rotating. + +Long-lived harnesses must not persist revision-scoped provider placeholders such +as `openshell:resolve:env:v123_TOKEN` into files they reuse across refreshes. +Persist the current-name alias, for example `openshell:resolve:env:TOKEN`, so the +sandbox proxy resolves the latest gateway-refreshed credential on each request. + +## Subagents + +The launcher injects subagent definitions under +`/etc/openshell/agent-payload/subagents/`. +Prompt templates should refer to the generic command instead of a harness-specific +script: + +```shell +bash /etc/openshell/agent-payload/runtime/subagent.sh < task.md +``` + +The shared subagent dispatcher forwards the task to the active harness adapter. +For Codex, this runs a separate bounded `codex exec` invocation using the same +model and reasoning defaults as the parent harness. + +## Providers + +Listing a provider in `agent.yaml` means the provider is attached to the sandbox. +Provider profiles describe credential shape, endpoint policy, discovery metadata, +and refresh metadata. The launcher only creates provider instances and supplies +runtime credential values. + +`profile_paths` are ordered. The first profile file with the requested `id` wins. +If the same directory contains duplicate profile IDs, the launcher fails. If a +later profile path contains a profile ID that was already found, the launcher +warns that the later file is shadowed. + +## Gator Example + +`gator/` is the first manifest-driven agent. It uses: + +- `gator/agent.yaml` for the launch contract. +- `gator/prompts/gator.md` for the rendered operator prompt. +- `gator/providers/` for scoped GitHub and Codex provider profiles. +- `Dockerfile.gator` for the local sandbox image. +- `runtime/harnesses/codex/` for Codex installation and execution. + +Run it through the generic launcher: + +```shell +./openshell-agents/run.sh \ + --agent gator \ + --gateway docker-dev \ + "Run gator on PR 1536 and keep watching until it closes or merges." +``` diff --git a/openshell-agents/gator/.gitignore b/openshell-agents/gator/.gitignore new file mode 100644 index 000000000..333c1e910 --- /dev/null +++ b/openshell-agents/gator/.gitignore @@ -0,0 +1 @@ +logs/ diff --git a/openshell-agents/gator/README.md b/openshell-agents/gator/README.md new file mode 100644 index 000000000..9b3718c68 --- /dev/null +++ b/openshell-agents/gator/README.md @@ -0,0 +1,52 @@ +# Gator Agent + +Launch a headless sandbox agent that runs the `gator-gate` skill against OpenShell issues and pull requests. The default and currently only supported harness is Codex. + +## Prerequisites + +- `gh` is authenticated on the host and has access to `NVIDIA/OpenShell` and `NVIDIA/OpenShell-Community`. +- For `--harness codex`, `codex login` has created `$HOME/.codex/auth.json`. +- For `--harness codex`, local Codex auth must include an access token, refresh token, and account ID. +- A local gateway is available when using the default local Dockerfile source. + +## Usage + +```shell +./openshell-agents/run.sh \ + --agent gator \ + --gateway docker-dev \ + --harness codex \ + "Run gator on PR 1536 and keep watching until it closes or merges." +``` + +By default the launcher uses `openshell-agents/Dockerfile.gator` as the sandbox source. Local gateways build that Dockerfile with `openshell-agents/` as the build context, which lets the image use shared harness install scripts from `runtime/` and gator-specific policy from `gator/policy.yaml`. The launcher bakes rendered prompts, skills, subagents, and runtime files into `/etc/openshell/agent-payload`, so `--from` must point to a local Dockerfile or directory containing a Dockerfile. + +Use `--harness codex` to select Codex explicitly. Other harness names are rejected until their support is added to `agent.yaml` and `openshell-agents/runtime/harnesses//`. Agent directories do not carry their own harness implementations; they provide prompt templates and optional skills or subagents for the shared runtime to inject. + +Use `--codex-bin "$(command -v codex)"` only when the host executable is compatible with the sandbox OS and architecture. + +The manifest-driven launcher at `openshell-agents/run.sh` reads `agent.yaml`, which defines the agent prompt template, provider profile IDs, provider credential sources, gateway settings, skills, subagents, sandbox defaults, runtime mode, and harness defaults. The shared sandbox entrypoint at `openshell-agents/runtime/entrypoint.sh` starts the in-sandbox supervisor, which invokes the selected harness adapter for bounded cycles. + +The launcher: + +- Scans `profile_paths` in manifest order and imports `providers/github-gator.yaml`. +- Creates or updates the `github-gator` provider from `gh auth token`. +- Selects the requested harness and bakes the common runtime into the immutable sandbox payload. +- For `--harness codex`, imports `providers/codex-gator.yaml`, creates or updates the `codex-gator` provider from `$HOME/.codex/auth.json`, and stores the refresh token as gateway-only refresh material. +- For `--harness codex`, configures gateway-managed refresh for `CODEX_AUTH_ACCESS_TOKEN` and rotates it before launching the sandbox. +- Enables `providers_v2_enabled`, `agent_policy_proposals_enabled`, and `proposal_approval_mode=auto` at gateway scope. +- Uses the gator image policy copied to `/etc/openshell/policy.yaml`. +- Bakes the current `.agents/skills/gator-gate/SKILL.md` into `/etc/openshell/agent-payload`. +- Bakes `.claude/agents/principal-engineer-reviewer.md` so the selected harness can run a deterministic independent reviewer execution through `/etc/openshell/agent-payload/runtime/subagent.sh principal-engineer-reviewer < task.md`. +- For `--harness codex`, optionally bakes a host Codex executable as `/etc/openshell/agent-payload/runtime/harnesses/codex/codex`. +- Starts the selected harness without a TTY. +- Runs gator in `watch` mode by default. The sandbox stays alive while the supervisor sleeps between bounded Codex cycles, so Codex is not connected during passive PR waits. +- Deletes the sandbox automatically after the supervisor exits. Pass `--keep` to preserve it for debugging. + +The GitHub provider profile allows read-only GraphQL queries on `api.github.com/graphql` so `gh` read paths can use GraphQL when needed. Write operations remain REST-only and scoped to the two allowed repositories. + +Set `GATOR_CODEX_ACCESS_CREDENTIAL_KEY` or pass `--codex-access-key` if the gator Codex profile uses a credential key other than `CODEX_AUTH_ACCESS_TOKEN` for the short-lived access token. + +Use `--once` for a single reconciliation cycle. Use `--poll-interval ` to change the default 15-minute watch cadence. + +The launcher preserves existing gateway-owned Codex refresh material by default so multiple gator sandboxes do not overwrite each other's refresh-token lineage from host Codex auth. If gateway rotation fails, the launcher automatically resets gateway refresh material from host Codex auth and retries once. After `codex logout && codex login`, you can also pass `--reset-refresh` to force that reset before rotation. diff --git a/openshell-agents/gator/agent.yaml b/openshell-agents/gator/agent.yaml new file mode 100644 index 000000000..e5e4bec44 --- /dev/null +++ b/openshell-agents/gator/agent.yaml @@ -0,0 +1,90 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: gator +display_name: Gator Gate Agent +description: Validate and monitor OpenShell GitHub issues and pull requests through the gator state machine. + +sandbox: + name_prefix: gator + from: agent://../Dockerfile.gator + gateway: docker-dev + background_log_dir: logs + +harness: + default: codex + supported: + codex: + model: gpt-5.5 + reasoning: high + +runtime: + mode: watch + poll_interval_seconds: 900 + max_transient_failures: 5 + +profile_paths: + - providers + +settings: + - key: providers_v2_enabled + value: true + - key: agent_policy_proposals_enabled + value: true + - key: proposal_approval_mode + value: auto + +providers: + - id: github + name: github-gator + profile: github-gator + credential_mode: explicit + credentials: + - env: GITHUB_TOKEN + source: + kind: host_command + command: gh auth token + export: true + + - id: codex + name: codex-gator + profile: codex-gator + harness: codex + credential_mode: from_existing + credentials: + - env: CODEX_AUTH_ACCESS_TOKEN + source: + kind: file_json + path: ~/.codex/auth.json + query: tokens.access_token + export: true + - env: CODEX_AUTH_ACCOUNT_ID + source: + kind: file_json + path: ~/.codex/auth.json + query: tokens.account_id + export: true + refresh: + credential_key: CODEX_AUTH_ACCESS_TOKEN + strategy: oauth2-refresh-token + materials: + - name: client_id + value: app_EMoamEEZ73f0CkXaXp7hrann + - name: refresh_token + secret: true + source: + kind: file_json + path: ~/.codex/auth.json + query: tokens.refresh_token + +skills: + - id: gator-gate + source: repo://.agents/skills/gator-gate/SKILL.md + destination: .agents/skills/gator-gate/SKILL.md + +subagents: + - id: principal-engineer-reviewer + source: repo://.claude/agents/principal-engineer-reviewer.md + destination: subagents/principal-engineer-reviewer.md + +prompt_template: prompts/gator.md diff --git a/openshell-agents/gator/policy.yaml b/openshell-agents/gator/policy.yaml new file mode 100644 index 000000000..407baef22 --- /dev/null +++ b/openshell-agents/gator/policy.yaml @@ -0,0 +1,18 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +version: 1 + +filesystem_policy: + include_workdir: true + read_only: [/usr, /lib, /proc, /dev/urandom, /app, /etc, /var/log] + read_write: [/sandbox, /tmp, /dev/null] + +landlock: + compatibility: best_effort + +process: + run_as_user: sandbox + run_as_group: sandbox + +network_policies: {} diff --git a/openshell-agents/gator/prompts/gator.md b/openshell-agents/gator/prompts/gator.md new file mode 100644 index 000000000..4e1a2cddd --- /dev/null +++ b/openshell-agents/gator/prompts/gator.md @@ -0,0 +1,24 @@ +You are running inside an OpenShell sandbox as the gator gate agent. + +Active harness: {{HARNESS}}. +Runtime mode: {{RUN_MODE}}. + +Load and follow this skill exactly: + +/etc/openshell/agent-payload/.agents/skills/gator-gate/SKILL.md + +Important sandbox constraints: + +- GitHub REST write access is scoped to NVIDIA/OpenShell and NVIDIA/OpenShell-Community. +- GitHub GraphQL access is read-only. Prefer REST endpoints for write actions and use GraphQL-backed `gh` reads when useful. +- Keep watching active PRs until they close, merge, or the operator stops the sandbox. +- In `watch` runtime mode, do not run passive sleep or polling loops inside Codex. Perform one bounded reconciliation cycle, then print one `OPENSHELL_AGENT_RESULT` line as the final line of output and stop. The in-sandbox supervisor will sleep and relaunch the harness for the next cycle. +- In `watch` runtime mode, when the next action is to keep waiting, use this exact final-line format with a reason and poll interval: `OPENSHELL_AGENT_RESULT {"status":"waiting","next_poll_seconds":{{POLL_INTERVAL_SECONDS}},"reason":"checks_pending"}`. Use `blocked` when waiting on a human/process blocker, `complete` when the issue or PR reached a terminal state, `terminal_failure` for unrecoverable errors, and `transient_failure` only when the supervisor should retry soon. +- In `once` runtime mode, run one bounded cycle unless the operator explicitly asks you to watch inline. Still print `OPENSHELL_AGENT_RESULT {"status":"complete","reason":"one_shot_complete"}` when finished. +- Do not push to contributor branches unless the operator explicitly instructs you to do so. +- If you receive 403 errors from the sandbox proxy, inspect the JSON response and propose a policy update to allow the requested action if the response contains a structured error message. +- When the gator skill requires the `principal-engineer-reviewer` sub-agent, run a bounded independent review with `{{SUBAGENT_COMMAND}}`. Include PR metadata and full diff/file context in `task.md`, save the output, and use it as the independent reviewer result while the main gator process continues labels, comments, docs, and CI gating. + +Operator request: + +{{USER_PROMPT}} diff --git a/openshell-agents/gator/providers/codex-gator.yaml b/openshell-agents/gator/providers/codex-gator.yaml new file mode 100644 index 000000000..c0820e5c5 --- /dev/null +++ b/openshell-agents/gator/providers/codex-gator.yaml @@ -0,0 +1,61 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: codex-gator +display_name: Codex Gator +description: OpenAI Codex CLI profile for gator with gateway-managed access-token refresh +category: agent +inference_capable: true +credentials: + - name: access_token + description: Codex OAuth access token refreshed by the gateway from refresh material + env_vars: [CODEX_AUTH_ACCESS_TOKEN] + required: true + auth_style: bearer + header_name: authorization + refresh: + strategy: oauth2_refresh_token + token_url: https://auth.openai.com/oauth/token + refresh_before_seconds: 300 + max_lifetime_seconds: 3600 + material: + - name: client_id + description: Codex OAuth client ID + required: true + - name: refresh_token + description: Codex OAuth refresh token from local auth.json + required: true + secret: true + - name: account_id + description: Codex account identifier + env_vars: [CODEX_AUTH_ACCOUNT_ID] + required: true +discovery: + credentials: [access_token, account_id] +endpoints: + - host: api.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: auth.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: chatgpt.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: ab.chatgpt.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce + - host: files.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce +binaries: [/usr/bin/codex, /usr/local/bin/codex, /usr/lib/node_modules/@openai/**] diff --git a/openshell-agents/gator/providers/github-gator.yaml b/openshell-agents/gator/providers/github-gator.yaml new file mode 100644 index 000000000..654254971 --- /dev/null +++ b/openshell-agents/gator/providers/github-gator.yaml @@ -0,0 +1,105 @@ +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +id: github-gator +display_name: GitHub Gator +description: Repo-scoped GitHub access for the OpenShell gator gate agent +category: source_control +credentials: + - name: GITHUB_TOKEN + description: GitHub token used by the gator gate agent + env_vars: [GITHUB_TOKEN, GH_TOKEN] + required: true + auth_style: bearer + header_name: authorization +discovery: + credentials: [GITHUB_TOKEN] +endpoints: + - host: api.github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /user } + - allow: + method: GET + path: /search/issues + query: + q: + any: + - "*repo:NVIDIA/OpenShell*" + - "*repo:NVIDIA/OpenShell-Community*" + - allow: { method: GET, path: /repos/NVIDIA/OpenShell } + - allow: { method: GET, path: /repos/NVIDIA/OpenShell/** } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/issues/*/comments } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/issues/comments/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/issues/*/labels } + - allow: { method: PUT, path: /repos/NVIDIA/OpenShell/issues/*/labels } + - allow: { method: DELETE, path: /repos/NVIDIA/OpenShell/issues/*/labels/* } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/issues/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/labels } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell/labels/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/comments } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/reviews } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/pulls/*/reviews/*/comments } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell/statuses/* } + - allow: { method: GET, path: /repos/NVIDIA/OpenShell-Community } + - allow: { method: GET, path: /repos/NVIDIA/OpenShell-Community/** } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/issues/*/comments } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/issues/comments/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/issues/*/labels } + - allow: { method: PUT, path: /repos/NVIDIA/OpenShell-Community/issues/*/labels } + - allow: { method: DELETE, path: /repos/NVIDIA/OpenShell-Community/issues/*/labels/* } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/issues/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/labels } + - allow: { method: PATCH, path: /repos/NVIDIA/OpenShell-Community/labels/* } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/comments } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/pulls/*/reviews/*/comments } + - allow: { method: POST, path: /repos/NVIDIA/OpenShell-Community/statuses/* } + - host: api.github.com + port: 443 + path: /graphql + protocol: graphql + enforcement: enforce + rules: + - allow: + operation_type: query + - host: github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /NVIDIA/OpenShell } + - allow: { method: GET, path: /NVIDIA/OpenShell/** } + - allow: { method: GET, path: /NVIDIA/OpenShell.git/** } + - allow: { method: POST, path: /NVIDIA/OpenShell/**/git-upload-pack } + - allow: { method: POST, path: /NVIDIA/OpenShell.git/**/git-upload-pack } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community/** } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community.git/** } + - allow: { method: POST, path: /NVIDIA/OpenShell-Community/**/git-upload-pack } + - allow: { method: POST, path: /NVIDIA/OpenShell-Community.git/**/git-upload-pack } + - host: codeload.github.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /NVIDIA/OpenShell/** } + - allow: { method: GET, path: /NVIDIA/OpenShell-Community/** } + - host: results-receiver.actions.githubusercontent.com + port: 443 + protocol: rest + enforcement: enforce + rules: + - allow: { method: GET, path: /rest/runs/** } +binaries: + - /usr/bin/gh + - /usr/local/bin/gh + - /usr/bin/curl + - /usr/local/bin/curl + - /usr/bin/git + - /usr/local/bin/git + - /usr/bin/codex + - /usr/local/bin/codex + - /usr/lib/node_modules/@openai/** diff --git a/openshell-agents/run.sh b/openshell-agents/run.sh new file mode 100755 index 000000000..6f0b85bc0 --- /dev/null +++ b/openshell-agents/run.sh @@ -0,0 +1,737 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +ROOT_DIR="$(cd "$SCRIPT_DIR/.." && pwd)" + +OPENSHELL_BIN="${OPENSHELL_BIN:-openshell}" +AGENT_ARG="${OPENSHELL_AGENT_DIR:-}" +GATEWAY_OVERRIDE="" +SANDBOX_NAME_OVERRIDE="" +SANDBOX_FROM_OVERRIDE="" +HARNESS_OVERRIDE="${GATOR_HARNESS:-}" +GITHUB_PROVIDER_OVERRIDE="${GATOR_GITHUB_PROVIDER:-}" +CODEX_PROVIDER_OVERRIDE="${GATOR_CODEX_PROVIDER:-}" +CODEX_PROVIDER_PROFILE_OVERRIDE="${GATOR_CODEX_PROVIDER_PROFILE:-}" +CODEX_ACCESS_KEY_OVERRIDE="${GATOR_CODEX_ACCESS_CREDENTIAL_KEY:-}" +CODEX_LOCAL_BIN="${GATOR_CODEX_LOCAL_BIN:-}" +RUN_MODE_OVERRIDE="${OPENSHELL_AGENT_RUN_MODE:-}" +POLL_INTERVAL_OVERRIDE="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-}" +MAX_TRANSIENT_FAILURES_OVERRIDE="${OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES:-}" +RESET_REFRESH="${OPENSHELL_AGENT_RESET_REFRESH:-0}" +BACKGROUND=0 +KEEP_SANDBOX=0 + +usage() { + printf '%s\n' 'Usage: openshell-agents/run.sh --agent [options] "agent prompt"' + cat <<'EOF' + +Options: + --agent NAME|PATH Agent manifest directory or name under openshell-agents/ + --gateway NAME Gateway name to use + --name NAME Sandbox name + --from DOCKERFILE|DIR Local Dockerfile source for the sandbox image + --harness NAME Agent harness to run + --github-provider NAME Override the github-gator provider instance name + --codex-provider NAME Override the codex-gator provider instance name + --codex-access-key KEY Override the Codex access-token credential key + --codex-bin PATH Upload this Codex executable into the sandbox + --once Run one bounded agent cycle + --watch Keep the sandbox alive and re-run bounded cycles + --poll-interval SECONDS Sleep duration between watch cycles + --reset-refresh Replace gateway-owned refresh material from host auth before rotating + --background Run sandbox create in the background and write a log + --keep Keep the sandbox after the harness exits + -h, --help Show this help +EOF +} + +fail() { + echo "error: $*" >&2 + exit 1 +} + +require_cmd() { + command -v "$1" >/dev/null 2>&1 || fail "missing required command: $1" +} + +while [[ $# -gt 0 ]]; do + case "$1" in + --agent) + [[ $# -ge 2 ]] || fail "--agent requires a value" + AGENT_ARG="$2" + shift 2 + ;; + --gateway) + [[ $# -ge 2 ]] || fail "--gateway requires a value" + GATEWAY_OVERRIDE="$2" + shift 2 + ;; + --name) + [[ $# -ge 2 ]] || fail "--name requires a value" + SANDBOX_NAME_OVERRIDE="$2" + shift 2 + ;; + --from) + [[ $# -ge 2 ]] || fail "--from requires a value" + SANDBOX_FROM_OVERRIDE="$2" + shift 2 + ;; + --harness) + [[ $# -ge 2 ]] || fail "--harness requires a value" + HARNESS_OVERRIDE="$2" + shift 2 + ;; + --github-provider) + [[ $# -ge 2 ]] || fail "--github-provider requires a value" + GITHUB_PROVIDER_OVERRIDE="$2" + shift 2 + ;; + --codex-provider) + [[ $# -ge 2 ]] || fail "--codex-provider requires a value" + CODEX_PROVIDER_OVERRIDE="$2" + shift 2 + ;; + --codex-access-key) + [[ $# -ge 2 ]] || fail "--codex-access-key requires a value" + CODEX_ACCESS_KEY_OVERRIDE="$2" + shift 2 + ;; + --codex-bin) + [[ $# -ge 2 ]] || fail "--codex-bin requires a value" + CODEX_LOCAL_BIN="$2" + shift 2 + ;; + --once) + RUN_MODE_OVERRIDE="once" + shift + ;; + --watch) + RUN_MODE_OVERRIDE="watch" + shift + ;; + --poll-interval) + [[ $# -ge 2 ]] || fail "--poll-interval requires a value" + POLL_INTERVAL_OVERRIDE="$2" + shift 2 + ;; + --reset-refresh) + RESET_REFRESH=1 + shift + ;; + --background) + BACKGROUND=1 + shift + ;; + --keep) + KEEP_SANDBOX=1 + shift + ;; + -h|--help) + usage + exit 0 + ;; + --) + shift + break + ;; + -* ) + fail "unknown option: $1" + ;; + *) + break + ;; + esac +done + +[[ -n "$AGENT_ARG" ]] || { usage >&2; exit 2; } +[[ $# -gt 0 ]] || { usage >&2; exit 2; } +USER_PROMPT="$*" + +case "$AGENT_ARG" in + /*|*/*) + AGENT_DIR="$AGENT_ARG" + ;; + *) + AGENT_DIR="$SCRIPT_DIR/$AGENT_ARG" + ;; +esac + +[[ -d "$AGENT_DIR" ]] || fail "missing agent directory: $AGENT_DIR" +AGENT_DIR="$(cd "$AGENT_DIR" && pwd)" +MANIFEST_FILE="$AGENT_DIR/agent.yaml" +[[ -f "$MANIFEST_FILE" ]] || fail "missing agent manifest: $MANIFEST_FILE" + +require_cmd ruby +require_cmd "$OPENSHELL_BIN" + +CONFIG_FILE="$(mktemp "${TMPDIR:-/tmp}/openshell-agent-config.XXXXXX")" +cleanup_config() { + rm -f "$CONFIG_FILE" +} +trap cleanup_config EXIT + +ruby -ryaml -rshellwords - "$MANIFEST_FILE" "$HARNESS_OVERRIDE" >"$CONFIG_FILE" <<'RUBY' +manifest = YAML.load_file(ARGV[0]) || {} +harness = ARGV[1].to_s.empty? ? manifest.dig("harness", "default").to_s : ARGV[1].to_s +supported = manifest.dig("harness", "supported") || {} +abort "unsupported harness: #{harness} (supported: #{supported.keys.join(', ')})" unless supported.key?(harness) + +def sh(value) + Shellwords.escape(value.to_s) +end + +def emit(name, value) + puts "#{name}=#{sh(value)}" +end + +def emit_array(name, values) + puts "#{name}=(#{values.map { |value| sh(value) }.join(' ')})" +end + +harness_config = supported[harness] || {} +emit "AGENT_ID", manifest.fetch("id") +emit "AGENT_DISPLAY_NAME", manifest.fetch("display_name", manifest.fetch("id")) +emit "HARNESS", harness +emit "HARNESS_MODEL", harness_config.fetch("model", "") +emit "HARNESS_REASONING", harness_config.fetch("reasoning", "") +emit "SANDBOX_NAME_PREFIX", manifest.dig("sandbox", "name_prefix") || manifest.fetch("id") +emit "SANDBOX_FROM_DEFAULT", manifest.dig("sandbox", "from") || "agent://." +emit "GATEWAY_DEFAULT", manifest.dig("sandbox", "gateway") || "docker-dev" +emit "BACKGROUND_LOG_DIR", manifest.dig("sandbox", "background_log_dir") || "logs" +emit "PROMPT_TEMPLATE", manifest.fetch("prompt_template") +emit_array "PROFILE_PATHS", manifest.fetch("profile_paths", []) + +runtime = manifest.fetch("runtime", {}) +emit "RUNTIME_MODE", runtime.fetch("mode", "once") +emit "RUNTIME_POLL_INTERVAL_SECONDS", runtime.fetch("poll_interval_seconds", 900) +emit "RUNTIME_MAX_TRANSIENT_FAILURES", runtime.fetch("max_transient_failures", 5) + +settings = manifest.fetch("settings", []) +emit "SETTING_COUNT", settings.length +settings.each_with_index do |setting, index| + emit "SETTING_#{index}_KEY", setting.fetch("key") + emit "SETTING_#{index}_VALUE", setting.fetch("value") +end + +providers = manifest.fetch("providers", []).select do |provider| + provider["harness"].nil? || provider["harness"] == harness +end +emit "PROVIDER_COUNT", providers.length +providers.each_with_index do |provider, index| + emit "PROVIDER_#{index}_ID", provider.fetch("id") + emit "PROVIDER_#{index}_NAME", provider.fetch("name") + emit "PROVIDER_#{index}_PROFILE", provider.fetch("profile") + emit "PROVIDER_#{index}_CREDENTIAL_MODE", provider.fetch("credential_mode", "explicit") + credentials = provider.fetch("credentials", []) + emit "PROVIDER_#{index}_CREDENTIAL_COUNT", credentials.length + credentials.each_with_index do |credential, credential_index| + source = credential.fetch("source", {}) + prefix = "PROVIDER_#{index}_CREDENTIAL_#{credential_index}" + emit "#{prefix}_ENV", credential.fetch("env") + emit "#{prefix}_EXPORT", credential.fetch("export", true) + emit "#{prefix}_KIND", source.fetch("kind", "value") + emit "#{prefix}_COMMAND", source.fetch("command", "") + emit "#{prefix}_PATH", source.fetch("path", "") + emit "#{prefix}_QUERY", source.fetch("query", "") + emit "#{prefix}_VALUE", source.fetch("value", "") + end + + refresh = provider["refresh"] || {} + emit "PROVIDER_#{index}_REFRESH_ENABLED", refresh.empty? ? "false" : "true" + emit "PROVIDER_#{index}_REFRESH_CREDENTIAL_KEY", refresh.fetch("credential_key", "") + emit "PROVIDER_#{index}_REFRESH_STRATEGY", refresh.fetch("strategy", "") + materials = refresh.fetch("materials", []) + emit "PROVIDER_#{index}_REFRESH_MATERIAL_COUNT", materials.length + materials.each_with_index do |material, material_index| + source = material.fetch("source", {}) + prefix = "PROVIDER_#{index}_REFRESH_MATERIAL_#{material_index}" + emit "#{prefix}_NAME", material.fetch("name") + emit "#{prefix}_SECRET", material.fetch("secret", false) + emit "#{prefix}_KIND", source.fetch("kind", material.key?("value") ? "value" : "") + emit "#{prefix}_COMMAND", source.fetch("command", "") + emit "#{prefix}_PATH", source.fetch("path", "") + emit "#{prefix}_QUERY", source.fetch("query", "") + emit "#{prefix}_VALUE", material.fetch("value", source.fetch("value", "")) + end +end + +uploads = [] +manifest.fetch("skills", []).each do |skill| + uploads << [skill.fetch("source"), skill.fetch("destination")] +end +manifest.fetch("subagents", []).each do |subagent| + uploads << [subagent.fetch("source"), subagent.fetch("destination")] +end +emit "UPLOAD_COUNT", uploads.length +uploads.each_with_index do |(source, destination), index| + emit "UPLOAD_#{index}_SOURCE", source + emit "UPLOAD_#{index}_DESTINATION", destination +end +RUBY + +# shellcheck source=/dev/null +source "$CONFIG_FILE" + +set_var() { + printf -v "$1" '%s' "$2" +} + +resolve_manifest_path() { + local path="$1" + case "$path" in + repo://*) printf '%s/%s' "$ROOT_DIR" "${path#repo://}" ;; + agent://*) printf '%s/%s' "$AGENT_DIR" "${path#agent://}" ;; + /*) printf '%s' "$path" ;; + *) printf '%s/%s' "$AGENT_DIR" "$path" ;; + esac +} + +expand_home_path() { + local path="$1" + case "$path" in + \~) printf '%s' "$HOME" ;; + \~/*) printf '%s/%s' "$HOME" "${path#\~/}" ;; + *) printf '%s' "$path" ;; + esac +} + +openshell_cmd() { + "$OPENSHELL_BIN" --gateway "$GATEWAY" "$@" +} + +upsert_provider() { + local name="$1" + local type="$2" + shift 2 + + if openshell_cmd provider get "$name" >/dev/null 2>&1; then + openshell_cmd provider update "$name" "$@" >/dev/null + else + openshell_cmd provider create --name "$name" --type "$type" "$@" >/dev/null + fi +} + +import_provider_profile() { + local profile_id="$1" + local profile_file="$2" + local import_output + + openshell_cmd provider profile delete "$profile_id" >/dev/null 2>&1 || true + if import_output="$(openshell_cmd provider profile import --file "$profile_file" 2>&1)"; then + return 0 + fi + if [[ "$import_output" == *"already exists"* ]]; then + echo "Provider profile already exists: $profile_file" + return 0 + fi + + printf '%s\n' "$import_output" >&2 + return 1 +} + +resolve_profile_file() { + local profile_id="$1" + ruby -ryaml - "$MANIFEST_FILE" "$ROOT_DIR" "$AGENT_DIR" "$profile_id" <<'RUBY' +manifest_path, root_dir, agent_dir, profile_id = ARGV +manifest = YAML.load_file(manifest_path) || {} + +def resolve(path, root_dir, agent_dir) + case path.to_s + when /^repo:\/\// then File.expand_path(path.delete_prefix("repo://"), root_dir) + when /^agent:\/\// then File.expand_path(path.delete_prefix("agent://"), agent_dir) + when /^\// then path + else File.expand_path(path, agent_dir) + end +end + +selected = nil +manifest.fetch("profile_paths", []).each do |raw_path| + dir = resolve(raw_path, root_dir, agent_dir) + next unless File.directory?(dir) + + ids = {} + Dir.glob(File.join(dir, "*.{yaml,yml}")).sort.each do |file| + data = YAML.load_file(file) || {} + id = data["id"] + next if id.nil? || id.to_s.empty? + if ids.key?(id) + abort "duplicate provider profile id '#{id}' in #{dir}: #{ids[id]} and #{file}" + end + ids[id] = file + rescue Psych::SyntaxError => error + abort "invalid provider profile YAML #{file}: #{error.message}" + end + + match = ids[profile_id] + next unless match + if selected + warn "warning: provider profile #{profile_id} in #{match} is shadowed by #{selected}" + else + selected = match + end +end + +abort "provider profile not found in profile_paths: #{profile_id}" unless selected +puts selected +RUBY +} + +resolve_source_value() { + local kind="$1" + local command_value="$2" + local path_value="$3" + local query_value="$4" + local literal_value="$5" + + case "$kind" in + host_command) + bash -lc "$command_value" + ;; + file_json) + local expanded_path + expanded_path="$(expand_home_path "$path_value")" + [[ -f "$expanded_path" ]] || fail "missing credential file: $expanded_path" + ruby -rjson - "$expanded_path" "$query_value" <<'RUBY' +path, query = ARGV +value = JSON.parse(File.read(path)) +query.split(".").each do |part| + value = value.fetch(part) +end +print value.to_s +RUBY + ;; + value) + printf '%s' "$literal_value" + ;; + *) + fail "unsupported credential source kind: $kind" + ;; + esac +} + +configure_provider_refresh() { + local provider_index="$1" + local provider_name_var="PROVIDER_${provider_index}_NAME" + local key_var="PROVIDER_${provider_index}_REFRESH_CREDENTIAL_KEY" + local strategy_var="PROVIDER_${provider_index}_REFRESH_STRATEGY" + local count_var="PROVIDER_${provider_index}_REFRESH_MATERIAL_COUNT" + local provider_name="${!provider_name_var}" + local credential_key="${!key_var}" + local strategy="${!strategy_var}" + local material_count="${!count_var}" + local args=( + provider refresh configure "$provider_name" + --credential-key "$credential_key" + --strategy "$strategy" + ) + + local material_index + for ((material_index = 0; material_index < material_count; material_index++)); do + local prefix="PROVIDER_${provider_index}_REFRESH_MATERIAL_${material_index}" + local name_var="${prefix}_NAME" + local secret_var="${prefix}_SECRET" + local kind_var="${prefix}_KIND" + local command_var="${prefix}_COMMAND" + local path_var="${prefix}_PATH" + local query_var="${prefix}_QUERY" + local value_var="${prefix}_VALUE" + local material_name="${!name_var}" + local material_value + + if [[ "$material_name" == "client_id" && -n "${GATOR_CODEX_OAUTH_CLIENT_ID:-}" ]]; then + material_value="$GATOR_CODEX_OAUTH_CLIENT_ID" + else + material_value="$(resolve_source_value "${!kind_var}" "${!command_var}" "${!path_var}" "${!query_var}" "${!value_var}")" + fi + [[ -n "$material_value" ]] || fail "empty refresh material: $provider_name/$material_name" + args+=(--material "$material_name=$material_value") + if [[ "${!secret_var}" == "true" ]]; then + args+=(--secret-material-key "$material_name") + fi + done + + local status_output + local rotate_output + status_output="$(openshell_cmd provider refresh status "$provider_name" --credential-key "$credential_key" 2>&1 || true)" + if [[ "$RESET_REFRESH" != "1" && "$status_output" != *"No refresh configuration found"* ]]; then + echo "Preserving existing gateway refresh state for $provider_name/$credential_key. Use --reset-refresh to replace it from host auth." + else + openshell_cmd "${args[@]}" >/dev/null + echo "Configured gateway refresh for $provider_name/$credential_key." + fi + if ! rotate_output="$(openshell_cmd provider refresh rotate "$provider_name" --credential-key "$credential_key" 2>&1)"; then + if [[ "$RESET_REFRESH" != "1" && "$status_output" != *"No refresh configuration found"* ]]; then + echo "Gateway refresh rotation failed; resetting $provider_name/$credential_key from host auth and retrying once." >&2 + openshell_cmd "${args[@]}" >/dev/null + openshell_cmd provider refresh rotate "$provider_name" --credential-key "$credential_key" >/dev/null + else + printf '%s\n' "$rotate_output" >&2 + return 1 + fi + fi + echo "Rotated gateway refresh credential for $provider_name/$credential_key." +} + +GATEWAY="${GATEWAY_OVERRIDE:-${GATOR_GATEWAY:-$GATEWAY_DEFAULT}}" +SANDBOX_NAME="${SANDBOX_NAME_OVERRIDE:-${GATOR_SANDBOX_NAME:-$SANDBOX_NAME_PREFIX-$(date +%Y%m%d%H%M%S)}}" +SANDBOX_FROM="${SANDBOX_FROM_OVERRIDE:-${GATOR_SANDBOX_FROM:-$(resolve_manifest_path "$SANDBOX_FROM_DEFAULT")}}" +RUN_MODE="${RUN_MODE_OVERRIDE:-$RUNTIME_MODE}" +POLL_INTERVAL_SECONDS="${POLL_INTERVAL_OVERRIDE:-$RUNTIME_POLL_INTERVAL_SECONDS}" +MAX_TRANSIENT_FAILURES="${MAX_TRANSIENT_FAILURES_OVERRIDE:-$RUNTIME_MAX_TRANSIENT_FAILURES}" + +case "$RUN_MODE" in + once|watch) ;; + *) fail "unsupported runtime mode: $RUN_MODE" ;; +esac +[[ "$POLL_INTERVAL_SECONDS" =~ ^[0-9]+$ ]] || fail "--poll-interval must be an integer number of seconds" +[[ "$MAX_TRANSIENT_FAILURES" =~ ^[0-9]+$ ]] || fail "max_transient_failures must be an integer" +[[ "$POLL_INTERVAL_SECONDS" -gt 0 ]] || fail "--poll-interval must be greater than zero" + +for ((provider_index = 0; provider_index < PROVIDER_COUNT; provider_index++)); do + profile_var="PROVIDER_${provider_index}_PROFILE" + name_var="PROVIDER_${provider_index}_NAME" + refresh_key_var="PROVIDER_${provider_index}_REFRESH_CREDENTIAL_KEY" + case "${!profile_var}" in + github-gator) + [[ -z "$GITHUB_PROVIDER_OVERRIDE" ]] || set_var "$name_var" "$GITHUB_PROVIDER_OVERRIDE" + ;; + codex-gator) + [[ -z "$CODEX_PROVIDER_OVERRIDE" ]] || set_var "$name_var" "$CODEX_PROVIDER_OVERRIDE" + [[ -z "$CODEX_PROVIDER_PROFILE_OVERRIDE" ]] || set_var "$profile_var" "$CODEX_PROVIDER_PROFILE_OVERRIDE" + [[ -z "$CODEX_ACCESS_KEY_OVERRIDE" ]] || set_var "$refresh_key_var" "$CODEX_ACCESS_KEY_OVERRIDE" + ;; + esac +done + +PAYLOAD_PARENT="$(mktemp -d "${TMPDIR:-/tmp}/openshell-agent.XXXXXX")" +PAYLOAD_DIR="$PAYLOAD_PARENT/payload" +WORKSPACE_UPLOAD_DIR="$PAYLOAD_PARENT/workspace" +PAYLOAD_IMAGE_DIR="/etc/openshell/agent-payload" +cleanup_payload() { + rm -rf "$PAYLOAD_PARENT" +} +trap 'cleanup_config; cleanup_payload' EXIT + +mkdir -p "$PAYLOAD_DIR" "$WORKSPACE_UPLOAD_DIR" +cp -R "$SCRIPT_DIR/runtime" "$PAYLOAD_DIR/runtime" +chmod +x "$PAYLOAD_DIR/runtime"/*.sh +chmod +x "$PAYLOAD_DIR/runtime/harnesses/$HARNESS"/*.sh + +if [[ -n "$CODEX_LOCAL_BIN" ]]; then + [[ -x "$CODEX_LOCAL_BIN" ]] || fail "--codex-bin is not executable: $CODEX_LOCAL_BIN" + [[ "$HARNESS" == "codex" ]] || fail "--codex-bin is only valid with --harness codex" + cp "$CODEX_LOCAL_BIN" "$PAYLOAD_DIR/runtime/harnesses/codex/codex" + chmod +x "$PAYLOAD_DIR/runtime/harnesses/codex/codex" +fi + +for ((upload_index = 0; upload_index < UPLOAD_COUNT; upload_index++)); do + source_var="UPLOAD_${upload_index}_SOURCE" + destination_var="UPLOAD_${upload_index}_DESTINATION" + source_path="$(resolve_manifest_path "${!source_var}")" + destination_path="$PAYLOAD_DIR/${!destination_var}" + [[ -f "$source_path" ]] || fail "missing payload source: $source_path" + mkdir -p "$(dirname "$destination_path")" + cp "$source_path" "$destination_path" +done + +SUBAGENT_COMMAND="bash $PAYLOAD_IMAGE_DIR/runtime/subagent.sh principal-engineer-reviewer < task.md" +PROMPT_TEMPLATE_PATH="$(resolve_manifest_path "$PROMPT_TEMPLATE")" +[[ -f "$PROMPT_TEMPLATE_PATH" ]] || fail "missing prompt template: $PROMPT_TEMPLATE_PATH" +ruby - "$PROMPT_TEMPLATE_PATH" "$PAYLOAD_DIR/agent-prompt.md" "$HARNESS" "$SUBAGENT_COMMAND" "$RUN_MODE" "$POLL_INTERVAL_SECONDS" "$USER_PROMPT" <<'RUBY' +template_path, output_path, harness, subagent_command, run_mode, poll_interval_seconds, user_prompt = ARGV +values = { + "HARNESS" => harness, + "SUBAGENT_COMMAND" => subagent_command, + "RUN_MODE" => run_mode, + "POLL_INTERVAL_SECONDS" => poll_interval_seconds, + "USER_PROMPT" => user_prompt, +} +template = File.read(template_path) +rendered = template.gsub(/\{\{([A-Z0-9_]+)\}\}/) do + values.fetch(Regexp.last_match(1)) +end +File.write(output_path, rendered) +RUBY + +prepare_immutable_sandbox_source() { + local source="$1" + local dockerfile + local context + + if [[ -f "$source" ]]; then + local lower_name + lower_name="$(basename "$source" | tr '[:upper:]' '[:lower:]')" + [[ "$lower_name" == *dockerfile* || "$lower_name" == *.dockerfile ]] || fail "immutable agent payload requires --from to be a Dockerfile path or directory: $source" + dockerfile="$(cd "$(dirname "$source")" && pwd)/$(basename "$source")" + context="$(cd "$(dirname "$source")" && pwd)" + elif [[ -d "$source" && -f "$source/Dockerfile" ]]; then + context="$(cd "$source" && pwd)" + dockerfile="$context/Dockerfile" + else + fail "immutable agent payload requires a local Dockerfile source; --from '$source' cannot receive read-only agent guts" + fi + + local build_context="$PAYLOAD_PARENT/build-context" + mkdir -p "$build_context" + ( + cd "$context" + tar --exclude './gator/logs' --exclude './logs' -cf - . + ) | ( + cd "$build_context" + tar -xf - + ) + + rm -rf "$build_context/openshell-agent-payload" + mkdir -p "$build_context/openshell-agent-payload" + cp -R "$PAYLOAD_DIR/." "$build_context/openshell-agent-payload/" + + if [[ -L "$build_context/.dockerignore" ]]; then + rm -f "$build_context/.dockerignore" + fi + + { + printf '\n# OpenShell staged immutable agent payload\n' + printf '!openshell-agent-payload\n' + printf '!openshell-agent-payload/**\n' + } >> "$build_context/.dockerignore" + + local rel_dockerfile + rel_dockerfile="${dockerfile#$context/}" + local build_dockerfile="$build_context/$rel_dockerfile" + [[ -f "$build_dockerfile" ]] || fail "failed to stage Dockerfile: $rel_dockerfile" + [[ ! -L "$build_dockerfile" ]] || fail "staged Dockerfile must not be a symlink: $rel_dockerfile" + + ruby - "$build_dockerfile" "$PAYLOAD_IMAGE_DIR" <<'RUBY' +dockerfile_path, payload_image_dir = ARGV +lines = File.readlines(dockerfile_path) +final_stage_start = lines.rindex { |line| line.strip.start_with?("FROM ") } || 0 +final_user = lines[final_stage_start..].reverse.find { |line| line.strip.start_with?("USER ") }&.strip +File.open(dockerfile_path, "a") do |file| + file.puts + file.puts "USER root" + file.puts "COPY openshell-agent-payload/ #{payload_image_dir}/" + file.puts "RUN chmod -R a-w #{payload_image_dir}" + file.puts final_user if final_user +end +RUBY + + SANDBOX_FROM="$build_dockerfile" +} + +prepare_immutable_sandbox_source "$SANDBOX_FROM" + +for ((setting_index = 0; setting_index < SETTING_COUNT; setting_index++)); do + key_var="SETTING_${setting_index}_KEY" + value_var="SETTING_${setting_index}_VALUE" + openshell_cmd settings set --global --key "${!key_var}" --value "${!value_var}" --yes >/dev/null +done + +PROVIDER_ARGS=() +for ((provider_index = 0; provider_index < PROVIDER_COUNT; provider_index++)); do + name_var="PROVIDER_${provider_index}_NAME" + profile_var="PROVIDER_${provider_index}_PROFILE" + mode_var="PROVIDER_${provider_index}_CREDENTIAL_MODE" + credential_count_var="PROVIDER_${provider_index}_CREDENTIAL_COUNT" + refresh_enabled_var="PROVIDER_${provider_index}_REFRESH_ENABLED" + provider_name="${!name_var}" + profile_id="${!profile_var}" + credential_mode="${!mode_var}" + credential_count="${!credential_count_var}" + profile_file="$(resolve_profile_file "$profile_id")" + + import_provider_profile "$profile_id" "$profile_file" + + credential_args=() + for ((credential_index = 0; credential_index < credential_count; credential_index++)); do + prefix="PROVIDER_${provider_index}_CREDENTIAL_${credential_index}" + env_var="${prefix}_ENV" + export_var="${prefix}_EXPORT" + kind_var="${prefix}_KIND" + command_var="${prefix}_COMMAND" + path_var="${prefix}_PATH" + query_var="${prefix}_QUERY" + value_var="${prefix}_VALUE" + credential_env="${!env_var}" + credential_value="$(resolve_source_value "${!kind_var}" "${!command_var}" "${!path_var}" "${!query_var}" "${!value_var}")" + [[ -n "$credential_value" ]] || fail "empty credential value: $provider_name/$credential_env" + if [[ "${!export_var}" == "true" ]]; then + export "$credential_env=$credential_value" + fi + if [[ "$credential_mode" == "explicit" ]]; then + credential_args+=(--credential "$credential_env") + fi + done + + case "$credential_mode" in + explicit) + upsert_provider "$provider_name" "$profile_id" "${credential_args[@]}" + ;; + from_existing) + upsert_provider "$provider_name" "$profile_id" --from-existing + ;; + *) + fail "unsupported credential_mode for $provider_name: $credential_mode" + ;; + esac + + if [[ "${!refresh_enabled_var}" == "true" ]]; then + configure_provider_refresh "$provider_index" + fi + PROVIDER_ARGS+=(--provider "$provider_name") +done + +KEEP_ARGS=() +if [[ "$KEEP_SANDBOX" != "1" ]]; then + KEEP_ARGS+=(--no-keep) +fi + +HARNESS_ENV_ARGS=( + "OPENSHELL_AGENT_ID=$AGENT_ID" + "OPENSHELL_AGENT_HARNESS=$HARNESS" + "OPENSHELL_AGENT_RUN_MODE=$RUN_MODE" + "OPENSHELL_AGENT_POLL_INTERVAL_SECONDS=$POLL_INTERVAL_SECONDS" + "OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES=$MAX_TRANSIENT_FAILURES" +) + +case "$HARNESS" in + codex) + HARNESS_ENV_ARGS+=( + "CODEX_MODEL=${CODEX_MODEL:-$HARNESS_MODEL}" + "CODEX_REASONING=${CODEX_REASONING:-$HARNESS_REASONING}" + ) + ;; +esac + +SANDBOX_CMD=( + env -u OPENSHELL_SANDBOX_POLICY + "$OPENSHELL_BIN" --gateway "$GATEWAY" sandbox create + --name "$SANDBOX_NAME" + --from "$SANDBOX_FROM" + "${PROVIDER_ARGS[@]}" + --upload "$WORKSPACE_UPLOAD_DIR:/sandbox" + --no-git-ignore + --no-auto-providers + --no-tty + "${KEEP_ARGS[@]}" + -- env "${HARNESS_ENV_ARGS[@]}" bash "$PAYLOAD_IMAGE_DIR/runtime/entrypoint.sh" +) + +echo "Launching $AGENT_DISPLAY_NAME sandbox '$SANDBOX_NAME' on gateway '$GATEWAY'..." +if [[ "$BACKGROUND" == "1" ]]; then + LOG_DIR="$(resolve_manifest_path "$BACKGROUND_LOG_DIR")" + mkdir -p "$LOG_DIR" + LOG_FILE="$LOG_DIR/${SANDBOX_NAME}.log" + trap - EXIT + ( + trap 'cleanup_config; cleanup_payload' EXIT + "${SANDBOX_CMD[@]}" + ) >"$LOG_FILE" 2>&1 & + echo "Started in background. Log: $LOG_FILE" +else + "${SANDBOX_CMD[@]}" +fi diff --git a/openshell-agents/runtime/entrypoint.sh b/openshell-agents/runtime/entrypoint.sh new file mode 100755 index 000000000..fd27b6d78 --- /dev/null +++ b/openshell-agents/runtime/entrypoint.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env OPENSHELL_AGENT_HARNESS + +RUNTIME_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$RUNTIME_DIR/.." && pwd)" +SUPERVISOR="$PAYLOAD_DIR/runtime/supervisor.sh" + +[[ -x "$SUPERVISOR" ]] || { echo "missing agent supervisor: $SUPERVISOR" >&2; exit 1; } + +exec bash "$SUPERVISOR" diff --git a/openshell-agents/runtime/harnesses/codex/exec.sh b/openshell-agents/runtime/harnesses/codex/exec.sh new file mode 100755 index 000000000..693aaac0b --- /dev/null +++ b/openshell-agents/runtime/harnesses/codex/exec.sh @@ -0,0 +1,95 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "usage: exec.sh " >&2 + exit 2 +fi + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env CODEX_AUTH_ACCESS_TOKEN +require_env CODEX_AUTH_ACCOUNT_ID +require_env GITHUB_TOKEN + +PROMPT_FILE="$1" +export GH_TOKEN="$GITHUB_TOKEN" +export HOME=/sandbox/home + +mkdir -p "$HOME/.codex" +node - <<'NODE' +const fs = require("fs"); +const path = `${process.env.HOME}/.codex/auth.json`; +const b64u = (obj) => Buffer.from(JSON.stringify(obj)).toString("base64url"); +const providerPlaceholder = (envName) => { + const value = process.env[envName]; + if (value && value.startsWith("openshell:resolve:env:")) { + return `openshell:resolve:env:${envName}`; + } + return value; +}; +const now = Math.floor(Date.now() / 1000); +const fallbackIdToken = [ + b64u({ alg: "none", typ: "JWT" }), + b64u({ + iss: "https://auth.openai.com", + aud: "codex", + sub: "openshell-agent", + email: "agent@openshell.local", + iat: now, + exp: now + 3600, + }), + "placeholder", +].join("."); + +fs.writeFileSync(path, JSON.stringify({ + auth_mode: "chatgpt", + OPENAI_API_KEY: null, + tokens: { + id_token: providerPlaceholder("CODEX_AUTH_ID_TOKEN") || fallbackIdToken, + access_token: providerPlaceholder("CODEX_AUTH_ACCESS_TOKEN"), + refresh_token: providerPlaceholder("CODEX_AUTH_REFRESH_TOKEN") || "gateway-managed-refresh-token", + account_id: providerPlaceholder("CODEX_AUTH_ACCOUNT_ID"), + }, + last_refresh: new Date().toISOString(), +}, null, 2)); +NODE +chmod 600 "$HOME/.codex/auth.json" + +WORK="$(mktemp -d)" +cd "$WORK" + +CODEX_BIN="${CODEX_BIN:-codex}" +ADAPTER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$ADAPTER_DIR/../../.." && pwd)" +if [[ -x "$PAYLOAD_DIR/runtime/harnesses/codex/codex" ]]; then + CODEX_BIN="$PAYLOAD_DIR/runtime/harnesses/codex/codex" +fi +CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" +CODEX_REASONING="${CODEX_REASONING:-high}" + +CODEX_EXEC_ARGS=( + exec + --skip-git-repo-check + --sandbox danger-full-access + --ephemeral +) + +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-user-config"; then + CODEX_EXEC_ARGS+=(--ignore-user-config) +fi +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-rules"; then + CODEX_EXEC_ARGS+=(--ignore-rules) +fi + +exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ + -c "model=\"${CODEX_MODEL}\"" \ + -c "model_reasoning_effort=\"${CODEX_REASONING}\"" \ + "$(<"$PROMPT_FILE")" diff --git a/openshell-agents/runtime/harnesses/codex/install-codex.sh b/openshell-agents/runtime/harnesses/codex/install-codex.sh new file mode 100644 index 000000000..833ef9679 --- /dev/null +++ b/openshell-agents/runtime/harnesses/codex/install-codex.sh @@ -0,0 +1,17 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +version="${1:-${CODEX_VERSION:-latest}}" + +if [[ "$version" != "latest" && ! "$version" =~ ^[0-9]+(\.[0-9]+){0,2}(-[0-9A-Za-z.-]+)?$ ]]; then + echo "unsupported Codex version: $version" >&2 + exit 2 +fi + +npm install -g "@openai/codex@${version}" +npm cache clean --force >/dev/null 2>&1 || true +codex --version diff --git a/openshell-agents/runtime/harnesses/codex/subagent.sh b/openshell-agents/runtime/harnesses/codex/subagent.sh new file mode 100755 index 000000000..cd4cb3078 --- /dev/null +++ b/openshell-agents/runtime/harnesses/codex/subagent.sh @@ -0,0 +1,64 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "usage: subagent.sh < task.md" >&2 + exit 2 +fi + +SUBAGENT_ID="$1" +ADAPTER_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$ADAPTER_DIR/../../.." && pwd)" +SUBAGENT_PROMPT="$PAYLOAD_DIR/subagents/$SUBAGENT_ID.md" +[[ -f "$SUBAGENT_PROMPT" ]] || { + echo "missing subagent prompt: $SUBAGENT_PROMPT" >&2 + exit 1 +} + +CODEX_BIN="${CODEX_BIN:-codex}" +if [[ -x "$PAYLOAD_DIR/runtime/harnesses/codex/codex" ]]; then + CODEX_BIN="$PAYLOAD_DIR/runtime/harnesses/codex/codex" +fi + +CODEX_MODEL="${CODEX_MODEL:-gpt-5.5}" +CODEX_REASONING="${CODEX_REASONING:-high}" + +TASK_FILE="$(mktemp)" +PROMPT_FILE="$(mktemp)" +cleanup() { + rm -f "$TASK_FILE" "$PROMPT_FILE" +} +trap cleanup EXIT + +cat >"$TASK_FILE" + +{ + printf '%s\n\n' "You are running as the $SUBAGENT_ID sub-agent inside an OpenShell sandbox." + printf '%s\n\n' 'Follow this agent definition exactly:' + cat "$SUBAGENT_PROMPT" + printf '\n%s\n\n' 'Task:' + cat "$TASK_FILE" +} >"$PROMPT_FILE" + +CODEX_EXEC_ARGS=( + exec + --skip-git-repo-check + --sandbox danger-full-access + --ephemeral +) + +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-user-config"; then + CODEX_EXEC_ARGS+=(--ignore-user-config) +fi +if "$CODEX_BIN" exec --help 2>/dev/null | grep -q -- "--ignore-rules"; then + CODEX_EXEC_ARGS+=(--ignore-rules) +fi + +exec "$CODEX_BIN" "${CODEX_EXEC_ARGS[@]}" \ + -c "model=\"${CODEX_MODEL}\"" \ + -c "model_reasoning_effort=\"${CODEX_REASONING}\"" \ + - <"$PROMPT_FILE" diff --git a/openshell-agents/runtime/subagent.sh b/openshell-agents/runtime/subagent.sh new file mode 100755 index 000000000..e116b083e --- /dev/null +++ b/openshell-agents/runtime/subagent.sh @@ -0,0 +1,21 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +if [[ $# -ne 1 ]]; then + echo "usage: subagent.sh < task.md" >&2 + exit 2 +fi + +HARNESS="${OPENSHELL_AGENT_HARNESS:-}" +[[ -n "$HARNESS" ]] || { echo "missing required env: OPENSHELL_AGENT_HARNESS" >&2; exit 1; } +RUNTIME_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$RUNTIME_DIR/.." && pwd)" + +ADAPTER="$PAYLOAD_DIR/runtime/harnesses/$HARNESS/subagent.sh" +[[ -x "$ADAPTER" ]] || { echo "missing subagent adapter: $ADAPTER" >&2; exit 1; } + +exec bash "$ADAPTER" "$1" diff --git a/openshell-agents/runtime/supervisor.sh b/openshell-agents/runtime/supervisor.sh new file mode 100755 index 000000000..914f723e7 --- /dev/null +++ b/openshell-agents/runtime/supervisor.sh @@ -0,0 +1,216 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +require_env() { + local name="$1" + [[ -n "${!name:-}" ]] || { echo "missing required env: $name" >&2; exit 1; } +} + +require_env OPENSHELL_AGENT_HARNESS + +RUNTIME_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" +PAYLOAD_DIR="$(cd "$RUNTIME_DIR/.." && pwd)" +PROMPT_FILE="$PAYLOAD_DIR/agent-prompt.md" +ADAPTER="$PAYLOAD_DIR/runtime/harnesses/$OPENSHELL_AGENT_HARNESS/exec.sh" +RUN_MODE="${OPENSHELL_AGENT_RUN_MODE:-once}" +POLL_INTERVAL_SECONDS="${OPENSHELL_AGENT_POLL_INTERVAL_SECONDS:-900}" +MAX_TRANSIENT_FAILURES="${OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES:-5}" +MAX_SLEEP_SECONDS=86400 + +[[ -f "$PROMPT_FILE" ]] || { echo "missing agent prompt: $PROMPT_FILE" >&2; exit 1; } +[[ -x "$ADAPTER" ]] || { echo "missing harness adapter: $ADAPTER" >&2; exit 1; } + +case "$RUN_MODE" in + once|watch) ;; + *) echo "unsupported agent run mode: $RUN_MODE" >&2; exit 2 ;; +esac +[[ "$POLL_INTERVAL_SECONDS" =~ ^[0-9]+$ ]] || { echo "OPENSHELL_AGENT_POLL_INTERVAL_SECONDS must be an integer" >&2; exit 2; } +[[ "$MAX_TRANSIENT_FAILURES" =~ ^[0-9]+$ ]] || { echo "OPENSHELL_AGENT_MAX_TRANSIENT_FAILURES must be an integer" >&2; exit 2; } +[[ "$POLL_INTERVAL_SECONDS" -gt 0 ]] || { echo "OPENSHELL_AGENT_POLL_INTERVAL_SECONDS must be greater than zero" >&2; exit 2; } + +json_string_field() { + local json="$1" + local key="$2" + printf '%s' "$json" | sed -nE "s/.*\"$key\"[[:space:]]*:[[:space:]]*\"([^\"]*)\".*/\1/p" +} + +json_number_field() { + local json="$1" + local key="$2" + printf '%s' "$json" | sed -nE "s/.*\"$key\"[[:space:]]*:[[:space:]]*([0-9]+).*/\1/p" +} + +valid_result_json() { + local json="$1" + + if command -v jq >/dev/null 2>&1; then + printf '%s' "$json" | jq -e 'type == "object"' >/dev/null 2>&1 + return + fi + if command -v python3 >/dev/null 2>&1; then + printf '%s' "$json" | python3 -c ' +import json +import sys + +try: + value = json.load(sys.stdin) +except Exception: + sys.exit(1) + +sys.exit(0 if isinstance(value, dict) else 1) +' >/dev/null 2>&1 + return + fi + return 1 +} + +classify_transient_failure() { + local output_file="$1" + grep -Eiq 'stream disconnected before completion|failed to connect to websocket|Reconnecting\.\.\.|Broken pipe|Connection to sandbox closed by remote host|peer closed connection without sending TLS close_notify' "$output_file" +} + +safe_sleep_seconds() { + local value="$1" + + if [[ ! "$value" =~ ^[0-9]+$ ]] || [[ "$value" -le 0 ]]; then + printf '%s\n' "$POLL_INTERVAL_SECONDS" + return + fi + if [[ "$value" -gt "$MAX_SLEEP_SECONDS" ]]; then + printf '%s\n' "$MAX_SLEEP_SECONDS" + return + fi + printf '%s\n' "$value" +} + +retry_watch_cycle() { + local reason="$1" + transient_failures=$((transient_failures + 1)) + + if [[ "$MAX_TRANSIENT_FAILURES" -gt 0 ]]; then + if [[ $((transient_failures % MAX_TRANSIENT_FAILURES)) -eq 0 ]]; then + echo "openshell-agent: transient watch failure $transient_failures ($reason); still retrying in ${transient_backoff_seconds}s" >&2 + else + echo "openshell-agent: transient watch failure $transient_failures ($reason); retrying in ${transient_backoff_seconds}s" >&2 + fi + else + echo "openshell-agent: transient watch failure $transient_failures ($reason); retrying in ${transient_backoff_seconds}s" >&2 + fi + sleep "$transient_backoff_seconds" + transient_backoff_seconds=$((transient_backoff_seconds * 2)) + cap_transient_backoff +} + +cap_transient_backoff() { + if [[ "$transient_backoff_seconds" -gt "$POLL_INTERVAL_SECONDS" ]]; then + transient_backoff_seconds="$POLL_INTERVAL_SECONDS" + fi + if [[ "$transient_backoff_seconds" -gt "$MAX_SLEEP_SECONDS" ]]; then + transient_backoff_seconds="$MAX_SLEEP_SECONDS" + fi +} + +run_cycle() { + local output_file="$1" + + set +e + bash "$ADAPTER" "$PROMPT_FILE" 2>&1 | tee "$output_file" + local status=${PIPESTATUS[0]} + set -e + + return "$status" +} + +cycle=0 +transient_failures=0 +transient_backoff_seconds=30 +cap_transient_backoff + +while true; do + cycle=$((cycle + 1)) + echo "openshell-agent: starting $RUN_MODE cycle $cycle with harness $OPENSHELL_AGENT_HARNESS" >&2 + output_file="$(mktemp /tmp/openshell-agent-cycle.XXXXXX)" + + if run_cycle "$output_file"; then + harness_status=0 + else + harness_status=$? + fi + + result_line="$(grep -E '^OPENSHELL_AGENT_RESULT[[:space:]]+' "$output_file" | tail -n 1 || true)" + result_json="${result_line#OPENSHELL_AGENT_RESULT }" + + if [[ -z "$result_line" ]]; then + if [[ "$RUN_MODE" == "once" ]]; then + rm -f "$output_file" + if [[ "$harness_status" -ne 0 ]]; then + exit "$harness_status" + fi + exit 1 + fi + retry_reason="missing OPENSHELL_AGENT_RESULT after harness exit $harness_status" + if classify_transient_failure "$output_file"; then + retry_reason="$retry_reason; upstream transport failure detected" + fi + rm -f "$output_file" + retry_watch_cycle "$retry_reason" + continue + fi + + if ! valid_result_json "$result_json"; then + rm -f "$output_file" + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: malformed OPENSHELL_AGENT_RESULT JSON" >&2 + exit 1 + fi + retry_watch_cycle "malformed OPENSHELL_AGENT_RESULT JSON" + continue + fi + + status="$(json_string_field "$result_json" status)" + reason="$(json_string_field "$result_json" reason)" + next_poll_seconds="$(json_number_field "$result_json" next_poll_seconds)" + next_poll_seconds="$(safe_sleep_seconds "$next_poll_seconds")" + [[ -n "$reason" ]] || reason="unspecified" + + rm -f "$output_file" + + case "$status" in + complete) + echo "openshell-agent: complete ($reason)" >&2 + exit 0 + ;; + waiting|blocked) + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: $status ($reason)" >&2 + exit 0 + fi + transient_failures=0 + transient_backoff_seconds=30 + echo "openshell-agent: $status ($reason); sleeping ${next_poll_seconds}s outside harness" >&2 + sleep "$next_poll_seconds" + ;; + transient_failure) + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: transient failure ($reason)" >&2 + exit 1 + fi + retry_watch_cycle "$reason" + ;; + terminal_failure) + echo "openshell-agent: terminal failure ($reason)" >&2 + exit 1 + ;; + *) + if [[ "$RUN_MODE" == "once" ]]; then + echo "openshell-agent: invalid OPENSHELL_AGENT_RESULT status: ${status:-}" >&2 + exit 1 + fi + retry_watch_cycle "invalid OPENSHELL_AGENT_RESULT status: ${status:-}" + ;; + esac +done diff --git a/openshell-agents/runtime/supervisor_test.sh b/openshell-agents/runtime/supervisor_test.sh new file mode 100755 index 000000000..2bc07654c --- /dev/null +++ b/openshell-agents/runtime/supervisor_test.sh @@ -0,0 +1,183 @@ +#!/usr/bin/env bash + +# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +set -euo pipefail + +SUPERVISOR_UNDER_TEST="${SUPERVISOR_UNDER_TEST:-$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/supervisor.sh}" + +fail() { + printf 'not ok - %s\n' "$*" >&2 + exit 1 +} + +assert_contains() { + local file="$1" + local expected="$2" + if ! grep -Fq "$expected" "$file"; then + printf 'missing expected text: %s\n' "$expected" >&2 + printf '%s\n' '--- output ---' >&2 + sed -n '1,200p' "$file" >&2 + fail "assert_contains failed" + fi +} + +make_payload() { + local dir="$1" + local adapter_body="$2" + + mkdir -p "$dir/runtime/harnesses/test" + printf 'test prompt\n' > "$dir/agent-prompt.md" + cp "$SUPERVISOR_UNDER_TEST" "$dir/runtime/supervisor.sh" + cat > "$dir/runtime/harnesses/test/exec.sh" < "$output_file" 2>&1 + local status=$? + set -e + return "$status" +} + +test_once_requires_sentinel() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" "exit 0" + + if run_supervisor "$tmp/payload" once "$tmp/output"; then + fail "once mode succeeded without sentinel" + fi + printf 'ok - once requires sentinel\n' +} + +test_watch_retries_missing_sentinel_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 3 ]]; then + printf "%s\n" "ERROR: stream disconnected before completion" >&2 + exit 1 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "transient watch failure 1" + assert_contains "$tmp/output" "transient watch failure 2" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries missing sentinel until complete\n' +} + +test_watch_retries_invalid_status_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 2 ]]; then + printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"nonsense\",\"reason\":\"bad\"}" + exit 0 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "invalid OPENSHELL_AGENT_RESULT status: nonsense" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries invalid status until complete\n' +} + +test_watch_retries_malformed_terminal_json_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 2 ]]; then + printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\"" + exit 0 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "malformed OPENSHELL_AGENT_RESULT JSON" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries malformed terminal JSON until complete\n' +} + +test_watch_retries_failed_alias_until_complete() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" ' +state_file="${OPENSHELL_AGENT_TEST_STATE:?}" +count=0 +if [[ -f "$state_file" ]]; then + count="$(cat "$state_file")" +fi +count=$((count + 1)) +printf "%s\n" "$count" > "$state_file" +if [[ "$count" -lt 2 ]]; then + printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"failed\",\"reason\":\"legacy\"}" + exit 0 +fi +printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"complete\",\"reason\":\"done\"}" +' + + OPENSHELL_AGENT_TEST_STATE="$tmp/state" run_supervisor "$tmp/payload" watch "$tmp/output" + assert_contains "$tmp/output" "invalid OPENSHELL_AGENT_RESULT status: failed" + assert_contains "$tmp/output" "openshell-agent: complete (done)" + printf 'ok - watch retries failed alias until complete\n' +} + +test_watch_terminal_failure_exits() { + local tmp + tmp="$(mktemp -d)" + make_payload "$tmp/payload" 'printf "%s\n" "OPENSHELL_AGENT_RESULT {\"status\":\"terminal_failure\",\"reason\":\"fatal\"}"' + + if run_supervisor "$tmp/payload" watch "$tmp/output"; then + fail "watch mode succeeded after terminal failure" + fi + assert_contains "$tmp/output" "openshell-agent: terminal failure (fatal)" + printf 'ok - watch terminal failure exits\n' +} + +test_once_requires_sentinel +test_watch_retries_missing_sentinel_until_complete +test_watch_retries_invalid_status_until_complete +test_watch_retries_malformed_terminal_json_until_complete +test_watch_retries_failed_alias_until_complete +test_watch_terminal_failure_exits diff --git a/providers/codex.yaml b/providers/codex.yaml index 7edd86a97..5396333bd 100644 --- a/providers/codex.yaml +++ b/providers/codex.yaml @@ -45,4 +45,9 @@ endpoints: protocol: rest access: read-write enforcement: enforce + - host: files.openai.com + port: 443 + protocol: rest + access: read-write + enforcement: enforce binaries: [/usr/bin/codex, /usr/local/bin/codex, /usr/lib/node_modules/@openai/**]