Add sandboxed agent PR exploration (#2604)

2026-06-01 03:14:35 +07:00 · 2026-05-26 15:52:42 +08:00 · 2026-05-26 15:52:42 +08:00 · b5bf28060b
commit b5bf28060b
parent ceb636aa1b
5 changed files with 1960 additions and 0 deletions
--- a/.github/actionlint.yaml
+++ b/.github/actionlint.yaml
@ -0,0 +1,8 @@
+paths:
+  .github/workflows/*.lock.yml:
+    ignore:
+      - 'shellcheck reported issue in this script: SC2016:.+'
+      - 'shellcheck reported issue in this script: SC2086:.+'
+self-hosted-runner:
+  labels:
+    - agent-pr-explore
--- a/.github/scripts/agent-pr-explore-local.sh
+++ b/.github/scripts/agent-pr-explore-local.sh
@ -0,0 +1,105 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+usage() {
+  cat <<'USAGE'
+Usage:
+  .github/scripts/agent-pr-explore-local.sh <pr-number>
+
+Runs the Docker-isolated PR exploration path from a local or self-hosted
+machine, without relying on a GitHub Actions workflow being present on main.
+
+Required on the host:
+  docker, gh, jq, node/npm, expect-cli@0.1.3
+
+Optional environment:
+  BASE_REPO=nexu-io/open-design
+  RUNNER_TEMP=/tmp/od-agent-pr-explore-local
+  OD_EXPECT_TIMEOUT_SECONDS=1200
+  OD_SANDBOX_CPUS=4
+  OD_SANDBOX_MEMORY=8g
+  OD_ALLOW_NPX_EXPECT_CLI=1
+  OD_TRACE_R2_UPLOAD=1
+  R2_ACCOUNT_ID=...
+  R2_ACCESS_KEY_ID=...
+  R2_SECRET_ACCESS_KEY=...
+  R2_BUCKET=...
+  R2_PUBLIC_ORIGIN=https://...
+USAGE
+}
+
+if [ "${1:-}" = "-h" ] || [ "${1:-}" = "--help" ]; then
+  usage
+  exit 0
+fi
+
+pr_number="${1:-${PR_NUMBER:-}}"
+if ! [[ "$pr_number" =~ ^[0-9]+$ ]]; then
+  usage >&2
+  exit 2
+fi
+
+base_repo="${BASE_REPO:-nexu-io/open-design}"
+runner_temp="${RUNNER_TEMP:-/tmp/od-agent-pr-explore-local}"
+
+for command_name in docker gh jq node; do
+  if ! command -v "$command_name" >/dev/null 2>&1; then
+    echo "::error::$command_name is required on the mini/local runner" >&2
+    exit 1
+  fi
+done
+
+if [ -z "${GH_TOKEN:-}" ]; then
+  if ! GH_TOKEN="$(gh auth token 2>/dev/null)"; then
+    echo "::error::GH_TOKEN is not set and gh auth token failed. Run gh auth login or export GH_TOKEN." >&2
+    exit 1
+  fi
+  export GH_TOKEN
+fi
+
+if ! command -v expect-cli >/dev/null 2>&1 && [ "${OD_ALLOW_NPX_EXPECT_CLI:-0}" != "1" ]; then
+  echo "::error::expect-cli is not installed. Install it on the mini with: npm install -g expect-cli@${OD_EXPECT_CLI_VERSION:-0.1.3}" >&2
+  echo "        For a one-off smoke run, set OD_ALLOW_NPX_EXPECT_CLI=1 to use the pinned npx fallback." >&2
+  exit 1
+fi
+
+mkdir -p "$runner_temp"
+
+pr_json="$(gh pr view "$pr_number" --repo "$base_repo" --json state,isDraft,headRefOid,baseRefOid,headRepositoryOwner,headRepository)"
+state="$(jq -r '.state' <<<"$pr_json")"
+draft="$(jq -r '.isDraft' <<<"$pr_json")"
+head_sha="$(jq -r '.headRefOid' <<<"$pr_json")"
+base_sha="$(jq -r '.baseRefOid' <<<"$pr_json")"
+head_repo="$(jq -r '.headRepositoryOwner.login + "/" + .headRepository.name' <<<"$pr_json")"
+
+if [ "$state" != "OPEN" ]; then
+  echo "::error::Refusing to explore PR $pr_number because state is $state." >&2
+  exit 1
+fi
+if [ "$draft" != "false" ]; then
+  echo "::error::Refusing to explore draft PR $pr_number." >&2
+  exit 1
+fi
+if ! [[ "$head_sha" =~ ^[0-9a-f]{40}$ && "$base_sha" =~ ^[0-9a-f]{40}$ ]]; then
+  echo "::error::Invalid PR SHA metadata for PR $pr_number." >&2
+  exit 1
+fi
+
+echo "Running agent PR exploration locally"
+echo "  PR:        $base_repo#$pr_number"
+echo "  Head:      $head_repo@$head_sha"
+echo "  Base SHA:  $base_sha"
+echo "  Temp root: $runner_temp"
+
+PR_NUMBER="$pr_number" \
+HEAD_SHA="$head_sha" \
+HEAD_REPO="$head_repo" \
+BASE_REPO="$base_repo" \
+BASE_SHA="$base_sha" \
+RUNNER_TEMP="$runner_temp" \
+GH_TOKEN="$GH_TOKEN" \
+.github/scripts/agent-pr-explore-sandbox.sh
+
+echo
+echo "Artifacts:"
+echo "  $runner_temp/agent-pr-explore-sandbox/artifacts"
--- a/.github/scripts/agent-pr-explore-sandbox.sh
+++ b/.github/scripts/agent-pr-explore-sandbox.sh
--- a/.github/workflows/agent-pr-explore-sandbox.yml
+++ b/.github/workflows/agent-pr-explore-sandbox.yml
@ -0,0 +1,235 @@
+name: agent-pr-explore-sandbox
+
+# Trusted-orchestrator workflow for PR exploration. It intentionally uses
+# pull_request_target so the workflow file and runner script come from the
+# protected base branch. The PR head is never checked out on the host runner;
+# the sandbox script fetches and executes it inside Docker.
+on:
+  pull_request_target:
+    types: [opened, synchronize, reopened, ready_for_review]
+    paths:
+      - "apps/web/**"
+      - "package.json"
+      - "pnpm-lock.yaml"
+      - "pnpm-workspace.yaml"
+      - ".github/workflows/agent-pr-explore-sandbox.yml"
+      - ".github/scripts/agent-pr-explore-sandbox.sh"
+  workflow_dispatch:
+    inputs:
+      pr_number:
+        description: Pull request number to explore.
+        required: true
+        type: string
+
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
+concurrency:
+  group: agent-pr-explore-sandbox-${{ github.event.pull_request.number || inputs.pr_number }}
+  cancel-in-progress: true
+
+jobs:
+  sandbox:
+    name: Sandbox PR runtime
+    if: ${{ github.event_name == 'workflow_dispatch' || !github.event.pull_request.draft }}
+    runs-on: [self-hosted, agent-pr-explore]
+    environment: agent-pr-explore
+    timeout-minutes: 45
+
+    steps:
+      - name: Checkout trusted base scripts
+        uses: actions/checkout@v6.0.2
+        with:
+          ref: ${{ github.event.pull_request.base.sha || github.sha }}
+          persist-credentials: false
+
+      - name: Resolve PR metadata
+        id: pr
+        shell: bash
+        env:
+          GH_TOKEN: ${{ github.token }}
+          EVENT_PR_NUMBER: ${{ github.event.pull_request.number || inputs.pr_number }}
+        run: |
+          set -euo pipefail
+          if ! [[ "$EVENT_PR_NUMBER" =~ ^[0-9]+$ ]]; then
+            echo "::error::Invalid PR number: $EVENT_PR_NUMBER"
+            exit 1
+          fi
+
+          state="$(gh pr view "$EVENT_PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json state --jq '.state')"
+          draft="$(gh pr view "$EVENT_PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json isDraft --jq '.isDraft')"
+          author="$(gh pr view "$EVENT_PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json author --jq '.author.login')"
+          head_sha="$(gh pr view "$EVENT_PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json headRefOid --jq '.headRefOid')"
+          head_repo="$(gh pr view "$EVENT_PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json headRepositoryOwner,headRepository --jq '.headRepositoryOwner.login + "/" + .headRepository.name')"
+          base_repo="$GITHUB_REPOSITORY"
+          base_sha="$(gh pr view "$EVENT_PR_NUMBER" --repo "$GITHUB_REPOSITORY" --json baseRefOid --jq '.baseRefOid')"
+
+          if [ "$state" != "OPEN" ]; then
+            echo "::error::Refusing to explore PR $EVENT_PR_NUMBER because state is $state."
+            exit 1
+          fi
+          if [ "$draft" != "false" ]; then
+            echo "::error::Refusing to explore draft PR $EVENT_PR_NUMBER."
+            exit 1
+          fi
+          if [ "$base_repo" != "$GITHUB_REPOSITORY" ]; then
+            echo "::error::Unexpected base repo $base_repo."
+            exit 1
+          fi
+          if ! [[ "$head_sha" =~ ^[0-9a-f]{40}$ && "$base_sha" =~ ^[0-9a-f]{40}$ ]]; then
+            echo "::error::Invalid PR SHA metadata."
+            exit 1
+          fi
+
+          {
+            echo "number=$EVENT_PR_NUMBER"
+            echo "author=$author"
+            echo "head_sha=$head_sha"
+            echo "head_repo=$head_repo"
+            echo "base_sha=$base_sha"
+          } >> "$GITHUB_OUTPUT"
+
+      - name: Run expect against Docker-isolated PR app
+        shell: bash
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+          HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
+          HEAD_REPO: ${{ steps.pr.outputs.head_repo }}
+          BASE_REPO: ${{ github.repository }}
+          BASE_SHA: ${{ steps.pr.outputs.base_sha }}
+          OD_SANDBOX_CPUS: "4"
+          OD_SANDBOX_MEMORY: "8g"
+          OD_SANDBOX_READY_TIMEOUT_SECONDS: "900"
+          OD_EXPECT_TIMEOUT_SECONDS: "1200"
+          OD_EXPECT_CONTEXT_MAX_BYTES: "120000"
+          OD_TRACE_R2_UPLOAD: "1"
+          R2_ACCOUNT_ID: ${{ secrets.R2_ACCOUNT_ID }}
+          R2_ACCESS_KEY_ID: ${{ secrets.R2_ACCESS_KEY_ID }}
+          R2_SECRET_ACCESS_KEY: ${{ secrets.R2_SECRET_ACCESS_KEY }}
+          R2_BUCKET: ${{ secrets.R2_BUCKET }}
+          R2_PUBLIC_ORIGIN: ${{ vars.R2_PUBLIC_ORIGIN }}
+        run: .github/scripts/agent-pr-explore-sandbox.sh
+
+      - name: Upload sandbox artifacts
+        if: always()
+        uses: actions/upload-artifact@v7
+        with:
+          name: agent-pr-explore-sandbox-${{ steps.pr.outputs.number }}-${{ steps.pr.outputs.head_sha }}
+          path: ${{ runner.temp }}/agent-pr-explore-sandbox/artifacts/
+          if-no-files-found: warn
+          retention-days: 7
+
+      - name: Comment exploration report
+        if: always()
+        shell: bash
+        env:
+          GH_TOKEN: ${{ github.token }}
+          PR_NUMBER: ${{ steps.pr.outputs.number }}
+          PR_AUTHOR: ${{ steps.pr.outputs.author }}
+          HEAD_SHA: ${{ steps.pr.outputs.head_sha }}
+        run: |
+          set -euo pipefail
+          report="$RUNNER_TEMP/agent-pr-explore-sandbox/artifacts/agent-pr-exploration-report.md"
+          if [ ! -s "$report" ]; then
+            echo "No agent exploration report was produced; skipping PR comment."
+            exit 0
+          fi
+
+          marker="<!-- agent-pr-explore-sandbox:${PR_NUMBER} -->"
+          body_file="$(mktemp)"
+          {
+            cat "$report"
+            echo
+            echo "$marker"
+          } > "$body_file"
+
+          comment_id="$(
+            gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" --paginate \
+              --jq ".[] | select(.user.type == \"Bot\" and (.body | contains(\"$marker\"))) | .id" \
+              | tail -n 1
+          )"
+          body="$(cat "$body_file")"
+          if [ -n "$comment_id" ]; then
+            gh api "repos/$GITHUB_REPOSITORY/issues/comments/$comment_id" -X PATCH -f body="$body" --silent
+          else
+            gh api "repos/$GITHUB_REPOSITORY/issues/$PR_NUMBER/comments" -f body="$body" --silent
+          fi
+
+          case "${PR_AUTHOR,,}" in
+            nettee|mrcfps|alchemistklk|siri-ray)
+              ;;
+            *)
+              echo "PR author $PR_AUTHOR is not configured for inline agent reports; skipping inline review comment."
+              exit 0
+              ;;
+          esac
+
+          files_json="$(mktemp)"
+          gh api --paginate --slurp "repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/files" > "$files_json"
+          inline_target="$(
+            FILES_JSON="$files_json" node <<'NODE'
+          const fs = require("node:fs");
+          const pages = JSON.parse(fs.readFileSync(process.env.FILES_JSON, "utf8"));
+          const files = pages.flat();
+
+          function firstAddedLine(file) {
+            if (typeof file.patch !== "string") return null;
+            let newLine = null;
+            for (const line of file.patch.split("\n")) {
+              const hunk = /^@@ -\d+(?:,\d+)? \+(\d+)(?:,\d+)? @@/.exec(line);
+              if (hunk) {
+                newLine = Number(hunk[1]);
+                continue;
+              }
+              if (newLine == null) continue;
+              if (line.startsWith("+") && !line.startsWith("+++")) return newLine;
+              if (!line.startsWith("-")) newLine += 1;
+            }
+            return null;
+          }
+
+          for (const file of files) {
+            if (!file || file.status === "removed") continue;
+            const line = firstAddedLine(file);
+            if (line != null) {
+              process.stdout.write(JSON.stringify({ path: file.filename, line }));
+              process.exit(0);
+            }
+          }
+          NODE
+          )"
+          if [ -z "$inline_target" ]; then
+            echo "No added diff line was available for an inline agent report; skipping inline review comment."
+            exit 0
+          fi
+
+          inline_path="$(node -e 'const target = JSON.parse(process.argv[1]); process.stdout.write(target.path)' "$inline_target")"
+          inline_line="$(node -e 'const target = JSON.parse(process.argv[1]); process.stdout.write(String(target.line))' "$inline_target")"
+          inline_marker="<!-- agent-pr-explore-inline:${PR_NUMBER} -->"
+          inline_body_file="$(mktemp)"
+          {
+            cat "$report"
+            echo
+            echo "$inline_marker"
+          } > "$inline_body_file"
+          inline_body="$(cat "$inline_body_file")"
+
+          inline_comment_id="$(
+            gh api "repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/comments" --paginate \
+              --jq ".[] | select(.user.type == \"Bot\" and (.body | contains(\"$inline_marker\"))) | .id" \
+              | tail -n 1
+          )"
+          if [ -n "$inline_comment_id" ]; then
+            gh api "repos/$GITHUB_REPOSITORY/pulls/comments/$inline_comment_id" -X PATCH -f body="$inline_body" --silent
+          else
+            gh api "repos/$GITHUB_REPOSITORY/pulls/$PR_NUMBER/comments" \
+              -f body="$inline_body" \
+              -f commit_id="$HEAD_SHA" \
+              -f path="$inline_path" \
+              -F line="$inline_line" \
+              -f side=RIGHT \
+              --silent
+          fi
--- a/specs/change/20260522-pr-explore-agent/spec.md
+++ b/specs/change/20260522-pr-explore-agent/spec.md
@ -0,0 +1,338 @@
+---
+id: 20260522-pr-explore-agent
+name: PR Explore Agent - Advisory Web E2E
+status: designed
+created: '2026-05-22'
+---
+
+## Overview
+
+### Problem Statement
+
+PR throughput is outpacing the maintainer pool's review bandwidth on
+the "does this PR's claimed browser behavior actually land?" half of
+review.
+
+Existing deterministic E2E/visual checks cover predefined scenarios.
+They do not read a PR body, infer the riskiest changed behavior, and
+probe that behavior in a running app. This proposal adds an advisory
+agent lane for that manual reviewer task.
+
+### Goal
+
+Add a per-PR advisory, manually-approved agent that:
+
+- reads the PR body and diff;
+- boots the PR's `apps/web` runtime inside Docker;
+- uses host-side `expect-cli` to explore a small number of
+  diff-implied browser cases;
+- captures Playwright trace/video/screenshot artifacts;
+- posts a reviewer-ready PR report with trace, verdict, concrete
+  evidence, and E2E coverage suggestions.
+
+The agent does not gate merge, replace deterministic E2E, or replace
+the visual-regression workflows.
+
+## Scope
+
+P1 is intentionally web-only.
+
+In scope:
+
+- PRs touching `apps/web/**`.
+- Root workspace inputs that can affect the web runtime:
+  `package.json`, `pnpm-lock.yaml`, `pnpm-workspace.yaml`.
+- The sandbox workflow/script themselves:
+  `.github/workflows/agent-pr-explore-sandbox.yml` and
+  `.github/scripts/agent-pr-explore-sandbox.sh`.
+- Manual workflow dispatch for a specific PR number.
+- Advisory output only.
+
+Out of scope for P1:
+
+- `apps/landing-page/**` and its content sources. The landing page is
+  a separate Astro runtime and must not be reported as verified by the
+  `apps/web` sandbox. A follow-up should add a separate landing-page
+  boot path or a two-pass surface router.
+- `apps/daemon/src/**`, `packages/contracts/**`, and `od` CLI
+  verification. The browser explorer cannot prove CLI/API contract
+  behavior.
+- The older `gh-aw` workflow path and STEP marker extractor. This PR
+  deploys the self-hosted Docker sandbox path only.
+- Auto-fix or patch-suggesting behavior.
+- Merge-blocking enforcement.
+
+## Security Model
+
+The workflow uses `pull_request_target` only as a trusted orchestrator.
+The workflow file and shell runner come from the protected base branch;
+the PR head is fetched by exact commit SHA inside Docker.
+
+The Docker sandbox receives:
+
+- no repo/org secrets;
+- no model credentials;
+- no host `$HOME`;
+- no `.ssh`, `.config`, `.codex`, or Claude/Codex credential files;
+- no Docker socket.
+
+Host-side `expect-cli` may use the operator's model/OAuth credentials,
+but it receives only PR metadata, diff/body context, and the sandboxed
+localhost URL. It must not run arbitrary host shell commands or expose
+host files to untrusted PR code.
+
+The workflow is environment-gated through GitHub's native
+`agent-pr-explore` environment. Every run waits for a maintainer to
+click Approve in the PR checks UI before the self-hosted runner starts
+the expensive/sensitive portion.
+
+## Runtime
+
+### Workflow
+
+`.github/workflows/agent-pr-explore-sandbox.yml` is the trusted
+orchestrator.
+
+It:
+
+1. Triggers on matching `pull_request_target` events or manual
+   `workflow_dispatch`.
+2. Checks out trusted base scripts only.
+3. Resolves PR number, author, head SHA, head repo, and base SHA.
+4. Runs `.github/scripts/agent-pr-explore-sandbox.sh` on a
+   self-hosted runner labeled `agent-pr-explore`.
+5. Uploads artifacts.
+6. Posts or updates one sticky PR comment.
+7. For Looper-authored/managed users (`nettee`, `mrcfps`,
+   `alchemistklk`, `Siri-Ray`), also posts or updates one inline review
+   comment anchored to the first added line in the current diff.
+
+### Sandbox Runner
+
+`.github/scripts/agent-pr-explore-sandbox.sh`:
+
+1. Validates PR metadata and required host tools.
+2. Builds PR context from GitHub API/diff data.
+3. Selects a small fixture when the diff maps to known web UI state.
+4. Starts a Docker container from `node:24-bookworm`.
+5. Fetches the PR head SHA inside Docker.
+6. Installs dependencies using a host-mounted pnpm store cache.
+7. Builds daemon/tools-dev, then boots:
+
+   ```bash
+   pnpm tools-dev run web \
+     --namespace "agent-pr-<number>-<sha8>" \
+     --daemon-port 17456 \
+     --web-port 17573
+   ```
+
+8. Publishes the container web proxy to a host localhost port.
+9. Runs host-side `expect-cli` against that URL.
+10. Records smoke Playwright artifacts and writes the final report.
+
+The workflow concurrency key is PR-number scoped with
+`cancel-in-progress: true`, so a new push cancels older pending/running
+exploration for the same PR.
+
+## Deterministic Verifiers
+
+Some changes are build/deploy behavior rather than browser interaction.
+P1 includes a deterministic verifier for Vercel/static-export changes:
+
+- `vercel.json`
+- `apps/web/next.config.ts`
+- `apps/web/tests/runtime/app-route-export.test.ts`
+
+For those PRs, when no browser-observable files are touched, the runner
+skips browser exploration and executes this inside the Docker checkout:
+
+```bash
+rm -rf apps/web/out apps/web/.next
+OD_WEB_OUTPUT_MODE=server sh -c 'OD_WEB_OUTPUT_MODE= pnpm --filter @open-design/web build && test -d apps/web/out'
+test -f apps/web/out/index.html
+```
+
+The deterministic result becomes the advisory verdict. The runner still
+boots the app and captures smoke artifacts for reviewer debugging.
+
+## Report Contract
+
+The canonical reviewer-facing artifact is
+`agent-pr-exploration-report.md`.
+
+It always has this shape:
+
+```markdown
+## 🤖 Agent PR Exploration Report
+
+### 🎬 Trace
+
+[Open Playwright trace](...)
+
+### ✅ Verdict: Pass
+
+...
+
+### 🧪 What Was Verified / Cases Tested
+
+...
+
+### 🔍 Concrete Evidence
+
+...
+
+### 🧱 E2E Coverage to Sediment
+
+...
+```
+
+The trace section is first. The report must describe what actually ran;
+it must not use "dry run" wording for real workflow output.
+
+For normal browser exploration, the prompt asks the agent to return only
+the markdown fragment below the trace section. The runner prepends the
+real trace link after artifact upload. For deterministic verifier paths,
+the shell script writes the same report structure directly.
+
+The "E2E Coverage to Sediment" section is required. It should state
+which deterministic test, fixture, mock, or CI smoke should be added if
+the exploratory run found a repeatable behavior worth preserving.
+
+## Artifacts
+
+Artifacts are written under
+`$RUNNER_TEMP/agent-pr-explore-sandbox/artifacts/`.
+
+Important files:
+
+- `agent-pr-exploration-report.md`
+- `expect.log`
+- `expect-exit-code.txt`
+- `deterministic-verifier.log` when selected
+- `playwright-smoke-trace.zip`
+- `playwright-smoke-session.webm`
+- `playwright-initial.png`
+- `playwright-final.png`
+- `playwright-trace-viewer.md`
+- `playwright-trace-viewer.txt`
+- `playwright-recording-summary.json`
+- `docker.log`
+- `sandbox.log`
+
+The Playwright recording is a post-run smoke/debug artifact, not the
+source of truth for the verdict. It overlays a reviewer HUD identifying
+the fixture/verifier being replayed.
+
+The recorder waits for a visible document plus a short UI-settle delay.
+It intentionally does not use Playwright `networkidle`, because Open
+Design keeps background connections active and `networkidle` creates
+misleading trace timeout steps.
+
+## R2 Trace Upload
+
+When `OD_TRACE_R2_UPLOAD=1`, the host runner uploads selected artifacts
+to Cloudflare R2 using these environment values:
+
+- `R2_ACCOUNT_ID`
+- `R2_ACCESS_KEY_ID`
+- `R2_SECRET_ACCESS_KEY`
+- `R2_BUCKET`
+- `R2_PUBLIC_ORIGIN`
+
+R2 credentials stay on the host and are never passed into Docker.
+
+The default object prefix is:
+
+```text
+agent-pr-explore/pr-<number>/<head-sha>/
+```
+
+The public R2 origin must allow browser fetches from
+`https://trace.playwright.dev` or `*` via CORS, otherwise the trace zip
+may be public but the hosted trace viewer cannot load it.
+
+## Operator Runbook
+
+### GitHub Setup
+
+Required repository/environment setup:
+
+- `agent-pr-explore` environment exists.
+- Environment required reviewers are configured.
+- A self-hosted runner is available with labels:
+  - `self-hosted`
+  - `agent-pr-explore`
+- Environment secrets:
+  - `R2_ACCOUNT_ID`
+  - `R2_ACCESS_KEY_ID`
+  - `R2_SECRET_ACCESS_KEY`
+  - `R2_BUCKET`
+- Environment variable:
+  - `R2_PUBLIC_ORIGIN`
+
+### Mini / Runner Host
+
+Install host prerequisites:
+
+```bash
+gh auth login
+npm install -g expect-cli@0.1.3
+command -v jq
+docker info
+```
+
+The runner does not use mutable `expect-cli@latest` by default. If
+`expect-cli` is not preinstalled, operators must either install the
+pinned version above or explicitly set `OD_ALLOW_NPX_EXPECT_CLI=1` for
+a one-off smoke run using the pinned npx fallback.
+
+### Manual Local Smoke
+
+Before or after merging, the Mac mini can run the same sandbox path
+manually:
+
+```bash
+git clone git@github.com:nexu-io/open-design.git
+cd open-design
+git fetch origin pull/2604/head:agent-pr-explore-sandbox
+git checkout agent-pr-explore-sandbox
+RUNNER_TEMP=/tmp/od-agent-pr-explore-local \
+  OD_EXPECT_TIMEOUT_SECONDS=1200 \
+  .github/scripts/agent-pr-explore-local.sh <open-pr-number>
+```
+
+## Rollout
+
+P1 is a controlled rollout:
+
+1. Merge the sandbox workflow and scripts.
+2. Register the mini as the dedicated `agent-pr-explore` runner.
+3. Trigger `agent-pr-explore-sandbox` manually for 3-5 open PRs.
+4. Verify runner stability, R2 trace links, sticky comments, and Looper
+   inline comment behavior.
+5. Expand reviewer approval volume only after the reports are useful and
+   no isolation incidents occur.
+
+Landing-page exploration, CLI/API exploration, deeper gh-aw integration,
+and multi-surface routing are separate follow-up work.
+
+## Success Criteria
+
+- Maintainer can approve and run an exploration job manually.
+- The PR code runs only inside Docker.
+- No host credential is mounted or forwarded into Docker.
+- The final report starts with a clickable Playwright trace link when
+  R2 upload is configured.
+- The report includes an explicit E2E coverage-sedimentation section.
+- Looper authors receive an inline review comment thread in addition to
+  the sticky top-level report.
+- Non-Looper authors receive only the sticky top-level report.
+- `expect-cli` non-zero exits preserve artifacts and are advisory, while
+  sandbox/bootstrap failures fail the job.
+
+## References
+
+- GitHub Actions secrets and fork PRs:
+  https://docs.github.com/en/actions/how-tos/security-for-github-actions/security-guides/using-secrets-in-github-actions
+- Playwright Trace Viewer:
+  https://trace.playwright.dev/