#!/usr/bin/env bash set -euo pipefail required_env=( PR_NUMBER HEAD_SHA HEAD_REPO BASE_REPO BASE_SHA RUNNER_TEMP GH_TOKEN ) for name in "${required_env[@]}"; do if [ -z "${!name:-}" ]; then echo "::error::$name is required" exit 1 fi done if ! [[ "$PR_NUMBER" =~ ^[0-9]+$ ]]; then echo "::error::Invalid PR_NUMBER: $PR_NUMBER" exit 1 fi if ! [[ "$HEAD_SHA" =~ ^[0-9a-f]{40}$ && "$BASE_SHA" =~ ^[0-9a-f]{40}$ ]]; then echo "::error::HEAD_SHA and BASE_SHA must be full commit SHAs" exit 1 fi if [[ "$HEAD_REPO" != */* || "$BASE_REPO" != */* ]]; then echo "::error::HEAD_REPO and BASE_REPO must be owner/name" exit 1 fi for command_name in docker gh; do if ! command -v "$command_name" >/dev/null 2>&1; then echo "::error::$command_name is required on the agent-pr-explore runner" exit 1 fi done root="$RUNNER_TEMP/agent-pr-explore-sandbox" artifacts="$root/artifacts" # Persist the pnpm store outside RUNNER_TEMP (which the Actions runner wipes # per job) so dependencies are reused across runs instead of being fully # re-downloaded every time -- the self-hosted runner's network to the npm # registry is as unreliable as its docker.io access. Content-addressed, so # sharing across PRs is safe; override with OD_SANDBOX_PNPM_STORE if needed. pnpm_store="${OD_SANDBOX_PNPM_STORE:-$HOME/.cache/agent-pr-explore/pnpm-store}" context_file="$artifacts/pr-context.md" trimmed_context_file="$artifacts/pr-context-trimmed.md" changed_files_file="$artifacts/changed-files.txt" fixture_instructions_file="$artifacts/fixture-instructions.md" agent_report_file="$artifacts/agent-report.md" playwright_video_dir="$artifacts/playwright-video" rm -rf "$root" mkdir -p "$artifacts" "$pnpm_store" "$playwright_video_dir" container_name="od-agent-pr-${PR_NUMBER}-${HEAD_SHA:0:12}" image="${OD_SANDBOX_IMAGE:-node:24-bookworm}" container_web_port=17573 container_daemon_port=17456 container_proxy_port=17574 host_web_port="${OD_SANDBOX_WEB_PORT:-$((20000 + (PR_NUMBER % 20000)))}" base_url="http://127.0.0.1:${host_web_port}" cpus="${OD_SANDBOX_CPUS:-4}" memory="${OD_SANDBOX_MEMORY:-8g}" expect_timeout_seconds="${OD_EXPECT_TIMEOUT_SECONDS:-1200}" expect_cli_version="${OD_EXPECT_CLI_VERSION:-0.1.3}" # ACP agent backend expect-cli drives. expect-cli defaults to Claude Code, which # is not installed on this runner; we use Codex (authenticated via the runner's # CODEX_HOME). Set OD_EXPECT_AGENT="" to fall back to expect-cli's default. expect_agent="${OD_EXPECT_AGENT-codex}" expect_agent_args="" [ -n "$expect_agent" ] && expect_agent_args="-a $expect_agent" context_max_bytes="${OD_EXPECT_CONTEXT_MAX_BYTES:-120000}" file_patch_max_chars="${OD_EXPECT_FILE_PATCH_MAX_CHARS:-8000}" ready_timeout_seconds="${OD_SANDBOX_READY_TIMEOUT_SECONDS:-900}" ready_attempts=$((ready_timeout_seconds / 2)) if [ "$ready_attempts" -lt 1 ]; then ready_attempts=1 fi app_surface_touched=false browser_exploration_needed=false agent_fixture="none" deterministic_verifier="none" expect_url="$base_url" is_app_surface_path() { case "$1" in apps/web/*|package.json|pnpm-lock.yaml|pnpm-workspace.yaml|turbo.json|vite.config.*|tsconfig.json) return 0 ;; *) return 1 ;; esac } is_browser_exploration_path() { case "$1" in apps/web/src/*|apps/web/app/*|apps/web/public/*|apps/web/styles/*) return 0 ;; *) return 1 ;; esac } select_deterministic_verifier() { local requested="${OD_DETERMINISTIC_VERIFIER:-auto}" if [ "$requested" != "auto" ]; then echo "$requested" return fi local touches_static_export=false while IFS= read -r changed_path; do case "$changed_path" in vercel.json|apps/web/next.config.ts|apps/web/tests/runtime/app-route-export.test.ts) touches_static_export=true ;; esac done < "$changed_files_file" if [ "$touches_static_export" = "true" ]; then echo "web-static-export" else echo "none" fi } select_agent_fixture() { local requested="${OD_AGENT_FIXTURE:-auto}" if [ "$requested" != "auto" ]; then echo "$requested" return fi if [ "$app_surface_touched" != "true" ]; then echo "none" return fi while IFS= read -r changed_path; do case "$changed_path" in apps/web/src/components/AssistantMessage.tsx|apps/web/src/components/ChatPane.tsx|apps/web/src/components/ProjectView.tsx) echo "assistant-message-plugin-action" return ;; apps/web/src/components/EntryShell.tsx|apps/web/src/App.tsx) echo "home-onboarding" return ;; apps/web/src/components/FileViewer.tsx|apps/web/src/components/FileWorkspace.tsx) echo "project-preview-artifact" return ;; esac done < "$changed_files_file" echo "none" } write_fixture_instructions() { local fixture="$1" local url="$2" case "$fixture" in assistant-message-plugin-action) cat > "$fixture_instructions_file" < "$fixture_instructions_file" < "$fixture_instructions_file" < "$fixture_instructions_file" < { const created = await request("POST", "/api/projects", { id: projectId, name: `Agent fixture PR ${prNumber}`, skillId: null, designSystemId: null, pendingPrompt: null, metadata: { kind: "prototype", fixture: "assistant-message-plugin-action" }, }); const conversationId = created.conversationId; if (!conversationId) throw new Error("project create response did not include conversationId"); await uploadFile("generated-plugin/open-design.json", JSON.stringify({ "$schema": "https://open-design.ai/schemas/plugin.v1.json", specVersion: "1.0.0", name: `agent-fixture-plugin-${prNumber}`, title: "Agent Fixture Plugin", version: "0.1.0", description: "Fixture plugin used by PR agent exploration.", license: "MIT", tags: ["fixture", "plugin-authoring"], compat: { agentSkills: [{ path: "./SKILL.md" }] }, od: { kind: "skill", taskKind: "new-generation", mode: "prototype", scenario: "plugin-authoring", surface: "web", useCase: { query: "Use the agent fixture plugin." }, context: { skills: [{ path: "./SKILL.md" }], atoms: ["file-write"] }, pipeline: { stages: [{ id: "generate", atoms: ["file-write"] }] }, capabilities: ["prompt:inject", "fs:write"], }, }, null, 2)); await uploadFile( "generated-plugin/SKILL.md", "# Agent Fixture Plugin\n\nA small seeded plugin folder for PR agent exploration.\n", ); const now = Date.now(); await request( "PUT", `/api/projects/${encodeURIComponent(projectId)}/conversations/${encodeURIComponent(conversationId)}/messages/u-fixture`, { role: "user", content: "Create a small Open Design plugin.", createdAt: now - 2000, }, ); await request( "PUT", `/api/projects/${encodeURIComponent(projectId)}/conversations/${encodeURIComponent(conversationId)}/messages/a-fixture`, { role: "assistant", content: "The plugin is ready to add to My plugins: generated-plugin/open-design.json", runStatus: "succeeded", producedFiles: [ { name: "generated-plugin/open-design.json", path: "generated-plugin/open-design.json", size: 100, mtime: now - 1000, kind: "code", mime: "application/json", }, { name: "generated-plugin/SKILL.md", path: "generated-plugin/SKILL.md", size: 80, mtime: now - 1000, kind: "text", mime: "text/markdown", }, ], events: [ { kind: "tool_use", id: "write-manifest", name: "Write", input: { path: "generated-plugin/open-design.json" } }, { kind: "tool_result", toolUseId: "write-manifest", content: "ok", isError: false }, ], createdAt: now - 1000, startedAt: now - 1500, endedAt: now - 1000, }, ); const targetUrl = `${baseUrl}/projects/${encodeURIComponent(projectId)}/conversations/${encodeURIComponent(conversationId)}`; const fixture = { id: "assistant-message-plugin-action", projectId, conversationId, targetUrl, }; fs.writeFileSync(path.join(artifacts, "fixture.json"), JSON.stringify(fixture, null, 2)); process.stdout.write(targetUrl); })().catch((error) => { console.error(error instanceof Error ? error.stack || error.message : String(error)); process.exit(1); }); NODE )" expect_url="$seed_output" ;; home-onboarding) expect_url="$base_url/onboarding" cat > "$artifacts/fixture.json" < "$artifacts/fixture.json" < false)) { await button.click().catch(() => undefined); } } } async function settlePageForRecording(page) { await page.locator("body").waitFor({ state: "visible", timeout: 10_000 }); await page.evaluate(() => document.fonts?.ready?.then?.(() => undefined)).catch(() => undefined); await page.waitForTimeout(750); } function recordingTitle() { if (deterministicVerifier === "web-static-export") return "VERIFIER - STATIC EXPORT"; if (fixture === "assistant-message-plugin-action") return "SMOKE - ASSISTANT MESSAGE"; if (fixture === "home-onboarding") return "SMOKE - HOME VIEW"; if (fixture === "project-preview-artifact") return "SMOKE - PROJECT PREVIEW"; return "SMOKE - APP REACHABILITY"; } function deterministicExitCode() { try { return fs.readFileSync(path.join(artifacts, "deterministic-verifier-exit-code.txt"), "utf8").trim(); } catch { return ""; } } async function updateRecordingHud(page, subtitle, lines) { await page.evaluate(({ title, subtitle, lines }) => { const id = "__od_agent_recording_hud"; let root = document.getElementById(id); if (!root) { root = document.createElement("aside"); root.id = id; Object.assign(root.style, { position: "fixed", top: "20px", right: "20px", zIndex: "2147483647", width: "360px", maxWidth: "calc(100vw - 40px)", padding: "14px 16px", borderRadius: "10px", background: "rgba(12, 18, 28, 0.94)", color: "#e5edf7", boxShadow: "0 18px 42px rgba(15, 23, 42, 0.32)", fontFamily: "ui-monospace, SFMono-Regular, Menlo, Consolas, monospace", fontSize: "13px", lineHeight: "1.45", pointerEvents: "none", textAlign: "left", }); document.documentElement.appendChild(root); } root.replaceChildren(); const heading = document.createElement("div"); heading.style.fontWeight = "700"; heading.style.letterSpacing = "0"; heading.style.marginBottom = "3px"; heading.textContent = title; root.appendChild(heading); const sub = document.createElement("div"); sub.style.color = "#9fb0c7"; sub.style.fontStyle = "italic"; sub.style.marginBottom = "10px"; sub.textContent = subtitle; root.appendChild(sub); const list = document.createElement("div"); for (const line of lines) { const item = document.createElement("div"); item.style.marginTop = "4px"; item.style.color = line.startsWith("DONE") ? "#86efac" : "#93c5fd"; item.textContent = `${line.startsWith("DONE") ? "OK" : "->"} ${line}`; list.appendChild(item); } root.appendChild(list); }, { title: recordingTitle(), subtitle, lines }); await page.waitForTimeout(250).catch(() => undefined); } async function exerciseFixture(page) { await page.goto(targetUrl, { waitUntil: "domcontentloaded", timeout: 45_000 }); await updateRecordingHud(page, "post-run replay for reviewer artifacts", [ `Loaded ${targetUrl}`, `Fixture: ${fixture}`, `Verifier: ${deterministicVerifier}`, ]).catch(() => undefined); await dismissStartupDialogs(page); await updateRecordingHud(page, "stabilize the selected surface", [ "Startup dialogs handled", "Waiting for visible document", "Allowing UI to settle briefly", ]).catch(() => undefined); await settlePageForRecording(page); await page.screenshot({ path: path.join(artifacts, "playwright-initial.png"), fullPage: true }).catch(() => undefined); if (fixture === "assistant-message-plugin-action") { await updateRecordingHud(page, "exercise fixture action", [ "Locate generated-plugin assistant message", "Click install action if visible", "Watch status feedback", ]).catch(() => undefined); await page.getByText("generated-plugin").first().waitFor({ state: "visible", timeout: 20_000 }); const installButton = page.getByTestId("assistant-plugin-install-generated-plugin").first(); if (await installButton.isVisible({ timeout: 5_000 }).catch(() => false)) { await installButton.click(); await page.getByRole("status").filter({ hasText: /Installed|Added|OK|failure/i }).first() .waitFor({ state: "visible", timeout: 20_000 }) .catch(() => undefined); } } else if (fixture === "home-onboarding") { await updateRecordingHud(page, "confirm home entry surface", [ "Skip onboarding if present", "Confirm primary actions are visible", ]).catch(() => undefined); await page.getByRole("button").first().waitFor({ state: "visible", timeout: 10_000 }).catch(() => undefined); } else if (deterministicVerifier !== "none") { const status = deterministicExitCode(); await updateRecordingHud(page, "deterministic verifier summary", [ `Verifier selected: ${deterministicVerifier}`, `Verifier exit code: ${status || "missing"}`, "DONE Smoke recording captured after verifier", ]).catch(() => undefined); } else { await updateRecordingHud(page, "reachability-only smoke", [ "No browser fixture selected", "DONE App surface loaded", ]).catch(() => undefined); } await updateRecordingHud(page, "artifact capture complete", [ "DONE Initial screenshot saved", "DONE Final screenshot saved", "DONE Trace and video will be written", ]).catch(() => undefined); await page.screenshot({ path: path.join(artifacts, "playwright-final.png"), fullPage: true }).catch(() => undefined); } function traceViewerUrl() { const traceZipUrl = tracePublicTraceUrl || ( tracePublicBaseUrl ? new URL("playwright-smoke-trace.zip", tracePublicBaseUrl.endsWith("/") ? tracePublicBaseUrl : `${tracePublicBaseUrl}/`).href : "" ); return traceZipUrl ? `https://trace.playwright.dev/?trace=${encodeURIComponent(traceZipUrl)}` : ""; } function writeTraceViewerFiles(viewerUrl) { const tracePath = path.join(artifacts, "playwright-smoke-trace.zip"); const localCommand = `npx playwright show-trace "${tracePath}"`; const markdown = viewerUrl ? [ "# Playwright Trace", "", `[Open trace in Playwright Trace Viewer](${viewerUrl})`, "", "If the hosted artifact URL expires or requires authentication, use the local command instead:", "", "```bash", localCommand, "```", "", ].join("\n") : [ "# Playwright Trace", "", "No public trace URL was configured for this run, so trace.playwright.dev cannot fetch the zip directly.", "", "Open it locally with:", "", "```bash", localCommand, "```", "", "To generate a one-click trace link in future runs, upload `playwright-smoke-trace.zip` somewhere browser-readable and set `OD_TRACE_PUBLIC_BASE_URL` to that artifact directory, or set `OD_TRACE_PUBLIC_TRACE_URL` to the zip URL.", "", ].join("\n"); fs.writeFileSync(path.join(artifacts, "playwright-trace-viewer.md"), markdown); fs.writeFileSync(path.join(artifacts, "playwright-trace-viewer.txt"), viewerUrl || localCommand); } (async () => { const { chromium } = loadPlaywright(); ensurePlaywrightBrowserCache(); fs.mkdirSync(videoDir, { recursive: true }); const browser = await chromium.launch({ headless: true }); const context = await browser.newContext({ recordVideo: { dir: videoDir, size: { width: 1280, height: 800 } }, viewport: { width: 1280, height: 800 }, }); await context.tracing.start({ screenshots: true, snapshots: true, sources: false }); const page = await context.newPage(); const viewerUrl = traceViewerUrl(); const summary = { fixture, targetUrl, kind: "post-run-smoke-recording", hud: true, ok: false, video: null, trace: "playwright-smoke-trace.zip", traceViewerUrl: viewerUrl || null, }; try { await exerciseFixture(page); summary.ok = true; } finally { await context.tracing.stop({ path: path.join(artifacts, "playwright-smoke-trace.zip") }).catch(() => undefined); await context.close(); await browser.close(); } const videos = fs.readdirSync(videoDir).filter((name) => name.endsWith(".webm")); if (videos.length > 0) { const source = path.join(videoDir, videos[0]); fs.copyFileSync(source, path.join(artifacts, "playwright-smoke-session.webm")); summary.video = "playwright-smoke-session.webm"; } writeTraceViewerFiles(viewerUrl); fs.writeFileSync(path.join(artifacts, "playwright-recording-summary.json"), JSON.stringify(summary, null, 2)); })().catch((error) => { fs.writeFileSync( path.join(artifacts, "playwright-recording-error.log"), error instanceof Error ? `${error.stack || error.message}\n` : `${String(error)}\n`, ); process.exit(0); }); NODE } publish_trace_artifacts_to_r2() { if [ "${OD_TRACE_R2_UPLOAD:-0}" != "1" ]; then return 0 fi ARTIFACTS="$artifacts" \ PR_NUMBER="$PR_NUMBER" \ HEAD_SHA="$HEAD_SHA" \ R2_PREFIX="${OD_TRACE_R2_PREFIX:-}" \ R2_BUCKET="${R2_BUCKET:-${CLOUDFLARE_R2_RELEASES_BUCKET:-}}" \ R2_PUBLIC_ORIGIN="${R2_PUBLIC_ORIGIN:-${CLOUDFLARE_R2_RELEASES_PUBLIC_ORIGIN:-}}" \ R2_ACCESS_KEY_ID="${R2_ACCESS_KEY_ID:-${CLOUDFLARE_R2_RELEASES_AK:-}}" \ R2_SECRET_ACCESS_KEY="${R2_SECRET_ACCESS_KEY:-${CLOUDFLARE_R2_RELEASES_SK:-}}" \ R2_ENDPOINT="${R2_ENDPOINT:-${CLOUDFLARE_R2_RELEASES_URL:-}}" \ R2_ACCOUNT_ID="${R2_ACCOUNT_ID:-}" \ node <<'NODE' const crypto = require("node:crypto"); const fs = require("node:fs"); const path = require("node:path"); const artifacts = process.env.ARTIFACTS; const prNumber = process.env.PR_NUMBER; const headSha = process.env.HEAD_SHA; const bucket = process.env.R2_BUCKET; const publicOrigin = (process.env.R2_PUBLIC_ORIGIN || "").replace(/\/+$/, ""); const accessKeyId = process.env.R2_ACCESS_KEY_ID; const secretAccessKey = process.env.R2_SECRET_ACCESS_KEY; const endpoint = (process.env.R2_ENDPOINT || endpointFromAccountId(process.env.R2_ACCOUNT_ID) || "").replace(/\/+$/, ""); const prefix = (process.env.R2_PREFIX || `agent-pr-explore/pr-${prNumber}/${headSha}`).replace(/^\/+|\/+$/g, ""); function endpointFromAccountId(accountId) { return accountId ? `https://${accountId}.r2.cloudflarestorage.com` : ""; } function requireConfig() { const missing = []; for (const [name, value] of Object.entries({ R2_BUCKET: bucket, R2_PUBLIC_ORIGIN: publicOrigin, R2_ACCESS_KEY_ID: accessKeyId, R2_SECRET_ACCESS_KEY: secretAccessKey, R2_ENDPOINT: endpoint })) { if (!value) missing.push(name); } if (missing.length > 0) { throw new Error(`Missing R2 config for trace upload: ${missing.join(", ")}`); } } function hmac(key, value) { return crypto.createHmac("sha256", key).update(value).digest(); } function sha256Hex(value) { return crypto.createHash("sha256").update(value).digest("hex"); } function encodeKey(key) { return key.split("/").map(encodeURIComponent).join("/"); } function publicUrl(key) { return `${publicOrigin}/${encodeKey(key)}`; } function traceViewerUrl(traceUrl) { return `https://trace.playwright.dev/?trace=${encodeURIComponent(traceUrl)}`; } function writeTraceViewerFiles(viewerUrl, traceUrl) { const tracePath = path.join(artifacts, "playwright-smoke-trace.zip"); const localCommand = `npx playwright show-trace "${tracePath}"`; const markdown = [ "# Playwright Trace", "", `[Open trace in Playwright Trace Viewer](${viewerUrl})`, "", `Trace zip: ${traceUrl}`, "", "If the hosted artifact URL expires or requires authentication, use the local command instead:", "", "```bash", localCommand, "```", "", ].join("\n"); fs.writeFileSync(path.join(artifacts, "playwright-trace-viewer.md"), markdown); fs.writeFileSync(path.join(artifacts, "playwright-trace-viewer.txt"), `${viewerUrl}\n`); } async function putObject(filePath, key, contentType, cacheControl) { const body = fs.readFileSync(filePath); const payloadHash = sha256Hex(body); const now = new Date(); const amzDate = now.toISOString().replace(/[:-]|\.\d{3}/g, ""); const dateStamp = amzDate.slice(0, 8); const region = "auto"; const service = "s3"; const target = new URL(`${endpoint}/${encodeURIComponent(bucket)}/${encodeKey(key)}`); const headers = { "cache-control": cacheControl, "content-type": contentType, host: target.host, "x-amz-content-sha256": payloadHash, "x-amz-date": amzDate, }; const signedHeaderNames = Object.keys(headers).sort(); const canonicalHeaders = signedHeaderNames.map((name) => `${name}:${headers[name]}\n`).join(""); const canonicalRequest = [ "PUT", target.pathname, "", canonicalHeaders, signedHeaderNames.join(";"), payloadHash, ].join("\n"); const credentialScope = `${dateStamp}/${region}/${service}/aws4_request`; const stringToSign = [ "AWS4-HMAC-SHA256", amzDate, credentialScope, sha256Hex(canonicalRequest), ].join("\n"); const signingKey = hmac(hmac(hmac(hmac(`AWS4${secretAccessKey}`, dateStamp), region), service), "aws4_request"); const signature = crypto.createHmac("sha256", signingKey).update(stringToSign).digest("hex"); const authorization = `AWS4-HMAC-SHA256 Credential=${accessKeyId}/${credentialScope}, SignedHeaders=${signedHeaderNames.join(";")}, Signature=${signature}`; const response = await fetch(target, { method: "PUT", headers: { ...headers, authorization }, body, }); if (!response.ok) { throw new Error(`R2 PUT ${key} failed with HTTP ${response.status}: ${(await response.text()).slice(0, 500)}`); } } (async () => { requireConfig(); const files = [ ["playwright-smoke-trace.zip", "application/zip", "public, max-age=604800"], ["playwright-smoke-session.webm", "video/webm", "public, max-age=604800"], ["playwright-initial.png", "image/png", "public, max-age=604800"], ["playwright-final.png", "image/png", "public, max-age=604800"], ["expect.log", "text/plain; charset=utf-8", "public, max-age=604800"], ]; const uploaded = {}; for (const [name, contentType, cacheControl] of files) { const filePath = path.join(artifacts, name); if (!fs.existsSync(filePath)) continue; const key = `${prefix}/${name}`; await putObject(filePath, key, contentType, cacheControl); uploaded[name] = { key, url: publicUrl(key) }; } if (!uploaded["playwright-smoke-trace.zip"]) { throw new Error("playwright-smoke-trace.zip was not found; cannot create trace viewer URL"); } const viewerUrl = traceViewerUrl(uploaded["playwright-smoke-trace.zip"].url); writeTraceViewerFiles(viewerUrl, uploaded["playwright-smoke-trace.zip"].url); const summaryPath = path.join(artifacts, "playwright-recording-summary.json"); const summary = fs.existsSync(summaryPath) ? JSON.parse(fs.readFileSync(summaryPath, "utf8")) : {}; summary.traceViewerUrl = viewerUrl; summary.r2 = { prefix, uploaded }; fs.writeFileSync(summaryPath, `${JSON.stringify(summary, null, 2)}\n`); fs.writeFileSync(path.join(artifacts, "r2-upload-summary.json"), `${JSON.stringify({ prefix, uploaded, traceViewerUrl: viewerUrl }, null, 2)}\n`); })().catch((error) => { fs.writeFileSync( path.join(artifacts, "r2-upload-error.log"), error instanceof Error ? `${error.stack || error.message}\n` : `${String(error)}\n`, ); process.exit(0); }); NODE if [ -f "$artifacts/r2-upload-error.log" ]; then echo "::warning::R2 trace upload failed; see $artifacts/r2-upload-error.log" elif [ -f "$artifacts/r2-upload-summary.json" ]; then echo "Published Playwright trace artifacts to R2" fi } write_agent_report_artifact() { local trace_text="" if [ -f "$artifacts/playwright-trace-viewer.txt" ]; then trace_text="$(head -n 1 "$artifacts/playwright-trace-viewer.txt" || true)" fi { echo "## ๐Ÿค– Agent PR Exploration Report" echo echo "### ๐ŸŽฌ Trace" echo if [[ "$trace_text" == http* ]]; then echo "[Open Playwright trace]($trace_text)" elif [ -n "$trace_text" ]; then echo "No browser-readable trace URL was configured for this run." echo echo "Open the trace locally with:" echo echo '```bash' echo "$trace_text" echo '```' else echo "Trace artifact was not generated for this run." fi echo if [ -s "$agent_report_file" ]; then # The agent wrote its clean Markdown report to this file directly. cat "$agent_report_file" else echo "### โš ๏ธ Verdict: Inconclusive" echo echo "The agent did not write a final report (it may have hit the run" echo "timeout before finishing). See the run log artifact / \`expect.log\` for details." fi } > "$artifacts/agent-pr-exploration-report.md" } cleanup() { docker rm -f "$container_name" >/dev/null 2>&1 || true } trap cleanup EXIT cleanup cat > "$artifacts/manifest.json" < "$changed_files_file" while IFS= read -r changed_path; do if is_app_surface_path "$changed_path"; then app_surface_touched=true fi if is_browser_exploration_path "$changed_path"; then browser_exploration_needed=true fi done < "$changed_files_file" echo "$app_surface_touched" > "$artifacts/app-surface-touched.txt" echo "$browser_exploration_needed" > "$artifacts/browser-exploration-needed.txt" agent_fixture="$(select_agent_fixture)" echo "$agent_fixture" > "$artifacts/agent-fixture.txt" deterministic_verifier="$(select_deterministic_verifier)" echo "$deterministic_verifier" > "$artifacts/deterministic-verifier.txt" { echo "# PR #$PR_NUMBER context" echo echo "Base repo: $BASE_REPO" echo "Head repo: $HEAD_REPO" echo "Base SHA: $BASE_SHA" echo "Head SHA: $HEAD_SHA" echo echo "## PR body" gh pr view "$PR_NUMBER" --repo "$BASE_REPO" --json title,body --jq '"# " + .title + "\n\n" + (.body // "")' echo echo "## Changed files" cat "$changed_files_file" echo echo "## Text patches" gh api --paginate "repos/${BASE_REPO}/pulls/${PR_NUMBER}/files" --jq \ '.[] | "### " + .filename + " (" + .status + ", +" + (.additions | tostring) + "/-" + (.deletions | tostring) + ")\n```diff\n" + (if .patch == null then "[binary or generated patch omitted]" else (.patch[0:'"$file_patch_max_chars"'] + (if (.patch | length) > '"$file_patch_max_chars"' then "\n[patch truncated]" else "" end)) end) + "\n```\n"' } > "$context_file" head -c "$context_max_bytes" "$context_file" > "$trimmed_context_file" if [ "$(wc -c < "$context_file" | tr -d " ")" -gt "$context_max_bytes" ]; then { echo echo echo "[context truncated at ${context_max_bytes} bytes for expect prompt]" } >> "$trimmed_context_file" fi # Use the locally cached image when present. The self-hosted runner's # network to docker.io is unreliable, and the base image is referenced by # a tag we treat as stable for the duration of a run, so don't pay for (or # fail on) a pull when the image is already available. Only pull when it is # missing; refreshing the cached image is a separate, explicit operation. if docker image inspect "$image" >/dev/null 2>&1; then echo "Using locally cached image $image (skipping pull)." else docker pull "$image" fi # --- Fetch PR source on the trusted host; hand it to the container read-only --- # The runner's bandwidth to github.com is throttled across every transport # (HTTPS / SSH / codeload / API all ~30-90 KB/s), so a from-scratch fetch of this # ~200MB repo is impractical per run. Keep a persistent local mirror and fetch # only the PR's delta into it over SSH (the one transport that is not RST'd). The # PR head is taken from the BASE repo's refs/pull//head so fork PRs work too, # and the read-only deploy key stays on the trusted host -- it is never exposed to # the untrusted PR code, which only ever sees the checked-out files inside Docker. mirror="${OD_SANDBOX_REPO_MIRROR:-$HOME/.cache/agent-pr-explore/open-design.git}" git_ssh_key="${OD_SANDBOX_GIT_SSH_KEY:-$HOME/.ssh/od_agent_deploy}" pr_src="$root/pr-src" export GIT_SSH_COMMAND="ssh -i $git_ssh_key -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -o ConnectTimeout=20" if [ ! -d "$mirror" ]; then echo "::error::Repo mirror $mirror is missing on the runner. Seed it once with:" echo "::error:: git clone --bare --depth=1 --single-branch --branch main git@github.com:${BASE_REPO}.git $mirror" exit 1 fi pr_fetched= for fetch_attempt in 1 2 3; do if git --git-dir="$mirror" fetch --no-tags --depth=1 origin \ "+refs/pull/${PR_NUMBER}/head:refs/pull/${PR_NUMBER}/head"; then pr_fetched=1 break fi echo "PR source fetch failed; retrying (${fetch_attempt}/3)" sleep $((fetch_attempt * 5)) done [ -n "$pr_fetched" ] || { echo "::error::Failed to fetch PR #${PR_NUMBER} source over SSH after 3 attempts."; exit 1; } fetched_sha="$(git --git-dir="$mirror" rev-parse "refs/pull/${PR_NUMBER}/head")" if [ "$fetched_sha" != "$HEAD_SHA" ]; then echo "::error::Fetched PR head $fetched_sha does not match expected $HEAD_SHA" exit 1 fi rm -rf "$pr_src" mkdir -p "$pr_src" git -C "$pr_src" init -q git -C "$pr_src" fetch --no-tags --depth=1 "$mirror" "$HEAD_SHA" git -C "$pr_src" checkout -q --detach FETCH_HEAD unset GIT_SSH_COMMAND docker run -d \ --name "$container_name" \ --cpus "$cpus" \ --memory "$memory" \ --pids-limit 1024 \ --cap-drop ALL \ --security-opt no-new-privileges \ --tmpfs /tmp:rw,nosuid,nodev,size=2g \ --publish "127.0.0.1:${host_web_port}:${container_proxy_port}" \ --mount "type=bind,src=$artifacts,dst=/artifacts" \ --mount "type=bind,src=$pnpm_store,dst=/pnpm-store" \ --mount "type=bind,src=$pr_src,dst=/pr-src,readonly" \ --env "PR_NUMBER=$PR_NUMBER" \ --env "HEAD_SHA=$HEAD_SHA" \ --env "HEAD_REPO=$HEAD_REPO" \ --env "BASE_REPO=$BASE_REPO" \ --env "BASE_SHA=$BASE_SHA" \ --env "OD_ALLOWED_ORIGINS=$base_url" \ --env "OD_DETERMINISTIC_VERIFIER=$deterministic_verifier" \ --env "CI=true" \ --env "PLAYWRIGHT_HTML_OPEN=never" \ "$image" \ bash -lc ' set -euo pipefail # PR source was fetched on the trusted host and mounted read-only at # /pr-src; copy it into a writable workdir. The sandbox needs (and has) no # github network access of its own. mkdir -p /work/repo cp -a /pr-src/. /work/repo/ cd /work/repo git rev-parse HEAD | tee /artifacts/checked-out-sha.txt test "$(git rev-parse HEAD)" = "${HEAD_SHA}" corepack enable corepack prepare pnpm@10.33.2 --activate pnpm config set store-dir /pnpm-store # The runner direct network to npmjs / nodejs.org / github releases is # throttled or reset by GFW, which stalls package downloads (~20 KB/s) and # breaks native-module installs: node-gyp headers (nodejs.org), and the # better-sqlite3 / electron binaries (github releases). Route everything # through the China npm mirror, which is fast and complete. Integrity is # still verified against the lockfile, so the mirror only changes transport. export npm_config_registry="https://registry.npmmirror.com" export npm_config_disturl="https://npmmirror.com/mirrors/node" export npm_config_electron_mirror="https://npmmirror.com/mirrors/electron/" export npm_config_electron_builder_binaries_mirror="https://npmmirror.com/mirrors/electron-builder-binaries/" export npm_config_better_sqlite3_binary_host_mirror="https://npmmirror.com/mirrors/better-sqlite3" export PLAYWRIGHT_DOWNLOAD_HOST="https://npmmirror.com/mirrors/playwright" { echo "== install ==" pnpm install --frozen-lockfile echo "== prebuild ==" pnpm --filter @open-design/daemon build pnpm --filter @open-design/tools-dev build if [ "${OD_DETERMINISTIC_VERIFIER}" = "web-static-export" ]; then echo "== deterministic verifier: web-static-export ==" set +e ( set -euo pipefail rm -rf apps/web/out apps/web/.next OD_WEB_OUTPUT_MODE=server sh -lc '"'"'OD_WEB_OUTPUT_MODE= pnpm --filter @open-design/web build && test -d apps/web/out'"'"' test -f apps/web/out/index.html ) > /artifacts/deterministic-verifier.log 2>&1 verifier_status=$? set -e echo "$verifier_status" > /artifacts/deterministic-verifier-exit-code.txt if [ "$verifier_status" -eq 0 ]; then echo "deterministic verifier passed" else echo "deterministic verifier failed with status $verifier_status" fi fi echo "== boot web ==" pnpm tools-dev run web \ --namespace "agent-pr-${PR_NUMBER}-${HEAD_SHA:0:8}" \ --daemon-port '"$container_daemon_port"' \ --web-port '"$container_web_port"' \ > /artifacts/dev-server.log 2>&1 & echo $! > /artifacts/dev-server.pid for i in $(seq 1 90); do if curl -sf "http://127.0.0.1:'"$container_web_port"'" >/dev/null; then echo "ready" > /artifacts/ready echo "Dev server ready after ${i} attempt(s)" break fi sleep 2 done test -f /artifacts/ready node -e " const net = require(\"node:net\"); const targetPort = Number('"$container_web_port"'); const proxyPort = Number('"$container_proxy_port"'); const server = net.createServer((client) => { const upstream = net.connect(targetPort, \"127.0.0.1\"); client.pipe(upstream); upstream.pipe(client); upstream.on(\"error\", () => client.destroy()); client.on(\"error\", () => upstream.destroy()); }); server.listen(proxyPort, \"0.0.0.0\", () => { console.log(\"Proxy ready at 0.0.0.0:\" + proxyPort + \" -> 127.0.0.1:\" + targetPort); }); " > /artifacts/proxy.log 2>&1 & echo $! > /artifacts/proxy.pid tail -f /artifacts/dev-server.log } 2>&1 | tee /artifacts/sandbox.log ' for i in $(seq 1 "$ready_attempts"); do if [ "$(docker inspect -f '{{.State.Running}}' "$container_name" 2>/dev/null || echo false)" != "true" ]; then echo "::error::Sandbox container exited before dev server became reachable" docker logs "$container_name" > "$artifacts/docker.log" 2>&1 || true exit 1 fi if curl -sf "$base_url" >/dev/null; then echo "Sandbox dev server reachable at $base_url" break fi if [ "$i" = "$ready_attempts" ]; then echo "::error::Sandbox dev server did not become reachable at $base_url within ${ready_timeout_seconds}s" docker logs "$container_name" > "$artifacts/docker.log" 2>&1 || true exit 1 fi sleep 2 done seed_agent_fixture "$agent_fixture" if [ "$deterministic_verifier" = "web-static-export" ] && [ "$browser_exploration_needed" != "true" ]; then verifier_status="$(cat "$artifacts/deterministic-verifier-exit-code.txt" 2>/dev/null || echo 1)" if [ "$verifier_status" = "0" ]; then cat > "$agent_report_file" < "$agent_report_file" < "$artifacts/expect-exit-code.txt" record_playwright_artifacts || true publish_trace_artifacts_to_r2 || true write_agent_report_artifact docker logs "$container_name" > "$artifacts/docker.log" 2>&1 || true exit 0 fi if [ "$app_surface_touched" != "true" ]; then cat > "$agent_report_file" < "$artifacts/docker.log" 2>&1 || true exit 0 fi expect_prompt="$(cat </dev/null 2>&1; then expect_command=(expect-cli tui --ci $expect_agent_args --timeout "$((expect_timeout_seconds * 1000))" -u "$expect_url") elif [ "${OD_ALLOW_NPX_EXPECT_CLI:-0}" = "1" ] && command -v npx >/dev/null 2>&1; then expect_command=(npx -y "expect-cli@${expect_cli_version}" tui --ci $expect_agent_args --timeout "$((expect_timeout_seconds * 1000))" -u "$expect_url") else echo "::error::expect-cli is required on the agent-pr-explore runner. Install expect-cli@${expect_cli_version}, or set OD_ALLOW_NPX_EXPECT_CLI=1 to use the pinned npx fallback." exit 1 fi if command -v timeout >/dev/null 2>&1; then set +e timeout "$expect_timeout_seconds" "${expect_command[@]}" -m "$expect_prompt" -y 2>&1 | tee "$artifacts/expect.log" expect_status=${PIPESTATUS[0]} set -e else set +e "${expect_command[@]}" -m "$expect_prompt" -y 2>&1 | tee "$artifacts/expect.log" expect_status=${PIPESTATUS[0]} set -e fi echo "$expect_status" > "$artifacts/expect-exit-code.txt" if [ "$expect_status" -ne 0 ]; then echo "::warning::expect-cli exited with status $expect_status; preserving advisory artifacts" fi record_playwright_artifacts || true publish_trace_artifacts_to_r2 || true write_agent_report_artifact docker logs "$container_name" > "$artifacts/docker.log" 2>&1 || true # Persist the report + trace pointer to a stable host dir so dry/validation runs # (skip_comment) can be inspected without downloading the slow, large workflow # artifact. Overwrites per PR; the big trace zip stays on R2 only. report_persist_dir="${OD_SANDBOX_REPORT_DIR:-$HOME/.cache/agent-pr-explore/reports}/pr-${PR_NUMBER}" mkdir -p "$report_persist_dir" 2>/dev/null || true cp -f "$artifacts/agent-pr-exploration-report.md" "$report_persist_dir/report.md" 2>/dev/null || true cp -f "$artifacts/agent-report.md" "$report_persist_dir/agent-report.md" 2>/dev/null || true cp -f "$artifacts/expect.log" "$report_persist_dir/expect.log" 2>/dev/null || true cp -f "$artifacts/playwright-trace-viewer.txt" "$report_persist_dir/trace-url.txt" 2>/dev/null || true echo "Report persisted on runner: $report_persist_dir"