mirror of
https://github.com/nexu-io/open-design.git
synced 2026-05-31 19:04:39 +07:00
ci: clean agent report (write-to-file) + slim artifacts/uploads (#3116)
* ci: clean agent report (write-to-file) + slim artifacts/uploads Four related cleanups to the agent PR exploration output: 1. Clean report. The PR comment / report.md was assembled by dumping the entire verbose expect.log (ACP init logs, "Git failed" warnings, the ~24KB echoed prompt, ANSI codes, progress checklist) under the trace header -- ~28KB of noise. Instead, instruct the agent to write its final Markdown report to a file via its file-write tool, and have the runner read that file directly. Verified: Codex writes a clean report to the given absolute path. Falls back to an inconclusive note if the agent did not finish. 2. Drop duplicate trace/video. The script copied playwright-smoke-trace.zip -> playwright-trace.zip (a ~28MB legacy duplicate) and the webm likewise, and uploaded both to R2. Keep only the canonical smoke-named artifacts. 3. Slim the GitHub artifact. The trace zips and videos are already on R2; exclude *.zip / *.webm from the uploaded artifact so it drops from ~56MB to <1MB (report + logs only). 4. Persist report on the runner. Copy the report / agent-report / expect.log / trace URL to a stable host dir ($HOME/.cache/agent-pr-explore/reports/pr-<n>) so dry runs (skip_comment) can be inspected without downloading the artifact. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * ci: address review — keep advisory reports + recursive artifact excludes Review findings on the report/artifact cleanup: 1. Regression fix: the non-app-surface and deterministic-verifier branches write their pre-baked advisory report (Inconclusive / Pass / Fail) and never run the agent, so they don't produce agent-report.md. After switching write_agent_report_artifact to read only agent-report.md they fell through to the "agent did not write a final report" fallback, dropping the real advisory (and mis-reporting on .github-only PRs like this one). Fix: those branches now write their advisory directly to $agent_report_file — single source of truth for the report body. 2. Recursive artifact excludes: the source Playwright recording lives at artifacts/playwright-video/<uuid>.webm; non-recursive !*.webm / !*.zip didn't match the subdirectory. Use **/*.zip and **/*.webm so the slim actually holds. 3. Drop the now-dangling summary.legacyTrace field (the legacy trace copy is no longer produced), matching the legacyVideo removal. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> --------- Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
ee1eab77c6
commit
bf61a39cb5
2 changed files with 29 additions and 21 deletions
43
.github/scripts/agent-pr-explore-sandbox.sh
vendored
43
.github/scripts/agent-pr-explore-sandbox.sh
vendored
|
|
@ -52,6 +52,7 @@ context_file="$artifacts/pr-context.md"
|
|||
trimmed_context_file="$artifacts/pr-context-trimmed.md"
|
||||
changed_files_file="$artifacts/changed-files.txt"
|
||||
fixture_instructions_file="$artifacts/fixture-instructions.md"
|
||||
agent_report_file="$artifacts/agent-report.md"
|
||||
playwright_video_dir="$artifacts/playwright-video"
|
||||
rm -rf "$root"
|
||||
mkdir -p "$artifacts" "$pnpm_store" "$playwright_video_dir"
|
||||
|
|
@ -674,7 +675,6 @@ function writeTraceViewerFiles(viewerUrl) {
|
|||
ok: false,
|
||||
video: null,
|
||||
trace: "playwright-smoke-trace.zip",
|
||||
legacyTrace: "playwright-trace.zip",
|
||||
traceViewerUrl: viewerUrl || null,
|
||||
};
|
||||
|
||||
|
|
@ -686,19 +686,11 @@ function writeTraceViewerFiles(viewerUrl) {
|
|||
await context.close();
|
||||
await browser.close();
|
||||
}
|
||||
const smokeTrace = path.join(artifacts, "playwright-smoke-trace.zip");
|
||||
if (fs.existsSync(smokeTrace)) {
|
||||
fs.copyFileSync(smokeTrace, path.join(artifacts, "playwright-trace.zip"));
|
||||
}
|
||||
|
||||
const videos = fs.readdirSync(videoDir).filter((name) => name.endsWith(".webm"));
|
||||
if (videos.length > 0) {
|
||||
const source = path.join(videoDir, videos[0]);
|
||||
const stable = path.join(artifacts, "playwright-smoke-session.webm");
|
||||
fs.copyFileSync(source, stable);
|
||||
fs.copyFileSync(source, path.join(artifacts, "playwright-session.webm"));
|
||||
fs.copyFileSync(source, path.join(artifacts, "playwright-smoke-session.webm"));
|
||||
summary.video = "playwright-smoke-session.webm";
|
||||
summary.legacyVideo = "playwright-session.webm";
|
||||
}
|
||||
writeTraceViewerFiles(viewerUrl);
|
||||
fs.writeFileSync(path.join(artifacts, "playwright-recording-summary.json"), JSON.stringify(summary, null, 2));
|
||||
|
|
@ -847,9 +839,7 @@ async function putObject(filePath, key, contentType, cacheControl) {
|
|||
requireConfig();
|
||||
const files = [
|
||||
["playwright-smoke-trace.zip", "application/zip", "public, max-age=604800"],
|
||||
["playwright-trace.zip", "application/zip", "public, max-age=604800"],
|
||||
["playwright-smoke-session.webm", "video/webm", "public, max-age=604800"],
|
||||
["playwright-session.webm", "video/webm", "public, max-age=604800"],
|
||||
["playwright-initial.png", "image/png", "public, max-age=604800"],
|
||||
["playwright-final.png", "image/png", "public, max-age=604800"],
|
||||
["expect.log", "text/plain; charset=utf-8", "public, max-age=604800"],
|
||||
|
|
@ -913,12 +903,14 @@ write_agent_report_artifact() {
|
|||
echo "Trace artifact was not generated for this run."
|
||||
fi
|
||||
echo
|
||||
if [ -f "$artifacts/expect.log" ]; then
|
||||
cat "$artifacts/expect.log"
|
||||
if [ -s "$agent_report_file" ]; then
|
||||
# The agent wrote its clean Markdown report to this file directly.
|
||||
cat "$agent_report_file"
|
||||
else
|
||||
echo "### ⚠️ Verdict: Inconclusive"
|
||||
echo
|
||||
echo "The runner did not produce an exploration report."
|
||||
echo "The agent did not write a final report (it may have hit the run"
|
||||
echo "timeout before finishing). See the run log artifact / \`expect.log\` for details."
|
||||
fi
|
||||
} > "$artifacts/agent-pr-exploration-report.md"
|
||||
}
|
||||
|
|
@ -1181,7 +1173,7 @@ seed_agent_fixture "$agent_fixture"
|
|||
if [ "$deterministic_verifier" = "web-static-export" ] && [ "$browser_exploration_needed" != "true" ]; then
|
||||
verifier_status="$(cat "$artifacts/deterministic-verifier-exit-code.txt" 2>/dev/null || echo 1)"
|
||||
if [ "$verifier_status" = "0" ]; then
|
||||
cat > "$artifacts/expect.log" <<REPORT
|
||||
cat > "$agent_report_file" <<REPORT
|
||||
### ✅ Verdict: Pass
|
||||
|
||||
This PR changes the web deployment/static-export path rather than an interactive user flow. The agent therefore used the deterministic Docker verifier instead of inventing browser interaction cases that would not exercise the changed behavior.
|
||||
|
|
@ -1215,7 +1207,7 @@ Observed result:
|
|||
- A dedicated CI smoke for the Vercel/static-export command would make this regression class easier to catch without requiring agent exploration.
|
||||
REPORT
|
||||
else
|
||||
cat > "$artifacts/expect.log" <<REPORT
|
||||
cat > "$agent_report_file" <<REPORT
|
||||
### ❌ Verdict: Fail
|
||||
|
||||
The deterministic static-export verifier failed. Because this PR changes build/deploy output rather than an interactive browser flow, browser exploration would not be a useful substitute for the failing build-level signal.
|
||||
|
|
@ -1244,7 +1236,7 @@ REPORT
|
|||
fi
|
||||
|
||||
if [ "$app_surface_touched" != "true" ]; then
|
||||
cat > "$artifacts/expect.log" <<REPORT
|
||||
cat > "$agent_report_file" <<REPORT
|
||||
### ⚪ Verdict: Inconclusive
|
||||
|
||||
This PR does not touch a path that the browser explorer can map to app UI/runtime behavior, so the run avoided inventing a broad app audit.
|
||||
|
|
@ -1263,7 +1255,7 @@ This PR does not touch a path that the browser explorer can map to app UI/runtim
|
|||
|
||||
- None from this PR diff. Add deterministic checks when a future PR changes app/runtime behavior.
|
||||
REPORT
|
||||
echo "No app/runtime surface touched; wrote inconclusive advisory report to $artifacts/expect.log"
|
||||
echo "No app/runtime surface touched; wrote inconclusive advisory report to $agent_report_file"
|
||||
record_playwright_artifacts || true
|
||||
publish_trace_artifacts_to_r2 || true
|
||||
write_agent_report_artifact
|
||||
|
|
@ -1295,7 +1287,7 @@ Keep this as a fast exploratory pass:
|
|||
|
||||
CRITICAL -- finish and submit promptly: the runner aborts this turn with NO report if you produce no output for about 3 minutes. Do not plan or attempt more steps than you will actually complete. As soon as you have verified 2-3 cases (or hit a blocker), stop exploring and emit the COMPLETE Markdown report below as your final message in a single turn. Never leave planned steps pending, retry silently, or run "just one more" check once you have enough to write the verdict.
|
||||
|
||||
Return a reviewer-ready Markdown report fragment. Do not include the top-level title or trace section; the runner prepends the real trace link after artifacts are published.
|
||||
Write your final report as a reviewer-ready Markdown fragment to the file ${agent_report_file} using your file-write tool, as your final action. Do not print the report to stdout -- only write the file, then stop. Do not include the top-level title or trace section; the runner prepends the real trace link after artifacts are published.
|
||||
|
||||
Use this structure and keep the writing concrete:
|
||||
|
||||
|
|
@ -1369,3 +1361,14 @@ publish_trace_artifacts_to_r2 || true
|
|||
write_agent_report_artifact
|
||||
|
||||
docker logs "$container_name" > "$artifacts/docker.log" 2>&1 || true
|
||||
|
||||
# Persist the report + trace pointer to a stable host dir so dry/validation runs
|
||||
# (skip_comment) can be inspected without downloading the slow, large workflow
|
||||
# artifact. Overwrites per PR; the big trace zip stays on R2 only.
|
||||
report_persist_dir="${OD_SANDBOX_REPORT_DIR:-$HOME/.cache/agent-pr-explore/reports}/pr-${PR_NUMBER}"
|
||||
mkdir -p "$report_persist_dir" 2>/dev/null || true
|
||||
cp -f "$artifacts/agent-pr-exploration-report.md" "$report_persist_dir/report.md" 2>/dev/null || true
|
||||
cp -f "$artifacts/agent-report.md" "$report_persist_dir/agent-report.md" 2>/dev/null || true
|
||||
cp -f "$artifacts/expect.log" "$report_persist_dir/expect.log" 2>/dev/null || true
|
||||
cp -f "$artifacts/playwright-trace-viewer.txt" "$report_persist_dir/trace-url.txt" 2>/dev/null || true
|
||||
echo "Report persisted on runner: $report_persist_dir"
|
||||
|
|
|
|||
|
|
@ -127,7 +127,12 @@ jobs:
|
|||
uses: actions/upload-artifact@v7
|
||||
with:
|
||||
name: agent-pr-explore-sandbox-${{ steps.pr.outputs.number }}-${{ steps.pr.outputs.head_sha }}
|
||||
path: ${{ runner.temp }}/agent-pr-explore-sandbox/artifacts/
|
||||
# The trace zips (~28MB) and videos are already published to R2; keep
|
||||
# them out of the GitHub artifact so it stays small (report + logs).
|
||||
path: |
|
||||
${{ runner.temp }}/agent-pr-explore-sandbox/artifacts/
|
||||
!${{ runner.temp }}/agent-pr-explore-sandbox/artifacts/**/*.zip
|
||||
!${{ runner.temp }}/agent-pr-explore-sandbox/artifacts/**/*.webm
|
||||
if-no-files-found: warn
|
||||
retention-days: 7
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue