#!/usr/bin/env bash # # Provision / repair the self-hosted agent-pr-explore runner. # # The runner that powers `.github/workflows/agent-pr-explore-sandbox.yml` is a # self-hosted macOS host. Several pieces of its setup are layered on top of the # base toolchain and are easy to lose on a rebuild (most importantly the # codex-acp pin -- see below). This script makes that layer reproducible and # idempotent: run it on the runner, any time, to bring it back to a working # state. It never prints or embeds secrets. # # Run as the runner user (e.g. `mashu`) on the runner host: # bash provision-agent-pr-explore-runner.sh # # ───────────────────────────────────────────────────────────────────────────── # MANUAL prerequisites this script does NOT do (one-time, need a human/secret): # # A. Base toolchain (user-local, no sudo). Install once into ~/agent-pr-explore-bin # + ~/.npm-global if missing: docker CLI, colima, lima, node, npm, gh, # expect-cli. Then start colima (give the VM real resources): # colima start --runtime docker --cpu 8 --memory 13 --disk 80 \ # --vm-type=vz --mount-type=virtiofs --network-address=false # (Playwright Chromium for the host user is auto-installed by the sandbox # script's fallback on first run.) # # B. Codex ChatGPT login (interactive OAuth, cannot be scripted): # codex login # complete ChatGPT auth in a browser # On a headless box, log in on a workstation and copy ~/.codex/auth.json here. # This script verifies login status and warns if absent. # # C. Register the read-only deploy key (printed by this script) on the repo: # gh api repos/${BASE_REPO}/keys -X POST -f title='agent-pr-explore runner' \ # -f key="$(cat ~/.ssh/od_agent_deploy.pub)" -F read_only=true # (Needs repo admin. Required so the host can SSH-fetch PR source — the one # git transport GFW does not reset.) # # D. Register + service-install the GitHub Actions runner (token-based): # ./config.sh --url https://github.com/${BASE_REPO} --token \ # --labels self-hosted,agent-pr-explore --name macmini-agent-pr-explore # then install it as a launchd service so it survives reboot. # ───────────────────────────────────────────────────────────────────────────── set -uo pipefail # --- config (override via env) ----------------------------------------------- BASE_REPO="${BASE_REPO:-nexu-io/open-design}" CODEX_MODEL="${CODEX_MODEL:-gpt-5.4}" ACP_VERSION="${ACP_VERSION:-0.15.0}" ACP_ARCH_PKG="${ACP_ARCH_PKG:-@zed-industries/codex-acp-darwin-arm64}" # match the runner arch NPM_MIRROR="${NPM_MIRROR:-https://registry.npmmirror.com}" DEPLOY_KEY="${DEPLOY_KEY:-$HOME/.ssh/od_agent_deploy}" MIRROR_DIR="${OD_SANDBOX_REPO_MIRROR:-$HOME/.cache/agent-pr-explore/open-design.git}" TOOLS_DIR="$HOME/agent-pr-explore-tools" export PATH="$TOOLS_DIR/lima-2.1.1/bin:$HOME/agent-pr-explore-bin:$HOME/.npm-global/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" ok() { printf ' \033[32m✔\033[0m %s\n' "$*"; } warn() { printf ' \033[33m⚠\033[0m %s\n' "$*"; } step() { printf '\n\033[1m== %s ==\033[0m\n' "$*"; } # --- 0. sanity: base tools present ------------------------------------------- step "0. base toolchain" missing=0 for c in node npm docker expect-cli; do if command -v "$c" >/dev/null 2>&1; then ok "$c: $(command -v "$c")"; else warn "$c MISSING — see manual step A"; missing=1; fi done [ "$missing" = 1 ] && warn "install the missing base tools first (manual step A), then re-run." # --- 1. codex CLI ------------------------------------------------------------ step "1. codex CLI" if command -v codex >/dev/null 2>&1; then ok "codex present: $(codex --version 2>&1 | head -1)" else warn "installing @openai/codex via mirror…" npm_config_registry="$NPM_MIRROR" npm install -g @openai/codex >/dev/null 2>&1 \ && ok "codex installed: $(codex --version 2>&1 | head -1)" || warn "codex install FAILED" fi # --- 2. codex model pin (ChatGPT account rejects -codex / gpt-5 models) ------- step "2. codex model pin -> $CODEX_MODEL" mkdir -p "$HOME/.codex" cfg="$HOME/.codex/config.toml" touch "$cfg" if grep -q "^model *= *\"$CODEX_MODEL\"" "$cfg"; then ok "config.toml already pins model = \"$CODEX_MODEL\"" elif grep -q '^model *=' "$cfg"; then # Replace just the model line in place; leave any other settings intact. tmp="$(mktemp)" && sed "s|^model *=.*|model = \"$CODEX_MODEL\"|" "$cfg" > "$tmp" && mv "$tmp" "$cfg" ok "updated model -> \"$CODEX_MODEL\" (other config.toml settings preserved)" else printf 'model = "%s"\n' "$CODEX_MODEL" >> "$cfg" ok "appended model = \"$CODEX_MODEL\" to config.toml" fi # --- 3. codex login (verify only; interactive — manual step B) --------------- step "3. codex login (ChatGPT OAuth)" if codex login status 2>&1 | grep -qi 'logged in'; then ok "$(codex login status 2>&1 | head -1)" else warn "codex NOT logged in — run 'codex login' (manual step B) or copy ~/.codex/auth.json here." fi # --- 4. codex-acp pin (CRITICAL: expect-cli bundles 0.10 which is incompatible # with ChatGPT-account auth; reinstalling expect-cli reverts this). ----- step "4. codex-acp pin -> $ACP_VERSION (the fragile one)" zed="$(npm root -g 2>/dev/null)/expect-cli/node_modules/@zed-industries" cur="$(cat "$zed/codex-acp/package.json" 2>/dev/null | sed -n 's/.*"version": *"\([^"]*\)".*/\1/p' | head -1)" if [ "$cur" = "$ACP_VERSION" ]; then ok "codex-acp already $ACP_VERSION" elif [ -d "$zed" ]; then warn "codex-acp is '$cur' — pinning to $ACP_VERSION" tmp="$(mktemp -d)"; ( cd "$tmp" && npm_config_registry="$NPM_MIRROR" npm pack \ "@zed-industries/codex-acp@$ACP_VERSION" "$ACP_ARCH_PKG@$ACP_VERSION" >/dev/null 2>&1 ) for pair in "codex-acp:@zed-industries/codex-acp" "$(basename "$ACP_ARCH_PKG"):$ACP_ARCH_PKG"; do dir="${pair%%:*}"; tgz="$(ls "$tmp"/*"${dir}"-"$ACP_VERSION".tgz 2>/dev/null | head -1)" [ -z "$tgz" ] && { warn "tarball for $dir not fetched"; continue; } mkdir -p "$tmp/x_$dir"; tar -xzf "$tgz" -C "$tmp/x_$dir" rm -rf "$zed/$dir"/* && cp -a "$tmp/x_$dir/package/." "$zed/$dir/" done chmod +x "$zed/$(basename "$ACP_ARCH_PKG")/bin/"* 2>/dev/null || true rm -rf "$tmp" now="$(cat "$zed/codex-acp/package.json" 2>/dev/null | sed -n 's/.*"version": *"\([^"]*\)".*/\1/p' | head -1)" [ "$now" = "$ACP_VERSION" ] && ok "codex-acp now $ACP_VERSION" || warn "codex-acp pin FAILED (still $now)" else warn "expect-cli not found at $zed — install expect-cli first (manual step A)." fi # --- 5. deploy key (generate if missing; registration is manual step C) ------ step "5. SSH deploy key" if [ -f "$DEPLOY_KEY" ]; then ok "deploy key present: $DEPLOY_KEY" else # On a fresh-rebuild host ~/.ssh often does not exist yet; create it first so # ssh-keygen doesn't fail with "No such file or directory". mkdir -p "$(dirname "$DEPLOY_KEY")" && chmod 700 "$(dirname "$DEPLOY_KEY")" 2>/dev/null || true if ssh-keygen -t ed25519 -N "" -C "agent-pr-explore-deploy@$(hostname)" -f "$DEPLOY_KEY" >/dev/null; then ok "generated $DEPLOY_KEY" else warn "ssh-keygen failed — deploy key NOT created; mirror bootstrap will not work until fixed." fi fi if [ -f "$DEPLOY_KEY.pub" ]; then warn "ensure this pubkey is a READ-ONLY deploy key on $BASE_REPO (manual step C):" echo " $(cat "$DEPLOY_KEY.pub")" fi # --- 6. base repo git mirror (so per-PR fetches are small deltas) ------------ step "6. git mirror" export GIT_SSH_COMMAND="ssh -i $DEPLOY_KEY -o IdentitiesOnly=yes -o StrictHostKeyChecking=accept-new -o ConnectTimeout=20" if [ -d "$MIRROR_DIR" ] && git --git-dir="$MIRROR_DIR" rev-parse HEAD >/dev/null 2>&1; then ok "mirror present ($(du -sh "$MIRROR_DIR" 2>/dev/null | cut -f1)); refreshing main…" git --git-dir="$MIRROR_DIR" fetch --no-tags --depth=1 origin main >/dev/null 2>&1 && ok "main refreshed" || warn "mirror refresh failed (network?)" else mkdir -p "$(dirname "$MIRROR_DIR")" warn "seeding mirror (one-time, ~150MB over SSH)…" git clone --bare --depth=1 --single-branch --branch main "git@github.com:${BASE_REPO}.git" "$MIRROR_DIR" >/dev/null 2>&1 \ && ok "mirror seeded" || warn "mirror clone FAILED (deploy key registered? network?)" fi mkdir -p "$HOME/.cache/agent-pr-explore/pnpm-store" "$HOME/.cache/agent-pr-explore/reports" ok "pnpm-store + reports dirs ready" # --- 7. base image refresh helper + weekly cron ------------------------------ step "7. sandbox image refresh helper + cron" mkdir -p "$TOOLS_DIR" cat > "$TOOLS_DIR/refresh-sandbox-image.sh" <<'RSH' #!/usr/bin/env bash # Best-effort refresh of the sandbox base image. The sandbox script skips # `docker pull` when the image is cached (the runner's docker.io access is # flaky), so this is the decoupled refresh path; it never fails the host. set -uo pipefail export PATH="$HOME/agent-pr-explore-tools/lima-2.1.1/bin:$HOME/agent-pr-explore-bin:$HOME/.npm-global/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" image="${OD_SANDBOX_IMAGE:-node:24-bookworm}" ts() { date "+%Y-%m-%dT%H:%M:%S%z"; } echo "[$(ts)] refresh start: $image" colima status >/dev/null 2>&1 || { echo "[$(ts)] colima down; skip"; exit 0; } before="$(docker image inspect --format '{{.Id}}' "$image" 2>/dev/null || echo none)" if docker pull "$image"; then after="$(docker image inspect --format '{{.Id}}' "$image" 2>/dev/null || echo none)" [ "$before" != "$after" ] && { echo "[$(ts)] refreshed $before -> $after"; docker image prune -f >/dev/null 2>&1 || true; } || echo "[$(ts)] up to date" else echo "[$(ts)] pull failed (registry unreachable?); keeping cached $before" fi echo "[$(ts)] done" RSH chmod +x "$TOOLS_DIR/refresh-sandbox-image.sh" ok "wrote $TOOLS_DIR/refresh-sandbox-image.sh" cron_line="17 4 * * 0 $TOOLS_DIR/refresh-sandbox-image.sh >> $TOOLS_DIR/image-refresh.log 2>&1" if crontab -l 2>/dev/null | grep -qF "refresh-sandbox-image.sh"; then ok "weekly refresh cron already installed" else { crontab -l 2>/dev/null; echo "# agent-pr-explore weekly base-image refresh"; echo "$cron_line"; } | crontab - && ok "installed weekly refresh cron" fi # --- 8. readiness self-check helper ------------------------------------------ step "8. readiness self-check helper" cat > "$HOME/check-agent-ready.sh" <<'CHK' #!/usr/bin/env bash # Quick readiness check: VPN reaches chatgpt backend + Codex responds. export PATH="$HOME/agent-pr-explore-tools/lima-2.1.1/bin:$HOME/agent-pr-explore-bin:$HOME/.npm-global/bin:/opt/homebrew/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin" ok=1 echo "1. chatgpt backend: $(curl -sS -m 15 -o /dev/null -w '%{http_code}' https://chatgpt.com/backend-api/ 2>/dev/null || echo FAIL) (403/200 = reachable)" echo "2. codex model: $(grep '^model' "$HOME/.codex/config.toml" 2>/dev/null)" echo "3. codex-acp: $(cat "$(npm root -g)/expect-cli/node_modules/@zed-industries/codex-acp/package.json" 2>/dev/null | sed -n 's/.*"version": *"\([^"]*\)".*/\1/p' | head -1)" out="$(perl -e 'alarm shift; exec @ARGV' 90 codex exec --skip-git-repo-check 'reply with exactly READY_OK' 2>&1)" if printf '%s' "$out" | grep -q READY_OK; then echo "4. codex: ✅ responds"; else echo "4. codex: ❌ no response"; ok=0; fi [ "$ok" = 1 ] && echo "==> READY ✅" || echo "==> NOT READY ❌" CHK chmod +x "$HOME/check-agent-ready.sh" ok "wrote ~/check-agent-ready.sh" step "done — run ~/check-agent-ready.sh after VPN/login to confirm"