diff --git a/mocks/README.md b/mocks/README.md index d73d5b545..7cb2bb414 100644 --- a/mocks/README.md +++ b/mocks/README.md @@ -35,10 +35,10 @@ bash mocks/scripts/fetch-recordings.sh export PATH="$PWD/mocks/bin:$PATH" # Pick any recording to play back (8-char prefix OK): -export SYNCLO_EXPLORE_MOCK_TRACE=04097377 +export OD_MOCKS_TRACE=04097377 # Speed up replay (skip inter-event sleeps): -export SYNCLO_EXPLORE_MOCK_NO_DELAY=1 +export OD_MOCKS_NO_DELAY=1 # Now anything that spawns opencode/claude/codex gets the recording: echo "any prompt body" | opencode run @@ -154,12 +154,12 @@ Driven by env vars, in priority order: | Env | Behavior | |---|---| -| `SYNCLO_EXPLORE_MOCK_TRACE=` | Always play this trace. 8-char prefix OK. | -| `SYNCLO_EXPLORE_MOCK_BY_PROMPT_HASH=1` + stdin prompt | Deterministic by `sha256(prompt) % len(all)`. Same prompt → same trace. Useful for "stable answer per question" tests. | -| `SYNCLO_EXPLORE_MOCK_POOL=` | Random within the tag pool. Examples: `agent:claude`, `skill:agent-browser`, `outcome:failed`. | -| `SYNCLO_EXPLORE_MOCK_SEED=` | Makes "random" picks reproducible across runs. | -| `SYNCLO_EXPLORE_MOCK_NO_DELAY=1` | Skip inter-event waits. | -| `SYNCLO_EXPLORE_MOCK_RECORDINGS_DIR=` | Override the recordings dir. | +| `OD_MOCKS_TRACE=` | Always play this trace. 8-char prefix OK. | +| `OD_MOCKS_BY_PROMPT_HASH=1` + stdin prompt | Deterministic by `sha256(prompt) % len(all)`. Same prompt → same trace. Useful for "stable answer per question" tests. | +| `OD_MOCKS_POOL=` | Random within the tag pool. Examples: `agent:claude`, `skill:agent-browser`, `outcome:failed`. | +| `OD_MOCKS_SEED=` | Makes "random" picks reproducible across runs. | +| `OD_MOCKS_NO_DELAY=1` | Skip inter-event waits. | +| `OD_MOCKS_RECORDINGS_DIR=` | Override the recordings dir. | If none are set, a uniformly random recording is played each invocation. @@ -243,9 +243,8 @@ User-specific data has been scrubbed from every recording: The anonymizer is idempotent. Tool input/output payloads (HTML, code, etc.) are preserved verbatim — they're templated UI without cell-level -PII; if a future audit finds otherwise, add specific scrubs in -`apps/daemon/src/mocks/anonymize.ts` (in the synclo-explore source) and -re-run. +PII; if a future audit finds otherwise, add specific scrubs in the +harvester repo (see "Adding more recordings" below) and re-run. --- @@ -257,28 +256,17 @@ GitHub Action does. This means a stray `mocks/scripts/...` invocation can't corrupt prod data, and every new recording lands in a PR diff for review first. -### Step 1 — produce the .jsonl from your raw trace +### Step 1 — produce an anonymized .jsonl -The exporter that produced the current 179-trace set lives in -[nexu-io/agent-pr-explore](https://github.com/nexu-io/agent-pr-explore) -under `cli/src/local/orchestrator/langfuse-import.ts`: +The harvester that produced the current 179-trace set lives in a +separate repo, [nexu-io/agent-pr-explore][harvester]. See its README +for how to authenticate against your trace store, filter by skill / +agent / outcome, and anonymize the result. -```bash -cd ~/Documents/agent-pr-explore -export LANGFUSE_BASE_URL=https://us.cloud.langfuse.com -export LANGFUSE_PUBLIC_KEY=pk-lf-... -export LANGFUSE_SECRET_KEY=sk-lf-... +The output is one `.jsonl` file per recording; copy that +into a scratch dir of your choice, then continue with step 2. -# Examples: -synclo-explore local langfuse-import \ - --tag skill:data-report --limit 30 - -synclo-explore local langfuse-import \ - --min-tool-calls 8 --min-turns-in-session 3 --limit 50 - -# Anonymize + write to a temp dir: -synclo-explore local recordings anonymize --out-dir /tmp/new-recordings -``` +[harvester]: https://github.com/nexu-io/agent-pr-explore ### Step 2 — stage in this repo, open a PR @@ -328,7 +316,7 @@ can't race on the manifest. If you absolutely need to push from your laptop (e.g. backfilling an old trace the Action somehow lost), set -`SYNCLO_OD_MOCKS_I_KNOW_WHAT_IM_DOING=1` and run `upload-to-r2.mjs` with +`OD_MOCKS_ALLOW_LOCAL_UPLOAD=1` and run `upload-to-r2.mjs` with your own wrangler login. Not recommended; consider opening a PR instead. --- @@ -348,8 +336,8 @@ it('parses an opencode session with 4 tool calls into 4 UI events', async () => env: { ...process.env, PATH: `${MOCK_BIN}:${process.env.PATH}`, - SYNCLO_EXPLORE_MOCK_TRACE: '06a9324a', // 4-tool claude session - SYNCLO_EXPLORE_MOCK_NO_DELAY: '1', + OD_MOCKS_TRACE: '06a9324a', // 4-tool claude session + OD_MOCKS_NO_DELAY: '1', }, stdio: ['pipe', 'pipe', 'pipe'], }); @@ -364,8 +352,8 @@ it('parses an opencode session with 4 tool calls into 4 UI events', async () => ```bash # See what claude's 17-tool "delete v2" session emits to OD: export PATH=$(git rev-parse --show-toplevel)/mocks/bin:$PATH -export SYNCLO_EXPLORE_MOCK_TRACE=04097377 -export SYNCLO_EXPLORE_MOCK_NO_DELAY=1 +export OD_MOCKS_TRACE=04097377 +export OD_MOCKS_NO_DELAY=1 echo "anything" | claude -p --output-format=stream-json | jq .type | uniq -c ``` @@ -435,6 +423,6 @@ telemetry when they installed the desktop client. The anonymizer removed user-identifying paths and project UUIDs before checking in. If you find a recording that includes content that should be redacted, -delete the file (`rm mocks/recordings/.jsonl`) and regenerate the -index (`jq` will skip missing entries; for a fresh index, rerun the -exporter from synclo-explore). +open a PR removing it from `mocks/recordings-staging/` (or, if already +synced, file an issue — manifest regeneration after a delete needs to +run against R2 manually and is not automated yet). diff --git a/mocks/bin/claude b/mocks/bin/claude index bbae25a53..8b22e4f1e 100755 --- a/mocks/bin/claude +++ b/mocks/bin/claude @@ -2,7 +2,7 @@ # Mock wrapper for claude — runs the bundled mock-agent in this agent's # stdout protocol. The wrapper accepts (and ignores) any flags the real # CLI would take; the mock doesn't honor model selection, tool gates, or -# permission modes — recording selection is via SYNCLO_EXPLORE_MOCK_* +# permission modes — recording selection is via OD_MOCKS_* # env vars (see ../README.md). set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" diff --git a/mocks/bin/codex b/mocks/bin/codex index 4fef07f0e..7a1cfed1c 100755 --- a/mocks/bin/codex +++ b/mocks/bin/codex @@ -2,7 +2,7 @@ # Mock wrapper for codex — runs the bundled mock-agent in this agent's # stdout protocol. The wrapper accepts (and ignores) any flags the real # CLI would take; the mock doesn't honor model selection, tool gates, or -# permission modes — recording selection is via SYNCLO_EXPLORE_MOCK_* +# permission modes — recording selection is via OD_MOCKS_* # env vars (see ../README.md). set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" diff --git a/mocks/bin/deepseek b/mocks/bin/deepseek index e58d75970..d18f2f86d 100755 --- a/mocks/bin/deepseek +++ b/mocks/bin/deepseek @@ -2,7 +2,7 @@ # Mock wrapper for deepseek — runs the bundled mock-agent in this agent's # stdout protocol. The wrapper accepts (and ignores) any flags the real # CLI would take; the mock doesn't honor model selection, tool gates, or -# permission modes — recording selection is via SYNCLO_EXPLORE_MOCK_* +# permission modes — recording selection is via OD_MOCKS_* # env vars (see ../README.md). set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" diff --git a/mocks/bin/grok b/mocks/bin/grok index df87898e6..becd0e970 100755 --- a/mocks/bin/grok +++ b/mocks/bin/grok @@ -2,7 +2,7 @@ # Mock wrapper for grok — runs the bundled mock-agent in this agent's # stdout protocol. The wrapper accepts (and ignores) any flags the real # CLI would take; the mock doesn't honor model selection, tool gates, or -# permission modes — recording selection is via SYNCLO_EXPLORE_MOCK_* +# permission modes — recording selection is via OD_MOCKS_* # env vars (see ../README.md). set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" diff --git a/mocks/bin/opencode b/mocks/bin/opencode index 672e85a27..3fdfabec3 100755 --- a/mocks/bin/opencode +++ b/mocks/bin/opencode @@ -2,7 +2,7 @@ # Mock wrapper for opencode — runs the bundled mock-agent in this agent's # stdout protocol. The wrapper accepts (and ignores) any flags the real # CLI would take; the mock doesn't honor model selection, tool gates, or -# permission modes — recording selection is via SYNCLO_EXPLORE_MOCK_* +# permission modes — recording selection is via OD_MOCKS_* # env vars (see ../README.md). set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" diff --git a/mocks/bin/qwen b/mocks/bin/qwen index bfa5efb3c..132bafc36 100755 --- a/mocks/bin/qwen +++ b/mocks/bin/qwen @@ -2,7 +2,7 @@ # Mock wrapper for qwen — runs the bundled mock-agent in this agent's # stdout protocol. The wrapper accepts (and ignores) any flags the real # CLI would take; the mock doesn't honor model selection, tool gates, or -# permission modes — recording selection is via SYNCLO_EXPLORE_MOCK_* +# permission modes — recording selection is via OD_MOCKS_* # env vars (see ../README.md). set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" diff --git a/mocks/lib/recording-picker.mjs b/mocks/lib/recording-picker.mjs index e9aff5f43..262b8255d 100644 --- a/mocks/lib/recording-picker.mjs +++ b/mocks/lib/recording-picker.mjs @@ -1,13 +1,13 @@ // Pick which recording to play back, driven by env vars. // // Priority order: -// 1. SYNCLO_EXPLORE_MOCK_TRACE → fixed trace id (or prefix) -// 2. SYNCLO_EXPLORE_MOCK_BY_PROMPT_HASH=1 → hash(prompt) → trace -// 3. SYNCLO_EXPLORE_MOCK_POOL= → random within tag pool +// 1. OD_MOCKS_TRACE → fixed trace id (or prefix) +// 2. OD_MOCKS_BY_PROMPT_HASH=1 → hash(prompt) → trace +// 3. OD_MOCKS_POOL= → random within tag pool // 4. (default) → random across all // -// SYNCLO_EXPLORE_MOCK_SEED gives reproducible "random" selection. -// SYNCLO_EXPLORE_MOCK_RECORDINGS_DIR overrides the default recordings dir +// OD_MOCKS_SEED gives reproducible "random" selection. +// OD_MOCKS_RECORDINGS_DIR overrides the default recordings dir // (defaults to ../recordings/ relative to this file). import { readdir, readFile, stat } from 'node:fs/promises'; @@ -17,7 +17,7 @@ import { fileURLToPath } from 'node:url'; const HERE = dirname(fileURLToPath(import.meta.url)); export const DEFAULT_RECORDINGS_DIR = - process.env.SYNCLO_EXPLORE_MOCK_RECORDINGS_DIR || + process.env.OD_MOCKS_RECORDINGS_DIR || join(HERE, '..', 'recordings'); async function listRecordings(dir) { @@ -54,20 +54,20 @@ export async function pickRecording({ prompt } = {}) { if (all.length === 0) return null; // 1. fixed - const fixed = process.env.SYNCLO_EXPLORE_MOCK_TRACE; + const fixed = process.env.OD_MOCKS_TRACE; if (fixed) { const hit = all.find(id => id === fixed) ?? all.find(id => id.startsWith(fixed)); if (hit) return { traceId: hit, path: join(dir, `${hit}.jsonl`), method: 'fixed' }; } // 2. prompt-hash - if (process.env.SYNCLO_EXPLORE_MOCK_BY_PROMPT_HASH === '1' && prompt) { + if (process.env.OD_MOCKS_BY_PROMPT_HASH === '1' && prompt) { const picked = pickRandom(all, prompt); if (picked) return { traceId: picked, path: join(dir, `${picked}.jsonl`), method: 'hash' }; } // 3. pool by tag - const pool = process.env.SYNCLO_EXPLORE_MOCK_POOL; + const pool = process.env.OD_MOCKS_POOL; if (pool) { const candidates = []; for (const id of all) { @@ -83,13 +83,13 @@ export async function pickRecording({ prompt } = {}) { } } if (candidates.length > 0) { - const picked = pickRandom(candidates, process.env.SYNCLO_EXPLORE_MOCK_SEED); + const picked = pickRandom(candidates, process.env.OD_MOCKS_SEED); if (picked) return { traceId: picked, path: join(dir, `${picked}.jsonl`), method: 'pool', pool }; } } // 4. random - const picked = pickRandom(all, process.env.SYNCLO_EXPLORE_MOCK_SEED); + const picked = pickRandom(all, process.env.OD_MOCKS_SEED); if (!picked) return null; return { traceId: picked, path: join(dir, `${picked}.jsonl`), method: 'random' }; } diff --git a/mocks/mock-agent.mjs b/mocks/mock-agent.mjs index 155b81818..cd6a3800d 100755 --- a/mocks/mock-agent.mjs +++ b/mocks/mock-agent.mjs @@ -37,13 +37,13 @@ function parseArgs(argv) { // Anything left is a positional — used by vela subcommand dispatch. opts.positionals.push(a); } - if (process.env.SYNCLO_EXPLORE_MOCK_NO_DELAY === '1') opts.noDelay = true; + if (process.env.OD_MOCKS_NO_DELAY === '1') opts.noDelay = true; // Fall through to REPORT_FILE env when --report-file wasn't supplied. - // Harnesses that spawn us (e.g. synclo-explore's orchestrator at - // nexu-io/agent-pr-explore) set REPORT_FILE as env but expect the - // agent to write there autonomously — real opencode/claude do via - // their Write tool, but the mock needs to project the recording's - // final assistant text to that path so the harness sees a report. + // Some harnesses (e.g. the agent-pr-explore orchestrator) set + // REPORT_FILE as env but expect the agent to write there + // autonomously — real opencode/claude do via their Write tool, but + // the mock needs to project the recording's final assistant text to + // that path so the harness sees a report. if (!opts.reportFile && process.env.REPORT_FILE) { opts.reportFile = process.env.REPORT_FILE; } @@ -89,7 +89,7 @@ async function main() { // ACP agents read JSON-RPC messages off stdin one line at a time, so the // bulk-prompt buffering logic below doesn't apply — pickRecording sees no - // prompt for hash-mode (use SYNCLO_EXPLORE_MOCK_TRACE or _POOL instead). + // prompt for hash-mode (use OD_MOCKS_TRACE or _POOL instead). const ACP_AGENTS = new Set(['devin', 'hermes', 'kilo', 'kimi', 'kiro', 'vibe', 'vela']); const isAcp = ACP_AGENTS.has(opts.as); const prompt = isAcp ? '' : await readStdinIfPiped(); @@ -103,7 +103,7 @@ async function main() { ' bash mocks/scripts/fetch-recordings.sh # all 179 (~30s, 4.5MB)\n' + ' bash mocks/scripts/fetch-recordings.sh --agent claude # subset\n' + '\n' + - 'Or set SYNCLO_EXPLORE_MOCK_RECORDINGS_DIR if you stashed them elsewhere.\n', + 'Or set OD_MOCKS_RECORDINGS_DIR if you stashed them elsewhere.\n', ); process.exit(3); } diff --git a/mocks/scripts/smoke-test.sh b/mocks/scripts/smoke-test.sh index ad4f9cc64..9f4074d47 100755 --- a/mocks/scripts/smoke-test.sh +++ b/mocks/scripts/smoke-test.sh @@ -13,7 +13,7 @@ set -euo pipefail HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" MOCKS="$(cd "$HERE/.." && pwd -P)" -TRACE_ID="${SYNCLO_EXPLORE_MOCK_SMOKE_TRACE:-04097377}" # the 17-tool claude session +TRACE_ID="${OD_MOCKS_SMOKE_TRACE:-04097377}" # the 17-tool claude session # Ensure recordings are on disk — the corpus is hosted on R2 and fetched # on demand. If nothing's been pulled yet (or only a few are), run the @@ -25,8 +25,8 @@ if ! ls "$MOCKS/recordings"/*.jsonl >/dev/null 2>&1; then fi export PATH="$MOCKS/bin:$PATH" -export SYNCLO_EXPLORE_MOCK_TRACE="$TRACE_ID" -export SYNCLO_EXPLORE_MOCK_NO_DELAY=1 +export OD_MOCKS_TRACE="$TRACE_ID" +export OD_MOCKS_NO_DELAY=1 failed=0 pass() { printf ' \033[32m✓\033[0m %s\n' "$1"; } diff --git a/mocks/scripts/upload-to-r2.mjs b/mocks/scripts/upload-to-r2.mjs index 262935228..14b9db1a1 100755 --- a/mocks/scripts/upload-to-r2.mjs +++ b/mocks/scripts/upload-to-r2.mjs @@ -10,7 +10,7 @@ * the CLOUDFLARE_R2_MOCKS_AK / _SK repo secrets). R2_S3_ENDPOINT must * also be set. If you need to test the upload path locally, configure * those env vars yourself AND set - * env SYNCLO_OD_MOCKS_I_KNOW_WHAT_IM_DOING=1 to bypass the safety gate. + * env OD_MOCKS_ALLOW_LOCAL_UPLOAD=1 to bypass the safety gate. * * Why not wrangler: wrangler 4.x calls /memberships before any R2 * action, which requires user:read scope. R2 "Object Read & Write" @@ -48,11 +48,11 @@ const CONCURRENCY = 4; function checkEnv() { const isCi = process.env.GITHUB_ACTIONS === 'true'; - const hasOverride = process.env.SYNCLO_OD_MOCKS_I_KNOW_WHAT_IM_DOING === '1'; + const hasOverride = process.env.OD_MOCKS_ALLOW_LOCAL_UPLOAD === '1'; if (!isCi && !hasOverride) { console.error('✗ upload-to-r2.mjs is intended for the GitHub Action.'); console.error(' To upload from your laptop you must explicitly opt-in:'); - console.error(' SYNCLO_OD_MOCKS_I_KNOW_WHAT_IM_DOING=1 node mocks/scripts/upload-to-r2.mjs'); + console.error(' OD_MOCKS_ALLOW_LOCAL_UPLOAD=1 node mocks/scripts/upload-to-r2.mjs'); process.exit(2); } for (const k of ['AWS_ACCESS_KEY_ID', 'AWS_SECRET_ACCESS_KEY', 'R2_S3_ENDPOINT']) {