diff --git a/AGENTS.md b/AGENTS.md index 08e752deb..f93912c69 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -14,7 +14,7 @@ This file is the single source of truth for agents entering this repository. Rea ## Workspace directories - Workspace packages come from `pnpm-workspace.yaml`: `apps/*`, `packages/*`, `tools/*`, and `e2e`. -- Top-level content directories: `skills/` (functional skills the agent invokes mid-task — utilities, briefs, packagers; see `skills/AGENTS.md`), `design-templates/` (rendering catalogue: decks, prototypes, image/video/audio templates; see `design-templates/AGENTS.md` and `specs/current/skills-and-design-templates.md`), `design-systems/` (brand `DESIGN.md` files), `craft/` (universal brand-agnostic craft rules a skill can opt into via `od.craft.requires`). +- Top-level content directories: `skills/` (functional skills the agent invokes mid-task — utilities, briefs, packagers; see `skills/AGENTS.md`), `design-templates/` (rendering catalogue: decks, prototypes, image/video/audio templates; see `design-templates/AGENTS.md` and `specs/current/skills-and-design-templates.md`), `design-systems/` (brand `DESIGN.md` files), `craft/` (universal brand-agnostic craft rules a skill can opt into via `od.craft.requires`), `mocks/` (replay-based mock CLIs for `opencode`/`claude`/`codex`/`gemini`/`cursor-agent`/`deepseek`/`qwen`/`grok`, the ACP family `devin`/`hermes`/`kilo`/`kimi`/`kiro`/`vibe`, and the AMR `vela` CLI (login + models + ACP), built from anonymized Langfuse traces — PATH-overlay drop-in for tests and self-validation; see `mocks/README.md`). - `apps/web` is the Next.js 16 App Router + React 18 web runtime; do not restore `apps/nextjs`. - `apps/daemon` is the local privileged daemon and `od` bin. It owns `/api/*`, agent spawning, skills, design systems, artifacts, and static serving. - `apps/desktop` is the Electron shell; it discovers the web URL through sidecar IPC. @@ -167,6 +167,7 @@ root `pnpm tools-pr` script without a new explicit maintainer decision. ## Validation strategy - After package, workspace, or command-entry changes, run `pnpm install` so workspace links and generated dist entries stay fresh. +- For agent-stream / parser changes (`apps/daemon/src/claude-stream.ts`, `json-event-stream.ts`, `qoder-stream.ts`, etc.), replay a recorded session through the mock CLIs in `mocks/` to verify event shapes round-trip without burning provider budget. PATH-overlay activation: `export PATH="$PWD/mocks/bin:$PATH" OD_MOCKS_TRACE=<8-char-id> OD_MOCKS_NO_DELAY=1`. See `mocks/README.md` for the trace catalog and selection knobs. - Treat every `pnpm-lock.yaml` change as requiring a Nix pnpm deps hash refresh check. `nix/pnpm-deps.nix` is a generated lock artifact; use `pnpm nix:update-hash` only when intentionally maintaining Nix packaging, then re-run `nix flake check --print-build-logs --keep-going`. Contributors without Nix can rely on the PR `Validate workspace` gate, which now uploads or auto-applies the generated hash-only fix when possible. - Before marking regular work ready, run at least `pnpm guard` and `pnpm typecheck`, plus the package-scoped tests/builds that match the files changed. Do not use or add root `pnpm test`/`pnpm build` aliases. - For local web runtime loops, prefer `pnpm tools-dev run web --daemon-port --web-port `. diff --git a/apps/daemon/tests/mocks-golden.test.ts b/apps/daemon/tests/mocks-golden.test.ts new file mode 100644 index 000000000..ee36c5010 --- /dev/null +++ b/apps/daemon/tests/mocks-golden.test.ts @@ -0,0 +1,113 @@ +// Golden daemon-event snapshots — addresses the regression-signal point +// from review on #3241: smoke-testing that mocks RUN catches only crashes +// or protocol-level garbage; it does NOT catch a parser change that +// semantically reshapes the events the daemon emits to the UI. +// +// This test replays representative recordings through the actual daemon +// stream handlers and asserts the emitted event sequence matches a +// committed `mocks/golden/.events.json`. A parser tweak that +// drops a tool_result, changes a usage shape, or renames an event type +// fails this test loudly. +// +// Update flow when a parser change is INTENTIONAL: +// MOCKS_GOLDEN_UPDATE=1 pnpm --filter @open-design/daemon test mocks-golden +// then `git diff mocks/golden/` and commit the new shapes. +// +// Auto-skips when the recording corpus hasn't been fetched yet (see +// `mocks/scripts/fetch-recordings.sh`); CI that exercises this test must +// fetch first. + +import { describe, it, expect } from 'vitest'; +import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs'; +import { spawnSync } from 'node:child_process'; +import { dirname, join } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { createClaudeStreamHandler } from '../src/claude-stream.js'; +import { createJsonEventStreamHandler } from '../src/json-event-stream.js'; + +const HERE = dirname(fileURLToPath(import.meta.url)); +const REPO = join(HERE, '../../..'); +const MOCK_AGENT = join(REPO, 'mocks/mock-agent.mjs'); +const GOLDEN_DIR = join(REPO, 'mocks/golden'); +const RECORDINGS_DIR = join(REPO, 'mocks/recordings'); + +// Median-tool-count successful traces per agent (selected from manifest +// 2026-05-29). Each one's `.jsonl` lives in `mocks/recordings/` after +// `bash mocks/scripts/fetch-recordings.sh`. +const CASES: Array<{ agent: 'claude' | 'codex' | 'opencode'; trace: string }> = [ + { agent: 'claude', trace: '314d6833-0377-4ac4-ba11-2b8d7eca5511' }, + { agent: 'codex', trace: 'dcdff3b3-cd39-4dcd-be83-372830a29639' }, + { agent: 'opencode', trace: '9a9522ec-575f-432f-aeed-efc491e900aa' }, +]; + +// Replace per-spawn-volatile fields with stable sentinels so the +// snapshot stays diffable across runs. Currently only `sessionId` — +// claude's mock emits a fresh UUID every spawn. Opencode/codex carry +// the recording's own session/thread id so they're already stable. +function normalizeVolatile(events: unknown[]): unknown[] { + return events.map(e => { + if (!e || typeof e !== 'object') return e; + const rec = e as Record; + const out: Record = { ...rec }; + if ('sessionId' in out) out.sessionId = ''; + return out; + }); +} + +function runMockAndCollectEvents(agent: string, trace: string): unknown[] { + // Force no-delay so the spawn returns quickly + deterministically. + const proc = spawnSync( + process.execPath, + [MOCK_AGENT, '--as', agent, '--no-delay'], + { + env: { ...process.env, OD_MOCKS_TRACE: trace, OD_MOCKS_NO_DELAY: '1' }, + input: 'golden-test-prompt', + encoding: 'utf-8', + timeout: 30_000, + maxBuffer: 50 * 1024 * 1024, + }, + ); + if (proc.status !== 0) { + throw new Error( + `mock-agent --as ${agent} exit ${proc.status}: ${proc.stderr.slice(0, 500)}`, + ); + } + + const events: unknown[] = []; + const sink = (e: unknown) => events.push(e); + const handler = + agent === 'claude' + ? createClaudeStreamHandler(sink) + : createJsonEventStreamHandler(agent, sink); + handler.feed(proc.stdout); + return normalizeVolatile(events); +} + +const recordingsAvailable = + existsSync(RECORDINGS_DIR) && + CASES.every(c => existsSync(join(RECORDINGS_DIR, `${c.trace}.jsonl`))); + +describe.skipIf(!recordingsAvailable)( + 'mocks goldens — daemon event shape regression', + () => { + for (const { agent, trace } of CASES) { + it(`${agent} ${trace.slice(0, 8)}`, () => { + const events = runMockAndCollectEvents(agent, trace); + const goldenPath = join(GOLDEN_DIR, `${trace}.events.json`); + + if (process.env.MOCKS_GOLDEN_UPDATE === '1') { + mkdirSync(GOLDEN_DIR, { recursive: true }); + writeFileSync( + goldenPath, + JSON.stringify({ agent, trace, events }, null, 2) + '\n', + ); + return; + } + + const golden = JSON.parse(readFileSync(goldenPath, 'utf-8')); + expect({ agent, trace, events }).toEqual(golden); + }); + } + }, +); diff --git a/docs/MOCKS-CONTRACT-CHECK.md b/docs/MOCKS-CONTRACT-CHECK.md new file mode 100644 index 000000000..395bbfec0 --- /dev/null +++ b/docs/MOCKS-CONTRACT-CHECK.md @@ -0,0 +1,108 @@ +# Real-CLI contract check + +The replay mocks under `mocks/` impersonate real agent CLIs by emitting +recorded traces in each CLI's native protocol. They're great for parser +regression coverage but they can silently drift away from the real CLI +when: + +- An agent CLI ships a new event `type` that the mock doesn't know about. +- A field gets renamed (`sessionID` → `sessionId`) and the mock keeps + emitting the old name. OD's parser may have been updated to accept + both, so smoke tests stay green, but new fields aren't surfaced. +- A protocol version bump changes the shape of `usage` / tool calls / + init blocks. + +The contract check is the periodic ritual that catches that drift. + +## Scope + +It is **not** a CI gate. The check: + +- Costs real LLM tokens (a few cents per agent per run). +- Requires the real CLI installed + authenticated locally or on a + maintainer-controlled runner. +- Wants a human to eyeball the output, not a regex. + +Treat it like a maintenance task — monthly is fine, ad-hoc whenever a +relevant CLI publishes a release note about output-format changes. + +## How to run + +```bash +bash mocks/scripts/contract-check.sh claude +bash mocks/scripts/contract-check.sh codex +bash mocks/scripts/contract-check.sh opencode +``` + +The script: + +1. Resolves the real CLI binary (ignoring the `mocks/bin/` PATH overlay). +2. Sends a fixed deterministic prompt: *"List the entries of the current + working directory and tell me how many JSON files are present."* +3. Runs the same prompt through the mock CLI. +4. Prints a side-by-side distribution of top-level event `type` + values from both. +5. Leaves both raw JSONL outputs in `/tmp` for you to `diff`. + +## What to look for + +Compare the two `type` distributions. Acceptable differences: + +- Counts vary slightly (mock plays a single recorded trace, real CLI + may take a different number of turns for the same prompt). +- Mock emits a superset of the real CLI's event types — the recordings + span historical CLI versions. + +**Red flags**: + +- Real CLI emits a `type` value the mock never produces → the mock + needs a new event handler in `mocks/lib/format-.mjs`. +- Real CLI's event uses different field names than the mock → either + the real CLI changed and the parser may already be out of sync, or + the mock is drifting toward an internal convention. +- Mock crashes / emits nothing → the agent's `--no-delay` path is + broken. + +## Suggested cadence + +No fixed schedule, no automated cron — the check is human-driven: + +- **On real-CLI release**: when Anthropic / OpenAI / OpenCode publishes + a release whose notes mention "output format" / "JSON" / "stream" / + "events" / "API", run the affected agent's check. This is the + highest-signal trigger. +- **Before a parser refactor**: lock the contract before touching + `apps/daemon/src/claude-stream.ts` / `json-event-stream.ts`, so a + post-refactor failure means "I broke the parser" rather than "the + real CLI already drifted and the parser had silently caught it". +- **Ad-hoc**: if something feels off — UI suddenly missing a tool call, + duplicate events, unfamiliar field names in logs — a contract check + is the fast first step. + +Putting this on a cron would burn LLM tokens every run with no human +review of the output, defeating the point. The check is an artifact a +maintainer reads, not a CI gate. + +## Future improvements + +The current script only compares top-level `type` distributions +because a deeper structural diff is hard to do without a schema. +Possible follow-ups: + +1. **JSON-shape schema per agent** — generate a JSON Schema from the + mock formatters' output, run a validator against real-CLI output, + report violations with field paths. +2. **Recorded-then-replayed delta** — capture the real CLI's output + for the fixed prompt, save under `mocks/contracts/.golden.jsonl`, + then in CI replay that golden through the daemon parser and assert + no parser errors. Cheaper than calling the LLM every CI run but + only catches *parser* drift, not *CLI* drift. + +Neither is implemented today. + +## Related + +- `mocks/scripts/contract-check.sh` — the script itself. +- `apps/daemon/tests/mocks-golden.test.ts` — daemon-event golden + snapshots (catches parser regressions against the mocks, complementary + to this check which catches mock-vs-real drift). diff --git a/mocks/README.md b/mocks/README.md new file mode 100644 index 000000000..605efae23 --- /dev/null +++ b/mocks/README.md @@ -0,0 +1,475 @@ +# `mocks/` — replay-based mock CLIs for OD's supported agents + +A drop-in replacement for the real agent CLIs (`claude`, `opencode`, +`codex`, `gemini`, `cursor-agent`, `deepseek`, `qwen`, `grok`, the +ACP family `devin` / `hermes` / `kilo` / `kimi` / `kiro` / `vibe`, and +the AMR `vela` CLI) that replays pre-recorded sessions in each CLI's +native protocol — stdout streaming for most, JSON-RPC over stdio for +ACP and AMR. **Zero LLM tokens.** + +Used by: + +- **E2E tests** in `apps/daemon/tests/` — run the full chat-server + pipeline against a known agent trace, assert UI events / artifacts. +- **Local self-tests during development** — iterate on `chat-routes.ts`, + `claude-stream.ts`, `json-event-stream.ts` parser changes without + burning provider budget. +- **Demo / onboarding** — show what a 17-tool `claude` editing session + looks like end-to-end, offline. +- **Regression harness** — replay the same trace before and after a + charter / parser change; diff the events the daemon surfaces. + +The recordings are anonymized exports from open-design's Langfuse +project (179 traces across 9 agents and 5+ skills as of this commit). + +--- + +## tl;dr + +```bash +# First-time setup — pull the recording corpus from R2 (~30s, 4.5MB): +bash mocks/scripts/fetch-recordings.sh +# Subsequent runs hit the local cache (sha256-verified, instant). + +# Make the mock CLIs override the real ones for this shell: +export PATH="$PWD/mocks/bin:$PATH" + +# Pick any recording to play back (8-char prefix OK): +export OD_MOCKS_TRACE=04097377 + +# Speed up replay (skip inter-event sleeps): +export OD_MOCKS_NO_DELAY=1 + +# Now anything that spawns opencode/claude/codex gets the recording: +echo "any prompt body" | opencode run +echo "any prompt" | claude -p --output-format=stream-json +echo "any prompt" | codex exec +``` + +The mock binaries are bash wrappers that exec +`node mocks/mock-agent.mjs --as `. Anything fed to stdin is +discarded by the renderer but used by the recording picker (see hash +mode below). + +## Recordings live on R2, not in this repo + +The 179-recording corpus (~4.5 MB) is hosted on Cloudflare R2 at +`open-design-mocks` and fetched **on demand** — `pnpm install` does NOT +pull them, and the repo stays small. Recordings only land in +`mocks/recordings/` when: + +1. You run `bash mocks/scripts/fetch-recordings.sh` directly, OR +2. `bash mocks/scripts/smoke-test.sh` runs and the dir is empty (auto- + fetch fallback), OR +3. A mock binary spawn finds no data — it errors with a pointer at the + fetch script (no silent failure). + +This is by design: contributors who don't touch agent code don't pay +the fetch cost. CI jobs that DO touch agent code (`apps/daemon/tests/` +parser changes, etc.) run the fetch as a quick pre-step and cache +`mocks/recordings/` between runs. + +```bash +# Fetch everything (parallel, sha256-verified, idempotent): +bash mocks/scripts/fetch-recordings.sh + +# Fetch a subset: +bash mocks/scripts/fetch-recordings.sh --agent claude # 57 claude traces +bash mocks/scripts/fetch-recordings.sh --outcome failed # 35 failed-path traces +bash mocks/scripts/fetch-recordings.sh --skill agent-browser + +# Override cache location (e.g. share across multiple OD checkouts): +OD_MOCKS_CACHE_DIR=~/.cache/od-mocks bash mocks/scripts/fetch-recordings.sh +``` + +Manifest at `mocks/manifest.json` is the committed source of truth — +it lists every recording's `trace_id`, `sha256`, `bytes`, `agent`, +`outcome`, `skills`, `multi_turn`, plus histograms over the corpus. +Tooling reads this; you don't have to. + +### Provenance per recording + +Beyond identity (`trace_id`, `sha256`), each manifest entry carries +fixture-trust signals so consumers can decide whether the recording +is still meaningful as the real CLIs evolve: + +| Field | Meaning | +|---|---| +| `captured_at` | ISO 8601 timestamp of the original session — populated for all 179 current entries | +| `cli_version` | The CLI version the trace was captured against (e.g. `"claude-code 1.0.65"`) — populated only on traces the harvester writes it to, null otherwise | +| `protocol_version` | Stream-format version (`"claude-stream-json/v1"`, `"opencode/json-event-stream"`) — populated by harvester | +| `anonymization_version` | Which anonymizer pass scrubbed the recording — populated by harvester | + +For now most of these are null on the existing 179 — the harvester in +[nexu-io/agent-pr-explore][harvester] is the next thing to teach to +write them. Once a recording's `cli_version` falls behind the actual +CLI by more than one minor version, treat it as a candidate for +re-harvest. + +### Golden daemon-event snapshots + +`mocks/golden/.events.json` holds the exact event sequence the +OD daemon emits when fed each (mock CLI → handler) pipeline. Diffed +on every `pnpm --filter @open-design/daemon test` run by +`apps/daemon/tests/mocks-golden.test.ts`. + +A parser refactor that semantically changes events (drops a field, +renames `sessionId`, stops emitting `turn_end`) fails the diff loudly. +After an intentional parser change, regenerate: + +```bash +MOCKS_GOLDEN_UPDATE=1 pnpm --filter @open-design/daemon test mocks-golden +git diff mocks/golden/ # eyeball the new shapes +git add mocks/golden/ && git commit -m "mocks: refresh goldens for " +``` + +Per-spawn volatile fields (currently just claude's generated +`sessionId`) are stripped to `""` so the snapshot stays +stable. See `mocks/golden/README.md` for the coverage rationale. + +### Real-CLI contract check + +The mocks catch parser regressions against the recordings; they do +**not** catch the recordings themselves drifting away from the live +agent CLIs. For that, `mocks/scripts/contract-check.sh` spawns a real +CLI alongside the mock with a fixed prompt and prints a side-by-side +event-type distribution. + +This is human-driven and costs real LLM tokens — run on a real-CLI +release or before a parser refactor, not on a cron. Full doc: +[`docs/MOCKS-CONTRACT-CHECK.md`](../docs/MOCKS-CONTRACT-CHECK.md). + +--- + +## What gets emitted + +Each renderer matches the EXACT event shapes the OD daemon expects, as +verified line-by-line against the parsers in `apps/daemon/src/`: + +| CLI | OD streamFormat | Parser source | +|---|---|---| +| `opencode` | `json-event-stream` (opencode kind) | `json-event-stream.ts:handleOpenCodeEvent` | +| `codex` | `json-event-stream` (codex kind) | `json-event-stream.ts:handleCodexEvent` | +| `claude` | `claude-stream-json` | `claude-stream.ts:createClaudeStreamHandler` | +| `gemini` | `json-event-stream` (gemini kind) | `json-event-stream.ts:handleGeminiEvent` | +| `cursor-agent` | `json-event-stream` (cursor-agent kind) | `json-event-stream.ts:handleCursorEvent` | +| `deepseek` `qwen` `grok` | `plain` | `server.ts` (raw stdout = final assistant text) | +| `devin` `hermes` `kilo` `kimi` `kiro` `vibe` | `acp-json-rpc` | `acp.ts:attachAcpSession` | +| `vela` (AMR) | `acp-json-rpc` + `login` / `models` subcommands | `runtimes/defs/amr.ts` + `apps/daemon/tests/fixtures/fake-vela.mjs` (sibling stub) | + +> **Note on `gemini` and `cursor-agent`**: OD's parsers for these two +> agents do NOT recognize tool-call events — only init / assistant text / +> usage. The renderers therefore emit ONLY the final assistant text wrapped +> in the expected init/text/usage envelope. Tool calls present in the +> source recording are silently dropped (which matches the real CLI's UI +> behavior — these agents don't surface tools in OD's chat view). + +> **Note on ACP agents** (`devin` / `hermes` / `kilo` / `kimi` / `kiro` / +> `vibe`): These do NOT stream stdout — they speak JSON-RPC v2 over stdio. +> OD's daemon sends `initialize` → `session/new` → (optional `session/set_model`) +> → `session/prompt`; the mock responds in order, streams text via +> `session/update` notifications carrying `agent_message_chunk` parts, +> then responds to the prompt request with usage stats. Tool calls +> aren't part of the ACP protocol on this path (tools surface via MCP or +> other side channels), so they're dropped from playback. + +> **Note on `vela` (AMR)**: vela is the bin OD's AMR runtime spawns. It +> extends the generic ACP shape with `agentCapabilities` + `models` +> blocks in `initialize` / `session/new`, plus a **strict set_model gate** +> — `session/prompt` is rejected with -32602 until `session/set_model` +> (or `session/set_config_option`) has been called for the current +> sessionId, mirroring real vela 0.0.1 contract. +> +> vela also has two non-ACP subcommands: +> +> - `vela login` → writes `~/.amr/config.json` with a fake profile so +> OD's daemon login route + `AmrLoginPill` poller see the same on-disk +> projection production produces. +> - `vela models` → prints the production-shaped `public_model_* vela` +> catalog. +> +> Error injection envs (kept in sync with +> `apps/daemon/tests/fixtures/fake-vela.mjs`): +> `FAKE_VELA_SESSION_NEW_ERROR` / `FAKE_VELA_SET_MODEL_ERROR` / +> `FAKE_VELA_PROMPT_ERROR` / `FAKE_VELA_LOGIN_FAIL` / +> `FAKE_VELA_REQUIRE_SET_MODEL=0`. + +Each tool call from the recording is rendered with the original input +arguments and tool output. The agents' assistant text is rendered as +the final message. + +--- + +## Recording selection + +Driven by env vars, in priority order: + +| Env | Behavior | +|---|---| +| `OD_MOCKS_TRACE=` | Always play this trace. 8-char prefix OK. | +| `OD_MOCKS_BY_PROMPT_HASH=1` + stdin prompt | Deterministic by `sha256(prompt) % len(all)`. Same prompt → same trace. Useful for "stable answer per question" tests. | +| `OD_MOCKS_POOL=` | Random within the tag pool. Examples: `agent:claude`, `skill:agent-browser`, `outcome:failed`. | +| `OD_MOCKS_SEED=` | Makes "random" picks reproducible across runs. | +| `OD_MOCKS_NO_DELAY=1` | Skip inter-event waits. | +| `OD_MOCKS_RECORDINGS_DIR=` | Override the recordings dir. | + +If none are set, a uniformly random recording is played each invocation. + +The mock binary announces the picked trace id on stderr: + +``` +[mock-opencode] picked 04097377… via fixed +``` + +This line is invisible to OD's stdout parser but useful for "wait, why +did my test get the FAQ-fix trace?" debugging. + +--- + +## Recording catalog + +The recordings live as one JSONL file per Langfuse trace under +`recordings/`. Each file starts with a `meta` event carrying: + +```json +{ + "type": "meta", + "source": {"provider": "langfuse", "trace_id": "...", "project_id": "..."}, + "agent": "claude" | "codex" | "opencode" | "gemini" | "cursor-agent" | "qwen" | "copilot" | "deepseek" | "antigravity", + "model": "...", + "outcome": "succeeded" | "failed" | "errored" | "interrupted", + "duration_ms": 33620, + "tool_call_count": 17, + "error_count": 0, + "total_tokens": 12345, + "tags": ["agent:claude", "skill:agent-browser", "open-design", ...], + "user_input": "...", + "session_id": "..." +} +``` + +Subsequent events are `tool_call`, `tool_result`, and `report` (the +final assistant text). + +### Indexed metadata + +`mocks/manifest.json` is a flat manifest with one entry per recording +plus histograms over all recordings, committed to the repo. It's also +mirrored to R2 alongside the .jsonl files so consumers can fetch the +current catalog without cloning. Query with `jq`: + +```bash +# All multi-turn claude sessions about HTML editing +jq '.entries[] | select(.agent=="claude" and .multi_turn==true)' \ + mocks/manifest.json | head -50 + +# Failed codex traces (negative-path tests) +jq '.entries[] | select(.agent=="codex" and .outcome=="failed") | .trace_id' \ + mocks/manifest.json + +# Agent-browser skill, sorted by tool count desc +jq '[.entries[] | select(.skills | index("agent-browser"))] | sort_by(-.tool_count)' \ + mocks/manifest.json +``` + +### Headline stats (current dataset) + +| Dimension | Distribution | +|---|---| +| Agents | claude 57 · opencode 41 · codex 38 · gemini 25 · cursor-agent 11 · qwen/copilot/deepseek 2 each · antigravity 1 | +| Outcomes | succeeded 144 · failed 35 | +| Skills | default 71 · ad-creative 50 · algorithmic-art 30 · agent-browser 22 · video-hyperframes 2 · magazine-web-ppt / brainstorming / data-report / penpot-flutter 1 each | +| Multi-turn | 124 traces tied to a session with ≥2 turns | +| Artifact | 18 traces produce `` output | + +--- + +## Anonymization + +User-specific data has been scrubbed from every recording: + +- `/Users//…`, `/home//…`, `C:\Users\\…` + → `${HOME}/…` / `%USERPROFILE%\…` +- Project UUIDs → stable `proj-001`, `proj-002`, … per recording +- meta tag `project:` rewritten too + +The anonymizer is idempotent. Tool input/output payloads (HTML, code, +etc.) are preserved verbatim — they're templated UI without cell-level +PII; if a future audit finds otherwise, add specific scrubs in the +harvester repo (see "Adding more recordings" below) and re-run. + +--- + +## Adding more recordings + +Local maintainer flow — the .jsonl never enters the repo. Only the +manifest delta (≈200 B per entry) gets committed. + +### Step 1 — produce an anonymized .jsonl + +The harvester that produced the current 179-trace set lives in a +separate repo, [nexu-io/agent-pr-explore][harvester]. See its README +for how to authenticate against your trace store, filter by skill / +agent / outcome, and anonymize the result. Output is one +`.jsonl` file per recording. + +[harvester]: https://github.com/nexu-io/agent-pr-explore + +### Step 2 — one-shot upload + manifest update + +```bash +# prereq, once: wrangler login (OAuth, no token to manage) +bash mocks/scripts/upload-recording.sh /path/to/.jsonl +``` + +The script validates the file, prints the manifest entry it will add, +uploads the .jsonl to R2, rewrites `mocks/manifest.json` locally, then +uploads the updated manifest to R2 too (so consumers see the new entry +without waiting for the next git push). + +### Step 3 — commit the manifest delta + +```bash +git add mocks/manifest.json +git commit -m "mocks: add recording " +git push # or open a PR — your call +``` + +The only thing in the commit is a ~200-byte JSON edit listing the new +entry's `trace_id`, `sha256`, `bytes`, `agent`, `outcome`, `skills`, +etc. The .jsonl itself stays in R2. + +### Trust model + +- **R2 write is wrangler-OAuth gated.** Maintainers do `wrangler login` + once. The bucket is on the powerformer Cloudflare account (pinned in + the script). No long-lived tokens in repo secrets, no Action to + hijack — just account access. +- **Repo stays small forever.** No .jsonl files ever land in git; the + manifest grows by ~200 B per recording. +- **Read stays public.** Anyone can fetch via the r2.dev URL — see + [Recordings live on R2, not in this repo](#recordings-live-on-r2-not-in-this-repo). + +### Removing a recording + +```bash +# 1. delete from R2 +export CLOUDFLARE_ACCOUNT_ID=64ad4569ffd912432d6b86d5656484c4 +wrangler r2 object delete open-design-mocks/recordings/v1/.jsonl --remote +# 2. drop the entry from manifest.json (edit by hand, or use `jq`) +# 3. re-upload manifest +wrangler r2 object put open-design-mocks/recordings/v1/manifest.json \ + --file mocks/manifest.json --remote +# 4. git add mocks/manifest.json && git commit && git push +``` + +There's no automation for delete because (a) it's rare and (b) you +want a human to think about whether removing a recording would +invalidate any test fixtures that pin it via `OD_MOCKS_TRACE=`. + +--- + +## Usage from OD's test code + +### From a test (Vitest / Jest) + +```ts +import { spawn } from 'node:child_process'; +import { join } from 'node:path'; + +const MOCK_BIN = join(__dirname, '../../mocks/bin'); + +it('parses an opencode session with 4 tool calls into 4 UI events', async () => { + const child = spawn('opencode', ['run'], { + env: { + ...process.env, + PATH: `${MOCK_BIN}:${process.env.PATH}`, + OD_MOCKS_TRACE: '06a9324a', // 4-tool claude session + OD_MOCKS_NO_DELAY: '1', + }, + stdio: ['pipe', 'pipe', 'pipe'], + }); + child.stdin.write('test prompt'); + child.stdin.end(); + // ... assert events parsed from child.stdout +}); +``` + +### From a manual playback + +```bash +# See what claude's 17-tool "delete v2" session emits to OD: +export PATH=$(git rev-parse --show-toplevel)/mocks/bin:$PATH +export OD_MOCKS_TRACE=04097377 +export OD_MOCKS_NO_DELAY=1 +echo "anything" | claude -p --output-format=stream-json | jq .type | uniq -c +``` + +--- + +## Files + +``` +mocks/ +├── README.md ← you are here +├── mock-agent.mjs ← entry; routes --as to format renderer +├── lib/ +│ ├── recording-picker.mjs ← env-driven trace selection +│ ├── format-opencode.mjs ← matches handleOpenCodeEvent +│ ├── format-codex.mjs ← matches handleCodexEvent +│ ├── format-claude.mjs ← matches createClaudeStreamHandler +│ ├── format-gemini.mjs ← matches handleGeminiEvent +│ ├── format-cursor-agent.mjs ← matches handleCursorEvent +│ ├── format-acp.mjs ← JSON-RPC server matching attachAcpSession +│ ├── format-vela.mjs ← AMR vela: ACP + models block + set_model gate +│ ├── vela-subcommands.mjs ← `vela login` + `vela models` handlers +│ └── format-plain.mjs ← raw stdout (deepseek/qwen/grok) +├── bin/ +│ ├── opencode claude codex +│ ├── gemini cursor-agent +│ ├── deepseek qwen grok +│ ├── devin hermes kilo kimi kiro vibe +│ └── vela ← 15 bash wrappers, PATH-overlay +├── manifest.json ← committed: 179 entries' metadata + sha256 + provenance + R2 storage hints +├── golden/ ← committed: daemon-event regression snapshots +│ ├── README.md +│ └── *.events.json ← 3 representative traces (claude/codex/opencode) +├── scripts/ +│ ├── smoke-test.sh ← 21 checks; auto-fetches recordings if empty +│ ├── fetch-recordings.sh ← pull from R2 (parallel, sha256-verified, idempotent) +│ ├── upload-recording.sh ← maintainer-local: validate + wrangler put + manifest update +│ ├── contract-check.sh ← real-CLI vs mock protocol drift check (manual) +│ └── lib/ +│ └── manifest-utils.mjs ← shared sha256 / meta-parse / manifest-rebuild logic +└── recordings/ ← populated at runtime, gitignored .jsonl + └── .gitignore ← recordings come via fetch +``` + +No external dependencies. Pure node:`fs`/`crypto`/`child_process`. Works +under any Node ≥18. + +--- + +## Limitations + +- `copilot`, `qoder`, `pi` (the niche `copilot-stream-json` / + `qoder-stream-json` / `pi-rpc` formats) are recorded but not yet + rendered as their native protocols — they fall back to the plain + renderer for now. If you need them, add a `format-.mjs` + following the same pattern as `format-codex.mjs`; the parsers are + in `apps/daemon/src/{copilot-stream,qoder-stream}.ts` and the pi-rpc + handler inside `apps/daemon/src/server.ts`. +- The mock does not honor CLI flags that change semantics (`--model`, + `--permission-mode`, `--allowed-tools`). They're silently ignored. + +--- + +## Provenance / safety + +All recordings come from open-design's own Langfuse project (the +`open-design` project under the `powerformer` org). Users opted into +telemetry when they installed the desktop client. The anonymizer +removed user-identifying paths and project UUIDs before checking in. + +If you find a recording that includes content that should be redacted, +follow the [Removing a recording](#removing-a-recording) flow above. diff --git a/mocks/bin/claude b/mocks/bin/claude new file mode 100755 index 000000000..8b22e4f1e --- /dev/null +++ b/mocks/bin/claude @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Mock wrapper for claude — runs the bundled mock-agent in this agent's +# stdout protocol. The wrapper accepts (and ignores) any flags the real +# CLI would take; the mock doesn't honor model selection, tool gates, or +# permission modes — recording selection is via OD_MOCKS_* +# env vars (see ../README.md). +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as claude "$@" diff --git a/mocks/bin/codex b/mocks/bin/codex new file mode 100755 index 000000000..7a1cfed1c --- /dev/null +++ b/mocks/bin/codex @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Mock wrapper for codex — runs the bundled mock-agent in this agent's +# stdout protocol. The wrapper accepts (and ignores) any flags the real +# CLI would take; the mock doesn't honor model selection, tool gates, or +# permission modes — recording selection is via OD_MOCKS_* +# env vars (see ../README.md). +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as codex "$@" diff --git a/mocks/bin/cursor-agent b/mocks/bin/cursor-agent new file mode 100755 index 000000000..f1c764d7e --- /dev/null +++ b/mocks/bin/cursor-agent @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as cursor-agent "$@" diff --git a/mocks/bin/deepseek b/mocks/bin/deepseek new file mode 100755 index 000000000..d18f2f86d --- /dev/null +++ b/mocks/bin/deepseek @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Mock wrapper for deepseek — runs the bundled mock-agent in this agent's +# stdout protocol. The wrapper accepts (and ignores) any flags the real +# CLI would take; the mock doesn't honor model selection, tool gates, or +# permission modes — recording selection is via OD_MOCKS_* +# env vars (see ../README.md). +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as deepseek "$@" diff --git a/mocks/bin/devin b/mocks/bin/devin new file mode 100755 index 000000000..28545a799 --- /dev/null +++ b/mocks/bin/devin @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as devin "$@" diff --git a/mocks/bin/gemini b/mocks/bin/gemini new file mode 100755 index 000000000..8cca67dbe --- /dev/null +++ b/mocks/bin/gemini @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as gemini "$@" diff --git a/mocks/bin/grok b/mocks/bin/grok new file mode 100755 index 000000000..becd0e970 --- /dev/null +++ b/mocks/bin/grok @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Mock wrapper for grok — runs the bundled mock-agent in this agent's +# stdout protocol. The wrapper accepts (and ignores) any flags the real +# CLI would take; the mock doesn't honor model selection, tool gates, or +# permission modes — recording selection is via OD_MOCKS_* +# env vars (see ../README.md). +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as grok "$@" diff --git a/mocks/bin/hermes b/mocks/bin/hermes new file mode 100755 index 000000000..ae273eba1 --- /dev/null +++ b/mocks/bin/hermes @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as hermes "$@" diff --git a/mocks/bin/kilo b/mocks/bin/kilo new file mode 100755 index 000000000..9d4b3aad1 --- /dev/null +++ b/mocks/bin/kilo @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as kilo "$@" diff --git a/mocks/bin/kimi b/mocks/bin/kimi new file mode 100755 index 000000000..d9464a79e --- /dev/null +++ b/mocks/bin/kimi @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as kimi "$@" diff --git a/mocks/bin/kiro b/mocks/bin/kiro new file mode 100755 index 000000000..d733cae4d --- /dev/null +++ b/mocks/bin/kiro @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as kiro "$@" diff --git a/mocks/bin/kiro-cli b/mocks/bin/kiro-cli new file mode 100755 index 000000000..3f6a55199 --- /dev/null +++ b/mocks/bin/kiro-cli @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# Alias wrapper for kiro-cli — the primary bin name OD resolves for the +# kiro RuntimeAgentDef (bin: 'kiro-cli'). +# Delegates to the same mock as mocks/bin/kiro. +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as kiro "$@" diff --git a/mocks/bin/opencode b/mocks/bin/opencode new file mode 100755 index 000000000..3fdfabec3 --- /dev/null +++ b/mocks/bin/opencode @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Mock wrapper for opencode — runs the bundled mock-agent in this agent's +# stdout protocol. The wrapper accepts (and ignores) any flags the real +# CLI would take; the mock doesn't honor model selection, tool gates, or +# permission modes — recording selection is via OD_MOCKS_* +# env vars (see ../README.md). +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as opencode "$@" diff --git a/mocks/bin/opencode-cli b/mocks/bin/opencode-cli new file mode 100755 index 000000000..216d64425 --- /dev/null +++ b/mocks/bin/opencode-cli @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# Alias wrapper for opencode-cli — the primary bin name OD resolves for +# the opencode RuntimeAgentDef (bin: 'opencode-cli', fallbackBins: ['opencode']). +# Delegates to the same mock as mocks/bin/opencode. +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as opencode "$@" diff --git a/mocks/bin/qwen b/mocks/bin/qwen new file mode 100755 index 000000000..132bafc36 --- /dev/null +++ b/mocks/bin/qwen @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# Mock wrapper for qwen — runs the bundled mock-agent in this agent's +# stdout protocol. The wrapper accepts (and ignores) any flags the real +# CLI would take; the mock doesn't honor model selection, tool gates, or +# permission modes — recording selection is via OD_MOCKS_* +# env vars (see ../README.md). +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as qwen "$@" diff --git a/mocks/bin/vela b/mocks/bin/vela new file mode 100755 index 000000000..b43af65c8 --- /dev/null +++ b/mocks/bin/vela @@ -0,0 +1,8 @@ +#!/usr/bin/env bash +# vela mock CLI — dispatches by first argv: +# vela login → write ~/.amr/config.json (fake credentials) +# vela models → print public model catalog +# vela agent run --runtime opencode → ACP JSON-RPC server (default) +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as vela "$@" diff --git a/mocks/bin/vibe b/mocks/bin/vibe new file mode 100755 index 000000000..1ada0ee77 --- /dev/null +++ b/mocks/bin/vibe @@ -0,0 +1,4 @@ +#!/usr/bin/env bash +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as vibe "$@" diff --git a/mocks/bin/vibe-acp b/mocks/bin/vibe-acp new file mode 100755 index 000000000..5a465205d --- /dev/null +++ b/mocks/bin/vibe-acp @@ -0,0 +1,7 @@ +#!/usr/bin/env bash +# Alias wrapper for vibe-acp — the primary bin name OD resolves for the +# vibe RuntimeAgentDef (bin: 'vibe-acp'). +# Delegates to the same mock as mocks/bin/vibe. +set -euo pipefail +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +exec node "$HERE/../mock-agent.mjs" --as vibe "$@" diff --git a/mocks/golden/314d6833-0377-4ac4-ba11-2b8d7eca5511.events.json b/mocks/golden/314d6833-0377-4ac4-ba11-2b8d7eca5511.events.json new file mode 100644 index 000000000..88e764715 --- /dev/null +++ b/mocks/golden/314d6833-0377-4ac4-ba11-2b8d7eca5511.events.json @@ -0,0 +1,114 @@ +{ + "agent": "claude", + "trace": "314d6833-0377-4ac4-ba11-2b8d7eca5511", + "events": [ + { + "type": "status", + "label": "initializing", + "model": null, + "sessionId": "" + }, + { + "type": "tool_use", + "id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01W4ChHzyM5SX3pnfNPzvvQe", + "name": "Read", + "input": { + "file_path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html", + "offset": 100, + "limit": 40 + } + }, + { + "type": "turn_end", + "stopReason": "tool_use" + }, + { + "type": "tool_result", + "toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01W4ChHzyM5SX3pnfNPzvvQe", + "content": "100\t .led-grid .led { width: 100%; aspect-ratio: 1 / 1; max-height: 22px; min-height: 12px;\n101\t display: grid; place-items: center; font-size: 8px; font-weight: 700; }\n102\t .led.up { background: rgba(0,255,102,0.18); color: var(--green); border: 1px solid rgba(0,255,102,0.55); box-shadow: inset 0 0 6px rgba(0,255,102,0.35); }\n103\t .led.dn { background: rgba(255,51,68,0.16); color: var(--red); border: 1px solid rgba(255,51,68,0.5); box-shadow: inset 0 0 6px rgba(255,51,68,0.3); }\n104\t .led.flat { background: rgba(112,112,112,0.12); color: var(--muted); border: 1px solid var(--line-strong); }\n105\t .led-grid .net { font-size: 10px; color: var(--fg-strong); font-weight: 700; text-align: right; letter-spacing: 0.04em; }\n106\t .led-grid .net.up { color: var(--green); }\n107\t .led-grid .net.dn { color: var(--red); }\n108\t .led-legend { display: flex; gap: 12px; margin-top: 8px; font-size: 9px; color: var(--muted); letter-spacing: 0.08em; }\n109\t .led-legend .lg { display: flex; align-items: center; gap: 4px; }\n110\t .led-legend i { width: 10px; height: 10px; }\n111\t .led-legend i.up { background: rgba(0,255,102,0.45); border: 1px solid var(--green); }\n112\t .led-legend i.dn { background: rgba(255,51,68,0.4); border: 1px solid var(--red); }\n113\t .led-legend i.flat { background: rgba(112,112,112,0.2); border: 1px solid var(--muted); }\n114\t\n115\t /* MOD 2: Activity log */\n116\t .log-list { font-size: 10px; line-height: 1.5; letter-spacing: 0.02em; }\n117\t .log-row { display: grid; grid-template-columns: 56px 38px 1fr auto; gap: 8px; padding: 4px 0; border-bottom: 1px dotted var(--line); }\n118\t .log-row:last-child { border-bottom: 0; }\n119\t .log-time { color: var(--dim); }\n120\t .log-type { color: var(--amber); font-weight: 700; letter-spacing: 0.06em; }\n121\t .log-type.cl { color: var(--green); }\n122\t .log-type.cr { color: var(--cyan); }\n123\t .log-msg { color: var(--fg); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }\n124\t .log-d { font-weight: 700; }\n125\t .log-d.up { color: var(--green); }\n126\t .log-d.dn { color: var(--red); }\n127\t\n128\t /* MOD 3: ASCII heatmap */\n129\t .heat-wrap { font-family: var(--font-mono); font-size: 10px; line-height: 1.55; color: var(--fg); }\n130\t .heat-wrap .heat-head, .heat-wrap .heat-row { display: grid; grid-template-columns: 36px repeat(4, 1fr); gap: 6px; }\n131\t .heat-wrap .heat-head { color: var(--muted); font-size: 9px; letter-spacing: 0.1em; padding-bottom: 4px; border-bottom: 1px dotted var(--line-strong); margin-bottom: 4px; }\n132\t .heat-wrap .heat-row { padding: 3px 0; }\n133\t .heat-wrap .heat-row .m { color: var(--muted); font-weight: 600; letter-spacing: 0.06em; }\n134\t .heat-cell { padding: 2px 4px; text-align: center; font-weight: 700; letter-spacing: 0.04em; }\n135\t .heat-cell.h0 { background: #0f0f0f; color: var(--dim); }\n136\t .heat-cell.h1 { background: rgba(255,176,0,0.12); color: var(--amber-dim); }\n137\t .heat-cell.h2 { background: rgba(255,176,0,0.28); color: var(--amber); }\n138\t .heat-cell.h3 { background: rgba(255,176,0,0.5); color: #fff; }\n139\t .heat-cell.h4 { background: rgba(255,176,0,0.78); color: #050505; }", + "isError": false + }, + { + "type": "tool_use", + "id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01SifooERKXmRaxihtaFixFU", + "name": "Grep", + "input": { + "pattern": "kpi-cell::before|kpi-cell\\s*\\{|\\.kpi-strip|\\.kpi-cell", + "path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html", + "output_mode": "content", + "-n": true + } + }, + { + "type": "turn_end", + "stopReason": "tool_use" + }, + { + "type": "tool_result", + "toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01SifooERKXmRaxihtaFixFU", + "content": "55: .kpi-strip { background: var(--bg-2); border-bottom: 1px solid var(--line); display: grid; grid-template-columns: repeat(4, 1fr); }\n56: .kpi-cell { padding: 14px 22px; border-right: 1px solid var(--line); position: relative; }\n57: .kpi-cell:last-child { border-right: 0; }\n58: .kpi-cell::before { content:''; position: absolute; left: 0; top: 12px; bottom: 12px; width: 3px; }\n59: .kpi-cell.cust-create::before { background: var(--cyan); }\n60: .kpi-cell.cust-close::before { background: var(--green); }\n61: .kpi-cell.int-create::before { background: var(--amber); }\n62: .kpi-cell.int-close::before { background: var(--magenta); }", + "isError": false + }, + { + "type": "tool_use", + "id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_0163uAuJyfBhuHWtxXoSwi3N", + "name": "Edit", + "input": { + "file_path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html", + "old_string": " .kpi-cell::before { content:''; position: absolute; left: 0; top: 12px; bottom: 12px; width: 3px; }", + "new_string": " .kpi-cell::before { content:''; position: absolute; left: 0; top: 14px; bottom: 14px; width: 3px; }" + } + }, + { + "type": "turn_end", + "stopReason": "tool_use" + }, + { + "type": "tool_result", + "toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_0163uAuJyfBhuHWtxXoSwi3N", + "content": "The file ${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html has been updated successfully. (file state is current in your context — no need to Read it back)", + "isError": false + }, + { + "type": "tool_use", + "id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01QdvPxPh8Qk9Ch4bgAgGZxT", + "name": "Grep", + "input": { + "-n": true, + "output_mode": "content", + "path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html", + "pattern": "topbar|\\.bar\\s" + } + }, + { + "type": "turn_end", + "stopReason": "tool_use" + }, + { + "type": "tool_result", + "toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01QdvPxPh8Qk9Ch4bgAgGZxT", + "content": "40: /* topbar */\n41: .topbar { background: var(--bg-2); border-bottom: 1px solid var(--line); display: grid;\n44: .topbar .lbl { color: var(--muted); }\n45: .topbar .v { color: var(--amber); }\n46: .topbar .v.up { color: var(--green); }\n47: .topbar .v.dn { color: var(--red); }\n48: .topbar .sep { color: var(--dim); }\n214:
", + "isError": false + }, + { + "type": "text_delta", + "delta": "让我看一下 KPI 色条的当前样式定位。让我找 KPI 色条样式的定义。找到了。当前 `top: 12px; bottom: 12px` — KPI cell 高度是 78px(来自 stage grid `32px 78px 1fr 44px` 的第二行),padding 是 `14px 22px`。\n\n色条 `top:12 / bottom:12` 意味着色条上下各留 12px,色条本身高度 = 78 - 12 - 12 = 54px。\n\n但 KPI cell 的 padding 是 `14px 22px`(上下 14px),意味着内容区从 14px 开始。色条 `top:12px` 比内容区上沿(14px)高 2px,bottom 同理。这看起来挺合理啊...\n\n等等,可能是用户感觉\"统一往下偏移几像素\"是因为色条上端比 KPI 数字(在 padding 14px 后开始)更高 2px,但下端比 KPI 文字下边缘更深 — 视觉上整条色条的中点跟内容的中点错开了。\n\n让我让色条跟 KPI 内容(padding 14 上下)完全对齐:`top:14px / bottom:14px`。改完。色条上下从 12px 改成 14px,跟 KPI cell 的 padding(14px)完全对齐 — 现在色条的上下端正好齐内容区的上下沿,不再有 2px 错位。\n\n但你说\"统一往下偏移几个像素\" — 还有一种可能是色条整体被往下推了。让我再看一下是不是有别的样式干扰。topbar 32px 高度正常。color bar 现在 `top:14 / bottom:14` 应该对齐到 KPI 内容区的上下沿了。\n\n刷新 ref-b-tickets-v3.html 看看现在对齐了没?如果还是觉得偏移,告诉我具体是色条整体往下移、还是上下不对称,我再精调。" + }, + { + "type": "turn_end", + "stopReason": "end_turn" + }, + { + "type": "usage", + "usage": { + "input_tokens": 0, + "output_tokens": 1594, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0 + }, + "costUsd": 0, + "durationMs": 40885, + "stopReason": "end_turn" + } + ] +} diff --git a/mocks/golden/9a9522ec-575f-432f-aeed-efc491e900aa.events.json b/mocks/golden/9a9522ec-575f-432f-aeed-efc491e900aa.events.json new file mode 100644 index 000000000..df8bc6834 --- /dev/null +++ b/mocks/golden/9a9522ec-575f-432f-aeed-efc491e900aa.events.json @@ -0,0 +1,187 @@ +{ + "agent": "opencode", + "trace": "9a9522ec-575f-432f-aeed-efc491e900aa", + "events": [ + { + "type": "status", + "label": "running" + }, + { + "type": "tool_use", + "id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_9F86gxR0AA5Ea5dwfJQA0062", + "name": "read", + "input": { + "filePath": "%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001\\evidguard-pitch-deck.html" + } + }, + { + "type": "tool_result", + "toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_9F86gxR0AA5Ea5dwfJQA0062", + "content": "%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001\\evidguard-pitch-deck.html\nfile\n\n1: \n2: \n3: \n4: \n5: \n6: \n7: EvidGuard · Pitch deck\n8: \n\n\n
\n
\n
\n
QueueCare
\n

Doctor queues without appointment slots.

\n

Patients choose a live doctor queue, receive a token, track how many people are ahead, and check in only when it is worth leaving.

\n
\n
\n
Patient firstThe core promise is a held queue position, not a fixed time that clinics cannot honor.
\n
Live queue stateEvery doctor card carries patients ahead, expected call, token availability, and clinic notes.
\n
Native adaptationiOS uses compact tab navigation; Android uses Material-style top actions, bottom nav, and a floating queue shortcut.
\n
\n
\n\n
\n
\n

iOS flow

\n

Discovery, doctor queue detail, and active token are separate app screens inside the iPhone frame.

\n
\n
\n
\n

Find queues

Open
\n \n
\n
\n

Join queue

Open
\n \n
\n
\n

Track token

Open
\n \n
\n
\n
\n\n
\n
\n

Android flow

\n

The same product adapts to Android density, action placement, and Material-style controls.

\n
\n
\n
\n

Find queues

Open
\n \n
\n
\n

Join queue

Open
\n \n
\n
\n

Track token

Open
\n \n
\n
\n
\n
\n\n\n" + }, + { + "type": "usage", + "usage": { + "input_tokens": 0, + "output_tokens": 1982076, + "cached_read_tokens": 0 + } + } + ] +} diff --git a/mocks/lib/format-acp.mjs b/mocks/lib/format-acp.mjs new file mode 100644 index 000000000..ca590608b --- /dev/null +++ b/mocks/lib/format-acp.mjs @@ -0,0 +1,204 @@ +// OD-faithful ACP (agent-client-protocol) renderer. +// +// Unlike the streaming formats (opencode/codex/claude/gemini/cursor-agent), +// ACP is a real JSON-RPC server over stdio. The mock has to: +// +// 1. Listen on stdin for newline-delimited JSON-RPC messages from OD. +// 2. Respond to `initialize` (id=1) with the protocol version. +// 3. Respond to `session/new` (id=2) with a synthetic sessionId. +// 4. Optionally respond to `session/set_model` (id=3) with {}. +// 5. When `session/prompt` (id=N) arrives, push a series of +// `session/update` NOTIFICATIONS carrying agent_message_chunk +// content from the recording, then respond to the prompt request +// with a final usage stats result. +// 6. Stay alive until OD closes stdin or the request completes. +// +// Verified against the spec implementation at +// apps/daemon/src/acp.ts (attachAcpSession + ACP_PROTOCOL_VERSION). +// +// Tool events from the recording are dropped — OD's ACP parser only +// recognizes thought_chunk + message_chunk. Tool-use surfaces in the +// real ACP agents (Hermes/Kimi/Kilo/Kiro/Vibe/Devin) come through +// MCP or other side channels, not the stdio protocol. + +import { writeFile } from 'node:fs/promises'; + +const PROTOCOL_VERSION = 1; +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +function writeRpcResult(out, id, result) { + out.write(JSON.stringify({ jsonrpc: '2.0', id, result }) + '\n'); +} + +function writeNotification(out, method, params) { + out.write(JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n'); +} + +/** Split a long text into approximately N-char chunks at word boundaries + * so OD's UI sees a streamed message rather than one giant burst. */ +function chunkText(text, targetChunkSize = 400) { + if (text.length <= targetChunkSize) return [text]; + const chunks = []; + let pos = 0; + while (pos < text.length) { + let end = Math.min(text.length, pos + targetChunkSize); + if (end < text.length) { + // Round to the next whitespace so we don't split mid-word + const ws = text.indexOf(' ', end); + if (ws !== -1 && ws - end < 200) end = ws + 1; + } + chunks.push(text.slice(pos, end)); + pos = end; + } + return chunks; +} + +/** + * Run the ACP server on stdin/stdout. Returns a promise that resolves + * when the prompt round-trip completes. + */ +export async function runAcpServer(events, opts = {}) { + const out = opts.emit ?? process.stdout; + const writeFn = typeof out === 'function' ? out : s => out.write(s); + const writeLine = obj => writeFn(JSON.stringify(obj) + '\n'); + + const meta = events.find(e => e.type === 'meta'); + const sessionId = opts.sessionId ?? `mock-acp-${Date.now()}`; + const reportEvent = events.find(e => e.type === 'report'); + const reportText = reportEvent?.content ?? ''; + + // Collect any thought_chunks from tool_call inputs that look like + // "I should..." style reasoning. For our recordings, the recording + // doesn't separate thinking from tool calls — so we just stream the + // final report as message chunks. Future enhancement could split out + // thinking-style text. + void meta; + + // Buffer for incoming JSON-RPC lines. + let buf = ''; + let aborted = false; + + // Track which request we owe a response to. + let lastPromptId = null; + + // Chunks generator — used after session/prompt arrives. + const startPromptResponse = async (id) => { + lastPromptId = id; + const chunks = chunkText(reportText, 400); + for (const chunk of chunks) { + if (aborted) break; + if (!opts.noDelay) await sleep(50); + writeLine({ + jsonrpc: '2.0', + method: 'session/update', + params: { + sessionId, + update: { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: chunk }, + }, + }, + }); + } + // Final prompt response carries usage stats. ACP parser reads: + // formatUsage({inputTokens, outputTokens, cachedReadTokens, totalTokens}) + writeRpcResult(out, id, { + stopReason: 'end_of_turn', + usage: { + inputTokens: 0, + outputTokens: meta?.total_tokens ?? 0, + cachedReadTokens: 0, + totalTokens: meta?.total_tokens ?? 0, + }, + }); + + if (opts.reportFile) await writeFile(opts.reportFile, reportText).catch(() => {}); + }; + + // Dispatch incoming RPC messages. + const handleRpc = async (obj) => { + if (!obj || typeof obj !== 'object') return; + const { id, method } = obj; + if (method === 'initialize') { + writeRpcResult(out, id, { + protocolVersion: PROTOCOL_VERSION, + capabilities: { + loadSession: false, + // Tool calls aren't supported via stdio in this mock — + // matches the actual ACP agents on OD's side. + tools: false, + }, + }); + return; + } + if (method === 'session/new') { + writeRpcResult(out, id, { + sessionId, + configOptions: [ + { + configId: 'model', + category: 'model', + currentValue: meta?.model ?? 'default', + values: [meta?.model ?? 'default'], + }, + ], + }); + return; + } + if (method === 'session/set_model' || method === 'session/set_config_option') { + writeRpcResult(out, id, {}); + return; + } + if (method === 'session/prompt') { + // Fire-and-forget the chunk streaming; the prompt request gets + // responded to once the last chunk has been streamed. + void startPromptResponse(id); + return; + } + if (method === 'session/cancel') { + aborted = true; + // OD doesn't expect a response to cancel notifications. + return; + } + // Any other method: respond with an empty result to keep the parser happy. + if (id !== undefined && id !== null) { + writeRpcResult(out, id, {}); + } + }; + + // Promise that resolves when the prompt response has been written. + return new Promise((resolve) => { + process.stdin.setEncoding('utf-8'); + process.stdin.on('data', async (data) => { + buf += data; + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + let obj; + try { obj = JSON.parse(line); } catch { continue; } + await handleRpc(obj); + } + }); + process.stdin.on('end', () => { + // OD closed stdin after our prompt response — graceful shutdown. + resolve(); + }); + process.stdin.on('error', () => resolve()); + + const onSigint = () => { aborted = true; resolve(); }; + process.once('SIGINT', onSigint); + process.once('SIGTERM', onSigint); + + // Safety timeout — if OD never sends `initialize` within 30s, + // bail out so we don't hang in CI. .unref() lets the process exit + // promptly once the prompt round-trip completes normally. + setTimeout(() => { + if (lastPromptId === null) { + process.stderr.write('[mock-acp] no prompt received within 30s; exiting\n'); + resolve(); + } + }, 30_000).unref(); + }); +} diff --git a/mocks/lib/format-claude.mjs b/mocks/lib/format-claude.mjs new file mode 100644 index 000000000..0ae0353f3 --- /dev/null +++ b/mocks/lib/format-claude.mjs @@ -0,0 +1,92 @@ +// OD-faithful claude-stream-json renderer. Matches OD's +// `claude-stream.ts:createClaudeStreamHandler` parser. +// +// Each tool call lives in its own assistant message wrapper (the +// "finalized blocks" path — simpler than stream_event deltas, identical +// semantics). + +import { writeFile } from 'node:fs/promises'; +import { randomUUID } from 'node:crypto'; + +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +export async function renderAsClaude(events, opts = {}) { + const emit = opts.emit ?? (s => process.stdout.write(s)); + const maxSleep = opts.maxSleepMs ?? 3000; + const meta = events.find(e => e.type === 'meta'); + const sessionId = opts.sessionId ?? randomUUID(); + + emit(JSON.stringify({ + type: 'system', + subtype: 'init', + model: meta?.model ?? null, + session_id: sessionId, + }) + '\n'); + + const results = new Map(); + for (const e of events) if (e.type === 'tool_result') results.set(e.obs_id, e); + + let lastT = 0; + for (const e of events) { + if (e.type === 'meta' || e.type === 'stdout' || e.type === 'tool_result') continue; + const t = typeof e.t_ms === 'number' ? e.t_ms : undefined; + if (!opts.noDelay && t !== undefined) { + const delta = Math.min(maxSleep, Math.max(0, t - lastT)); + if (delta > 0) await sleep(delta); + lastT = t; + } + if (e.type === 'tool_call') { + const result = results.get(e.obs_id); + const messageId = `msg_${randomUUID().replace(/-/g, '').slice(0, 24)}`; + emit(JSON.stringify({ + type: 'assistant', + message: { + id: messageId, + role: 'assistant', + content: [{ + type: 'tool_use', id: e.obs_id, name: e.name, input: e.input ?? {}, + }], + stop_reason: 'tool_use', + }, + }) + '\n'); + emit(JSON.stringify({ + type: 'user', + message: { + role: 'user', + content: [{ + type: 'tool_result', + tool_use_id: e.obs_id, + content: result?.output ?? '', + is_error: result?.status === 'error', + }], + }, + }) + '\n'); + } else if (e.type === 'report') { + const messageId = `msg_${randomUUID().replace(/-/g, '').slice(0, 24)}`; + emit(JSON.stringify({ + type: 'assistant', + message: { + id: messageId, + role: 'assistant', + content: [{ type: 'text', text: e.content }], + stop_reason: 'end_turn', + }, + }) + '\n'); + if (opts.reportFile) await writeFile(opts.reportFile, e.content).catch(() => {}); + } + } + + emit(JSON.stringify({ + type: 'result', + subtype: 'success', + usage: { + input_tokens: 0, + output_tokens: meta?.total_tokens ?? 0, + cache_creation_input_tokens: 0, + cache_read_input_tokens: 0, + }, + total_cost_usd: 0, + duration_ms: meta?.duration_ms ?? 0, + stop_reason: 'end_turn', + }) + '\n'); +} diff --git a/mocks/lib/format-codex.mjs b/mocks/lib/format-codex.mjs new file mode 100644 index 000000000..09362087c --- /dev/null +++ b/mocks/lib/format-codex.mjs @@ -0,0 +1,92 @@ +// OD-faithful codex renderer. Emits the shape OD's +// `json-event-stream.ts:handleCodexEvent` parser accepts. +// Codex parser only knows command_execution (Bash) + agent_message — +// non-Bash tools are translated to a shell-equivalent command. + +import { writeFile } from 'node:fs/promises'; + +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +function commandFor(name, input) { + if (!input || typeof input !== 'object') return `# ${name}`; + const o = input; + switch (name) { + case 'Bash': + return typeof o.command === 'string' ? o.command : '# Bash'; + case 'Read': { + const fp = typeof o.file_path === 'string' ? o.file_path : ''; + const o2 = typeof o.offset === 'number' ? ` # offset=${o.offset}` : ''; + const lim = typeof o.limit === 'number' ? ` limit=${o.limit}` : ''; + return `cat "${fp}"${o2}${lim}`; + } + case 'Grep': { + const pat = typeof o.pattern === 'string' ? o.pattern : ''; + const path = typeof o.path === 'string' ? o.path : '.'; + return `grep -n "${pat.replace(/"/g, '\\"')}" "${path}"`; + } + case 'Glob': { + const pat = typeof o.pattern === 'string' ? o.pattern : ''; + return `find . -path "${pat}" -print`; + } + case 'Edit': + case 'Write': + return `# ${name} ${typeof o.file_path === 'string' ? o.file_path : ''}`; + default: + return `# ${name} ${JSON.stringify(o).slice(0, 200)}`; + } +} + +export async function renderAsCodex(events, opts = {}) { + const emit = opts.emit ?? (s => process.stdout.write(s)); + const maxSleep = opts.maxSleepMs ?? 3000; + const meta = events.find(e => e.type === 'meta'); + + const results = new Map(); + for (const e of events) if (e.type === 'tool_result') results.set(e.obs_id, e); + + emit(JSON.stringify({ type: 'thread.started' }) + '\n'); + emit(JSON.stringify({ type: 'turn.started' }) + '\n'); + + let lastT = 0; + let agentMessageCount = 0; + for (const e of events) { + if (e.type === 'meta' || e.type === 'stdout' || e.type === 'tool_result') continue; + const t = typeof e.t_ms === 'number' ? e.t_ms : undefined; + if (!opts.noDelay && t !== undefined) { + const delta = Math.min(maxSleep, Math.max(0, t - lastT)); + if (delta > 0) await sleep(delta); + lastT = t; + } + if (e.type === 'tool_call') { + const result = results.get(e.obs_id); + const command = commandFor(e.name, e.input); + emit(JSON.stringify({ + type: 'item.started', + item: { type: 'command_execution', id: e.obs_id, command }, + }) + '\n'); + emit(JSON.stringify({ + type: 'item.completed', + item: { + type: 'command_execution', + id: e.obs_id, + command, + aggregated_output: result?.output ?? '', + exit_code: result?.status === 'error' ? 1 : 0, + status: result?.status === 'error' ? 'failed' : 'completed', + }, + }) + '\n'); + } else if (e.type === 'report') { + agentMessageCount += 1; + emit(JSON.stringify({ + type: 'item.completed', + item: { type: 'agent_message', id: `mock-msg-${agentMessageCount}`, text: e.content }, + }) + '\n'); + if (opts.reportFile) await writeFile(opts.reportFile, e.content).catch(() => {}); + } + } + + emit(JSON.stringify({ + type: 'turn.completed', + usage: { input_tokens: 0, output_tokens: meta?.total_tokens ?? 0, cached_input_tokens: 0 }, + }) + '\n'); +} diff --git a/mocks/lib/format-cursor-agent.mjs b/mocks/lib/format-cursor-agent.mjs new file mode 100644 index 000000000..1a110bf30 --- /dev/null +++ b/mocks/lib/format-cursor-agent.mjs @@ -0,0 +1,63 @@ +// OD-faithful cursor-agent renderer. +// +// Matches the JSONL shape OD's `json-event-stream.ts:handleCursorEvent` +// parser accepts: +// {"type":"system","subtype":"init","model":"..."} +// {"type":"assistant","message":{ +// "content":[{"type":"text","text":"..."}, ...]}, +// "timestamp_ms": ...} +// {"type":"result","usage":{"inputTokens","outputTokens","cacheReadTokens","cacheWriteTokens"}, +// "duration_ms": ...} +// +// Cursor's parser handles delta-vs-replacement detection itself: when a +// later text block STARTS WITH the prior accumulated text, the parser +// strips the prefix and emits only the delta. So we can either emit a +// single message containing the full text, OR emit progressive chunks +// where each chunk is the cumulative-so-far. We use the single-message +// form — simplest, no risk of state desync. +// +// Like gemini, the cursor-agent parser does NOT recognize tool events. +// Tool calls in the recording are ignored; only the final assistant +// text is rendered. + +import { writeFile } from 'node:fs/promises'; + +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +export async function renderAsCursorAgent(events, opts = {}) { + const emit = opts.emit ?? (s => process.stdout.write(s)); + const maxSleep = opts.maxSleepMs ?? 2000; + const meta = events.find(e => e.type === 'meta'); + + emit(JSON.stringify({ + type: 'system', + subtype: 'init', + model: meta?.model ?? 'cursor-default', + }) + '\n'); + + if (!opts.noDelay) await sleep(Math.min(maxSleep, 200)); + + for (const e of events) { + if (e.type === 'report') { + emit(JSON.stringify({ + type: 'assistant', + message: { + content: [{ type: 'text', text: e.content }], + }, + timestamp_ms: Date.now(), + }) + '\n'); + if (opts.reportFile) await writeFile(opts.reportFile, e.content).catch(() => {}); + } + } + + emit(JSON.stringify({ + type: 'result', + usage: { + inputTokens: 0, + outputTokens: meta?.total_tokens ?? 0, + cacheReadTokens: 0, + cacheWriteTokens: 0, + }, + duration_ms: meta?.duration_ms ?? 0, + }) + '\n'); +} diff --git a/mocks/lib/format-gemini.mjs b/mocks/lib/format-gemini.mjs new file mode 100644 index 000000000..b498a9cac --- /dev/null +++ b/mocks/lib/format-gemini.mjs @@ -0,0 +1,53 @@ +// OD-faithful gemini renderer. +// +// Matches the JSONL shape OD's `json-event-stream.ts:handleGeminiEvent` +// parser accepts. The parser only recognizes THREE event types: +// {"type":"init","model":"..."} → status:initializing +// {"type":"message","role":"assistant","content":"…"} → text_delta +// {"type":"result","stats":{...}} → usage +// +// Notably ABSENT: any tool-call event shape. OD's gemini surface doesn't +// render tool calls in the UI — they're stripped at the parser layer. +// So our renderer only emits the final assistant text wrapped in the +// init/message/result envelope. Tool calls in the recording are ignored. + +import { writeFile } from 'node:fs/promises'; + +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +export async function renderAsGemini(events, opts = {}) { + const emit = opts.emit ?? (s => process.stdout.write(s)); + const maxSleep = opts.maxSleepMs ?? 2000; + const meta = events.find(e => e.type === 'meta'); + + emit(JSON.stringify({ + type: 'init', + model: meta?.model ?? 'gemini-2.5-pro', + }) + '\n'); + + // Stream the report text as one assistant message. Optionally we could + // chunk by token-count for a more "live streaming" feel — but OD's + // gemini parser accepts multi-chunk too (each emits as text_delta). + if (!opts.noDelay) await sleep(Math.min(maxSleep, 200)); + for (const e of events) { + if (e.type === 'report') { + emit(JSON.stringify({ + type: 'message', + role: 'assistant', + content: e.content, + }) + '\n'); + if (opts.reportFile) await writeFile(opts.reportFile, e.content).catch(() => {}); + } + } + + // Final stats wrapper. + emit(JSON.stringify({ + type: 'result', + stats: { + input_tokens: 0, + output_tokens: meta?.total_tokens ?? 0, + cached: 0, + duration_ms: meta?.duration_ms ?? 0, + }, + }) + '\n'); +} diff --git a/mocks/lib/format-opencode.mjs b/mocks/lib/format-opencode.mjs new file mode 100644 index 000000000..2672ac11b --- /dev/null +++ b/mocks/lib/format-opencode.mjs @@ -0,0 +1,60 @@ +// OD-faithful opencode renderer. Emits the JSONL shape OD's +// `json-event-stream.ts:handleOpenCodeEvent` parser accepts. + +import { writeFile } from 'node:fs/promises'; + +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +export async function renderAsOpencode(events, opts = {}) { + const emit = opts.emit ?? (s => process.stdout.write(s)); + const maxSleep = opts.maxSleepMs ?? 3000; + const sessionId = opts.sessionId ?? `mock-${Date.now()}`; + const meta = events.find(e => e.type === 'meta'); + + const results = new Map(); + for (const e of events) if (e.type === 'tool_result') results.set(e.obs_id, e); + + emit(JSON.stringify({ type: 'step_start' }) + '\n'); + + let lastT = 0; + for (const e of events) { + if (e.type === 'meta' || e.type === 'stdout' || e.type === 'tool_result') continue; + const t = typeof e.t_ms === 'number' ? e.t_ms : undefined; + if (!opts.noDelay && t !== undefined) { + const delta = Math.min(maxSleep, Math.max(0, t - lastT)); + if (delta > 0) await sleep(delta); + lastT = t; + } + if (e.type === 'tool_call') { + const result = results.get(e.obs_id); + const isErr = result?.status === 'error'; + emit(JSON.stringify({ + type: 'tool_use', + sessionID: sessionId, + part: { + tool: e.name, + callID: e.obs_id, + state: { + input: e.input ?? null, + status: isErr ? 'failed' : 'completed', + output: result?.output ?? '', + }, + }, + }) + '\n'); + } else if (e.type === 'report') { + emit(JSON.stringify({ + type: 'text', + part: { text: e.content }, + }) + '\n'); + if (opts.reportFile) await writeFile(opts.reportFile, e.content).catch(() => {}); + } + } + + emit(JSON.stringify({ + type: 'step_finish', + part: { + tokens: { input: 0, output: meta?.total_tokens ?? 0, reasoning: 0, cache: { read: 0, write: 0 } }, + cost: 0, + }, + }) + '\n'); +} diff --git a/mocks/lib/format-plain.mjs b/mocks/lib/format-plain.mjs new file mode 100644 index 000000000..ddc548601 --- /dev/null +++ b/mocks/lib/format-plain.mjs @@ -0,0 +1,40 @@ +// Plain (raw stdout) renderer. OD's `plain` streamFormat (deepseek / +// qwen / grok / grok-build) treats stdout as the final assistant +// response. We emit ONLY the report content. + +import { writeFile } from 'node:fs/promises'; + +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +export async function renderAsPlain(events, opts = {}) { + const emit = opts.emit ?? (s => process.stdout.write(s)); + const maxSleep = opts.maxSleepMs ?? 2000; + + if (opts.includeToolTrace) { + const tools = events.filter(e => e.type === 'tool_call'); + if (tools.length > 0) { + emit(`\n`); + } + } + + for (const e of events) { + if (e.type === 'report') { + if (!opts.noDelay) await sleep(Math.min(maxSleep, 200)); + emit(e.content); + if (!e.content.endsWith('\n')) emit('\n'); + if (opts.reportFile) await writeFile(opts.reportFile, e.content).catch(() => {}); + } + } +} diff --git a/mocks/lib/format-vela.mjs b/mocks/lib/format-vela.mjs new file mode 100644 index 000000000..95bccc1db --- /dev/null +++ b/mocks/lib/format-vela.mjs @@ -0,0 +1,224 @@ +// OD-faithful vela renderer for AMR (Agent Management Runtime). +// +// vela is the bin OD's AMR runtime spawns; `bin: 'vela'` in +// apps/daemon/src/runtimes/defs/amr.ts. The protocol is ACP JSON-RPC +// over stdio, but vela's spec extends the generic ACP shape with: +// +// 1. `initialize` response carries `agentCapabilities` and `models` +// (currentModelId + availableModels). +// 2. `session/new` response carries the same `models` block. +// 3. `session/prompt` is REJECTED unless `session/set_model` (or +// `session/set_config_option`) has been called for the current +// sessionId — the strict-set_model gate. This is the contract real +// vela 0.0.1 enforces; we mirror it so a regression in +// attachAcpSession that silently skips set_model gets caught. +// 4. Final prompt response includes `usage.{inputTokens, outputTokens, +// totalTokens}` (no cachedReadTokens — vela doesn't surface those). +// +// Error-injection envs (mirror apps/daemon/tests/fixtures/fake-vela.mjs): +// FAKE_VELA_SESSION_NEW_ERROR — fail session/new with this message +// FAKE_VELA_SET_MODEL_ERROR — fail session/set_model +// FAKE_VELA_PROMPT_ERROR — fail session/prompt +// FAKE_VELA_REQUIRE_SET_MODEL='0' — disable the strict gate (legacy) +// +// Content envs: +// FAKE_VELA_SESSION_ID — the sessionId returned by session/new +// FAKE_VELA_TEXT — override the assistant text (defaults to recording's +// report content) +// FAKE_VELA_THOUGHT — optional thought_chunk emitted before text + +import { writeFile } from 'node:fs/promises'; + +const PROTOCOL_VERSION = 1; +const sleep = ms => new Promise(r => setTimeout(r, ms)); + +const DEFAULT_AVAILABLE_MODELS = [ + { modelId: 'openai/gpt-5.4-mini', name: 'gpt-5.4-mini' }, + { modelId: 'anthropic/claude-3.7-sonnet', name: 'claude-3.7-sonnet' }, + { modelId: 'deepseek-v3.2', name: 'deepseek-v3.2' }, + { modelId: 'glm-5.1', name: 'glm-5.1' }, +]; + +function writeRpcResult(out, id, result) { + out.write(JSON.stringify({ jsonrpc: '2.0', id, result }) + '\n'); +} + +function writeRpcError(out, id, message, code = -32603) { + out.write(JSON.stringify({ jsonrpc: '2.0', id, error: { code, message } }) + '\n'); +} + +function writeNotification(out, method, params) { + out.write(JSON.stringify({ jsonrpc: '2.0', method, params }) + '\n'); +} + +/** Split long text on word boundaries — chunk size mirrors the real vela + * agent server's typical streaming cadence (the existing fake-vela.mjs + * uses 16-char chunks; we use 400 to match the higher-throughput + * recordings without flooding the test harness). */ +function chunkText(text, target = 400) { + if (text.length <= target) return [text]; + const chunks = []; + let pos = 0; + while (pos < text.length) { + let end = Math.min(text.length, pos + target); + if (end < text.length) { + const ws = text.indexOf(' ', end); + if (ws !== -1 && ws - end < 200) end = ws + 1; + } + chunks.push(text.slice(pos, end)); + pos = end; + } + return chunks; +} + +/** + * Run the vela ACP server on stdin/stdout. Returns a promise that + * resolves when stdin closes or the prompt round-trip completes. + */ +export async function runVelaAcpServer(events, opts = {}) { + const out = opts.emit ?? process.stdout; + const meta = events.find(e => e.type === 'meta'); + const reportEvent = events.find(e => e.type === 'report'); + const reportText = process.env.FAKE_VELA_TEXT ?? reportEvent?.content ?? ''; + const thoughtText = process.env.FAKE_VELA_THOUGHT ?? ''; + + const sessionId = opts.sessionId ?? process.env.FAKE_VELA_SESSION_ID ?? `fake-vela-session-${Date.now()}`; + const strict = process.env.FAKE_VELA_REQUIRE_SET_MODEL !== '0'; + const SESSION_NEW_ERROR = process.env.FAKE_VELA_SESSION_NEW_ERROR ?? ''; + const SET_MODEL_ERROR = process.env.FAKE_VELA_SET_MODEL_ERROR ?? ''; + const PROMPT_ERROR = process.env.FAKE_VELA_PROMPT_ERROR ?? ''; + + let currentModelId = null; + const sessionsWithModel = new Set(); + let buf = ''; + let aborted = false; + + const startPromptResponse = async (id, sid) => { + if (thoughtText) { + writeNotification(out, 'session/update', { + sessionId: sid, + update: { + sessionUpdate: 'agent_thought_chunk', + content: { type: 'text', text: thoughtText }, + }, + }); + } + const chunks = chunkText(reportText, 400); + for (const chunk of chunks) { + if (aborted) break; + if (!opts.noDelay) await sleep(30); + writeNotification(out, 'session/update', { + sessionId: sid, + update: { + sessionUpdate: 'agent_message_chunk', + content: { type: 'text', text: chunk }, + }, + }); + } + writeRpcResult(out, id, { + stopReason: 'end_turn', + usage: { + inputTokens: 12, + outputTokens: meta?.total_tokens ?? 7, + totalTokens: (meta?.total_tokens ?? 7) + 12, + }, + }); + if (opts.reportFile) await writeFile(opts.reportFile, reportText).catch(() => {}); + }; + + const handleRpc = async (obj) => { + if (!obj || typeof obj !== 'object') return; + const { id, method, params } = obj; + switch (method) { + case 'initialize': + writeRpcResult(out, id, { + protocolVersion: PROTOCOL_VERSION, + agentCapabilities: { promptCapabilities: { embeddedContext: false } }, + models: { + currentModelId, + availableModels: DEFAULT_AVAILABLE_MODELS, + }, + }); + return; + case 'session/new': { + if (SESSION_NEW_ERROR) { + writeRpcError(out, id, SESSION_NEW_ERROR); + return; + } + writeRpcResult(out, id, { + sessionId, + models: { currentModelId, availableModels: DEFAULT_AVAILABLE_MODELS }, + }); + return; + } + case 'session/set_model': { + if (SET_MODEL_ERROR) { + writeRpcError(out, id, SET_MODEL_ERROR, -32099); + return; + } + const next = typeof params?.modelId === 'string' ? params.modelId.trim() : ''; + const sid = typeof params?.sessionId === 'string' ? params.sessionId : sessionId; + if (next) currentModelId = next; + sessionsWithModel.add(sid); + writeRpcResult(out, id, {}); + return; + } + case 'session/set_config_option': { + // Treat config-option model selection as set_model for the + // strict-set_model gate. + const sid = typeof params?.sessionId === 'string' ? params.sessionId : sessionId; + sessionsWithModel.add(sid); + writeRpcResult(out, id, {}); + return; + } + case 'session/prompt': { + if (PROMPT_ERROR) { + writeRpcError(out, id, PROMPT_ERROR, -32602); + return; + } + const sid = typeof params?.sessionId === 'string' ? params.sessionId : sessionId; + if (strict && !sessionsWithModel.has(sid)) { + writeRpcError(out, id, 'session/set_model must be called before session/prompt', -32602); + return; + } + void startPromptResponse(id, sid); + return; + } + case 'session/cancel': + aborted = true; + return; + default: + if (id !== undefined && id !== null) { + writeRpcError(out, id, `unknown method: ${method}`, -32601); + } + } + }; + + return new Promise((resolve) => { + process.stdin.setEncoding('utf-8'); + process.stdin.on('data', async (data) => { + buf += data; + let nl; + while ((nl = buf.indexOf('\n')) !== -1) { + const line = buf.slice(0, nl).trim(); + buf = buf.slice(nl + 1); + if (!line) continue; + let obj; + try { obj = JSON.parse(line); } catch { continue; } + await handleRpc(obj); + } + }); + process.stdin.on('end', () => resolve()); + process.stdin.on('error', () => resolve()); + + const onSig = () => { aborted = true; resolve(); }; + process.once('SIGINT', onSig); + process.once('SIGTERM', onSig); + + // 30s safety timeout; .unref() so a completed session exits promptly. + setTimeout(() => { + process.stderr.write('[mock-vela] no prompt received within 30s; exiting\n'); + resolve(); + }, 30_000).unref(); + }); +} diff --git a/mocks/lib/recording-picker.mjs b/mocks/lib/recording-picker.mjs new file mode 100644 index 000000000..324141b80 --- /dev/null +++ b/mocks/lib/recording-picker.mjs @@ -0,0 +1,128 @@ +// Pick which recording to play back, driven by env vars. +// +// Priority order: +// 1. OD_MOCKS_TRACE → fixed trace id (or prefix) +// 2. OD_MOCKS_BY_PROMPT_HASH=1 → hash(prompt) → trace +// 3. OD_MOCKS_POOL= → random within tag pool +// 4. (default) → random across all +// +// OD_MOCKS_SEED gives reproducible "random" selection. +// OD_MOCKS_RECORDINGS_DIR overrides the default recordings dir +// (defaults to ../recordings/ relative to this file). + +import { readdir, readFile, stat } from 'node:fs/promises'; +import { createHash } from 'node:crypto'; +import { join, dirname } from 'node:path'; +import { fileURLToPath } from 'node:url'; + +const HERE = dirname(fileURLToPath(import.meta.url)); +export const DEFAULT_RECORDINGS_DIR = + process.env.OD_MOCKS_RECORDINGS_DIR || + join(HERE, '..', 'recordings'); + +async function listRecordings(dir) { + try { + return (await readdir(dir)) + .filter(f => f.endsWith('.jsonl')) + .map(f => f.replace(/\.jsonl$/, '')) + .sort(); + } catch { + return []; + } +} + +async function readMeta(dir, traceId) { + try { + const txt = await readFile(join(dir, `${traceId}.jsonl`), 'utf-8'); + const firstLine = txt.split('\n', 1)[0]; + return JSON.parse(firstLine); + } catch { + return null; + } +} + +function pickRandom(arr, seed) { + if (arr.length === 0) return null; + if (!seed) return arr[Math.floor(Math.random() * arr.length)]; + const h = parseInt(createHash('sha256').update(String(seed)).digest('hex').slice(0, 12), 16); + return arr[h % arr.length]; +} + +export async function pickRecording({ prompt } = {}) { + const dir = DEFAULT_RECORDINGS_DIR; + const all = await listRecordings(dir); + if (all.length === 0) return null; + + // 1. fixed — if the env is set, refuse to fall through to random / pool / + // hash selection. A typo in `OD_MOCKS_TRACE` should surface loudly, + // not silently produce a different trace and silently poison a test. + const fixed = process.env.OD_MOCKS_TRACE; + if (fixed) { + const hit = all.find(id => id === fixed) ?? all.find(id => id.startsWith(fixed)); + if (hit) return { traceId: hit, path: join(dir, `${hit}.jsonl`), method: 'fixed' }; + throw new Error( + `OD_MOCKS_TRACE="${fixed}" set but no matching recording in ${dir}. ` + + `8-char id prefix is supported; check spelling and that the corpus has been fetched ` + + `(\`bash mocks/scripts/fetch-recordings.sh\`).` + ); + } + + // 2. prompt-hash + if (process.env.OD_MOCKS_BY_PROMPT_HASH === '1' && prompt) { + const picked = pickRandom(all, prompt); + if (picked) return { traceId: picked, path: join(dir, `${picked}.jsonl`), method: 'hash' }; + } + + // 3. pool by tag — supports structured `:` shortcuts + // documented in README (agent:claude, skill:agent-browser, + // outcome:failed). The dimension routes to the right meta field; + // bare values fall back to tag substring match. Mirrors the + // OD_MOCKS_TRACE policy: if the env is set and matches nothing, + // refuse to fall through to global random — surface the typo. + const pool = process.env.OD_MOCKS_POOL; + if (pool) { + const colonIdx = pool.indexOf(':'); + const dim = colonIdx >= 0 ? pool.slice(0, colonIdx) : null; + const value = colonIdx >= 0 ? pool.slice(colonIdx + 1) : null; + + const candidates = []; + for (const id of all) { + const meta = await readMeta(dir, id); + if (!meta) continue; + const tags = meta.tags ?? []; + + let match = false; + if (dim === 'outcome' && meta.outcome === value) match = true; + else if (dim === 'agent' && meta.agent === value) match = true; + else if (dim === 'skill' && tags.some(t => t === `skill:${value}`)) match = true; + else if (tags.includes(pool)) match = true; + else if (meta.agent === pool) match = true; + else if (tags.some(t => typeof t === 'string' && t.includes(pool))) match = true; + + if (match) candidates.push(id); + } + if (candidates.length === 0) { + throw new Error( + `OD_MOCKS_POOL="${pool}" matched no recordings in ${dir}. ` + + `Supported shapes: agent:, skill:, outcome:, ` + + `or any tag substring. Check candidates with ` + + `\`jq '[.entries[] | {agent, outcome, skills}] | unique' mocks/manifest.json\`.`, + ); + } + const picked = pickRandom(candidates, process.env.OD_MOCKS_SEED); + if (picked) return { traceId: picked, path: join(dir, `${picked}.jsonl`), method: 'pool', pool }; + } + + // 4. random + const picked = pickRandom(all, process.env.OD_MOCKS_SEED); + if (!picked) return null; + return { traceId: picked, path: join(dir, `${picked}.jsonl`), method: 'random' }; +} + +export async function readRecording(path) { + const text = await readFile(path, 'utf-8'); + return text + .split('\n') + .filter(Boolean) + .map(line => JSON.parse(line)); +} diff --git a/mocks/lib/vela-subcommands.mjs b/mocks/lib/vela-subcommands.mjs new file mode 100644 index 000000000..ee0bac685 --- /dev/null +++ b/mocks/lib/vela-subcommands.mjs @@ -0,0 +1,99 @@ +// vela subcommand handlers (login + models) used by the mock CLI when +// argv[1] is not the default 'agent' (which falls through to the +// ACP server in format-vela.mjs). +// +// Mirrors the contract that +// apps/daemon/tests/fixtures/fake-vela.mjs implements — kept in sync +// because both feed the same daemon-side login route + status reader +// (apps/daemon/src/integrations/vela-profile.ts + +// apps/web/src/components/amrLoginPolling.ts). + +import { mkdirSync, writeFileSync } from 'node:fs'; +import { homedir } from 'node:os'; +import { dirname, join } from 'node:path'; + +const DEFAULT_MODELS_STDOUT = [ + 'public_model_deepseek_v3_2 vela', + 'public_model_deepseek_v4_flash vela', + 'public_model_deepseek_v4_pro vela', + 'public_model_gemini_2_5_flash vela', + 'public_model_gemini_3_1_flash_lite_preview vela', + 'public_model_gemini_3_1_pro_preview vela', + 'public_model_gpt_5_4 vela', + 'public_model_gpt_5_4_mini vela', + 'public_model_glm_5 vela', + 'public_model_glm_5_1 vela', + 'public_model_gpt_image_2 vela', + 'public_model_kimi_k2_6 vela', + 'public_model_minimax_m2_7 vela', + 'public_model_qwen3_235b_a22b vela', + 'public_model_seedance_2 vela', +].join('\n'); + +/** + * `vela login` — writes the AMR config file the daemon's status reader + * + AmrLoginPill component expect on disk. The real vela goes through a + * device-authorization browser approval; we skip the loop and just + * project the same on-disk artifact a successful real login produces. + * + * Envs (compat with fake-vela.mjs): + * VELA_PROFILE — profile slot to populate (prod|test|local) + * FAKE_VELA_LOGIN_DELAY_MS — sleep before the write (test in-flight states) + * FAKE_VELA_LOGIN_USER_EMAIL — email written into the profile + * FAKE_VELA_LOGIN_USER_PLAN — plan written into the profile + * FAKE_VELA_LOGIN_FAIL — when set, prints to stderr + exits 1 + */ +export async function runVelaLogin() { + if (process.env.FAKE_VELA_LOGIN_FAIL) { + process.stderr.write(`${process.env.FAKE_VELA_LOGIN_FAIL}\n`); + process.exit(1); + } + const allowed = new Set(['prod', 'test', 'local']); + const requested = (process.env.VELA_PROFILE || 'prod').trim() || 'prod'; + const profile = allowed.has(requested) ? requested : 'prod'; + if (!allowed.has(requested)) { + process.stderr.write(`[mock-vela] unknown profile ${requested}; defaulting to prod\n`); + } + const delayMs = Number(process.env.FAKE_VELA_LOGIN_DELAY_MS) || 0; + const userEmail = process.env.FAKE_VELA_LOGIN_USER_EMAIL || 'fake-user@example.com'; + const userPlan = process.env.FAKE_VELA_LOGIN_USER_PLAN || 'free'; + + const write = () => { + const file = join(homedir(), '.amr', 'config.json'); + mkdirSync(dirname(file), { recursive: true }); + const payload = { + profiles: { + [profile]: { + // Fake credentials — never used by real vela traffic since the + // daemon's link client points at the same FAKE_VELA_API_URL. + controlKey: 'fake-control-key-0000000000000000000000', + runtimeKey: 'fake-runtime-key-0000000000000000000000', + apiUrl: profile === 'local' ? 'http://localhost:18080' : '', + linkUrl: profile === 'local' ? 'http://localhost:18081' : '', + user: { + id: 'fake-user-id', + email: userEmail, + name: 'Fake User', + plan: userPlan, + }, + }, + }, + }; + writeFileSync(file, JSON.stringify(payload, null, 2), 'utf8'); + process.stdout.write(`Login successful for ${userEmail}.\n`); + process.exit(0); + }; + + if (delayMs > 0) setTimeout(write, delayMs); + else write(); +} + +/** + * `vela models` — prints the production-shaped public model catalog. + * Override via FAKE_VELA_MODELS env (newline-separated lines). + */ +export function runVelaModels() { + const out = process.env.FAKE_VELA_MODELS || DEFAULT_MODELS_STDOUT; + process.stdout.write(`${out}\n`); + process.exit(0); +} diff --git a/mocks/manifest.json b/mocks/manifest.json new file mode 100644 index 000000000..be4c579a1 --- /dev/null +++ b/mocks/manifest.json @@ -0,0 +1,3623 @@ +{ + "schema_version": 1, + "generated_at": "2026-05-29T06:21:39.088Z", + "storage": { + "provider": "cloudflare-r2", + "bucket": "open-design-mocks", + "public_url_base": "https://pub-89c3624d8b4a48018c37b212dbc4b945.r2.dev", + "object_prefix": "recordings/v1/" + }, + "entries": [ + { + "trace_id": "00e83799-a64b-4ce4-9a4e-2b09b72c7859", + "sha256": "244f93875170e602b91c7e0c0d5261322d0199335649a4f5132059837f40ee1f", + "bytes": 30840, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 85188, + "skills": [ + "default" + ], + "session_id": "a4fd69eb-912e-4969-a732-70d58d44370b", + "user_input_preview": "请优化 Futuro Edu 项目中的手机号登录页面,要求如下: 1、页面顶部放置 Futuro Edu 的 Logo ,Logo旁边为Futuro Edu文案 2、只需要手机号和验证码,方便下次继续练唱。文案改为:随时随地,想唱就唱,位置在logo的下方 3、手机号输入框左边固定为+86,提示语:请输入11位手机号改为请输入手机号 4、提示语:请输入6位验证码改为请输入验证码 5、去掉提示语:前", + "multi_turn": false, + "captured_at": "2026-05-28T16:32:03.773Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "03e7bf7a-12ed-45a9-a17f-a8d8f31538f6", + "sha256": "a4068a85eceaab64d19b773dcf2f6cd27e4411531b355e145219447b22080112", + "bytes": 477, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 837, + "skills": [ + "default" + ], + "session_id": "97d627b9-c997-48c4-b976-1be8c982c2f4", + "user_input_preview": "你能设计App页面么", + "multi_turn": true, + "captured_at": "2026-05-28T15:50:09.893Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "04097377-d26e-4f04-b308-5e4e9e99fc76", + "sha256": "8ba97c1072f46d5145fc776724c51a55cd130bfe60acb47dfaf8f6ba6bdeaf2b", + "bytes": 46321, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 17, + "duration_ms": 111204, + "skills": [ + "agent-browser" + ], + "session_id": "70959fe1-a613-4b0f-a120-49762a4e1a26", + "user_input_preview": "давай удалим 2ю версию", + "multi_turn": false, + "captured_at": "2026-05-28T15:48:32.349Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "0425c974-4002-4e7e-93d4-fc9ab026ef84", + "sha256": "1fbfd42758c450b39be952317656c5146696cf639dc2b0f06af5fc4a0dd35bb0", + "bytes": 1253, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 19525, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "can we try, to make it in to a motion graphic?", + "multi_turn": true, + "captured_at": "2026-05-28T15:49:17.045Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "04a7f865-1dad-452f-8fe5-c408196dd347", + "sha256": "e839ffb55e9608adacc3013901fc8fe053540e80a66826fc8322df68dfbc4453", + "bytes": 4316, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 155003, + "skills": [ + "default" + ], + "session_id": "ef1df4e9-1bb0-4e81-a16d-72846d90348a", + "user_input_preview": "[form answers — task-type] - What should I build?: Prototype - Target platform: iOS app (iPhone 15 Pro) - Who is this for?: (skipped) - Brand context: Use the Royal Violet spec provided in the brief [", + "multi_turn": true, + "captured_at": "2026-05-28T18:35:28.248Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "0580c870-0310-44dd-9036-4c79074ce7cb", + "sha256": "b1b7fa5e36288ca4c80e86416f26fd58097881082c31d3a1d1a50ae942726121", + "bytes": 16302, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 45421, + "skills": [ + "default" + ], + "session_id": "929f2ab5-4584-4875-8c35-cd2f83df64cf", + "user_input_preview": "On the very bottom you have to instances of Google Maps. Remove the Google Maps on the right hand side. And center: Find Us Carriagetown Neighborhood 328 N Grand Traverse St Flint, MI 48503 Hours", + "multi_turn": false, + "captured_at": "2026-05-28T16:34:54.901Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "06a9324a-a117-405a-980f-a0c730e0a3b5", + "sha256": "5963a3e4d44c0929e4d758b066c484249712be0de6fdb1e67ba236d4beda2830", + "bytes": 8880, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 4, + "duration_ms": 27250, + "skills": [ + "default" + ], + "session_id": "364e2ac1-815c-4dcb-9412-24d260631481", + "user_input_preview": "P7界面,验证闭环: 15项评审清单中去掉所有的✅", + "multi_turn": true, + "captured_at": "2026-05-28T15:44:36.263Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "072ae827-f4b6-42e3-93e2-6c2c3a157366", + "sha256": "b2111665528421889767a0043e9d4f2c1e7d52de3fb6b7429f6008c65edf23c5", + "bytes": 8467, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 12, + "duration_ms": 34759, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "no no remove the latest image I gave you, not a good one, we need to work with the earlier ones and designs and assets etc", + "multi_turn": true, + "captured_at": "2026-05-28T15:50:01.872Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "07560066-0da6-46d5-859e-0bf0cc268f47", + "sha256": "18b499c73d90f5a4c2b42857aa871de0a2c588c8a71dcd6f4d65a8af7be083ce", + "bytes": 62326, + "agent": "opencode", + "model": "default", + "outcome": "succeeded", + "tool_count": 20, + "duration_ms": 316248, + "skills": [ + "default" + ], + "session_id": "7f1484bf-9553-4c41-b83c-84f74d6ee6d8", + "user_input_preview": "Continue the remaining unfinished tasks from the previous run. Do not redo completed work. Focus only on these unfinished todos: 1. [in_progress] Audit current slide layouts for edge bleed and croppe", + "multi_turn": false, + "captured_at": "2026-05-28T16:28:31.545Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "0a06cb0c-4a3c-4200-8a73-d1d4f192cc39", + "sha256": "2e9b953bffea0600ef034827d300bb61404a537cbca89ae74163d2385acaf1e6", + "bytes": 13881, + "agent": "claude", + "model": "opus", + "outcome": "succeeded", + "tool_count": 4, + "duration_ms": 51062, + "skills": [ + "agent-browser" + ], + "session_id": "df5bbbff-38e1-4030-892e-38ce96e7a529", + "user_input_preview": "We need to remove all \"+n mores\" from plan details and workout picker details. The coach needs to see the full workout plan of the user", + "multi_turn": false, + "captured_at": "2026-05-28T15:47:55.705Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "0cee30c9-04b4-484f-ad39-192c2198d37d", + "sha256": "9bd179a4462d33599466beab69e797cf8132333b3f9f08c3136f4402ad95b78b", + "bytes": 851, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 77580, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": "Let's shorten \"Every county sends vehicle counts and miles traveled into a LightGBM ensemble; predictions return as stacked emissions you can explore by year\" too \"Every county sends vehicle counts an", + "multi_turn": true, + "captured_at": "2026-05-28T15:59:31.540Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "0cfd3766-40a1-48e9-b3a5-d420bf5551b8", + "sha256": "215f05be726bb46bf53712d756544fffb44e9a29f0fea75a2d25e9341ea02aae", + "bytes": 36929, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 101998, + "skills": [ + "default" + ], + "session_id": "b591fc28-d2fb-41e6-b5bb-ac9d0d5edf86", + "user_input_preview": "check and fix the line breaks as per the thumb rules, like \"for\", \"and\" , conjunted twowords with \"-\" hyphens. etc.,", + "multi_turn": false, + "captured_at": "2026-05-28T16:27:12.008Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "0fd0e45d-9e38-46c8-b8f7-3cb0244ac428", + "sha256": "d08011601d1bbb7e671a6a94ae50f12e06755f2325d433dcf74e05a3a19bbe6a", + "bytes": 5122, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 27849, + "skills": [ + "default" + ], + "session_id": "ef1df4e9-1bb0-4e81-a16d-72846d90348a", + "user_input_preview": "# 📱 TASKORA — COMPLETE OPEN DESIGN UI MASTER DOCUMENT # 🎯 MAIN DESIGN GOAL Design style must feel like: ✅ Smart productivity workspace ✅ Interactive timeline dashboard ✅ Floating modern utility s", + "multi_turn": true, + "captured_at": "2026-05-28T18:33:57.359Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "108c20c0-4fbb-47b8-b9f2-625aaf394f88", + "sha256": "ac1038309f122857213417515df301b94fb51938b4e88cfaa85ca439d8b9bd23", + "bytes": 4132, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 137817, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-28T16:12:46.683Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "10f9c7ea-4d4c-4993-9914-df1680d9abbf", + "sha256": "df02e9458edbae5721ba7c30a303bdf63eff74d68f959a9f3b11fd2efecd52c1", + "bytes": 534, + "agent": "cursor-agent", + "model": "auto", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 693, + "skills": [ + "default" + ], + "session_id": "40e92b93-443d-4000-a03c-a0d490807da8", + "user_input_preview": "i need to work on the prototypes pf the following designs", + "multi_turn": false, + "captured_at": "2026-05-28T16:31:31.342Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1151b2f9-634e-471b-9479-9df5fddbe9bc", + "sha256": "ba407318b2259c10a562ea01251468e611ad6ce94e4c4d9e6c848b6b1561479b", + "bytes": 4567, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 142587, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": "Great work. Note that on mobile, it's still really hard to read. Suggest that you make Vehicle Sourcetypes menu minimizable, and minimized by default when opening from mobile (but open by default on", + "multi_turn": true, + "captured_at": "2026-05-28T15:29:22.841Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "11705d3e-a233-4829-9f2b-340c93c43da5", + "sha256": "e03e95542ca57b036c44513e846e978d9b1d90a6be0d804f9e123b9dba0e907f", + "bytes": 4133, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 127446, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": "Thank you. Literally can't see the halo at all, no matter how much I adjust . Is the halo somehow black? it looks like the coastline borders don't show up or show up black perhaps. Also, in the \"What", + "multi_turn": true, + "captured_at": "2026-05-28T15:52:55.793Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "12d88deb-dfca-4ff8-b773-e39cce1e387d", + "sha256": "afe8a423474c7d16b215c077f233d722a752e3179ca8958f503a61f2b29b62c4", + "bytes": 1119, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 44423, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": "I only have Gemini and Claude, I do not use OpenAI", + "multi_turn": true, + "captured_at": "2026-05-28T15:29:37.225Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "131b0c5c-c55c-413d-a706-04cbedef29a5", + "sha256": "95c3786bce5e1a83e20e27d0a0b860946e82081ac3a23447342d86ff9d7f1a5c", + "bytes": 48746, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 15, + "duration_ms": 246725, + "skills": [ + "magazine-web-ppt" + ], + "session_id": "d18d4153-d55e-43fa-9d5a-c9007e156d62", + "user_input_preview": "继续", + "multi_turn": false, + "captured_at": "2026-05-28T16:31:36.649Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "13903bf5-abd3-4502-8843-df44a19e9386", + "sha256": "e4059649ef737ee9d954e5eb045f24f6dbe1c3581f8972e0656f385ab439e3a1", + "bytes": 2427, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 103267, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-28T15:13:41.745Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "139bebed-aebb-4901-85fb-1f9081a98758", + "sha256": "02fd7636860cb8b1a051c9d0690d74957852684b0bd9c236ccb60a5e5705dd1a", + "bytes": 24757, + "agent": "codex", + "model": null, + "outcome": "failed", + "tool_count": 18, + "duration_ms": 441360, + "skills": [ + "default" + ], + "session_id": "976333ac-966e-49bc-a5e7-d6c903bd1a01", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-28T16:25:42.211Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "15a7afd9-ff65-4ae3-a787-c474ea31073a", + "sha256": "3ea01016ee4c4ca216bc533700a08ce56d5e3485d7b89e92cef4a8b551d96cd4", + "bytes": 40958, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 144873, + "skills": [ + "default" + ], + "session_id": "b95f8261-731b-4fdc-8a16-86b489a7a917", + "user_input_preview": "crea un post para columna 2: **1. Post estático Manifiesto** Texto grande centrado sobre fondo negro: \"Tú dominas tu conocimiento. Nosotros lo convertimos en ventas.\" Pie: Escale It Group | Performa", + "multi_turn": false, + "captured_at": "2026-05-28T16:14:13.859Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "15ccb8f8-177b-4db4-b8bb-2998d4c32881", + "sha256": "3ff2aaeb5f7d78ec637b1fb8b7725f92098e22ef17fb3ad1804b85cd76faacf0", + "bytes": 548, + "agent": "gemini", + "model": "gemini-3-pro-preview", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 32460, + "skills": [ + "default" + ], + "session_id": "1ee7c118-e305-4914-a459-10d56e321b4a", + "user_input_preview": "continue please", + "multi_turn": false, + "captured_at": "2026-05-28T16:31:19.675Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "15f00160-3d85-4bf3-b469-2b51ea86761f", + "sha256": "ad144855539f1ee2e06f003d3bf3457092a13c29b1227d58aad4d49a7be41f99", + "bytes": 54800, + "agent": "opencode", + "model": "deepseek/deepseek-v4-pro", + "outcome": "succeeded", + "tool_count": 17, + "duration_ms": 407242, + "skills": [ + "brainstorming" + ], + "session_id": "3020ab35-cff7-4601-86fa-2a83d67571c4", + "user_input_preview": "usa @brainstorming para mejorar el diseño de la app pero usa como referencia el documento adjunto que es el como funciona mi app real , para que tengas una guia y no inventar cualquier cosa", + "multi_turn": false, + "captured_at": "2026-05-28T16:22:56.614Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "169fa62e-6000-41da-871a-83bbee11baab", + "sha256": "dccb3d617a7f5ac75921b9f1477419c3279c1fb804f6bb207d06ee7f36c4b0c5", + "bytes": 15169, + "agent": "opencode", + "model": "opencode-go/kimi-k2.5", + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 23348, + "skills": [ + "default" + ], + "session_id": "32b36014-fc99-4881-8592-e1885a2ac4f6", + "user_input_preview": "design md nya udh oke ga dengan color dan style design yang sekarang", + "multi_turn": false, + "captured_at": "2026-05-28T16:22:19.925Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "17d864af-b648-40c1-8cb0-753d1702beec", + "sha256": "4ffabd0e99ac86241e65b8f50ed656b812b46ddc28521db0555f28f7a670d375", + "bytes": 85299, + "agent": "claude", + "model": "default", + "outcome": "succeeded", + "tool_count": 50, + "duration_ms": 944824, + "skills": [ + "ad-creative" + ], + "session_id": "8fbb6ad4-4c3d-4745-956c-502b09af5e2f", + "user_input_preview": "continua", + "multi_turn": true, + "captured_at": "2026-05-28T01:42:07.143Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "18f2b6fd-cff3-4bb3-bd8f-dd6b35f1501e", + "sha256": "55cdd3253175fee49d84f6edb5a833a1c4eadad80be880f1935d019d3ecbad56", + "bytes": 55144, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 13, + "duration_ms": 157374, + "skills": [ + "default" + ], + "session_id": "a557cd88-6582-4c49-a4f3-46015bd43f1b", + "user_input_preview": "[form answers — task-type] - What should I build?: Prototype - Who is this for?: me, local use - Brand context: Pick a direction for me [value: pick_direction] - Roughly how much?: 1 page, spa - Any i", + "multi_turn": false, + "captured_at": "2026-05-28T16:29:52.095Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1a35cbef-bbda-4ea9-899e-313da2b594fe", + "sha256": "9e71677acc04a77be5a6ebe499564872b48a49a6276dcf39b94529da9e36d563", + "bytes": 31213, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 11, + "duration_ms": 123249, + "skills": [ + "default" + ], + "session_id": "c64c0d19-9e6c-465c-ad34-94e17827c04f", + "user_input_preview": "同意,继续", + "multi_turn": true, + "captured_at": "2026-05-28T16:20:49.469Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1aa82807-305b-4c6e-8818-951a31bd84d9", + "sha256": "5671d7e030a0eae1788b47cf2fbeeaa51ed97ae40cd5d532f52833558d537842", + "bytes": 75800, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 33, + "duration_ms": 757099, + "skills": [ + "default" + ], + "session_id": "02af3152-9a28-4441-a613-216830bc642e", + "user_input_preview": "[form answers — discovery] - 目标平台: Desktop web - 复盘类型: A 股交易复盘 - 页面规模: 多个独立页面 - 特别要求: (skipped)", + "multi_turn": false, + "captured_at": "2026-05-28T16:19:28.936Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1ad1412e-07bd-4672-90dd-7ce258faa8c0", + "sha256": "99202c4ddb090ce14936292dee6c61e05e41a0f2d2b496ca0e5b77d47edc299f", + "bytes": 24033, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 93562, + "skills": [ + "ad-creative" + ], + "session_id": "4baaa2cb-68cb-4897-941f-39db5f3f835b", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-26T04:33:02.705Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1b622172-97ba-4e47-9eb2-7868eb2fc59b", + "sha256": "e0d9614962b5e7bc8ad8800f90be28ec99ba6a5bea813d99de7c79d8a91f616a", + "bytes": 928, + "agent": "qwen", + "model": "qwen3-coder-plus", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 168728, + "skills": [ + "ad-creative" + ], + "session_id": "0a51c2ff-bc6c-4d0e-bdb7-ae8bfaf4fe00", + "user_input_preview": "[form answers — discovery] - ¿Para quién son estos iconos?: (skipped) - ¿Qué estilo visual prefieres?: Microsoft Fluent — contornos limpios, geometría precisa, stroke-based [value: microsoft] - ¿Tamañ", + "multi_turn": true, + "captured_at": "2026-05-27T23:34:51.002Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1c572030-146b-4054-b2fe-7161b1b168f0", + "sha256": "a0781927ee3905eed67d50b8edb898b38972679bbab462a352cce1e35bbcd892", + "bytes": 2972, + "agent": "opencode", + "model": "default", + "outcome": "succeeded", + "tool_count": 3, + "duration_ms": 13445, + "skills": [ + "ad-creative" + ], + "session_id": "179fc5ef-f34b-4160-88d8-a430e6ce96f8", + "user_input_preview": "i need landing", + "multi_turn": false, + "captured_at": "2026-05-27T17:56:46.640Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1de734bc-84ae-4e35-a83d-bcf175cea0e2", + "sha256": "b3cab0330cdf3126ad726476929dbc27022faaf9df63b5bed387850f13898b9c", + "bytes": 9498, + "agent": "opencode", + "model": "opencode-go/qwen3.7-max", + "outcome": "succeeded", + "tool_count": 3, + "duration_ms": 26876, + "skills": [ + "algorithmic-art" + ], + "session_id": "6f4bc605-2293-47f4-babe-3248a051fab1", + "user_input_preview": "Make the edges rotate randomly", + "multi_turn": true, + "captured_at": "2026-05-28T16:03:05.908Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "1eec2c85-589f-4f8a-9f1f-17c0d0b517c8", + "sha256": "22a081d3a8eb570bd3648c69d6a9cd22197e95d8004ce4ea6f20a35c3b3442bf", + "bytes": 124556, + "agent": "codex", + "model": "default", + "outcome": "failed", + "tool_count": 26, + "duration_ms": 360819, + "skills": [ + "default" + ], + "session_id": "afeeb0bb-5b98-465e-8e9f-e2a19c9124c3", + "user_input_preview": "[form answers — discovery] - Qual alvo principal?: Responsive web, Desktop web - O que você quer que eu melhore agora?: Transformar em versão mais premium - Escopo: Editar só a home atual - Algo impor", + "multi_turn": false, + "captured_at": "2026-05-28T16:19:16.560Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2053209f-ca46-430c-a142-fc4a76191cea", + "sha256": "28477c54192e86f58422abb0c4a9862530dc333eb14cf781b1b208786c47e065", + "bytes": 35942, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 16, + "duration_ms": 75930, + "skills": [ + "default" + ], + "session_id": "df080b61-929d-43ee-9cbe-47b51215c953", + "user_input_preview": "terapkan di semua page dan logo di buat yang proper pada penempatannya", + "multi_turn": true, + "captured_at": "2026-05-28T16:24:37.134Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "21747360-31fa-46b3-9615-46140386a0d1", + "sha256": "7f3c1296298f2f6b088aea97590fbdc0c1e90451d09184ac37919da2d7264857", + "bytes": 48560, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 52, + "duration_ms": 698178, + "skills": [ + "ad-creative" + ], + "session_id": "8fbb6ad4-4c3d-4745-956c-502b09af5e2f", + "user_input_preview": "[form answers — discovery] - ¿Cómo quieres la estructura de archivos?: Un solo archivo HTML con scroll (todo en index.html) [value: single_page] - Contenido del menú / carta: Usa contenido representat", + "multi_turn": true, + "captured_at": "2026-05-27T21:52:00.399Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "242d7238-b206-4cdb-a02e-5edf3c73864c", + "sha256": "ea89f0e85a26b07ad7d4c341f5370d3a5d46f789b91d365064b19054c76e4fb0", + "bytes": 15476, + "agent": "opencode", + "model": "ollama-proxy/kimi-k2.6", + "outcome": "succeeded", + "tool_count": 5, + "duration_ms": 59388, + "skills": [ + "agent-browser" + ], + "session_id": "81526dba-0a4c-4805-a731-ed8f9b4f0639", + "user_input_preview": "this is landing page for ecommerce SaaS. we need to redesign this landing page, make it look modern for SaaS software. based on the existing design, re-design the app to look more moden, support lig", + "multi_turn": true, + "captured_at": "2026-05-28T16:24:13.103Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "24565b22-5230-40cd-8905-1c7936b54aef", + "sha256": "2f9098b0666c5fe2018b351e116013fd2a4c5ca692bd9762052e4e3a67dd0c68", + "bytes": 500, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 3267, + "skills": [ + "default" + ], + "session_id": "75ab5caa-6b7d-4d01-ac03-b2ece54e9398", + "user_input_preview": "اه", + "multi_turn": true, + "captured_at": "2026-05-28T16:35:02.029Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2667e851-1a26-4231-8b2b-1f78d829170b", + "sha256": "a56b9091efc98b5884e4faf2fe9becde33f256ad35fdf6fb9c8f206aa90e9c7f", + "bytes": 31701, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 13, + "duration_ms": 182000, + "skills": [ + "agent-browser" + ], + "session_id": "23db0c86-1bac-46cd-9b85-16a154fcaf71", + "user_input_preview": "can we make the app more minimal and allow for dark mode?", + "multi_turn": true, + "captured_at": "2026-05-28T16:19:10.438Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "26b69aa4-1569-4ca8-bc65-db728a88e8cb", + "sha256": "0ed816c8a03dc9f9e6d29b37087181e48ecde9c622968084ba2b0454d9905a02", + "bytes": 14316, + "agent": "claude", + "model": "default", + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 68000, + "skills": [ + "algorithmic-art" + ], + "session_id": "7f2343d7-07f6-4d71-89bf-6a86ade4bb8d", + "user_input_preview": "Le projet est de réaliser un nouveau maillot pour le club de volley d'Antony voici le brief du client et ce qu'il a généré via gémini", + "multi_turn": true, + "captured_at": "2026-05-28T15:40:29.221Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "274baae0-41ef-432d-97b5-0e9e6cb85cec", + "sha256": "3c1fcad433cdea7eea0fbff4ac2993ad0642d9222e140934be5ffd1befaa819f", + "bytes": 16025, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 58879, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "Include a new variant for the creation of this ad please", + "multi_turn": true, + "captured_at": "2026-05-28T15:43:23.080Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "282e25db-1e89-4c11-b3f1-29ff43c2f193", + "sha256": "a268c2082a2c49dce3d5ae1fe9f46ecbc6ba4eeb8f7659ca1288d2c367476320", + "bytes": 587, + "agent": "gemini", + "model": "gemini-3-flash", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 11723, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "still to small, make it like 250px", + "multi_turn": true, + "captured_at": "2026-05-26T13:29:14.339Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "28ca87cc-3b38-4813-ae91-39d2d818bfa1", + "sha256": "7e6598b7d9193ab855f004d5903deed9574dfda85ca6c627e737df020fabfd30", + "bytes": 54884, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 21, + "duration_ms": 359113, + "skills": [ + "default" + ], + "session_id": "58734cb3-6bfb-4ab6-91d4-576f76e6fd9c", + "user_input_preview": "tôi cần file expression-sheet là thu nhỏ lại từ raw, hiện tại để cắt cho đủ size dẫn đến hình bị mất đầu, thông tin khác,.... Review và thực hiện lại", + "multi_turn": false, + "captured_at": "2026-05-28T16:25:44.070Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "29989d5f-5380-4920-b578-484e4180367e", + "sha256": "e2da3644d51ae02df800b421101dc8fd382f84375da14ebbb8077258d1b9caab", + "bytes": 895, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 15705, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": "[form answers — discovery] - What is the specific topic of the discussion panel?: Successful Entrepeneurs - Brand context: I have a brand spec — I'll share it [value: brand_spec]", + "multi_turn": true, + "captured_at": "2026-05-28T15:25:22.020Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2c40378a-e966-490d-b899-4d6cd81ce12b", + "sha256": "b8e4c461ed32f6717d349a8cfc6e58e71e76b00c354767b2d7fb0c2dfa6a2780", + "bytes": 17439, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 5, + "duration_ms": 45673, + "skills": [ + "agent-browser" + ], + "session_id": "899d9e9f-0cb6-4429-b975-450f61400d7e", + "user_input_preview": "改成订单", + "multi_turn": true, + "captured_at": "2026-05-28T16:30:25.150Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2d88d27f-71f7-4f0c-8e76-608ccccbd8db", + "sha256": "1adeb5ae87327d2e52c6d212175c19cc6e8b1e270dc0fe096268bbbd5919a492", + "bytes": 57471, + "agent": "claude", + "model": "sonnet", + "outcome": "succeeded", + "tool_count": 26, + "duration_ms": 364912, + "skills": [ + "ad-creative" + ], + "session_id": "67b092e5-6aac-4977-93d7-339ba0321f18", + "user_input_preview": "To be honest I don't like this design, okay. https://v0.app/templates/auralink-saas-landing-page-zoQPxUaTqvE?b=v0-preview-b_7ZXtHjzn40x&f=1&path=%2F can you use the Browser to see this website fully.", + "multi_turn": false, + "captured_at": "2026-05-28T10:06:20.138Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2e526bda-1899-4e70-a541-af8102ea9f9d", + "sha256": "826cfc7b024a6c4c97c8475d28466776c4e1b9fb66cdc1b25af6d5d8416a9bc7", + "bytes": 515, + "agent": "claude", + "model": "sonnet", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 5924, + "skills": [ + "agent-browser" + ], + "session_id": "3a3aeb61-2638-4971-8181-75a40b800eb9", + "user_input_preview": "继续", + "multi_turn": true, + "captured_at": "2026-05-28T16:32:50.119Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2e69fc24-4d76-475f-aa5d-f9eb07a1b195", + "sha256": "eeca071e71709e2e065caef775662499e437bece6581a503cabbf0615276e0f4", + "bytes": 4030, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 146115, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": "Thank you. Better. You still have not added a visible outer halo. We need a much much thicker, more visible halo around the border. Please add a tweaks menu that lets me adjust the level of halo. Als", + "multi_turn": true, + "captured_at": "2026-05-28T15:47:30.663Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "2e821ee7-80c3-43ff-81d7-153515917ee6", + "sha256": "88c0c549cd86cc59f6f136171dcd1cb9c06b594fcc8a272744756ed56097a0f3", + "bytes": 11173, + "agent": "claude", + "model": "sonnet", + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 60382, + "skills": [ + "default" + ], + "session_id": "bc17a7ca-299f-4c4f-b8c4-f6ff8663a50c", + "user_input_preview": "i want to see where that message is. i never said that, this is the only chat we've had.", + "multi_turn": true, + "captured_at": "2026-05-28T15:48:02.249Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "314d6833-0377-4ac4-ba11-2b8d7eca5511", + "sha256": "3038c98c72deb641dd5eb4a32f5947dd7e4d555bdfd0e5c1f60b2e00d33dcfef", + "bytes": 10839, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 8, + "duration_ms": 40885, + "skills": [ + "default" + ], + "session_id": "2b9deee3-a148-420b-84f4-5e893a68cd87", + "user_input_preview": "然后顶部的那些彩色竖条还是没对齐现在是长度差不多合适了,但是统一往下偏移了几个像素,你看看这是怎么回事?", + "multi_turn": false, + "captured_at": "2026-05-28T15:50:03.845Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "3470e198-34df-4a75-8bfb-57bdc35e9b17", + "sha256": "cefccd0b6cf9629be82e34d758a26664e8c75327ffa2a8f4f7609f1c10209afc", + "bytes": 7116, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 348350, + "skills": [ + "default" + ], + "session_id": "ef1df4e9-1bb0-4e81-a16d-72846d90348a", + "user_input_preview": "Create a complete iOS app prototype for TASKORA with all 15 screens as standalone HTML files. Each screen must be self-contained with no external file dependencies. Start with: 1. Splash Screen 2. H", + "multi_turn": true, + "captured_at": "2026-05-28T18:46:18.487Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "35671ee2-9b53-43af-ac50-4dfbdb1994b6", + "sha256": "078db76f020902989eb26028ec9d1934096f48c372e0e6ad59d63ce95480b432", + "bytes": 2281, + "agent": "claude", + "model": "claude-opus-4-5", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 18897, + "skills": [ + "default" + ], + "session_id": "fb1bcfb7-36df-4997-ba78-80223a73067e", + "user_input_preview": "Make a website for an organization responsible for monitoring the government. It will monitor all available informnation including budget, audits, investigations, etc, and will issue papers and notifi", + "multi_turn": false, + "captured_at": "2026-05-28T15:44:51.155Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "37af2802-d34e-4d27-abdf-f7a93ea6e8a4", + "sha256": "cc5ac32e208426ac2cdccb32cc36aa1fc83845021d627911beb56518df68fd3f", + "bytes": 19910, + "agent": "opencode", + "model": "deepseek/deepseek-v4-pro", + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 219138, + "skills": [ + "default" + ], + "session_id": "cbfb3883-529a-44b9-ac30-e4ef54be29af", + "user_input_preview": null, + "multi_turn": false, + "captured_at": "2026-05-28T16:22:17.755Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "3881647e-18c3-4e55-85e3-5e74ae8c15b2", + "sha256": "a9cfc6c27363d05fcea589e3c30ce95f822ad0608626f8fb57f4aad137a1ac9c", + "bytes": 27790, + "agent": "opencode", + "model": "github-copilot/gpt-5.5", + "outcome": "failed", + "tool_count": 13, + "duration_ms": 145002, + "skills": [ + "penpot-flutter-design-source" + ], + "session_id": "60ac3f82-be6c-476e-bbc5-0034c626a03d", + "user_input_preview": "@penpot-mcp @design-review @brainstorming Current State: See some description text overflow issues at first glance. Please thoroughly audit, analyze, review, verify and fix & polish all of the remaini", + "multi_turn": false, + "captured_at": "2026-05-28T16:25:42.365Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "39f6d514-a5a0-48a4-8298-6c7fed7ea875", + "sha256": "29b054d1b456e71ed14d2fae91a06891f6fb56a81e70844d6aad70d55ba5caf3", + "bytes": 1270, + "agent": "gemini", + "model": "default", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 89651, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "still to small, make it like 250px", + "multi_turn": true, + "captured_at": "2026-05-26T13:29:42.117Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "3a885250-f8a8-43f7-946e-fbb2f1617c42", + "sha256": "b99ca7e3de5705a8c7d3d11667304a7fc535d28e9b917422c481f3fc7fb913c9", + "bytes": 1529, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 35577, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "do page sized", + "multi_turn": true, + "captured_at": "2026-05-28T15:31:49.927Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "3d6f3f38-5f74-428a-8f3b-7ab09da01aee", + "sha256": "53ddee15049870d727b8e6c007f23c3ddc71905c829eb3417580b66df76eaf4a", + "bytes": 128031, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 36, + "duration_ms": 591338, + "skills": [ + "ad-creative" + ], + "session_id": "5cb1aba3-6090-496d-8357-9cb33db9c4dc", + "user_input_preview": "https://www.institutomarketing.es/master-marketing-digital-inteligencia-artificial/ quiero hacer banners para google ads para esta web que me ayuden a verder el master de Máster Marketing Digital + IA", + "multi_turn": false, + "captured_at": "2026-05-27T06:33:13.252Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "3eae4b3b-3fc7-41d2-b162-7f15f1c731d7", + "sha256": "c7a7188d93ac9d705a841ed17dd73f0cb57fc90f0b2eab5373a7c14709d049ed", + "bytes": 2532, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 39056, + "skills": [ + "default" + ], + "session_id": "281fc730-1731-43cd-b265-0ed6a9ea0ced", + "user_input_preview": "你好", + "multi_turn": false, + "captured_at": "2026-05-28T15:46:04.838Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "4033d68a-bf58-42b3-b4a8-0b4c0f70ba09", + "sha256": "6a74117cdc1d3690121579854627ccd1d5a24f5d7ccf40c498cd6029ee9e3515", + "bytes": 1625, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 788923, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "for the content make it center-center", + "multi_turn": true, + "captured_at": "2026-05-28T14:55:49.807Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "411b6259-976f-4e6e-952e-51114269ad2c", + "sha256": "c9aa3406610eb95071c58a77181cb091cb2c366f9dba3033cca402d0ae01189e", + "bytes": 27452, + "agent": "opencode", + "model": "poe/anthropic/claude-opus-4.7", + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 97894, + "skills": [ + "default" + ], + "session_id": "e50d4f9d-546c-4d09-9ce8-f3a379fffd03", + "user_input_preview": "want to build momentum posting more on our instagram don't know what else to post", + "multi_turn": false, + "captured_at": "2026-05-28T16:27:26.020Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "429e54cc-827c-411d-97d5-4e3828dddb66", + "sha256": "91dca5d536f7ce9ae39d11e4c7c3842c3e849af6efebdfa574255db328659c66", + "bytes": 38869, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 21, + "duration_ms": 94840, + "skills": [ + "ad-creative" + ], + "session_id": "b282d8d9-7052-4955-998f-ae5c0c945491", + "user_input_preview": "adelante", + "multi_turn": true, + "captured_at": "2026-05-28T01:47:47.579Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "4650fd02-73b3-4536-a1c8-9304cdb14c40", + "sha256": "73429910d8984d297ae53df1a92bf7efae6798e21559c9a9b9bd04aab7ad0496", + "bytes": 30765, + "agent": "codex", + "model": null, + "outcome": "failed", + "tool_count": 10, + "duration_ms": 165235, + "skills": [ + "default" + ], + "session_id": "089b1ec0-c042-437d-b523-c9d8a4d1752f", + "user_input_preview": "y porque solo 5 reviews? a mi me gustaria que se vean todos", + "multi_turn": false, + "captured_at": "2026-05-28T16:31:40.778Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "4668cb89-9222-497b-addd-e1fa800ed33b", + "sha256": "57e2d39b16b2df8a872f5d7127af4df11a263f6c8badcf3869a62c04119d5b4b", + "bytes": 2221, + "agent": "copilot", + "model": "gpt-5.2-codex", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 22745, + "skills": [ + "ad-creative" + ], + "session_id": "d3eb9812-b37f-4b49-93a5-f336a21b4b33", + "user_input_preview": "continue", + "multi_turn": true, + "captured_at": "2026-05-28T08:30:08.153Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "483e01ea-546d-4e8b-ac57-1a9f4a204266", + "sha256": "86400267dc9de97aa64e83e24e514f997904dfde7694292f264b2176212947a4", + "bytes": 8219, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 4, + "duration_ms": 22785, + "skills": [ + "default" + ], + "session_id": "364e2ac1-815c-4dcb-9412-24d260631481", + "user_input_preview": "每个界面的英文标题由绿色修改为蓝色", + "multi_turn": true, + "captured_at": "2026-05-28T15:45:41.651Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "488cf76d-f6f3-4aa1-b2d6-c63b8558d334", + "sha256": "95507ca7d5e145a53c9ff41bff145301391f071332e342525117a8f73e4ef6e1", + "bytes": 2366, + "agent": "claude", + "model": "sonnet", + "outcome": "succeeded", + "tool_count": 4, + "duration_ms": 21461, + "skills": [ + "default" + ], + "session_id": "bc17a7ca-299f-4c4f-b8c4-f6ff8663a50c", + "user_input_preview": "delete one", + "multi_turn": true, + "captured_at": "2026-05-28T15:46:08.356Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "48ea64c1-7056-41e2-8618-e88e4a01130a", + "sha256": "1ef19ead5af4850bf96795814ecdd99916f92abe387920d61d8c8ac1745734c9", + "bytes": 14833, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 61075, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "[form answers — variant-c-copy] - How should the new copy live with the image?: Overlay on whitespace edges — accept some proximity to baked copy - What copy? (headline / sub / CTA — or leave blank fo", + "multi_turn": true, + "captured_at": "2026-05-28T15:46:32.759Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "4b7ae329-2145-4c5d-beb9-dd8b0fff9e78", + "sha256": "2c851e5b6101ed327c8d870d55454024812b954b319f51ac1d2774cf6e3374b9", + "bytes": 23775, + "agent": "codex", + "model": null, + "outcome": "failed", + "tool_count": 7, + "duration_ms": 85209, + "skills": [ + "default" + ], + "session_id": "913e4b29-34a2-46fd-892e-842b027b112c", + "user_input_preview": "[form answers — task-type] - What should I build?: Prototype - Who is this for?: (skipped) - Brand context: Pick a direction for me [value: pick_direction] - Roughly how much?: (skipped) - Any importa", + "multi_turn": false, + "captured_at": "2026-05-28T16:23:15.099Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "4e9340c6-f090-4900-9e94-1b86c800a2cc", + "sha256": "fd9a7243b6b59053979e01c4c391f5db6b83dec3a13a937b18b66fcf64b0fa94", + "bytes": 2199, + "agent": "claude", + "model": "sonnet", + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 17359, + "skills": [ + "default" + ], + "session_id": "85ac9b77-bd5c-4767-ae1f-7085c96b6baf", + "user_input_preview": "Delete Design System", + "multi_turn": true, + "captured_at": "2026-05-28T15:44:51.383Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "4ea84290-d2d4-4332-91f0-887c6fb00a57", + "sha256": "9322820a568ec835a32ef5af7cccaa8d24dfa0c56282da30fb12cc90cbbca90e", + "bytes": 477, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 706, + "skills": [ + "default" + ], + "session_id": "97d627b9-c997-48c4-b976-1be8c982c2f4", + "user_input_preview": "你能设计App页面么", + "multi_turn": true, + "captured_at": "2026-05-28T15:48:00.811Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "51c95383-64b9-4d62-9f0d-ec7b2e2b3dfb", + "sha256": "e023cbf76a5322d02d68945b3d3226c8d93a00681fb22fb74632f6184160f633", + "bytes": 24941, + "agent": "opencode", + "model": "azure/gpt-5", + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 98145, + "skills": [ + "agent-browser" + ], + "session_id": "000cb696-1bd1-454d-abd7-ced41f710ed4", + "user_input_preview": "Continue the remaining unfinished tasks from the previous run. Do not redo completed work. Focus only on these unfinished todos: 1. [in_progress] Critique on 5D radar and tighten spacing/hierarchy; e", + "multi_turn": true, + "captured_at": "2026-05-28T16:20:11.832Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "5322db99-d1eb-453b-9ef9-af960b3b9bcc", + "sha256": "f3e6ecf43ecee88bcebce97fc213595865b6a47318d0545b5e870cc8c1798267", + "bytes": 34269, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 9, + "duration_ms": 158497, + "skills": [ + "agent-browser" + ], + "session_id": "23db0c86-1bac-46cd-9b85-16a154fcaf71", + "user_input_preview": "its still really busy...", + "multi_turn": true, + "captured_at": "2026-05-28T16:30:35.525Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "54ec02ee-d6d1-4400-8267-fb09f904105c", + "sha256": "d3c301ce7fbd79261bf4787f6dc3a003d28bf9bd08839d84d81e0f58470ed388", + "bytes": 117611, + "agent": "opencode", + "model": "opencode-go/qwen3.7-max", + "outcome": "succeeded", + "tool_count": 15, + "duration_ms": 401600, + "skills": [ + "default" + ], + "session_id": "b4e20f57-f2f9-4677-9d82-249aaffaa3f3", + "user_input_preview": "[form answers — task-type] - What should I build?: Prototype - Who is this for?: mvp for starting alpha test - Brand context: Pick a direction for me [value: pick_direction] - Roughly how much?: about", + "multi_turn": false, + "captured_at": "2026-05-28T16:14:26.367Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "5573d3a0-a7ef-4b0b-a395-f3733905b036", + "sha256": "aa07abcdf7bebe928a6c597aadc664e60c6ea418a06111339b55d4f126f36964", + "bytes": 30582, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 20, + "duration_ms": 231607, + "skills": [ + "ad-creative" + ], + "session_id": "4baaa2cb-68cb-4897-941f-39db5f3f835b", + "user_input_preview": "[form answers — discovery] - ¿Dónde se publicará?: Instagram post 1080×1350 - ¿A quién queremos invitar?: Padres de familia - ¿Qué categoría debe empujar?: HLG general - Llamado a la acción: Únete al", + "multi_turn": true, + "captured_at": "2026-05-26T04:21:37.911Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "576e7665-3d46-479e-b591-afdc37ff5a84", + "sha256": "97272c985d9e29e948e553fb1cd0c670f55b0ccc6f21d4d46b0c2a340c67ca6e", + "bytes": 4471, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 26760, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "yeah I wanted the same thing, but with differnet copy on it, can we do that?", + "multi_turn": true, + "captured_at": "2026-05-28T15:45:41.624Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "58d86d0a-e669-4ab3-a808-8791b4fc3dbf", + "sha256": "45e0e19c721fbb8fbc2e63e8d645ac96c871fe6a4ad3da766d5ccd82a6989c70", + "bytes": 30020, + "agent": "codex", + "model": "gpt-5.3-codex", + "outcome": "succeeded", + "tool_count": 9, + "duration_ms": 74273, + "skills": [ + "default" + ], + "session_id": "e0f9af93-631a-4a5d-9480-f6f4fb14ff23", + "user_input_preview": "VAMOS MEXER NA ABA DE EMPRESAS PARCEIRAS, ESTÁ FALTANDO A TAG: CONFIANÇA E COLOQUE A LOGO DA HAPIVIDA E SBPNL NO LUGAR DA MITUTOYO E MAG LOG, AMBAS DA MESMA COR PRA NÃO FUGIR DA NOESSA PALETA, PODE SE", + "multi_turn": false, + "captured_at": "2026-05-28T16:30:10.154Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "5b35725c-cbfe-404f-a421-e3d43f6a8f18", + "sha256": "75d5f93a95b41764df416020676905022d76edc30540f7ebfca70e696aa0157f", + "bytes": 26419, + "agent": "opencode", + "model": "opencode-go/deepseek-v4-pro", + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 82503, + "skills": [ + "default" + ], + "session_id": "1357e16b-a021-4843-873f-5c78639eb35a", + "user_input_preview": "en el header esta 2 veces el menu y eso rompe la visibilidad arreglalo quita eso", + "multi_turn": false, + "captured_at": "2026-05-28T16:24:23.225Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "5b74d70b-27b7-428f-99eb-467b2fd86b4d", + "sha256": "5f9b538c4cad5211f8029eb36dd7cd4eea6ded654e2c7953e394db6a2ae1f845", + "bytes": 3062, + "agent": "claude", + "model": "claude-sonnet-4-5", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 1996, + "skills": [ + "default" + ], + "session_id": "e68b32d2-709e-4ffd-a571-7169f059b32b", + "user_input_preview": "# 📱 TASKORA — COMPLETE OPEN DESIGN UI MASTER DOCUMENT # 🎯 MAIN DESIGN GOAL Design style must feel like: ✅ Smart productivity workspace ✅ Interactive timeline dashboard ✅ Floating modern utility s", + "multi_turn": false, + "captured_at": "2026-05-28T18:31:55.503Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "5de25819-6be2-475a-bb51-b309424a3793", + "sha256": "3da365fa81be33e09b87cc7d0bd9b9c1ae85b8cc60b4b0ce08cf8a6b33e0831b", + "bytes": 7593, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 10969, + "skills": [ + "ad-creative" + ], + "session_id": "b282d8d9-7052-4955-998f-ae5c0c945491", + "user_input_preview": "/search", + "multi_turn": true, + "captured_at": "2026-05-28T01:40:30.156Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "5fa034f0-ff41-42c0-8e7a-8b18219ca067", + "sha256": "5715b72e5824435c9f5550bce2c39d34f7c47ecdebae6a6231c9b963c9c98f32", + "bytes": 7839, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 8, + "duration_ms": 30048, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "use the ad-creative skill please to redesigns these ads", + "multi_turn": true, + "captured_at": "2026-05-28T15:12:02.288Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "652e573b-512d-40ed-9e1b-fc81c576bb58", + "sha256": "45cacffa08a8dab6e38ac114b3cc0eaee0ecb587ddad2e93cd80d6d2e741adb5", + "bytes": 104309, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 66, + "duration_ms": 308056, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "[form answers — psych-redesign] - Apply the full psych framework above, or pick one lever to run across all 8?: Full framework — different lever per A/B pair (recommended for real learning) - Anything", + "multi_turn": true, + "captured_at": "2026-05-28T15:20:44.901Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "66c4e9aa-1afd-4a6b-abf7-6146a818d040", + "sha256": "454641f49ca3dd0fb01ccdf81cf8cdf091ebf7caa6ce8f1eaad3e27b572e8e42", + "bytes": 37409, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 119811, + "skills": [ + "default" + ], + "session_id": "df080b61-929d-43ee-9cbe-47b51215c953", + "user_input_preview": "Design the mobile landing hero + sticky nav for Photoganic. - Sticky top nav: \"PHOTOGANIC.\" wordmark left (heading font), anchor links right (profile, vision, products, portfolio, locations, contact);", + "multi_turn": true, + "captured_at": "2026-05-28T16:18:49.366Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "66e93859-5ed4-47d8-8fa1-e2b96167ac54", + "sha256": "56ef88efe36bf0abf2ab3c2db2131282aac48ea22df752c44213223271075a91", + "bytes": 14081, + "agent": "opencode", + "model": "opencode/deepseek-v4-flash-free", + "outcome": "succeeded", + "tool_count": 5, + "duration_ms": 49963, + "skills": [ + "algorithmic-art" + ], + "session_id": "e8b3a0ef-705c-44f1-859a-a7284c4ea16a", + "user_input_preview": "Me entregaram esse design system mas preciso pensar em 3 opções de conceitos e logos para o cliente. Me ajuda? É um estúdio de mmotion/produtora mas que usam AI para criar seus vídeos. A temática apli", + "multi_turn": false, + "captured_at": "2026-05-28T14:57:50.079Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "69afd671-5632-4a85-8108-819163e5294a", + "sha256": "541f1a46a65262da2acddd4dc13f751592541e73822b418ffc568efa9ce31e45", + "bytes": 2231, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 14019, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "I think my mac is not allowing you to download? How to enable? because clicking the share button and export it as a pdf does not include the images", + "multi_turn": true, + "captured_at": "2026-05-28T15:35:13.989Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "6ad53dbb-44ee-4f5a-a7d1-ea9e6d8f7816", + "sha256": "c8c25c8f997badc3f1c90e49d7ff21d6f2e41e06dc00dc220d0d46c9d93b4030", + "bytes": 2394, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 18671, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "now, how could we make a nice motion graphics as well that could fit this campaign?", + "multi_turn": true, + "captured_at": "2026-05-28T15:51:35.143Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "6affd98c-eb60-45d9-95a6-9ddad3a1f5d8", + "sha256": "f62d51c5a3e68868e0ea9768b527ca49f745c7dff693961550e20c9409ff485f", + "bytes": 928, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 43823, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "use this logo files mpm0xns9-Logo-peoplez-digital-1.png", + "multi_turn": true, + "captured_at": "2026-05-26T13:21:15.509Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "6b5f8096-c9b0-42e7-8656-c92b161bad62", + "sha256": "89c02d613a3abdf083062489f783edc66b8393befc0a6af167f44ce96f510098", + "bytes": 603, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 14838, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "still to small, make it like 250px", + "multi_turn": true, + "captured_at": "2026-05-26T13:26:57.935Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "6c5ac01b-950c-484f-847c-e997049d42b7", + "sha256": "ec768c185ab786db5d89c8c16d9e34f2009734ec77a4e0ed7ac68350e6f406f3", + "bytes": 3988, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 191857, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-28T15:16:44.847Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "6ef2fd62-cdb9-48f9-9cee-c4a12d5d7c99", + "sha256": "bb1f4528fb968209eaa91e8c0d8b73ee5a68581fc78b26392ad0bcacaa503fc9", + "bytes": 6129, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 226577, + "skills": [ + "algorithmic-art" + ], + "session_id": "73bdc051-ad43-4bbd-8db2-9fa10f41953d", + "user_input_preview": "the corner should be expandly logo like this. the previous one corner is really good", + "multi_turn": true, + "captured_at": "2026-05-28T14:37:08.090Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "71210cec-8df6-43ad-a769-2bac5abd29a7", + "sha256": "7cbc87d152dd761bfc1c5742a9b49b46dea5b39527101b133bb3420dd21c31fe", + "bytes": 12227, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 5, + "duration_ms": 22137, + "skills": [ + "video-hyperframes" + ], + "session_id": "c9a3e11b-ea5a-45ae-bcbd-5aab9ae204a3", + "user_input_preview": "sim quewro um video 16:9", + "multi_turn": true, + "captured_at": "2026-05-28T16:30:27.869Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "71ae54cb-ac92-4199-8524-de95f4eddb74", + "sha256": "67422e98350d24c1fde9484092bee42419e5979211c86720fc7d42339884c9aa", + "bytes": 29529, + "agent": "claude", + "model": "default", + "outcome": "failed", + "tool_count": 20, + "duration_ms": 752672, + "skills": [ + "algorithmic-art" + ], + "session_id": "7f2343d7-07f6-4d71-89bf-6a86ade4bb8d", + "user_input_preview": "[form answers — discovery] - Que veux-tu que je produise ?: Une variante créative différente (nouvelle direction) - Quelle(s) vue(s) générer ?: Kit complet (maillot + short) - Corrections vs. les visu", + "multi_turn": true, + "captured_at": "2026-05-28T15:42:38.685Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "728e40bd-2ead-4171-8e78-f127ea065b7b", + "sha256": "48d0857e9a5b3d93c45609416793f07a1ed9bf00a1ff04c9d5abb8c2bd709dfd", + "bytes": 11150, + "agent": "copilot", + "model": "gpt-5.2-codex", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 627740, + "skills": [ + "ad-creative" + ], + "session_id": "d3eb9812-b37f-4b49-93a5-f336a21b4b33", + "user_input_preview": "[form answers — discovery] - What kind of prototype is this?: Landing / marketing page - Target platform: Responsive web, Desktop web - Roughly how much?: you decide - Key content or sections to inclu", + "multi_turn": true, + "captured_at": "2026-05-28T08:31:04.996Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "74ed5b5a-4514-4d69-ab59-1060b842ac42", + "sha256": "00b518710cb390c16cd8223797d9d2ce2be39e8d38d3fdd266994fc3d774fb37", + "bytes": 580, + "agent": "deepseek", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 430, + "skills": [ + "agent-browser" + ], + "session_id": "7a1dc886-2445-4dee-aa8b-88f91b8430e3", + "user_input_preview": "revisa este cuadro de precios para una estrategia de ventas en eventos sociales (bodas)", + "multi_turn": true, + "captured_at": "2026-05-28T16:31:41.576Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "7718bfa3-c6e6-4333-928c-f1364c18ba00", + "sha256": "8c5eb1e51d9c4fed46141760676cd73ed312c88a8d00f74915904173b4f21d73", + "bytes": 32141, + "agent": "opencode", + "model": "opencode-go/qwen3.7-max", + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 190905, + "skills": [ + "algorithmic-art" + ], + "session_id": "6f4bc605-2293-47f4-babe-3248a051fab1", + "user_input_preview": "[form answers — task-type] - What should I build?: Live artifact - Image model: gpt-image-2 - Aspect ratio: 1:1 - Visual style: Exploded / deconstructed — cubes floating apart - Any constraints?: (ski", + "multi_turn": true, + "captured_at": "2026-05-28T15:59:26.769Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "7811c5b8-d75e-48dc-91e3-88ed42856a18", + "sha256": "365e0350c61ba26b5cf9556ba62a5d3b036bc153fbe46312f7c84e239f820158", + "bytes": 30190, + "agent": "codex", + "model": "default", + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 160041, + "skills": [ + "agent-browser" + ], + "session_id": "5db5aadf-1c22-42c6-b7da-6cc97a153997", + "user_input_preview": "あと、本文記事が右下の端っこなので小さく見づらいわかりずらいです、URLを貼るところと記事一覧などをマトマタ処においたり、少し狭めて、本文を大きくして欲しい", + "multi_turn": false, + "captured_at": "2026-05-28T16:20:05.931Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "7a644531-42e4-4dfd-9071-e8b3787fda42", + "sha256": "907ae23a18f9a0c8ba548bffcad3dc04594a5818ec80a185cbf267be7674d0b4", + "bytes": 846, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 35251, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": "They are done", + "multi_turn": true, + "captured_at": "2026-05-28T15:28:26.831Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "7d2fe0ea-2dfc-452e-b6d8-e60fc4daec83", + "sha256": "4721a269add1b2620a61ed552fb4baacc6cfe07b1a2f1e1b2f16cc63caa9dc1d", + "bytes": 63149, + "agent": "opencode", + "model": "opencode/mimo-v2.5-free", + "outcome": "succeeded", + "tool_count": 12, + "duration_ms": 210246, + "skills": [ + "default" + ], + "session_id": "ff030e8b-138a-43fb-880a-d7cd3fab0cc8", + "user_input_preview": "the notes related icons should flow in the background. and the color blobs are sticky, they should be floating, even when no scroll happens. And 1 big pencil whch does different action when scrolled,", + "multi_turn": false, + "captured_at": "2026-05-28T16:29:25.113Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "80f9124b-be2b-4a6b-bb7d-647d523c10ac", + "sha256": "602f299f7a5c81c4571846ab76617f79f2f09d59fa68862cc493eed2ec47fb52", + "bytes": 100352, + "agent": "codex", + "model": "gpt-5.3-codex", + "outcome": "succeeded", + "tool_count": 21, + "duration_ms": 395507, + "skills": [ + "agent-browser" + ], + "session_id": "bf4a6929-1fc1-4c41-bf43-2750e158d58e", + "user_input_preview": "[form answers — discovery] - Any sharper audience slice?: fitness coaches - Which screens should I deliver first?: all - Priority user flows: all - Copy/content source: Use the PRD as primary copy sou", + "multi_turn": false, + "captured_at": "2026-05-28T16:10:34.189Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8222687a-3886-421c-811f-ef8cf127fb37", + "sha256": "656b66b383947715d5dbfa6a78861dcf3814a54aa36b165bfd8136e545272df0", + "bytes": 2716, + "agent": "claude", + "model": "sonnet", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 37924, + "skills": [ + "default" + ], + "session_id": "bc17a7ca-299f-4c4f-b8c4-f6ff8663a50c", + "user_input_preview": "\"My memory already flags this — I have a saved memory note that says: \"User wants to see where that message is.\" That note exists because you apparently raised this concern before — which means this a", + "multi_turn": true, + "captured_at": "2026-05-28T15:49:13.895Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "88be3ec6-164a-4fd1-bfdc-41db48a93f5b", + "sha256": "54e187263c0f3780947f96d8d6bc93253ddd2eb02981a4bb0eb6f4e486fe6241", + "bytes": 5792, + "agent": "codex", + "model": null, + "outcome": "failed", + "tool_count": 2, + "duration_ms": 771177, + "skills": [ + "algorithmic-art" + ], + "session_id": "73bdc051-ad43-4bbd-8db2-9fa10f41953d", + "user_input_preview": "from the expandly-poster-faq-link, add orange tone to it. this one is much more cleaner than the recent one. also the clarification at the bottom should be 1 liner", + "multi_turn": true, + "captured_at": "2026-05-28T14:51:41.458Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8a03cb14-c398-4e68-8ee5-a580b2836167", + "sha256": "ecdc578780042c947d9302ac7d509ac51a05205375b6b6373a139b1c9d8b4b1e", + "bytes": 43982, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 10, + "duration_ms": 827107, + "skills": [ + "default" + ], + "session_id": "a4a0e73e-5ce6-4e5a-aaa2-6470d99b38fd", + "user_input_preview": "[form answers — task-type] - What should I build?: Live artifact - Which surface(s)?: S4 — /dashboard (ActionQueue + domain grid) - Fidelity target: High-fidelity (production-ready HTML, full design s", + "multi_turn": false, + "captured_at": "2026-05-28T16:07:24.119Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8be934bd-1ec3-44e5-9a67-f65c18524409", + "sha256": "9a69b2975e1afa207bc849462252d6bd193b4d1f1b79ef9bced13f2134dc4116", + "bytes": 519, + "agent": "gemini", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 147640, + "skills": [ + "algorithmic-art" + ], + "session_id": "9adb48c8-0744-47ee-a408-1f037c56d5aa", + "user_input_preview": "Please try again", + "multi_turn": true, + "captured_at": "2026-05-28T14:53:27.403Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8c393b12-cf8d-4e2f-881c-bcd26ed1848e", + "sha256": "43afca261f8b9fb9559bd9849747da9b21ac5abd936abd10fb8197d0a51e3342", + "bytes": 18126, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 5, + "duration_ms": 58614, + "skills": [ + "algorithmic-art" + ], + "session_id": "125ac378-0e9e-463e-bc90-91ee5f6a4e1e", + "user_input_preview": "Вставь в карточки фотографии из интернета", + "multi_turn": true, + "captured_at": "2026-05-28T14:51:23.492Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8d9d8101-0cbb-4416-b204-918fc96af999", + "sha256": "adfb5c55f58e51063d9cb8dbea9a3035f0148dd156e253b74ddd1b9d6e431ef1", + "bytes": 2033, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 445704, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "Take a look, it's so breaking", + "multi_turn": true, + "captured_at": "2026-05-28T14:46:43.498Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8e3f6786-9041-42f9-9de8-b9fe5734493d", + "sha256": "0c7a8a3f711dbf1b7ab35bad1891d782f47326066395812af6b07f2e574f97fc", + "bytes": 5391, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 46103, + "skills": [ + "default" + ], + "session_id": "e07b2547-0be5-41b8-8654-0476d5fea61b", + "user_input_preview": "下面这版按「可以拉产品、设计、研发、算法、运营一起过需求评审」的粒度来写。它不是完整技术 PRD,但已经包含目标、范围、流程、规则、页面、数据、风控和排期,足够进入评审讨论。 **需求名称** **脉脉愚人节活动:大厂生存局** **一句话** 用户扮演一名空降大厂的普通员工,通过 9 轮高压职场选择,在组织危机中求生,最终尝试晋升为部门负责人。高失败率制造胜负欲,失败和通关结果都可分享至社区", + "multi_turn": false, + "captured_at": "2026-05-28T16:31:03.752Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8eea78fd-1ec3-4b57-a432-f3f2839ff8c9", + "sha256": "4430039ae9fdfd874f5620e1639b5560afcc45b3843ab4add6ca578fb38e9c55", + "bytes": 5066, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 25891, + "skills": [ + "agent-browser" + ], + "session_id": "d89144a7-acfe-4d59-a732-b7737e73f165", + "user_input_preview": "Apply the --accent swap to the footer surface? Yes — swap to --accent at 4%", + "multi_turn": true, + "captured_at": "2026-05-28T15:49:57.514Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "8f93cb40-495e-43b7-9940-b99f080aca8f", + "sha256": "e64c2a32d00d4e0734454b3f52d3b23714a22923afb6750d399c83991639a334", + "bytes": 6843, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 16404, + "skills": [ + "algorithmic-art" + ], + "session_id": "125ac378-0e9e-463e-bc90-91ee5f6a4e1e", + "user_input_preview": "ВИП карточки, фон самих карт сделай чуть темнее", + "multi_turn": true, + "captured_at": "2026-05-28T14:54:49.872Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "98d2b062-ac29-4df6-8763-38df3eed138e", + "sha256": "a40cf3fd59b4e33bf1a2255b99e2dbbce02351b4b29d653db00f177f29afc651", + "bytes": 71465, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 33, + "duration_ms": 333203, + "skills": [ + "agent-browser" + ], + "session_id": "2cafde7f-3be0-41c4-8497-b1484c34eb9d", + "user_input_preview": "Crie o protótipo dessa tela", + "multi_turn": false, + "captured_at": "2026-05-28T16:13:44.089Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9a9522ec-575f-432f-aeed-efc491e900aa", + "sha256": "5cc1b9b198a5ced430438bcbc23c3c861254862a88bab8ee2cedeca700ccf93e", + "bytes": 23671, + "agent": "opencode", + "model": "opencode/deepseek-v4-flash-free", + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 73681, + "skills": [ + "default" + ], + "session_id": "efd35882-da45-49d0-b730-db507f7bdca6", + "user_input_preview": "Necesito que lo dejes mas minimalista y con animaciones para que quede mas profesional.", + "multi_turn": true, + "captured_at": "2026-05-28T16:29:26.125Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9b6d85bb-5d36-4dfd-85ae-9169882fe0ea", + "sha256": "5c09596ee4edd6215f080d5a15b39f91067ba586e2a717d4bf8cb8f76e36a596", + "bytes": 2813, + "agent": "qwen", + "model": "qwen3-coder-plus", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 51439, + "skills": [ + "ad-creative" + ], + "session_id": "0a51c2ff-bc6c-4d0e-bdb7-ae8bfaf4fe00", + "user_input_preview": "Crea iconos vectoriales , al Estilo Apple y Microsoft en Formato SVG descargables que sean minimalistas y editables en cualquier programa vectorial los iconos serian , Home , Configuracion, Libro, Web", + "multi_turn": true, + "captured_at": "2026-05-27T23:33:39.382Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9c38efe9-e27e-479b-a9a9-a7e7d2e4fec5", + "sha256": "8373b0f3fbccfb69e623297d1dc947bb65b8d5aa0e6de6c0928db65ace443855", + "bytes": 1119, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 46117, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": "Choose the most suitable one, I am new to this", + "multi_turn": true, + "captured_at": "2026-05-28T15:30:57.503Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9d148493-79fb-4d3b-a9ca-a8224effdfca", + "sha256": "106108e4ba89a3c8081b5cb3e7519b4a3822b0b8cd025712c202f800be5b484b", + "bytes": 1686, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 244277, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "remove this SMM-S2 Studio Cyan-to-teal gradient with centered typography and standardized vertical spacing. Hook / Big Stat 3 JAM/HARI Use a slash (/) to create a white sub-element automatically. Mai", + "multi_turn": true, + "captured_at": "2026-05-28T14:29:35.777Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9d726d33-cb93-487c-92e5-dab5c026d796", + "sha256": "861af361c4224bc0abc8377b27d93a3aa97d4ea617162abb73db2c1dae4da7d2", + "bytes": 30701, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 14, + "duration_ms": 130284, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "I need an a4? and the images are also not there???", + "multi_turn": true, + "captured_at": "2026-05-28T15:36:15.248Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9e3d1b49-5e9d-4133-ae2e-9f42806c0164", + "sha256": "58eecf2c6c8d800a4b2ef31078436d085942dcd3d73239479566ff89dc1de979", + "bytes": 1245, + "agent": "claude", + "model": "sonnet", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 20942, + "skills": [ + "default" + ], + "session_id": "bc17a7ca-299f-4c4f-b8c4-f6ff8663a50c", + "user_input_preview": "the question \"why did you create two\" is still unanswered. why did you do it?", + "multi_turn": true, + "captured_at": "2026-05-28T15:45:14.277Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "9f08d810-562a-4b06-ace0-7c94b8cfa20e", + "sha256": "83ae59c19aa1a0dc200bc971e4167af5b4c609b43af206ce3c485d0daecec087", + "bytes": 64235, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 24, + "duration_ms": 427792, + "skills": [ + "default" + ], + "session_id": "c64c0d19-9e6c-465c-ad34-94e17827c04f", + "user_input_preview": "看下所有页面的按钮的icon 是否显示正常 尤其是 hover 的时候", + "multi_turn": true, + "captured_at": "2026-05-28T16:29:19.493Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a1175805-8b79-4991-a669-3444d991883f", + "sha256": "1e8f9e1226e20e5b08f7073990f73de892ce941e8f251be44f1a08bd9289abb1", + "bytes": 79323, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 25, + "duration_ms": 190371, + "skills": [ + "default" + ], + "session_id": "976333ac-966e-49bc-a5e7-d6c903bd1a01", + "user_input_preview": "继续", + "multi_turn": true, + "captured_at": "2026-05-28T16:33:07.209Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a1218c0a-d660-427c-b77b-7cc175eb959e", + "sha256": "bdabd26b8868ca10a30cb4adf0cf146d19cfff4c1b3454793727f7965aab26e9", + "bytes": 3295, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 144059, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": "We need clear visible borders around the US - otherwise nobody will realize that this is actually a map of the US.", + "multi_turn": true, + "captured_at": "2026-05-28T15:41:26.917Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a132d324-32d0-4c85-baff-474c33c4bc2d", + "sha256": "66810815983c60db0d4d38fd73e6d04de302c5c0adaa80d535556bb400183f64", + "bytes": 43846, + "agent": "claude", + "model": "default", + "outcome": "failed", + "tool_count": 20, + "duration_ms": 881748, + "skills": [ + "ad-creative" + ], + "session_id": "8fbb6ad4-4c3d-4745-956c-502b09af5e2f", + "user_input_preview": "hay problemas en el sentido de. primero quiero que haya flujo en la pagina web, y en esto lo digo porque si agrego algo al carrito este se tiene que ser a traves del boton del producto no en la imagen", + "multi_turn": true, + "captured_at": "2026-05-28T02:04:21.831Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a2134bb6-8c2d-4484-87d6-7bd8826f690c", + "sha256": "211a553bc526271a8c669436b0f058922f5d03c7b1f3b45d45eb791d7d2cc6ca", + "bytes": 112807, + "agent": "opencode", + "model": "opencode/deepseek-v4-flash-free", + "outcome": "succeeded", + "tool_count": 32, + "duration_ms": 322604, + "skills": [ + "data-report" + ], + "session_id": "354c836a-be1b-4bdf-9588-cb4d83beb2eb", + "user_input_preview": "后端需要加入超级管理员与用户独立的管理页面", + "multi_turn": false, + "captured_at": "2026-05-28T16:25:49.899Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a5475efd-5f5d-4706-a7b6-5f34bd1329c8", + "sha256": "549155aa85e2cebe15bdbb2fca8507af8ac098b042a6e9ed0d2feaf417c3500f", + "bytes": 731, + "agent": "cursor-agent", + "model": "composer-2.5-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 161796, + "skills": [ + "ad-creative" + ], + "session_id": "routine-conv-d34f70ac-9cd8-41fb-9183-2b6263862d7c", + "user_input_preview": "https://nfsauto.by/design-system нужно переделать эту дизайн систему на всех вкладках", + "multi_turn": false, + "captured_at": "2026-05-26T09:07:05.083Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a70bf270-bb40-4e06-8734-9f05aef72d3b", + "sha256": "6d8d58cf5f252e24d46d6e08a1653b04ed070f4813fe7759d1a059beb577d32d", + "bytes": 593, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 3141, + "skills": [ + "default" + ], + "session_id": "75ab5caa-6b7d-4d01-ac03-b2ece54e9398", + "user_input_preview": "صمملي ديزاين موقع موسيقي وبي اياه بيسك مع زخارف عربيه", + "multi_turn": true, + "captured_at": "2026-05-28T16:34:34.517Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a88405ce-7adf-4847-9f5b-936818a2314c", + "sha256": "576495aa106d0afc5db538ea056bae7e341acbba60ffc45334858b6bf9cd9493", + "bytes": 5469, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 85968, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "did you exported it?", + "multi_turn": true, + "captured_at": "2026-05-28T15:32:46.574Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a89f2c3b-a94a-45b3-9a51-3e8efa0936b2", + "sha256": "30b3d89ab8b5a71f7608eb7f0d485222056d3972ada45f2948370468b62ff973", + "bytes": 30430, + "agent": "codex", + "model": null, + "outcome": "failed", + "tool_count": 12, + "duration_ms": 328153, + "skills": [ + "default" + ], + "session_id": "2b58dcbf-fca8-4bd8-8fd6-70a74b5819e5", + "user_input_preview": "[form answers — task-type] - What should I build?: Image - Who is this for?: 喜欢diy,喜欢送礼 - Brand context: Pick a direction for me [value: pick_direction] - Roughly how much?: 3000*3000 - Any important", + "multi_turn": false, + "captured_at": "2026-05-28T16:26:50.200Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a8e51f72-982d-4b39-b8c9-86c9457fe61b", + "sha256": "b7c89d4a0f127d65fe12755ec27014ff6a0bff7528bfe5c5888f3a2718dc2160", + "bytes": 2234, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 23273, + "skills": [ + "default" + ], + "session_id": "5db542cb-eaae-46cc-a255-c5bbb0c75f37", + "user_input_preview": null, + "multi_turn": false, + "captured_at": "2026-05-28T15:45:14.061Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a964326c-49dd-4aae-bbb8-fab3e9b16599", + "sha256": "67a76a9421ef62367a3c8b3e4ce42823221172de7d1a9a527707de91aef79ff5", + "bytes": 64131, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 31, + "duration_ms": 278476, + "skills": [ + "default" + ], + "session_id": "f67e8485-f269-41e2-87f6-0172a9bba9f9", + "user_input_preview": "continue", + "multi_turn": false, + "captured_at": "2026-05-28T16:28:54.878Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "a97cc987-dfb6-4e3d-956e-f7c3d82d4951", + "sha256": "254e197af6c06d9c28d6f1e45e7f45d6b5928715c8cd17bfd468463c283f9823", + "bytes": 1156, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 176836, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "the logo is to small the logo is to small", + "multi_turn": true, + "captured_at": "2026-05-26T13:23:24.552Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "ad17d1ee-cc5d-4cee-9738-ea93fafa27ee", + "sha256": "04485bb328812272a166ec01368324226f89b2d66cdca02913f72d1fe4292312", + "bytes": 74147, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 30, + "duration_ms": 256366, + "skills": [ + "video-hyperframes" + ], + "session_id": "c9a3e11b-ea5a-45ae-bcbd-5aab9ae204a3", + "user_input_preview": "Continue the remaining unfinished tasks from the previous run. Do not redo completed work. Focus only on these unfinished todos: 1. [in_progress] Init HyperFrames scaffold via npx hyperframes init 2.", + "multi_turn": true, + "captured_at": "2026-05-28T16:31:17.472Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "ae5f17a4-dee1-4e30-9a5b-e9457e76743a", + "sha256": "a8afa505a924cbf2b0b24830c2f7b65cf30c84205f774af36350ded9aa598c6c", + "bytes": 90685, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 31, + "duration_ms": 382694, + "skills": [ + "agent-browser" + ], + "session_id": "899d9e9f-0cb6-4429-b975-450f61400d7e", + "user_input_preview": "首页风格你在优化下,读取图片", + "multi_turn": true, + "captured_at": "2026-05-28T16:22:56.001Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b11a039d-5d8c-4035-a6c2-f49e94ed27d2", + "sha256": "39993e13cbd648b7424636956e5a53f624903d9af28afc114d2667611c71dbbb", + "bytes": 525, + "agent": "deepseek", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 182, + "skills": [ + "agent-browser" + ], + "session_id": "7a1dc886-2445-4dee-aa8b-88f91b8430e3", + "user_input_preview": "rediseña este cuadro de precios", + "multi_turn": true, + "captured_at": "2026-05-28T16:32:56.479Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b2ae079f-ed93-4dbf-8489-b13d50ac4278", + "sha256": "4345a20d92b6c28fd3047623725c0dedebf3c238c7a1faca83561d1fe73f2738", + "bytes": 59155, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 18, + "duration_ms": 325515, + "skills": [ + "default" + ], + "session_id": "72e59a1e-6354-4acb-88f8-e4eabae3dadb", + "user_input_preview": "工单里还是要显示已分配和未分配工单", + "multi_turn": true, + "captured_at": "2026-05-28T16:22:04.372Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b2ff83be-3d63-4b9b-b845-98e8163eda43", + "sha256": "3a50f6100734d1695b3540e370c4ee935c9ab482787e260ee9193bfad645f7ae", + "bytes": 2191, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 110613, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": "Please create an A5 Flyer to be digitally distributed, the Topic: Invite to professionals and companies to showcase their knowledge in an exhibition panel guided by a host. Audience is: Entrepreneurs", + "multi_turn": true, + "captured_at": "2026-05-28T15:17:07.162Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b5f436ad-6928-4170-a960-90cab38e0b5e", + "sha256": "d5caa89af8bee6b1e438713ba96263b343608ff37839087de65f8ac15b14ea52", + "bytes": 29407, + "agent": "opencode", + "model": "ollama-proxy/kimi-k2.6", + "outcome": "succeeded", + "tool_count": 11, + "duration_ms": 452709, + "skills": [ + "agent-browser" + ], + "session_id": "81526dba-0a4c-4805-a731-ed8f9b4f0639", + "user_input_preview": "continue. adding logo black-sm - for light bg white-sm - for dark bg", + "multi_turn": true, + "captured_at": "2026-05-28T16:27:47.660Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b70edeb2-47eb-4dc2-bbc9-628062672ba5", + "sha256": "081a135ebad953a7dd7bf198b43a974327e49349f5aba949218a9a9fbafb2b30", + "bytes": 148171, + "agent": "codex", + "model": "default", + "outcome": "succeeded", + "tool_count": 42, + "duration_ms": 605291, + "skills": [ + "default" + ], + "session_id": "1a316879-148c-49cf-87fd-c2893f0c9bbd", + "user_input_preview": "或者唔洗分開顯示語言同翻譯語言。直接前後5個語言,然後所有版面統一語言就可以。 新增一餐嗰到未有AI生成功能。 食譜個AI生成食譜內容唔需要顯示屋企情況。 同埋我要一鍵生成一週餐單。", + "multi_turn": false, + "captured_at": "2026-05-28T16:25:44.964Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b84bfafb-f17b-4d32-ae4e-dfaa11a1422b", + "sha256": "4dec4453ddc8597dc1e197e3d04f4605f820b4905dddca7172823b9e64a753f0", + "bytes": 64394, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 26, + "duration_ms": 279992, + "skills": [ + "default" + ], + "session_id": "106b1ee4-0f94-4e32-831d-9b14b720ceb3", + "user_input_preview": "i wanted mobile scrren designs you designed webaite plz check", + "multi_turn": true, + "captured_at": "2026-05-28T16:27:19.574Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "b9506214-9b49-4a52-be09-b61f7e694236", + "sha256": "648c6ad831cf53ca01850ce1e517b35b00f762984a15053318e59b9daa50f112", + "bytes": 16637, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 187269, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "Create meta ads creative: ### SMM-S2 — \"3 Jam Sehari Bukan Tugasmu\" **Angle:** Pain — Time Drain | **Format:** Static 1080×1350px and 1080x1920px #### Visual Direction - **Layout:** Bold typograph", + "multi_turn": true, + "captured_at": "2026-05-26T13:08:26.553Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "ba224f2c-8d1f-4fbb-8ac7-56780fdcce39", + "sha256": "dba8efcc1f02b149c228b4e2d88ca066d3d76ef433aa3844283be6a31c611f16", + "bytes": 12421, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 44421, + "skills": [ + "default" + ], + "session_id": "df080b61-929d-43ee-9cbe-47b51215c953", + "user_input_preview": "untuk foto logo mohon gunakan ini", + "multi_turn": true, + "captured_at": "2026-05-28T16:22:49.159Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "bb8a7a42-6e09-4684-bb14-71e3d2c52eed", + "sha256": "710aa0a916fc1f56866305c28958cfdbf33d4745f7cf367ce2c76b5adafaca2a", + "bytes": 17729, + "agent": "gemini", + "model": "default", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 176651, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "for the whatsapp icon, please make it better", + "multi_turn": true, + "captured_at": "2026-05-26T13:51:54.149Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "bc6fcb3b-b700-45f6-aafb-bce68b9f0176", + "sha256": "5b3b8ca8a8eac1ad71726a5a574859bafd36ede160c28fc3d826d040116cec25", + "bytes": 1756, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 27243, + "skills": [ + "agent-browser" + ], + "session_id": "d89144a7-acfe-4d59-a732-b7737e73f165", + "user_input_preview": "i didnt notice a change. should it have updated the preview?", + "multi_turn": true, + "captured_at": "2026-05-28T15:49:16.635Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "bccb5763-363b-4749-be1a-b51b17fe9b0c", + "sha256": "aed99293df4929f5405df56b0b46afcb224c1cd260f6234b9d7cc9aa92629f43", + "bytes": 477, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 795, + "skills": [ + "default" + ], + "session_id": "97d627b9-c997-48c4-b976-1be8c982c2f4", + "user_input_preview": "你能设计App页面么", + "multi_turn": true, + "captured_at": "2026-05-28T15:45:48.628Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "bd2db863-956d-4f8d-87bb-36a864b1d61c", + "sha256": "7b0ae36e7d939149f9f39e67238d3dbe18d8e84b145e6e6e24155831f731ca3e", + "bytes": 1185, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 108076, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "use logo files in design files folder", + "multi_turn": true, + "captured_at": "2026-05-26T13:12:44.475Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "bddc3c35-d642-484b-aee8-3c131f11dea9", + "sha256": "36d6da3064958c8b99752a5c203d2b1f67918329c5e64336092392b2a5fee471", + "bytes": 60202, + "agent": "opencode", + "model": "azure/gpt-5", + "outcome": "succeeded", + "tool_count": 8, + "duration_ms": 385510, + "skills": [ + "agent-browser" + ], + "session_id": "000cb696-1bd1-454d-abd7-ced41f710ed4", + "user_input_preview": "当前设计过于简单。请严格按照如下需求进行设计,不要遗漏细节:1. 地图制作 地图编辑页面是对二维地图进行编辑与管理。页面功能主要包括:地图管理、图层管理、地图编辑、量算工具、地图发布、地图预览、系统设置。 1.1. UI整体布局 地图窗口全屏显示,功能菜单显示到顶栏区域。点击菜单在地图区域左侧切换显示对应的管理面板。菜单同时为管理面板的显隐开关,面板隐藏时点击菜单则显示,面板显示时点击菜单则隐藏。", + "multi_turn": true, + "captured_at": "2026-05-28T16:26:10.402Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "bf4678a2-5510-48fd-b20d-999687c54b12", + "sha256": "638af861c828008950d05bee3b33419a4e20f289b90dba9946f227e64fc9044c", + "bytes": 546, + "agent": "antigravity", + "model": "gemini-3.5-flash", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 952, + "skills": [ + "default" + ], + "session_id": "4131bd71-386d-4c2d-b8a4-45c5014c5a58", + "user_input_preview": "你能做什么?教教我。", + "multi_turn": false, + "captured_at": "2026-05-28T16:35:02.214Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c1153633-3f74-4524-bfd8-15d8a58f2fac", + "sha256": "ca65d06c5cc54acf22eb2acc1c4a7e742b8f4897a25f9e925bec9408e705ea23", + "bytes": 12146, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 5, + "duration_ms": 42705, + "skills": [ + "ad-creative" + ], + "session_id": "4baaa2cb-68cb-4897-941f-39db5f3f835b", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-26T04:28:24.708Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c3bf9cb1-03fa-4fb7-ba25-090cd4c2f1a2", + "sha256": "a7ea696841b631ed3d40a6c040287f5f2d284fb08b715187c726edfdfbff4046", + "bytes": 22084, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 16, + "duration_ms": 155788, + "skills": [ + "default" + ], + "session_id": "1a669d6e-5cfd-4a77-af4c-7aae550d48d5", + "user_input_preview": "顶上的,日期切换,改成课程预约。老师板块去掉。", + "multi_turn": false, + "captured_at": "2026-05-28T15:47:50.307Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c4e6c7d0-a162-4c97-a083-2be853e9e028", + "sha256": "b9fbb5d9eaedd6f9692a7d660cf93cf9fc3b450c3e16a9fc8cb9747426b60352", + "bytes": 43586, + "agent": "opencode", + "model": "opencode/deepseek-v4-flash-free", + "outcome": "succeeded", + "tool_count": 7, + "duration_ms": 367707, + "skills": [ + "ad-creative" + ], + "session_id": "5a122929-f7a9-4a4e-bad5-5eb9f8a52d51", + "user_input_preview": "re design the whole page , as i am not able to see any cards , keep all in a same page without it being reveled by clicking some button", + "multi_turn": false, + "captured_at": "2026-05-26T06:05:28.121Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c531816e-95cc-4fb2-ba03-174e8f1dbe3b", + "sha256": "3a2e8cddce4f1015da01201cea29c94ea405f126f7715b5cb915a4145abde7bc", + "bytes": 18515, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 14, + "duration_ms": 58371, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "Still fucking empty", + "multi_turn": true, + "captured_at": "2026-05-28T15:39:08.782Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c7266eeb-fa45-46af-a700-6392dde65702", + "sha256": "da6d796f35bcedc61e6479247e6285766df731f2974608e9e214853affa5dbd7", + "bytes": 6831, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 13391, + "skills": [ + "algorithmic-art" + ], + "session_id": "125ac378-0e9e-463e-bc90-91ee5f6a4e1e", + "user_input_preview": "обводку вип карточек менее яркую", + "multi_turn": true, + "captured_at": "2026-05-28T14:55:31.294Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c7a33c84-ef9f-49c4-96e3-84ac3232d3db", + "sha256": "de9590fc30590b54283179bb967d45a123f18484aa0c5f5b4528fd27a583e2f7", + "bytes": 1329, + "agent": "claude", + "model": "sonnet", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 5746, + "skills": [ + "agent-browser" + ], + "session_id": "3a3aeb61-2638-4971-8181-75a40b800eb9", + "user_input_preview": "我的界面,你把太多东西都给舍弃掉了,这不行。我们这个辩论胜场得票文章获赞,还有关注和AI,这几个肯定是要有的。那个AI里面隐藏了一个很重要的功能,就是AI助手,相当于一个Deepseek放在了里面。这个AI使用量,这个做的好。还有上面这些东西都可以,头像、姓名什么的都可以。然后按照我说的调整一下。然后你现在可以看一下,我们做了一个设置页面。设置页面里面的东西,就是你这个页面很多都已经放在设置页面了", + "multi_turn": true, + "captured_at": "2026-05-28T16:31:57.479Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c901d77b-c98a-45d4-bb81-82502eac5920", + "sha256": "555257c703e3fd1ebd16a2c1b5bac9207648a5beb6a1f8ad73f9e8facb44ffe6", + "bytes": 4893, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 2, + "duration_ms": 130713, + "skills": [ + "algorithmic-art" + ], + "session_id": "73bdc051-ad43-4bbd-8db2-9fa10f41953d", + "user_input_preview": "continue", + "multi_turn": true, + "captured_at": "2026-05-28T15:15:30.035Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "c9ccdb7f-30d0-4b84-aa75-711d860a8aa2", + "sha256": "547a118d952f725cf17fd6c5c66999b95c04ae9f5087fcc28ba6b57713c49a6f", + "bytes": 586, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 3911, + "skills": [ + "ad-creative" + ], + "session_id": "8fbb6ad4-4c3d-4745-956c-502b09af5e2f", + "user_input_preview": "no puedes seguir?", + "multi_turn": true, + "captured_at": "2026-05-27T22:04:58.281Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "cdd021c6-9068-42df-a750-9cb7c435342b", + "sha256": "9f7afef1b4c82d8057f8f34e86670c39a6e11ce32b446c8dd7dc555def3b260e", + "bytes": 16796, + "agent": "opencode", + "model": "opencode/big-pickle", + "outcome": "succeeded", + "tool_count": 3, + "duration_ms": 33758, + "skills": [ + "ad-creative" + ], + "session_id": "b282d8d9-7052-4955-998f-ae5c0c945491", + "user_input_preview": "revisa la identidad", + "multi_turn": true, + "captured_at": "2026-05-28T01:46:30.963Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "d15c25b0-5ac6-420e-9a63-adaf0677560e", + "sha256": "acec3fa91991d369a579a6caaaa9f18fc650977c1f85bd60d5721587ba562669", + "bytes": 2440, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 18476, + "skills": [ + "ad-creative" + ], + "session_id": "4baaa2cb-68cb-4897-941f-39db5f3f835b", + "user_input_preview": "CRE un arte para hlg cycling academy donde diga una frase llamativa para ivnitar a las perosnas a sincribirse", + "multi_turn": true, + "captured_at": "2026-05-26T04:20:12.539Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "d15c3158-3d4a-40ce-8c89-ebc7d1ff877d", + "sha256": "938d44eb4052e2032386c8d0a4732e19606f82d63b436d3776bf1c27bcad1c9f", + "bytes": 539, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 42653, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": "Yes, do I need to install anything?", + "multi_turn": true, + "captured_at": "2026-05-28T15:32:08.675Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "d273af29-4477-4549-b7ac-f6ec7682efdd", + "sha256": "fb85bbf786abdb2d7cff9236008d9cc730bb5ff8aba2e5d7def1bb7097e1a7a7", + "bytes": 8922, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 47368, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "use the market-psychology skill to redisgn please", + "multi_turn": true, + "captured_at": "2026-05-28T15:19:33.644Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "d2c66997-5962-4f3f-8f24-4c1434f30467", + "sha256": "6393174f7b219627981b97680b5b985d6b0acdd3b4490f11bb5e0fc9bdc4524a", + "bytes": 41664, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 21, + "duration_ms": 158269, + "skills": [ + "default" + ], + "session_id": "2e16f4fb-024d-45c2-917a-b61c61c67850", + "user_input_preview": "\"Agente, vamos a crear nuestra primera pieza de conexión humana. Hazlo en dos pasos exactos: PASO 1: Usa la API de Leonardo AI (o tu motor de generación de imágenes activo) para generar una fotografí", + "multi_turn": false, + "captured_at": "2026-05-28T16:31:45.453Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "d82d885c-a067-450c-a17b-f3442ad6005b", + "sha256": "699783b5d4bf8f1dc2b20c33d9924110da9e61c677977c31f5b36085b74f19db", + "bytes": 544, + "agent": "claude", + "model": "sonnet", + "outcome": "failed", + "tool_count": 0, + "duration_ms": 4607, + "skills": [ + "default" + ], + "session_id": "fd3e2909-2b97-4014-a58b-4a6a331a5c13", + "user_input_preview": "Est-ce que c'est bon ?", + "multi_turn": false, + "captured_at": "2026-05-28T16:34:07.220Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "d996dfdd-3ba6-4167-8d07-b4a7052266eb", + "sha256": "610a53c112d04d72fe817490af52b896b6212878461db29f0bbd6cac38fefefb", + "bytes": 490, + "agent": "gemini", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 101307, + "skills": [ + "algorithmic-art" + ], + "session_id": "73dd5a51-118c-45e6-aa7a-680ab6795510", + "user_input_preview": null, + "multi_turn": true, + "captured_at": "2026-05-28T15:25:44.311Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "db0f3065-d9fd-407b-9a3d-32301a08520a", + "sha256": "90103897eb4a1458f036c765cd14f29db0f1ff8098794a0b78091479fd5efba3", + "bytes": 1153, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 2733, + "skills": [ + "algorithmic-art" + ], + "session_id": "9adb48c8-0744-47ee-a408-1f037c56d5aa", + "user_input_preview": "Please create an A5 Flyer to be digitally distributed, the Topic: Invite to professionals and companies to showcase their knowledge in an exhibition panel guided by a host. Audience is: Entrepreneurs", + "multi_turn": true, + "captured_at": "2026-05-28T14:52:28.756Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "dcdff3b3-cd39-4dcd-be83-372830a29639", + "sha256": "7a1faa6874d5569672e64ab60f09458892d0d3a36d3e363b58a02efa204f5f6d", + "bytes": 37600, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 14, + "duration_ms": 648955, + "skills": [ + "default" + ], + "session_id": "106b1ee4-0f94-4e32-831d-9b14b720ceb3", + "user_input_preview": "[form answers — task-type] - What should I build?: Prototype - Who is this for?: patient booking clinic queues - Brand context: Pick a direction for me [value: pick_direction] - Roughly how much?: (sk", + "multi_turn": true, + "captured_at": "2026-05-28T16:11:25.840Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "df6ad21c-ffa7-4475-8b9b-2440bd4c77bd", + "sha256": "50e6277afa4af22cb67c37630b98b82f5607600030d70e890e8a01a2a278ab9a", + "bytes": 38385, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 17, + "duration_ms": 95505, + "skills": [ + "default" + ], + "session_id": "5bbdbb28-b89c-4eac-b31d-12b367e640bc", + "user_input_preview": "cuando estamos en la red social x (twitter), como su color es negro, no se ve bien en los botone, la barra de progreso y todo lo que debe acentuar esta red social, debemos manejar grices claros para e", + "multi_turn": false, + "captured_at": "2026-05-28T16:23:39.608Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e1db6716-f86f-4600-aec0-71c043430f1a", + "sha256": "2ec9a9409e965eb4bd098de1b391b93b21d46c35b70ebb7fd00799fd091469f5", + "bytes": 60314, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 18, + "duration_ms": 330915, + "skills": [ + "default" + ], + "session_id": "72e59a1e-6354-4acb-88f8-e4eabae3dadb", + "user_input_preview": "工单列表的分大类 不是贴标签", + "multi_turn": true, + "captured_at": "2026-05-28T16:31:25.923Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e25c5699-8123-40d6-8580-495ecc72aed9", + "sha256": "7c2fad74b73c94499652c8fbe1d1cc6a5a9221a9b391e55f95266b6ef9d8bbff", + "bytes": 4182, + "agent": "claude", + "model": "sonnet", + "outcome": "failed", + "tool_count": 2, + "duration_ms": 6257, + "skills": [ + "default" + ], + "session_id": "85ac9b77-bd5c-4767-ae1f-7085c96b6baf", + "user_input_preview": "Fix the design-system package audit findings below. Treat every error and warning as blocking. Do not suppress the audit, delete evidence, or satisfy findings by only rewriting prose; update the real", + "multi_turn": true, + "captured_at": "2026-05-28T15:45:11.003Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e2992461-51df-477a-ab97-7e0ac35f4984", + "sha256": "5530848408e4a11b0c43600574efc8f8586b6ce791f2a503f18b70e4df8b9c3c", + "bytes": 2605, + "agent": "gemini", + "model": "gemini-3.1-pro-preview", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 456314, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "I want you to UI-Frontend Audit for meta-ads-smm-s2.html; Because i think The text, SVG icon, it's not cosistent", + "multi_turn": true, + "captured_at": "2026-05-28T14:35:58.096Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e452d373-535e-4e33-970e-1a99799e73af", + "sha256": "5ad965db2b680199a751c781b0d102580b8c8829dfe9b40d3a9096bb71c6df1a", + "bytes": 29899, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 151253, + "skills": [ + "agent-browser" + ], + "session_id": "65585191-6be2-40f4-ade7-bedc12657be6", + "user_input_preview": "fix the jumping between the vertical scroll state on 100% and its state when zoomed in", + "multi_turn": false, + "captured_at": "2026-05-28T16:27:17.194Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e54404b5-3260-464a-a062-5edc6ac79762", + "sha256": "3aba00a591599f22d3eb579112c48ad7cb22d8c57fb2ffb7aa798e606b077702", + "bytes": 10213, + "agent": "codex", + "model": null, + "outcome": "failed", + "tool_count": 5, + "duration_ms": 31233, + "skills": [ + "default" + ], + "session_id": "976333ac-966e-49bc-a5e7-d6c903bd1a01", + "user_input_preview": "[form answers — task-type] - What should I build?: Prototype - Who is this for?: (skipped) - Brand context: Pick a direction for me [value: pick_direction] - Roughly how much?: (skipped) - Target plat", + "multi_turn": true, + "captured_at": "2026-05-28T16:25:10.275Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e5678ca3-d40a-4915-a7e4-b9c000e396cc", + "sha256": "be4ab42eb4cff134288e5527537547f16e9e06936647cc3c2833285b056a94c1", + "bytes": 11103, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 6, + "duration_ms": 29373, + "skills": [ + "default" + ], + "session_id": "364e2ac1-815c-4dcb-9412-24d260631481", + "user_input_preview": "去掉最后一页的核心平台、执行平台 实现企业知识资产化、专家经验数字化、研发能力平台化修改为:知识数字化、经验技能化、能力资产化", + "multi_turn": true, + "captured_at": "2026-05-28T15:48:25.111Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e74397e3-03f5-4d36-9ae1-e21c80f53b11", + "sha256": "f4fb438a9e56bec53be252943bde229e7595cca54a654f7aadf5ed89e900eabc", + "bytes": 588, + "agent": "gemini", + "model": "default", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 631658, + "skills": [ + "ad-creative" + ], + "session_id": "3ea05316-0c9d-47d2-9ab0-f4b2cb2b15da", + "user_input_preview": "There is text behind the button, fix it", + "multi_turn": true, + "captured_at": "2026-05-26T13:57:04.076Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e7b16771-af47-41d8-9521-209d1e915c41", + "sha256": "f17a6e9a4c87b68f01d741ab2fb50381f5e02cf62015a3c2a51386a000eb88a5", + "bytes": 31223, + "agent": "opencode", + "model": "opencode/deepseek-v4-flash-free", + "outcome": "succeeded", + "tool_count": 8, + "duration_ms": 96506, + "skills": [ + "default" + ], + "session_id": "efd35882-da45-49d0-b730-db507f7bdca6", + "user_input_preview": "Arregla que funcione bien el deck.html ya que si le doy a la siguiente slid no funciona y se queda en la primera", + "multi_turn": true, + "captured_at": "2026-05-28T16:32:24.169Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "e9f67500-718e-4e45-a56f-217239bb45a7", + "sha256": "b705dc3d81dabecd9e30c865ffe19f0a3bbf2ab6ed8ff886aa8be4109be012fd", + "bytes": 41706, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 17, + "duration_ms": 194367, + "skills": [ + "default" + ], + "session_id": "5a68949a-e449-47ce-8e39-49ce1e8f7342", + "user_input_preview": "quiero una imagen de fondo de un sujeto pensativo, fotorealistica real.. Usa el modelo Leonardo para generar esa imagen. En lugar de \"Frase Espejo\" coloca \"Reflexiona\"", + "multi_turn": true, + "captured_at": "2026-05-28T16:26:54.772Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "eacd9639-086e-44e5-b3ad-f62b45d55460", + "sha256": "70e99a0e810b2bf54b1f38b3f13c97dd8210422fb76143eeec3f37bb5e7308e2", + "bytes": 51367, + "agent": "opencode", + "model": "deepseek/deepseek-v4-pro", + "outcome": "succeeded", + "tool_count": 16, + "duration_ms": 130617, + "skills": [ + "agent-browser" + ], + "session_id": "50d03b79-796f-41b4-a729-df42d8de18b5", + "user_input_preview": "CORRIGIR NO MOBILE E TABLET AS INFORMAÇÕES ABAIXO: > Sessão \"Isso faz sentido\" - os retângulos onde tem \"rotina corrida\" e etc, estão com alguns cantos quadrados e outros arredondados. Gostaria que f", + "multi_turn": false, + "captured_at": "2026-05-28T16:23:12.460Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "ec4b14f2-a96a-4646-be05-fd955120db8f", + "sha256": "220bafd4f669972779a94a7100c36593a17f64e802f7fe781f36803fce64e2de", + "bytes": 34902, + "agent": "codex", + "model": null, + "outcome": "succeeded", + "tool_count": 9, + "duration_ms": 62763, + "skills": [ + "agent-browser" + ], + "session_id": "899d9e9f-0cb6-4429-b975-450f61400d7e", + "user_input_preview": "这里字体太大了", + "multi_turn": true, + "captured_at": "2026-05-28T16:31:27.016Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "eda6f6d4-4c21-4e6d-9caf-84b0c37dc027", + "sha256": "0bf4098aeb9111f723250f394ca4d688b6dc3628d0d35ea35bbe6b5ef6aee8d9", + "bytes": 3008, + "agent": "claude", + "model": null, + "outcome": "failed", + "tool_count": 0, + "duration_ms": 2080, + "skills": [ + "default" + ], + "session_id": "6157971e-bc71-44e3-be76-8287fdcc3c3d", + "user_input_preview": "# 📱 TASKORA — COMPLETE OPEN DESIGN UI MASTER DOCUMENT # 🎯 MAIN DESIGN GOAL Design style must feel like: ✅ Smart productivity workspace ✅ Interactive timeline dashboard ✅ Floating modern utility s", + "multi_turn": false, + "captured_at": "2026-05-28T18:26:15.786Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "eff7d1c7-278a-4e9c-b360-e12a33872bc8", + "sha256": "e092342c0ef29dfe1c254afab8f2c663a383ea9b30d956d912ba5274f3236c38", + "bytes": 14447, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 12, + "duration_ms": 142314, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "Export index.html as a pdf", + "multi_turn": true, + "captured_at": "2026-05-28T15:27:41.692Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "f154b511-9dcf-4d7c-8e21-1da25e50ea23", + "sha256": "1a1916e6e99cabe3ee78acd01c45349b1c7ea7556ad1f44e68516969c65e76bf", + "bytes": 3360, + "agent": "cursor-agent", + "model": "composer-2-fast", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 137789, + "skills": [ + "algorithmic-art" + ], + "session_id": "8c252113-3802-4020-bb00-171c1576ebd8", + "user_input_preview": "Can you also make it possible for us to move the 3D map around, so we can manually navigate to different places, zoom, etc?", + "multi_turn": true, + "captured_at": "2026-05-28T16:01:14.642Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "f315889c-9a6b-4cd2-b986-ec41ad038466", + "sha256": "4d48cd4ba7bfaea6e309704e12feecebac8fe5d71c359eac33f64b3bdc3659ff", + "bytes": 98246, + "agent": "codex", + "model": "gpt-5.5", + "outcome": "succeeded", + "tool_count": 45, + "duration_ms": 783079, + "skills": [ + "default" + ], + "session_id": "167f41ae-5d12-4107-a6a7-0a09f394927b", + "user_input_preview": "[form answers — discovery] - Worksite visuals: Use structured placeholders only - Speaker notes depth: Concise executive notes - Slide density: Executive concise", + "multi_turn": false, + "captured_at": "2026-05-28T16:12:00.281Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "f545ebf8-9ceb-4ca1-990d-c1c88b59d2b7", + "sha256": "eeca917cfb45c2ef159467a5b84d6356990fd0f1a89e123e043a7504e4167503", + "bytes": 15748, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 4, + "duration_ms": 53938, + "skills": [ + "ad-creative" + ], + "session_id": "b54e56d6-b1e0-4e24-b713-c063de5e1a14", + "user_input_preview": "go for one 9s looping motion variant of Stories 9:16-A", + "multi_turn": true, + "captured_at": "2026-05-28T15:53:48.507Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "f8d32699-a348-4ffe-af73-11f59cb42b2a", + "sha256": "994c45eaf19d854ade9dd273ae041951e642eebcf6fe476afd26d0cfcbe7fd06", + "bytes": 58789, + "agent": "opencode", + "model": null, + "outcome": "succeeded", + "tool_count": 24, + "duration_ms": 239460, + "skills": [ + "default" + ], + "session_id": "5a68949a-e449-47ce-8e39-49ce1e8f7342", + "user_input_preview": "contenido para columna 3. **5. Post estático Frase espejo con imagen de sujeto de fondo pensativo** Texto sobre fondo: \"Ya viste a otros vender cursos. Tú llevas meses pensándolo.\" Pie: La diferencia", + "multi_turn": true, + "captured_at": "2026-05-28T16:21:19.096Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "f957c27c-0d66-468c-8236-bb6a2c96af12", + "sha256": "6482a9854eed845da61649ee5a948624f76d70385b6e9452d862bec04d34f8b7", + "bytes": 2583, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 56161, + "skills": [ + "ad-creative" + ], + "session_id": "8fbb6ad4-4c3d-4745-956c-502b09af5e2f", + "user_input_preview": "Quiero que revises la página web de referencia https://www.seikosushi.cl/pedir y diseñes una nueva página web completa para una marca de sushi, usando los archivos e imágenes que te voy a entregar en", + "multi_turn": true, + "captured_at": "2026-05-27T21:50:25.629Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "fa8d8b16-802e-4006-b19f-59b028996356", + "sha256": "37b0ba643791918b5f7c38c0c12303bcb3d01f59c7055d85c2e7d069da7b8507", + "bytes": 4410, + "agent": "claude", + "model": null, + "outcome": "succeeded", + "tool_count": 4, + "duration_ms": 14566, + "skills": [ + "default" + ], + "session_id": "a302a440-632b-4abb-8140-258ceff58ac0", + "user_input_preview": "Красота. Коммить", + "multi_turn": false, + "captured_at": "2026-05-28T15:45:21.149Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + }, + { + "trace_id": "ff0f0c68-763c-4dfb-9b3d-eae3235f4acd", + "sha256": "ead40a44eceaa9ab28346c7a3f2152aff23ec1178d2c0561da1a88da039c212f", + "bytes": 2438, + "agent": "opencode", + "model": "opencode-go/qwen3.7-max", + "outcome": "succeeded", + "tool_count": 0, + "duration_ms": 15272, + "skills": [ + "algorithmic-art" + ], + "session_id": "6f4bc605-2293-47f4-babe-3248a051fab1", + "user_input_preview": "Create a rubics cube @algorithmic-art", + "multi_turn": true, + "captured_at": "2026-05-28T15:57:43.230Z", + "cli_version": null, + "protocol_version": null, + "anonymization_version": null + } + ], + "total": 179, + "total_bytes": 4200634, + "histograms": { + "by_agent": { + "codex": 38, + "claude": 57, + "gemini": 25, + "opencode": 41, + "cursor-agent": 11, + "qwen": 2, + "copilot": 2, + "deepseek": 2, + "antigravity": 1 + }, + "by_outcome": { + "succeeded": 144, + "failed": 35 + }, + "by_skill": { + "default": 71, + "agent-browser": 22, + "ad-creative": 50, + "algorithmic-art": 30, + "magazine-web-ppt": 1, + "brainstorming": 1, + "penpot-flutter-design-source": 1, + "video-hyperframes": 2, + "data-report": 1 + } + }, + "sessions_with_multi_turn": 35 +} \ No newline at end of file diff --git a/mocks/mock-agent.mjs b/mocks/mock-agent.mjs new file mode 100755 index 000000000..cd6a3800d --- /dev/null +++ b/mocks/mock-agent.mjs @@ -0,0 +1,149 @@ +#!/usr/bin/env node +/** + * mock-agent.mjs — pretends to be one of OD's supported agent CLIs + * (claude / opencode / codex / deepseek / qwen / grok) by streaming a + * pre-recorded session in that CLI's native stdout protocol. Zero LLM + * tokens. + * + * Usage (driven by the wrappers in bin/, not directly): + * ./mock-agent.mjs --as opencode [--no-delay] [--report-file ] + * + * Recording selection — see lib/recording-picker.mjs. The wrappers + * announce the picked trace id on stderr. + * + * Trace data: ./recordings/.jsonl (anonymized exports from + * Langfuse). Index: ./recordings/index.json. + */ + +import { pickRecording, readRecording } from './lib/recording-picker.mjs'; +import { renderAsOpencode } from './lib/format-opencode.mjs'; +import { renderAsCodex } from './lib/format-codex.mjs'; +import { renderAsClaude } from './lib/format-claude.mjs'; +import { renderAsGemini } from './lib/format-gemini.mjs'; +import { renderAsCursorAgent } from './lib/format-cursor-agent.mjs'; +import { renderAsPlain } from './lib/format-plain.mjs'; +import { runAcpServer } from './lib/format-acp.mjs'; +import { runVelaAcpServer } from './lib/format-vela.mjs'; +import { runVelaLogin, runVelaModels } from './lib/vela-subcommands.mjs'; + +function parseArgs(argv) { + const opts = { as: null, noDelay: false, reportFile: null, positionals: [] }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + if (a === '--as' || a === '--agent') { opts.as = argv[++i]; continue; } + if (a === '--no-delay') { opts.noDelay = true; continue; } + if (a === '--report-file') { opts.reportFile = argv[++i]; continue; } + if (a.startsWith('-')) continue; // Unknown flag — silently skip (model/permission flags etc.) + // Anything left is a positional — used by vela subcommand dispatch. + opts.positionals.push(a); + } + if (process.env.OD_MOCKS_NO_DELAY === '1') opts.noDelay = true; + // Fall through to REPORT_FILE env when --report-file wasn't supplied. + // Some harnesses (e.g. the agent-pr-explore orchestrator) set + // REPORT_FILE as env but expect the agent to write there + // autonomously — real opencode/claude do via their Write tool, but + // the mock needs to project the recording's final assistant text to + // that path so the harness sees a report. + if (!opts.reportFile && process.env.REPORT_FILE) { + opts.reportFile = process.env.REPORT_FILE; + } + return opts; +} + +async function readStdinIfPiped() { + if (process.stdin.isTTY) return ''; + return new Promise(resolve => { + let acc = ''; + process.stdin.setEncoding('utf-8'); + process.stdin.on('data', c => { acc += c; }); + process.stdin.on('end', () => resolve(acc)); + process.stdin.on('error', () => resolve(acc)); + // Safety timeout in case the parent never closes stdin (PTY). + setTimeout(() => resolve(acc), 1500); + }); +} + +async function main() { + const opts = parseArgs(process.argv.slice(2)); + if (!opts.as) { + process.stderr.write( + 'mock-agent: --as required\n' + + ' supported: opencode | claude | codex | gemini | cursor-agent |\n' + + ' deepseek | qwen | grok | plain |\n' + + ' devin | hermes | kilo | kimi | kiro | vibe (ACP)\n' + + ' vela (AMR — vela CLI)\n', + ); + process.exit(2); + } + + // `vela` dispatches by the first positional arg passed by OD (login / + // models / agent). Subcommands run BEFORE recording selection because + // they don't use trace data at all. + if (opts.as === 'vela') { + const cmd = (opts.positionals[0] || '').trim(); + if (cmd === 'login') return runVelaLogin(); + if (cmd === 'models') return runVelaModels(); + // Default: `agent run --runtime opencode` — fall through to the ACP + // server below with the vela-flavored protocol. + } + + // ACP agents read JSON-RPC messages off stdin one line at a time, so the + // bulk-prompt buffering logic below doesn't apply — pickRecording sees no + // prompt for hash-mode (use OD_MOCKS_TRACE or _POOL instead). + const ACP_AGENTS = new Set(['devin', 'hermes', 'kilo', 'kimi', 'kiro', 'vibe', 'vela']); + const isAcp = ACP_AGENTS.has(opts.as); + const prompt = isAcp ? '' : await readStdinIfPiped(); + const picked = await pickRecording({ prompt }); + if (!picked) { + process.stderr.write( + 'mock-agent: no recordings on disk yet.\n' + + 'The recording corpus is hosted on Cloudflare R2 (see mocks/manifest.json)\n' + + 'and is fetched on demand. Run:\n' + + '\n' + + ' bash mocks/scripts/fetch-recordings.sh # all 179 (~30s, 4.5MB)\n' + + ' bash mocks/scripts/fetch-recordings.sh --agent claude # subset\n' + + '\n' + + 'Or set OD_MOCKS_RECORDINGS_DIR if you stashed them elsewhere.\n', + ); + process.exit(3); + } + + process.stderr.write( + `[mock-${opts.as}] picked ${picked.traceId.slice(0, 8)}… via ${picked.method}` + + (picked.pool ? ` (pool="${picked.pool}")` : '') + + '\n', + ); + + const events = await readRecording(picked.path); + const renderOpts = { noDelay: opts.noDelay, reportFile: opts.reportFile }; + + switch (opts.as) { + case 'opencode': await renderAsOpencode(events, renderOpts); break; + case 'codex': await renderAsCodex(events, renderOpts); break; + case 'claude': await renderAsClaude(events, renderOpts); break; + case 'gemini': await renderAsGemini(events, renderOpts); break; + case 'cursor-agent': await renderAsCursorAgent(events, renderOpts); break; + case 'deepseek': + case 'qwen': + case 'grok': + case 'plain': await renderAsPlain(events, renderOpts); break; + // ACP family — JSON-RPC server over stdio. + case 'devin': + case 'hermes': + case 'kilo': + case 'kimi': + case 'kiro': + case 'vibe': await runAcpServer(events, renderOpts); break; + // AMR (vela CLI) — ACP with vela-specific protocol extensions + // (agentCapabilities + models block + strict set_model gate). + case 'vela': await runVelaAcpServer(events, renderOpts); break; + default: + process.stderr.write(`mock-agent: unknown agent "${opts.as}"\n`); + process.exit(2); + } +} + +main().catch(err => { + process.stderr.write(`mock-agent: ${err.message}\n`); + process.exit(1); +}); diff --git a/mocks/recordings/.gitignore b/mocks/recordings/.gitignore new file mode 100644 index 000000000..bcfbf5443 --- /dev/null +++ b/mocks/recordings/.gitignore @@ -0,0 +1,6 @@ +# Recording corpus is hosted on Cloudflare R2 (see ../manifest.json). +# Pull it with: bash mocks/scripts/fetch-recordings.sh +# The .jsonl files land here at runtime; the manifest is the source of +# truth committed in the parent dir. +*.jsonl +index.json diff --git a/mocks/scripts/contract-check.sh b/mocks/scripts/contract-check.sh new file mode 100755 index 000000000..05fb93554 --- /dev/null +++ b/mocks/scripts/contract-check.sh @@ -0,0 +1,113 @@ +#!/usr/bin/env bash +# Real-CLI contract check — spot-check that the actual agent CLI's +# stream protocol still matches what the mock formatters emit. Catches +# drift like: +# - real CLI adds a new event `type` +# - real CLI changes a field name (e.g. `sessionID` → `sessionId`) +# - real CLI's usage object gains/loses a sub-key +# +# When mocks drift toward "satisfy the current OD parser" instead of +# "mimic the actual CLI protocol", the OD-level smoke tests stay green +# but real-world behavior diverges. Periodic runs of this script (manual +# or scheduled in CI on a real-CLI-available runner) surface that drift +# before it becomes a real-PR debugging session. +# +# bash mocks/scripts/contract-check.sh +# bash mocks/scripts/contract-check.sh claude +# bash mocks/scripts/contract-check.sh opencode +# +# Prereqs: the real agent CLI must be installed AND authenticated. This +# script DOES make a live LLM call (couple of cents of tokens) so it's +# not free. +# +# Output: side-by-side top-level event `type` distribution from real +# vs mock for the same prompt. A maintainer eyeballs the diff. Future +# work (see docs/MOCKS-CONTRACT-CHECK.md) is to lift this into a +# stricter structural compare. + +set -euo pipefail + +AGENT="${1:-}" +if [ -z "$AGENT" ]; then + echo "usage: $0 " >&2 + echo " supported: claude | opencode | codex" >&2 + exit 2 +fi + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +MOCKS_DIR="$(cd "$HERE/.." && pwd -P)" + +# Fixed deterministic prompt — small, no creative variability, costs +# pennies. Picked to be a realistic OD-shaped task: 1-2 tool calls. +PROMPT='List the entries of the current working directory and tell me how many JSON files are present. Reply with just the count, like "N JSON files".' + +real_out="$(mktemp -t contract-real.XXXX).jsonl" +mock_out="$(mktemp -t contract-mock.XXXX).jsonl" +trap 'rm -f "$real_out" "$mock_out"' EXIT + +# Resolve the real CLI binary. We deliberately ignore mocks/bin/ PATH +# overlays. +real_bin="" +case "$AGENT" in + claude) real_bin="$(PATH=$(echo "$PATH" | tr ':' '\n' | grep -v "$MOCKS_DIR/bin" | paste -sd: -) command -v claude || true)" ;; + codex) real_bin="$(PATH=$(echo "$PATH" | tr ':' '\n' | grep -v "$MOCKS_DIR/bin" | paste -sd: -) command -v codex || true)" ;; + opencode) real_bin="$(PATH=$(echo "$PATH" | tr ':' '\n' | grep -v "$MOCKS_DIR/bin" | paste -sd: -) command -v opencode || true)" ;; + *) echo "✗ unsupported agent for contract check: $AGENT" >&2; exit 2 ;; +esac +if [ -z "$real_bin" ]; then + echo "✗ real '$AGENT' CLI not on PATH. Install + login, then re-run." >&2 + exit 1 +fi +echo "real CLI: $real_bin" +echo "prompt: $PROMPT" +echo + +# 1. Real CLI +echo "→ invoking real $AGENT…" +case "$AGENT" in + claude) + printf '%s' "$PROMPT" | "$real_bin" -p --output-format=stream-json --verbose >"$real_out" 2>&1 || true ;; + codex) + printf '%s' "$PROMPT" | "$real_bin" exec >"$real_out" 2>&1 || true ;; + opencode) + printf '%s' "$PROMPT" | "$real_bin" run >"$real_out" 2>&1 || true ;; +esac + +# 2. Mock CLI — same prompt, PATH-overlayed to the mock bin. +# `bash -c` here would lose $PROMPT (parent shell var, not exported) +# and silently send an empty string to the mock — defeating the +# "same input on both sides" property the rest of the script relies on. +# A subshell scopes the PATH override locally, no var-passing dance. +echo "→ invoking mock $AGENT…" +( + export PATH="$MOCKS_DIR/bin:$PATH" + export OD_MOCKS_NO_DELAY=1 + case "$AGENT" in + claude) + printf '%s' "$PROMPT" | claude -p --output-format=stream-json --verbose >"$mock_out" 2>&1 ;; + codex) + printf '%s' "$PROMPT" | codex exec >"$mock_out" 2>&1 ;; + opencode) + printf '%s' "$PROMPT" | opencode run >"$mock_out" 2>&1 ;; + esac +) || true + +# 3. Compare top-level event `type` distributions (skip content) +summarize() { + jq -r 'try .type catch empty' "$1" 2>/dev/null | sort | uniq -c | sort -rn || true +} +real_summary=$(summarize "$real_out") +mock_summary=$(summarize "$mock_out") + +echo +echo "real:" +printf '%s\n' "${real_summary:- (no parseable JSON events)}" | sed 's/^/ /' +echo +echo "mock:" +printf '%s\n' "${mock_summary:- (no parseable JSON events)}" | sed 's/^/ /' +echo +echo "raw outputs kept at:" +echo " real: $real_out" +echo " mock: $mock_out" +echo "(diff manually — `diff <(jq -r .type $real_out|sort -u) <(jq -r .type $mock_out|sort -u)`)" +trap - EXIT # leave the tmpfiles for the maintainer to inspect diff --git a/mocks/scripts/fetch-recordings.sh b/mocks/scripts/fetch-recordings.sh new file mode 100755 index 000000000..277021ee0 --- /dev/null +++ b/mocks/scripts/fetch-recordings.sh @@ -0,0 +1,158 @@ +#!/usr/bin/env bash +# Fetch the recording corpus referenced by mocks/manifest.json from +# Cloudflare R2 into mocks/recordings/. Skips files already on disk +# whose sha256 matches the manifest. Verifies every download. +# +# Usage: +# bash mocks/scripts/fetch-recordings.sh # fetch all +# bash mocks/scripts/fetch-recordings.sh --agent claude # fetch claude only +# bash mocks/scripts/fetch-recordings.sh --outcome failed # fetch failed only +# bash mocks/scripts/fetch-recordings.sh --skill agent-browser +# bash mocks/scripts/fetch-recordings.sh --concurrency 16 +# bash mocks/scripts/fetch-recordings.sh --force # re-download all +# bash mocks/scripts/fetch-recordings.sh --cache-dir

# override cache location +# +# Default cache: mocks/recordings/. Override with OD_MOCKS_CACHE_DIR env +# or --cache-dir flag — useful for sharing across multiple OD checkouts. + +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +MOCKS_DIR="$(cd "$HERE/.." && pwd -P)" +MANIFEST="$MOCKS_DIR/manifest.json" + +FILTER_AGENT="" +FILTER_OUTCOME="" +FILTER_SKILL="" +CONCURRENCY=8 +FORCE=0 +CACHE_DIR="${OD_MOCKS_CACHE_DIR:-$MOCKS_DIR/recordings}" + +while [[ $# -gt 0 ]]; do + case "$1" in + --agent) FILTER_AGENT="$2"; shift 2 ;; + --outcome) FILTER_OUTCOME="$2"; shift 2 ;; + --skill) FILTER_SKILL="$2"; shift 2 ;; + --concurrency) CONCURRENCY="$2"; shift 2 ;; + --cache-dir) CACHE_DIR="$2"; shift 2 ;; + --force) FORCE=1; shift ;; + -h|--help) + sed -n '2,17p' "$0" | sed 's/^# //; s/^#//'; exit 0 ;; + *) echo "unknown flag: $1" >&2; exit 2 ;; + esac +done + +if [ ! -f "$MANIFEST" ]; then + echo "✗ manifest not found at $MANIFEST" >&2 + exit 1 +fi + +mkdir -p "$CACHE_DIR" + +# Use node to walk the manifest — sturdier than shell JSON parsing. +PUBLIC_URL=$(node -e ' +const m = JSON.parse(require("fs").readFileSync(process.argv[1],"utf-8")); +process.stdout.write(m.storage.public_url_base + "/" + m.storage.object_prefix); +' "$MANIFEST") + +# Select entries matching filters, write one TSV row per entry: +# \t\t +ENTRIES_TSV=$(node -e ' +const m = JSON.parse(require("fs").readFileSync(process.argv[1],"utf-8")); +const fa = process.argv[2], fo = process.argv[3], fs = process.argv[4]; +for (const e of m.entries) { + if (fa && e.agent !== fa) continue; + if (fo && e.outcome !== fo) continue; + if (fs && !(e.skills || []).includes(fs)) continue; + process.stdout.write(`${e.trace_id}\t${e.sha256}\t${e.bytes}\n`); +} +' "$MANIFEST" "$FILTER_AGENT" "$FILTER_OUTCOME" "$FILTER_SKILL") + +# Empty-string check has to come BEFORE any line-counting — `printf '%s\n' ""` +# emits a single empty line, which `grep -c ""` / `wc -l` would count as 1 +# and let a typo'd `--agent xyz` quietly succeed with zero downloads. +if [ -z "$ENTRIES_TSV" ]; then + echo "no entries matched filter" >&2 + exit 0 +fi +TOTAL=$(printf '%s\n' "$ENTRIES_TSV" | wc -l | tr -d ' ') + +echo "Fetching up to $TOTAL recordings → $CACHE_DIR" +echo " manifest: $MANIFEST" +echo " R2 prefix: $PUBLIC_URL" +[ -n "$FILTER_AGENT" ] && echo " filter: agent=$FILTER_AGENT" +[ -n "$FILTER_OUTCOME" ] && echo " filter: outcome=$FILTER_OUTCOME" +[ -n "$FILTER_SKILL" ] && echo " filter: skill=$FILTER_SKILL" +[ "$FORCE" -eq 1 ] && echo " --force: re-downloading all matched" +echo + +# Function called by xargs — must be exported. Writes one of: +# ✓ (newly fetched) +# • (skipped — sha256 already matches) +# ✗ (failed — sha256 mismatch or download error) +fetch_one() { + local id="$1" sha="$2" bytes="$3" + local dest="$CACHE_DIR/$id.jsonl" + if [ "$FORCE" -ne 1 ] && [ -f "$dest" ]; then + local existing + existing=$(shasum -a 256 "$dest" 2>/dev/null | awk '{print $1}') + if [ "$existing" = "$sha" ]; then + echo "• $id" + return 0 + fi + fi + local url="${PUBLIC_URL}${id}.jsonl" + if ! curl -sf -o "$dest.tmp" "$url"; then + echo "✗ $id (download failed)" + rm -f "$dest.tmp" + return 1 + fi + local got + got=$(shasum -a 256 "$dest.tmp" | awk '{print $1}') + if [ "$got" != "$sha" ]; then + echo "✗ $id (sha256 mismatch: got $got expected $sha)" + rm -f "$dest.tmp" + return 1 + fi + mv "$dest.tmp" "$dest" + echo "✓ $id" +} + +export PUBLIC_URL CACHE_DIR FORCE +export -f fetch_one + +printf '%s\n' "$ENTRIES_TSV" \ + | xargs -P "$CONCURRENCY" -L 1 bash -c 'fetch_one "$1" "$2" "$3"' _ \ + > /tmp/od-mocks-fetch-progress.txt 2>&1 + +new=$(grep -c "^✓" /tmp/od-mocks-fetch-progress.txt || true) +skip=$(grep -c "^•" /tmp/od-mocks-fetch-progress.txt || true) +fail=$(grep -c "^✗" /tmp/od-mocks-fetch-progress.txt || true) + +echo " ✓ fetched: $new" +echo " • cached: $skip" +if [ "$fail" -gt 0 ]; then + echo " ✗ failed: $fail" + echo + grep "^✗" /tmp/od-mocks-fetch-progress.txt | head -5 + echo " …(full log /tmp/od-mocks-fetch-progress.txt)" + exit 1 +fi + +# Symlink (or copy) into mocks/recordings/ when cache lives elsewhere so +# the mock-agent recording-picker keeps working without env overrides. +if [ "$CACHE_DIR" != "$MOCKS_DIR/recordings" ]; then + mkdir -p "$MOCKS_DIR/recordings" + for f in "$CACHE_DIR"/*.jsonl; do + [ -e "$f" ] || continue + bn=$(basename "$f") + if [ ! -e "$MOCKS_DIR/recordings/$bn" ]; then + ln -sf "$f" "$MOCKS_DIR/recordings/$bn" + fi + done + # Also link the manifest so picker/index-aware tooling sees it. + ln -sf "$MANIFEST" "$MOCKS_DIR/recordings/index.json" 2>/dev/null || true +fi + +echo +echo "✅ ready: $MOCKS_DIR/recordings/" diff --git a/mocks/scripts/lib/manifest-utils.mjs b/mocks/scripts/lib/manifest-utils.mjs new file mode 100644 index 000000000..59e989275 --- /dev/null +++ b/mocks/scripts/lib/manifest-utils.mjs @@ -0,0 +1,103 @@ +// Manifest math — shared between the local `add-recording.sh` preview +// step and the GitHub Action that actually does R2 uploads. +// +// Concentrated here so a maintainer who eyeballs add-recording's +// preview output sees the SAME entry shape the workflow will commit +// post-merge. No drift. + +import { readFileSync as readSync, writeFileSync } from 'node:fs'; +import { createHash } from 'node:crypto'; +import { basename } from 'node:path'; + +/** + * Parse a recording's first JSONL line (the `meta` event) and return + * the shape we store in the manifest. + */ +export function inspectRecording(jsonlPath) { + const buf = readSync(jsonlPath); + const sha256 = createHash('sha256').update(buf).digest('hex'); + const firstLine = buf.toString('utf-8').split('\n', 1)[0]; + let meta; + try { + meta = JSON.parse(firstLine); + } catch (err) { + throw new Error(`first line of ${jsonlPath} is not valid JSON: ${err.message}`); + } + if (meta.type !== 'meta') { + throw new Error(`first line of ${jsonlPath} has type="${meta.type}" — expected "meta"`); + } + const traceId = basename(jsonlPath, '.jsonl'); + const skills = (meta.tags ?? []) + .filter(t => typeof t === 'string' && t.startsWith('skill:')) + .map(t => t.slice('skill:'.length)); + return { + trace_id: traceId, + sha256, + bytes: buf.byteLength, + agent: meta.agent ?? 'unknown', + model: meta.model ?? null, + outcome: meta.outcome ?? 'unknown', + tool_count: meta.tool_call_count ?? 0, + duration_ms: meta.duration_ms ?? 0, + skills: skills.length > 0 ? skills : ['default'], + session_id: meta.session_id ?? null, + user_input_preview: meta.user_input + ? String(meta.user_input).slice(0, 200).replace(/\s+/g, ' ').trim() + : null, + multi_turn: false, // filled in after histograms + // ──── Provenance (fixture-trust signals; mrcfps review #3241) ──── + // Capture-time fields the harvester writes into the meta event. + // Older recordings may not have them all — null is the explicit "we + // don't know" so consumers can decide whether the fixture is still + // trustworthy as the real CLIs evolve. + captured_at: meta.timestamp ?? null, // ISO 8601 of the original session + cli_version: meta.cli_version ?? null, // e.g. "claude-code 1.0.65" / "codex 0.40" + protocol_version: meta.protocol_version ?? null, + anonymization_version: meta.anonymization_version ?? null, + }; +} + +/** + * Insert / replace a manifest entry and rebuild all derived fields + * (histograms, multi_turn flags, total_bytes, total). Returns the + * mutated manifest object — caller writes it back if it wants to. + */ +export function upsertEntry(manifest, newEntry) { + const idx = manifest.entries.findIndex(e => e.trace_id === newEntry.trace_id); + if (idx >= 0) { + manifest.entries[idx] = newEntry; + } else { + manifest.entries.push(newEntry); + } + // Rebuild derived fields from scratch. + const byAgent = {}; + const byOutcome = {}; + const bySkill = {}; + const sessions = {}; + let totalBytes = 0; + for (const e of manifest.entries) { + byAgent[e.agent] = (byAgent[e.agent] ?? 0) + 1; + byOutcome[e.outcome] = (byOutcome[e.outcome] ?? 0) + 1; + for (const s of e.skills) bySkill[s] = (bySkill[s] ?? 0) + 1; + if (e.session_id) (sessions[e.session_id] ??= []).push(e.trace_id); + totalBytes += e.bytes; + } + for (const e of manifest.entries) { + e.multi_turn = !!(e.session_id && (sessions[e.session_id]?.length ?? 0) >= 2); + } + manifest.entries.sort((a, b) => a.trace_id.localeCompare(b.trace_id)); + manifest.generated_at = new Date().toISOString(); + manifest.total = manifest.entries.length; + manifest.total_bytes = totalBytes; + manifest.histograms = { by_agent: byAgent, by_outcome: byOutcome, by_skill: bySkill }; + manifest.sessions_with_multi_turn = Object.values(sessions).filter(s => s.length >= 2).length; + return manifest; +} + +export function readManifest(path) { + return JSON.parse(readSync(path, 'utf-8')); +} + +export function writeManifest(path, manifest) { + writeFileSync(path, JSON.stringify(manifest, null, 2)); +} diff --git a/mocks/scripts/smoke-test.sh b/mocks/scripts/smoke-test.sh new file mode 100755 index 000000000..4b5ce0156 --- /dev/null +++ b/mocks/scripts/smoke-test.sh @@ -0,0 +1,175 @@ +#!/usr/bin/env bash +# Quick smoke test for the mock CLIs. +# Runs each agent's wrapper against a known recording and asserts that: +# 1. The mock binary exits 0 +# 2. Stdout produces a sensible number of lines (>= 5 for JSON formats, +# >= 1 for plain) +# 3. The first JSON line for each JSON agent has the expected shape +# +# Usage: +# bash mocks/scripts/smoke-test.sh + +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +MOCKS="$(cd "$HERE/.." && pwd -P)" +TRACE_ID="${OD_MOCKS_SMOKE_TRACE:-04097377}" # the 17-tool claude session + +# Ensure recordings are on disk — the corpus is hosted on R2 and fetched +# on demand. If nothing's been pulled yet (or only a few are), run the +# fetcher so smoke covers the full agent matrix. +if ! ls "$MOCKS/recordings"/*.jsonl >/dev/null 2>&1; then + echo "no recordings found — fetching from R2 first..." + bash "$HERE/fetch-recordings.sh" >/dev/null + echo +fi + +export PATH="$MOCKS/bin:$PATH" +export OD_MOCKS_TRACE="$TRACE_ID" +export OD_MOCKS_NO_DELAY=1 + +failed=0 +pass() { printf ' \033[32m✓\033[0m %s\n' "$1"; } +fail() { printf ' \033[31m✗\033[0m %s\n' "$1"; failed=$((failed + 1)); } + +check_json_first_event() { + local agent="$1" expected_type="$2" + local first + first=$(echo "smoke" | "$agent" run 2>/dev/null | head -1 || true) + if [ -z "$first" ]; then fail "$agent: empty stdout"; return; fi + local got + got=$(printf '%s' "$first" | node -e 'let buf="";process.stdin.on("data",d=>buf+=d);process.stdin.on("end",()=>{try{console.log(JSON.parse(buf).type||"")}catch{console.log("INVALID")}})') + if [ "$got" = "$expected_type" ]; then + pass "$agent first event = $expected_type" + else + fail "$agent first event = $got (wanted $expected_type)" + fi +} + +echo "Smoke testing mock CLIs against trace $TRACE_ID" +echo + +# opencode / opencode-cli (primary OD-facing bin) → step_start +check_json_first_event opencode step_start +check_json_first_event opencode-cli step_start + +# codex → thread.started +check_json_first_event codex thread.started + +# claude → system / init +# (codex/claude have a different entry verb; using a uniform "first line type" check) +first=$(echo smoke | claude -p 2>/dev/null | head -1 || true) +if printf '%s' "$first" | grep -q '"type":"system"'; then + pass "claude first event = system" +else + fail "claude first event missing system shape: ${first:0:80}" +fi + +# gemini → init +check_json_first_event gemini init + +# cursor-agent → system + subtype:init +first=$(echo smoke | cursor-agent 2>/dev/null | head -1 || true) +if printf '%s' "$first" | grep -q '"type":"system"' && printf '%s' "$first" | grep -q '"subtype":"init"'; then + pass "cursor-agent first event = system+init" +else + fail "cursor-agent first event missing system/init shape: ${first:0:80}" +fi + +# Plain agents — first non-empty line should be from the report content. +for agent in deepseek qwen grok; do + out=$(echo smoke | "$agent" 2>/dev/null | head -1 || true) + if [ -n "$out" ]; then + pass "$agent emitted plain text (${#out} chars on first line)" + else + fail "$agent emitted nothing" + fi +done + +# vela subcommands — `vela models` (prints catalog) and `vela login` (writes +# ~/.amr/config.json). These exit immediately, no recording involved. +vela_models_out=$(vela models 2>/dev/null | wc -l | tr -d ' ') +if [ "$vela_models_out" -ge 10 ]; then + pass "vela models printed $vela_models_out catalog lines" +else + fail "vela models printed only $vela_models_out lines (expected ≥10)" +fi + +# Sandbox vela login into a temp HOME so we never touch the caller's real +# ~/.amr config (which holds the production vela login state for anyone +# using the real CLI). vela's login subcommand resolves ~/.amr from $HOME, +# so override just for this one invocation. +amr_sandbox="$(mktemp -d -t od-mocks-amr.XXXXXX)" +trap 'rm -rf "$amr_sandbox"' EXIT +if HOME="$amr_sandbox" FAKE_VELA_LOGIN_USER_EMAIL=smoke@od.local vela login >/dev/null 2>&1 \ + && [ -f "$amr_sandbox/.amr/config.json" ]; then + email=$(node -e "console.log(JSON.parse(require('fs').readFileSync('$amr_sandbox/.amr/config.json','utf-8')).profiles.prod.user.email)" 2>/dev/null || echo "") + if [ "$email" = "smoke@od.local" ]; then + pass "vela login wrote ~/.amr/config.json with profile.prod.user.email" + else + fail "vela login config.json missing expected email (got: $email)" + fi +else + fail "vela login did not produce ~/.amr/config.json" +fi + +# vela ACP roundtrip (strict set_model gate enforced). +vela_acp_out=$(cat </dev/null +{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}} +{"jsonrpc":"2.0","id":2,"method":"session/new","params":{"cwd":"/tmp"}} +{"jsonrpc":"2.0","id":3,"method":"session/set_model","params":{"sessionId":"fake-vela-session-1","modelId":"deepseek-v3.2"}} +{"jsonrpc":"2.0","id":4,"method":"session/prompt","params":{"sessionId":"fake-vela-session-1","prompt":[{"type":"text","text":"hi"}]}} +EOF +) +if printf '%s' "$vela_acp_out" | grep -q '"agentCapabilities"' \ + && printf '%s' "$vela_acp_out" | grep -q '"availableModels"' \ + && printf '%s' "$vela_acp_out" | grep -q '"id":3,"result":{}' \ + && printf '%s' "$vela_acp_out" | grep -q '"sessionUpdate":"agent_message_chunk"' \ + && printf '%s' "$vela_acp_out" | grep -q '"id":4,"result":{"stopReason":'; then + pass "vela agent run ACP roundtrip (initialize+models, set_model accepted, prompt streamed)" +else + fail "vela agent run ACP roundtrip incomplete" +fi + +# vela strict set_model gate — skipping set_model must reject prompt. +vela_gate_out=$(cat </dev/null +{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}} +{"jsonrpc":"2.0","id":2,"method":"session/new","params":{"cwd":"/tmp"}} +{"jsonrpc":"2.0","id":3,"method":"session/prompt","params":{"sessionId":"fake-vela-session-1","prompt":[{"type":"text","text":"hi"}]}} +EOF +) +if printf '%s' "$vela_gate_out" | grep -q 'session/set_model must be called before session/prompt'; then + pass "vela strict set_model gate rejects session/prompt without prior set_model" +else + fail "vela strict set_model gate did not reject (negative-path regression)" +fi + +# ACP agents — JSON-RPC server. Send initialize+session/new+prompt and +# verify the protocol responses come back in order. +# kiro-cli and vibe-acp are the primary OD-facing bin names; test them +# alongside the fallback names (kiro, vibe). +for agent in hermes kimi kilo kiro kiro-cli vibe vibe-acp devin; do + out=$(cat </dev/null +{"jsonrpc":"2.0","id":1,"method":"initialize","params":{}} +{"jsonrpc":"2.0","id":2,"method":"session/new","params":{"cwd":"/tmp"}} +{"jsonrpc":"2.0","id":3,"method":"session/prompt","params":{"sessionId":"X","prompt":[{"type":"text","text":"hi"}]}} +EOF +) + # Expect: id=1 initialize result, id=2 session/new result, ≥1 session/update, id=3 prompt result + if printf '%s' "$out" | grep -q '"id":1,"result":{"protocolVersion":1' \ + && printf '%s' "$out" | grep -q '"id":2,"result":{"sessionId":' \ + && printf '%s' "$out" | grep -q '"sessionUpdate":"agent_message_chunk"' \ + && printf '%s' "$out" | grep -q '"id":3,"result":{"stopReason":'; then + pass "$agent ACP roundtrip complete (init → session/new → update → prompt result)" + else + fail "$agent ACP roundtrip incomplete" + fi +done + +echo +if [ "$failed" -eq 0 ]; then + echo "All mock CLIs working. ✅" +else + echo "$failed check(s) failed. ❌" + exit 1 +fi diff --git a/mocks/scripts/upload-recording.sh b/mocks/scripts/upload-recording.sh new file mode 100755 index 000000000..f6c834301 --- /dev/null +++ b/mocks/scripts/upload-recording.sh @@ -0,0 +1,103 @@ +#!/usr/bin/env bash +# Upload a new anonymized recording to Cloudflare R2 and update the +# local manifest. Local-maintainer flow — no GitHub Action involved, +# nothing about the recording ever lands in git. +# +# bash mocks/scripts/upload-recording.sh +# +# Prereqs +# ------- +# - `wrangler login` once (OAuth, no token to manage). The logged-in +# account must have access to the powerformer R2 namespace where the +# `open-design-mocks` bucket lives. +# - That's it. Bucket is public-read, manifest is in repo; consumers +# pull via `fetch-recordings.sh`. +# +# What it does +# ------------ +# 1. Validate the .jsonl (first line = meta event, UUID filename) +# 2. Compute sha256, parse meta, build manifest entry +# 3. `wrangler r2 object put` the recording → recordings/v1/.jsonl +# 4. Update mocks/manifest.json with the new entry (rebuilt histograms) +# 5. `wrangler r2 object put` the manifest too → recordings/v1/manifest.json +# 6. Tell you to commit + push the manifest change +# +# The .jsonl is never copied into the repo. Only mocks/manifest.json +# (≈200B added per entry) gets git-tracked. + +set -euo pipefail + +HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)" +MOCKS_DIR="$(cd "$HERE/.." && pwd -P)" +MANIFEST="$MOCKS_DIR/manifest.json" +LIB="$HERE/lib/manifest-utils.mjs" +BUCKET='open-design-mocks' +KEY_PREFIX='recordings/v1/' +# powerformer hosts the bucket; pin so wrangler doesn't ask which +# account in non-interactive mode when the OAuth login spans several. +export CLOUDFLARE_ACCOUNT_ID='64ad4569ffd912432d6b86d5656484c4' + +if [ $# -lt 1 ] || [ "$1" = "-h" ] || [ "$1" = "--help" ]; then + sed -n '2,28p' "$0" | sed 's/^# \?//' + exit 0 +fi + +INPUT="$1" +if [ ! -f "$INPUT" ]; then echo "✗ no such file: $INPUT" >&2; exit 1; fi + +INPUT_ABS="$(cd "$(dirname "$INPUT")" && pwd -P)/$(basename "$INPUT")" +TRACE_ID="$(basename "$INPUT" .jsonl)" + +if ! printf '%s' "$TRACE_ID" | grep -qE '^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$'; then + echo "✗ trace id '$TRACE_ID' is not a UUID. Rename the file." >&2 + exit 1 +fi + +if ! command -v wrangler >/dev/null 2>&1; then + echo "✗ wrangler not installed. \`npm i -g wrangler\` (or pnpm/yarn) and \`wrangler login\` first." >&2 + exit 1 +fi + +# Step 1+2: validate + build entry preview via shared lib (Node — same code +# the consumers use, so no shape drift) +ENTRY_JSON=$(node --input-type=module -e " +import { inspectRecording } from '$LIB'; +process.stdout.write(JSON.stringify(inspectRecording('$INPUT_ABS'), null, 2)); +" 2>&1) || { + echo "✗ validation failed:" >&2 + echo "$ENTRY_JSON" | sed 's/^/ /' >&2 + exit 1 +} + +echo "manifest entry to add:" +echo "$ENTRY_JSON" | sed 's/^/ /' +echo + +# Step 3: upload the recording itself +echo "→ uploading recording to R2…" +wrangler r2 object put "${BUCKET}/${KEY_PREFIX}${TRACE_ID}.jsonl" \ + --file "$INPUT_ABS" --remote >/dev/null +echo " ✓ $TRACE_ID.jsonl" + +# Step 4: update local manifest.json +echo "→ updating local mocks/manifest.json…" +node --input-type=module -e " +import { inspectRecording, upsertEntry, readManifest, writeManifest } from '$LIB'; +const m = readManifest('$MANIFEST'); +upsertEntry(m, inspectRecording('$INPUT_ABS')); +writeManifest('$MANIFEST', m); +console.log(' ✓ now ' + m.total + ' entries (' + (m.total_bytes/1024).toFixed(0) + ' KB total)'); +" + +# Step 5: upload the updated manifest to R2 so consumers see the new entry +# without waiting for the next git push. +echo "→ uploading manifest to R2…" +wrangler r2 object put "${BUCKET}/${KEY_PREFIX}manifest.json" \ + --file "$MANIFEST" --remote >/dev/null +echo " ✓ manifest.json" + +echo +echo "✅ done. Next:" +echo " git add mocks/manifest.json" +echo " git commit -m 'mocks: add recording $TRACE_ID'" +echo " git push" diff --git a/scripts/guard.ts b/scripts/guard.ts index f200b8311..6d7d1af88 100644 --- a/scripts/guard.ts +++ b/scripts/guard.ts @@ -117,6 +117,17 @@ const residualAllowedPathPrefixes = [ "design-templates/last30days/scripts/lib/vendor/", // Vendored upstream html-ppt runtime assets (lewislulu/html-ppt-skill, design template). "design-templates/html-ppt/assets/", + // Replay-based mock CLIs that impersonate the agent CLIs OD spawns + // (opencode/claude/codex/gemini/cursor-agent + ACP family). Need to + // be directly executable via Node so `child_process.spawn` from test + // harnesses and PATH-overlay shells work without any transform step. + // `mocks/scripts/` holds the maintainer-facing helpers (manifest math, + // fetch from R2) which are also pure-node single-file modules — same + // precedent as `apps/daemon/tests/fixtures/fake-vela.mjs` (an ACP + // stdio stub, allowlisted individually above). See `mocks/README.md`. + "mocks/lib/", + "mocks/mock-agent.mjs", + "mocks/scripts/", "test-results/", "vendor/", ];