mirror of
https://github.com/nexu-io/open-design.git
synced 2026-05-31 19:04:39 +07:00
mocks: address mrcfps review — goldens + provenance + contract check
Three durability improvements suggested in the PR #3241 top-level review: ## 1. Golden daemon-event snapshots (mocks/golden/*.events.json + apps/daemon/tests/mocks-golden.test.ts) Smoke-test verified that mocks RUN; that catches crashes but not a parser change that semantically reshapes the events the daemon emits. Commit the daemon-event sequence for 3 representative traces: - claude 314d6833 — median-complexity agent-browser session - codex dcdff3b3 — 14-tool refactor - opencode 9a9522ec — 7-tool data-report apps/daemon/tests/mocks-golden.test.ts spawns the mock, feeds stdout through the real createClaudeStreamHandler / createJsonEventStreamHandler, normalizes per-spawn volatile fields (only sessionId today, only on claude), and deep-equals against the committed snapshot. A parser regression fails the test loudly. After an intentional parser change, regenerate: MOCKS_GOLDEN_UPDATE=1 pnpm --filter @open-design/daemon test mocks-golden git diff mocks/golden/ # eyeball; commit if shapes match intent ## 2. Provenance fields on every manifest entry (mocks/scripts/lib/manifest-utils.mjs + mocks/manifest.json) Augment inspectRecording() to write: captured_at — ISO 8601 from existing meta.timestamp cli_version — null until harvester writes it protocol_version — null until harvester writes it anonymization_version — null until harvester writes it captured_at is now populated for all 179 existing entries from the meta event the harvester already emits. The harvester in nexu-io/agent-pr-explore is the next step for cli_version / protocol_version / anonymization_version — once those are populated, consumers can detect when a recording is older than ~1 minor version behind the live CLI and flag for re-harvest. No matrix of (cli_version × agent) recordings — that explodes maintenance. Just metadata per recording so trust decay is visible. ## 3. Real-CLI contract check (mocks/scripts/contract-check.sh + docs/MOCKS-CONTRACT-CHECK.md) Mocks catch parser regressions against recordings; they do NOT catch recordings drifting away from the live agent CLI as that CLI evolves. The contract check spawns the real CLI alongside the mock with a fixed deterministic prompt + diffs top-level event-type distributions. Deliberately human-driven, not cron-scheduled: - costs real LLM tokens per invocation - requires real CLI auth - maintainer reads the output, not a regex Suggested triggers per doc: real-CLI release notes mentioning "output format" / "stream" / "JSON" / "events"; before a parser refactor; ad-hoc when something looks off. ## Coverage note README updated to position mocks as "deterministic protocol/parser coverage" (not "e2e replacement") per mrcfps framing. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
parent
b1ea8b9df1
commit
a31fa71aec
10 changed files with 4562 additions and 2870 deletions
116
apps/daemon/tests/mocks-golden.test.ts
Normal file
116
apps/daemon/tests/mocks-golden.test.ts
Normal file
|
|
@ -0,0 +1,116 @@
|
|||
// Golden daemon-event snapshots — addresses the regression-signal point
|
||||
// from review on #3241: smoke-testing that mocks RUN catches only crashes
|
||||
// or protocol-level garbage; it does NOT catch a parser change that
|
||||
// semantically reshapes the events the daemon emits to the UI.
|
||||
//
|
||||
// This test replays representative recordings through the actual daemon
|
||||
// stream handlers and asserts the emitted event sequence matches a
|
||||
// committed `mocks/golden/<trace>.events.json`. A parser tweak that
|
||||
// drops a tool_result, changes a usage shape, or renames an event type
|
||||
// fails this test loudly.
|
||||
//
|
||||
// Update flow when a parser change is INTENTIONAL:
|
||||
// MOCKS_GOLDEN_UPDATE=1 pnpm --filter @open-design/daemon test mocks-golden
|
||||
// then `git diff mocks/golden/` and commit the new shapes.
|
||||
//
|
||||
// Auto-skips when the recording corpus hasn't been fetched yet (see
|
||||
// `mocks/scripts/fetch-recordings.sh`); CI that exercises this test must
|
||||
// fetch first.
|
||||
|
||||
import { describe, it, expect } from 'vitest';
|
||||
import { existsSync, readFileSync, writeFileSync, mkdirSync } from 'node:fs';
|
||||
import { spawnSync } from 'node:child_process';
|
||||
import { dirname, join } from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
import { createClaudeStreamHandler } from '../src/claude-stream.js';
|
||||
import {
|
||||
createJsonEventStreamHandler,
|
||||
type ParserKind,
|
||||
} from '../src/json-event-stream.js';
|
||||
|
||||
const HERE = dirname(fileURLToPath(import.meta.url));
|
||||
const REPO = join(HERE, '../../..');
|
||||
const MOCK_AGENT = join(REPO, 'mocks/mock-agent.mjs');
|
||||
const GOLDEN_DIR = join(REPO, 'mocks/golden');
|
||||
const RECORDINGS_DIR = join(REPO, 'mocks/recordings');
|
||||
|
||||
// Median-tool-count successful traces per agent (selected from manifest
|
||||
// 2026-05-29). Each one's `.jsonl` lives in `mocks/recordings/` after
|
||||
// `bash mocks/scripts/fetch-recordings.sh`.
|
||||
const CASES: Array<{ agent: 'claude' | 'codex' | 'opencode'; trace: string }> = [
|
||||
{ agent: 'claude', trace: '314d6833-0377-4ac4-ba11-2b8d7eca5511' },
|
||||
{ agent: 'codex', trace: 'dcdff3b3-cd39-4dcd-be83-372830a29639' },
|
||||
{ agent: 'opencode', trace: '9a9522ec-575f-432f-aeed-efc491e900aa' },
|
||||
];
|
||||
|
||||
// Replace per-spawn-volatile fields with stable sentinels so the
|
||||
// snapshot stays diffable across runs. Currently only `sessionId` —
|
||||
// claude's mock emits a fresh UUID every spawn. Opencode/codex carry
|
||||
// the recording's own session/thread id so they're already stable.
|
||||
function normalizeVolatile(events: unknown[]): unknown[] {
|
||||
return events.map(e => {
|
||||
if (!e || typeof e !== 'object') return e;
|
||||
const rec = e as Record<string, unknown>;
|
||||
const out: Record<string, unknown> = { ...rec };
|
||||
if ('sessionId' in out) out.sessionId = '<normalized>';
|
||||
return out;
|
||||
});
|
||||
}
|
||||
|
||||
function runMockAndCollectEvents(agent: string, trace: string): unknown[] {
|
||||
// Force no-delay so the spawn returns quickly + deterministically.
|
||||
const proc = spawnSync(
|
||||
process.execPath,
|
||||
[MOCK_AGENT, '--as', agent, '--no-delay'],
|
||||
{
|
||||
env: { ...process.env, OD_MOCKS_TRACE: trace, OD_MOCKS_NO_DELAY: '1' },
|
||||
input: 'golden-test-prompt',
|
||||
encoding: 'utf-8',
|
||||
timeout: 30_000,
|
||||
maxBuffer: 50 * 1024 * 1024,
|
||||
},
|
||||
);
|
||||
if (proc.status !== 0) {
|
||||
throw new Error(
|
||||
`mock-agent --as ${agent} exit ${proc.status}: ${proc.stderr.slice(0, 500)}`,
|
||||
);
|
||||
}
|
||||
|
||||
const events: unknown[] = [];
|
||||
const sink = (e: unknown) => events.push(e);
|
||||
const handler =
|
||||
agent === 'claude'
|
||||
? createClaudeStreamHandler(sink)
|
||||
: createJsonEventStreamHandler(agent as ParserKind, sink);
|
||||
handler.feed(proc.stdout);
|
||||
return normalizeVolatile(events);
|
||||
}
|
||||
|
||||
const recordingsAvailable =
|
||||
existsSync(RECORDINGS_DIR) &&
|
||||
CASES.every(c => existsSync(join(RECORDINGS_DIR, `${c.trace}.jsonl`)));
|
||||
|
||||
describe.skipIf(!recordingsAvailable)(
|
||||
'mocks goldens — daemon event shape regression',
|
||||
() => {
|
||||
for (const { agent, trace } of CASES) {
|
||||
it(`${agent} ${trace.slice(0, 8)}`, () => {
|
||||
const events = runMockAndCollectEvents(agent, trace);
|
||||
const goldenPath = join(GOLDEN_DIR, `${trace}.events.json`);
|
||||
|
||||
if (process.env.MOCKS_GOLDEN_UPDATE === '1') {
|
||||
mkdirSync(GOLDEN_DIR, { recursive: true });
|
||||
writeFileSync(
|
||||
goldenPath,
|
||||
JSON.stringify({ agent, trace, events }, null, 2) + '\n',
|
||||
);
|
||||
return;
|
||||
}
|
||||
|
||||
const golden = JSON.parse(readFileSync(goldenPath, 'utf-8'));
|
||||
expect({ agent, trace, events }).toEqual(golden);
|
||||
});
|
||||
}
|
||||
},
|
||||
);
|
||||
108
docs/MOCKS-CONTRACT-CHECK.md
Normal file
108
docs/MOCKS-CONTRACT-CHECK.md
Normal file
|
|
@ -0,0 +1,108 @@
|
|||
# Real-CLI contract check
|
||||
|
||||
The replay mocks under `mocks/` impersonate real agent CLIs by emitting
|
||||
recorded traces in each CLI's native protocol. They're great for parser
|
||||
regression coverage but they can silently drift away from the real CLI
|
||||
when:
|
||||
|
||||
- An agent CLI ships a new event `type` that the mock doesn't know about.
|
||||
- A field gets renamed (`sessionID` → `sessionId`) and the mock keeps
|
||||
emitting the old name. OD's parser may have been updated to accept
|
||||
both, so smoke tests stay green, but new fields aren't surfaced.
|
||||
- A protocol version bump changes the shape of `usage` / tool calls /
|
||||
init blocks.
|
||||
|
||||
The contract check is the periodic ritual that catches that drift.
|
||||
|
||||
## Scope
|
||||
|
||||
It is **not** a CI gate. The check:
|
||||
|
||||
- Costs real LLM tokens (a few cents per agent per run).
|
||||
- Requires the real CLI installed + authenticated locally or on a
|
||||
maintainer-controlled runner.
|
||||
- Wants a human to eyeball the output, not a regex.
|
||||
|
||||
Treat it like a maintenance task — monthly is fine, ad-hoc whenever a
|
||||
relevant CLI publishes a release note about output-format changes.
|
||||
|
||||
## How to run
|
||||
|
||||
```bash
|
||||
bash mocks/scripts/contract-check.sh claude
|
||||
bash mocks/scripts/contract-check.sh codex
|
||||
bash mocks/scripts/contract-check.sh opencode
|
||||
```
|
||||
|
||||
The script:
|
||||
|
||||
1. Resolves the real CLI binary (ignoring the `mocks/bin/` PATH overlay).
|
||||
2. Sends a fixed deterministic prompt: *"List the entries of the current
|
||||
working directory and tell me how many JSON files are present."*
|
||||
3. Runs the same prompt through the mock CLI.
|
||||
4. Prints a side-by-side distribution of top-level event `type`
|
||||
values from both.
|
||||
5. Leaves both raw JSONL outputs in `/tmp` for you to `diff`.
|
||||
|
||||
## What to look for
|
||||
|
||||
Compare the two `type` distributions. Acceptable differences:
|
||||
|
||||
- Counts vary slightly (mock plays a single recorded trace, real CLI
|
||||
may take a different number of turns for the same prompt).
|
||||
- Mock emits a superset of the real CLI's event types — the recordings
|
||||
span historical CLI versions.
|
||||
|
||||
**Red flags**:
|
||||
|
||||
- Real CLI emits a `type` value the mock never produces → the mock
|
||||
needs a new event handler in `mocks/lib/format-<agent>.mjs`.
|
||||
- Real CLI's event uses different field names than the mock → either
|
||||
the real CLI changed and the parser may already be out of sync, or
|
||||
the mock is drifting toward an internal convention.
|
||||
- Mock crashes / emits nothing → the agent's `--no-delay` path is
|
||||
broken.
|
||||
|
||||
## Suggested cadence
|
||||
|
||||
No fixed schedule, no automated cron — the check is human-driven:
|
||||
|
||||
- **On real-CLI release**: when Anthropic / OpenAI / OpenCode publishes
|
||||
a release whose notes mention "output format" / "JSON" / "stream" /
|
||||
"events" / "API", run the affected agent's check. This is the
|
||||
highest-signal trigger.
|
||||
- **Before a parser refactor**: lock the contract before touching
|
||||
`apps/daemon/src/claude-stream.ts` / `json-event-stream.ts`, so a
|
||||
post-refactor failure means "I broke the parser" rather than "the
|
||||
real CLI already drifted and the parser had silently caught it".
|
||||
- **Ad-hoc**: if something feels off — UI suddenly missing a tool call,
|
||||
duplicate events, unfamiliar field names in logs — a contract check
|
||||
is the fast first step.
|
||||
|
||||
Putting this on a cron would burn LLM tokens every run with no human
|
||||
review of the output, defeating the point. The check is an artifact a
|
||||
maintainer reads, not a CI gate.
|
||||
|
||||
## Future improvements
|
||||
|
||||
The current script only compares top-level `type` distributions
|
||||
because a deeper structural diff is hard to do without a schema.
|
||||
Possible follow-ups:
|
||||
|
||||
1. **JSON-shape schema per agent** — generate a JSON Schema from the
|
||||
mock formatters' output, run a validator against real-CLI output,
|
||||
report violations with field paths.
|
||||
2. **Recorded-then-replayed delta** — capture the real CLI's output
|
||||
for the fixed prompt, save under `mocks/contracts/<agent>.golden.jsonl`,
|
||||
then in CI replay that golden through the daemon parser and assert
|
||||
no parser errors. Cheaper than calling the LLM every CI run but
|
||||
only catches *parser* drift, not *CLI* drift.
|
||||
|
||||
Neither is implemented today.
|
||||
|
||||
## Related
|
||||
|
||||
- `mocks/scripts/contract-check.sh` — the script itself.
|
||||
- `apps/daemon/tests/mocks-golden.test.ts` — daemon-event golden
|
||||
snapshots (catches parser regressions against the mocks, complementary
|
||||
to this check which catches mock-vs-real drift).
|
||||
|
|
@ -87,6 +87,58 @@ it lists every recording's `trace_id`, `sha256`, `bytes`, `agent`,
|
|||
`outcome`, `skills`, `multi_turn`, plus histograms over the corpus.
|
||||
Tooling reads this; you don't have to.
|
||||
|
||||
### Provenance per recording
|
||||
|
||||
Beyond identity (`trace_id`, `sha256`), each manifest entry carries
|
||||
fixture-trust signals so consumers can decide whether the recording
|
||||
is still meaningful as the real CLIs evolve:
|
||||
|
||||
| Field | Meaning |
|
||||
|---|---|
|
||||
| `captured_at` | ISO 8601 timestamp of the original session — populated for all 179 current entries |
|
||||
| `cli_version` | The CLI version the trace was captured against (e.g. `"claude-code 1.0.65"`) — populated only on traces the harvester writes it to, null otherwise |
|
||||
| `protocol_version` | Stream-format version (`"claude-stream-json/v1"`, `"opencode/json-event-stream"`) — populated by harvester |
|
||||
| `anonymization_version` | Which anonymizer pass scrubbed the recording — populated by harvester |
|
||||
|
||||
For now most of these are null on the existing 179 — the harvester in
|
||||
[nexu-io/agent-pr-explore][harvester] is the next thing to teach to
|
||||
write them. Once a recording's `cli_version` falls behind the actual
|
||||
CLI by more than one minor version, treat it as a candidate for
|
||||
re-harvest.
|
||||
|
||||
### Golden daemon-event snapshots
|
||||
|
||||
`mocks/golden/<trace>.events.json` holds the exact event sequence the
|
||||
OD daemon emits when fed each (mock CLI → handler) pipeline. Diffed
|
||||
on every `pnpm --filter @open-design/daemon test` run by
|
||||
`apps/daemon/tests/mocks-golden.test.ts`.
|
||||
|
||||
A parser refactor that semantically changes events (drops a field,
|
||||
renames `sessionId`, stops emitting `turn_end`) fails the diff loudly.
|
||||
After an intentional parser change, regenerate:
|
||||
|
||||
```bash
|
||||
MOCKS_GOLDEN_UPDATE=1 pnpm --filter @open-design/daemon test mocks-golden
|
||||
git diff mocks/golden/ # eyeball the new shapes
|
||||
git add mocks/golden/ && git commit -m "mocks: refresh goldens for <parser change>"
|
||||
```
|
||||
|
||||
Per-spawn volatile fields (currently just claude's generated
|
||||
`sessionId`) are stripped to `"<normalized>"` so the snapshot stays
|
||||
stable. See `mocks/golden/README.md` for the coverage rationale.
|
||||
|
||||
### Real-CLI contract check
|
||||
|
||||
The mocks catch parser regressions against the recordings; they do
|
||||
**not** catch the recordings themselves drifting away from the live
|
||||
agent CLIs. For that, `mocks/scripts/contract-check.sh` spawns a real
|
||||
CLI alongside the mock with a fixed prompt and prints a side-by-side
|
||||
event-type distribution.
|
||||
|
||||
This is human-driven and costs real LLM tokens — run on a real-CLI
|
||||
release or before a parser refactor, not on a cron. Full doc:
|
||||
[`docs/MOCKS-CONTRACT-CHECK.md`](../docs/MOCKS-CONTRACT-CHECK.md).
|
||||
|
||||
---
|
||||
|
||||
## What gets emitted
|
||||
|
|
@ -378,11 +430,15 @@ mocks/
|
|||
│ ├── deepseek qwen grok
|
||||
│ ├── devin hermes kilo kimi kiro vibe
|
||||
│ └── vela ← 15 bash wrappers, PATH-overlay
|
||||
├── manifest.json ← committed: 179 entries' metadata + sha256 + R2 storage hints
|
||||
├── manifest.json ← committed: 179 entries' metadata + sha256 + provenance + R2 storage hints
|
||||
├── golden/ ← committed: daemon-event regression snapshots
|
||||
│ ├── README.md
|
||||
│ └── *.events.json ← 3 representative traces (claude/codex/opencode)
|
||||
├── scripts/
|
||||
│ ├── smoke-test.sh ← 21 checks; auto-fetches recordings if empty
|
||||
│ ├── fetch-recordings.sh ← pull from R2 (parallel, sha256-verified, idempotent)
|
||||
│ ├── upload-recording.sh ← maintainer-local: validate + wrangler put + manifest update
|
||||
│ ├── upload-recording.sh ← maintainer-local: validate + wrangler put + manifest update
|
||||
│ ├── contract-check.sh ← real-CLI vs mock protocol drift check (manual)
|
||||
│ └── lib/
|
||||
│ └── manifest-utils.mjs ← shared sha256 / meta-parse / manifest-rebuild logic
|
||||
└── recordings/ ← populated at runtime, gitignored .jsonl
|
||||
|
|
|
|||
114
mocks/golden/314d6833-0377-4ac4-ba11-2b8d7eca5511.events.json
Normal file
114
mocks/golden/314d6833-0377-4ac4-ba11-2b8d7eca5511.events.json
Normal file
|
|
@ -0,0 +1,114 @@
|
|||
{
|
||||
"agent": "claude",
|
||||
"trace": "314d6833-0377-4ac4-ba11-2b8d7eca5511",
|
||||
"events": [
|
||||
{
|
||||
"type": "status",
|
||||
"label": "initializing",
|
||||
"model": null,
|
||||
"sessionId": "<normalized>"
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01W4ChHzyM5SX3pnfNPzvvQe",
|
||||
"name": "Read",
|
||||
"input": {
|
||||
"file_path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html",
|
||||
"offset": 100,
|
||||
"limit": 40
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "turn_end",
|
||||
"stopReason": "tool_use"
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01W4ChHzyM5SX3pnfNPzvvQe",
|
||||
"content": "100\t .led-grid .led { width: 100%; aspect-ratio: 1 / 1; max-height: 22px; min-height: 12px;\n101\t display: grid; place-items: center; font-size: 8px; font-weight: 700; }\n102\t .led.up { background: rgba(0,255,102,0.18); color: var(--green); border: 1px solid rgba(0,255,102,0.55); box-shadow: inset 0 0 6px rgba(0,255,102,0.35); }\n103\t .led.dn { background: rgba(255,51,68,0.16); color: var(--red); border: 1px solid rgba(255,51,68,0.5); box-shadow: inset 0 0 6px rgba(255,51,68,0.3); }\n104\t .led.flat { background: rgba(112,112,112,0.12); color: var(--muted); border: 1px solid var(--line-strong); }\n105\t .led-grid .net { font-size: 10px; color: var(--fg-strong); font-weight: 700; text-align: right; letter-spacing: 0.04em; }\n106\t .led-grid .net.up { color: var(--green); }\n107\t .led-grid .net.dn { color: var(--red); }\n108\t .led-legend { display: flex; gap: 12px; margin-top: 8px; font-size: 9px; color: var(--muted); letter-spacing: 0.08em; }\n109\t .led-legend .lg { display: flex; align-items: center; gap: 4px; }\n110\t .led-legend i { width: 10px; height: 10px; }\n111\t .led-legend i.up { background: rgba(0,255,102,0.45); border: 1px solid var(--green); }\n112\t .led-legend i.dn { background: rgba(255,51,68,0.4); border: 1px solid var(--red); }\n113\t .led-legend i.flat { background: rgba(112,112,112,0.2); border: 1px solid var(--muted); }\n114\t\n115\t /* MOD 2: Activity log */\n116\t .log-list { font-size: 10px; line-height: 1.5; letter-spacing: 0.02em; }\n117\t .log-row { display: grid; grid-template-columns: 56px 38px 1fr auto; gap: 8px; padding: 4px 0; border-bottom: 1px dotted var(--line); }\n118\t .log-row:last-child { border-bottom: 0; }\n119\t .log-time { color: var(--dim); }\n120\t .log-type { color: var(--amber); font-weight: 700; letter-spacing: 0.06em; }\n121\t .log-type.cl { color: var(--green); }\n122\t .log-type.cr { color: var(--cyan); }\n123\t .log-msg { color: var(--fg); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }\n124\t .log-d { font-weight: 700; }\n125\t .log-d.up { color: var(--green); }\n126\t .log-d.dn { color: var(--red); }\n127\t\n128\t /* MOD 3: ASCII heatmap */\n129\t .heat-wrap { font-family: var(--font-mono); font-size: 10px; line-height: 1.55; color: var(--fg); }\n130\t .heat-wrap .heat-head, .heat-wrap .heat-row { display: grid; grid-template-columns: 36px repeat(4, 1fr); gap: 6px; }\n131\t .heat-wrap .heat-head { color: var(--muted); font-size: 9px; letter-spacing: 0.1em; padding-bottom: 4px; border-bottom: 1px dotted var(--line-strong); margin-bottom: 4px; }\n132\t .heat-wrap .heat-row { padding: 3px 0; }\n133\t .heat-wrap .heat-row .m { color: var(--muted); font-weight: 600; letter-spacing: 0.06em; }\n134\t .heat-cell { padding: 2px 4px; text-align: center; font-weight: 700; letter-spacing: 0.04em; }\n135\t .heat-cell.h0 { background: #0f0f0f; color: var(--dim); }\n136\t .heat-cell.h1 { background: rgba(255,176,0,0.12); color: var(--amber-dim); }\n137\t .heat-cell.h2 { background: rgba(255,176,0,0.28); color: var(--amber); }\n138\t .heat-cell.h3 { background: rgba(255,176,0,0.5); color: #fff; }\n139\t .heat-cell.h4 { background: rgba(255,176,0,0.78); color: #050505; }",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01SifooERKXmRaxihtaFixFU",
|
||||
"name": "Grep",
|
||||
"input": {
|
||||
"pattern": "kpi-cell::before|kpi-cell\\s*\\{|\\.kpi-strip|\\.kpi-cell",
|
||||
"path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html",
|
||||
"output_mode": "content",
|
||||
"-n": true
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "turn_end",
|
||||
"stopReason": "tool_use"
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01SifooERKXmRaxihtaFixFU",
|
||||
"content": "55: .kpi-strip { background: var(--bg-2); border-bottom: 1px solid var(--line); display: grid; grid-template-columns: repeat(4, 1fr); }\n56: .kpi-cell { padding: 14px 22px; border-right: 1px solid var(--line); position: relative; }\n57: .kpi-cell:last-child { border-right: 0; }\n58: .kpi-cell::before { content:''; position: absolute; left: 0; top: 12px; bottom: 12px; width: 3px; }\n59: .kpi-cell.cust-create::before { background: var(--cyan); }\n60: .kpi-cell.cust-close::before { background: var(--green); }\n61: .kpi-cell.int-create::before { background: var(--amber); }\n62: .kpi-cell.int-close::before { background: var(--magenta); }",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_0163uAuJyfBhuHWtxXoSwi3N",
|
||||
"name": "Edit",
|
||||
"input": {
|
||||
"file_path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html",
|
||||
"old_string": " .kpi-cell::before { content:''; position: absolute; left: 0; top: 12px; bottom: 12px; width: 3px; }",
|
||||
"new_string": " .kpi-cell::before { content:''; position: absolute; left: 0; top: 14px; bottom: 14px; width: 3px; }"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "turn_end",
|
||||
"stopReason": "tool_use"
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_0163uAuJyfBhuHWtxXoSwi3N",
|
||||
"content": "The file ${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html has been updated successfully. (file state is current in your context — no need to Read it back)",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01QdvPxPh8Qk9Ch4bgAgGZxT",
|
||||
"name": "Grep",
|
||||
"input": {
|
||||
"-n": true,
|
||||
"output_mode": "content",
|
||||
"path": "${HOME}/Library/Application Support/Open Design/namespaces/release-stable/data/projects/proj-001/ref-b-tickets-v3.html",
|
||||
"pattern": "topbar|\\.bar\\s"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "turn_end",
|
||||
"stopReason": "tool_use"
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "314d6833-0377-4ac4-ba11-2b8d7eca5511-tool-toolu_bdrk_01QdvPxPh8Qk9Ch4bgAgGZxT",
|
||||
"content": "40: /* topbar */\n41: .topbar { background: var(--bg-2); border-bottom: 1px solid var(--line); display: grid;\n44: .topbar .lbl { color: var(--muted); }\n45: .topbar .v { color: var(--amber); }\n46: .topbar .v.up { color: var(--green); }\n47: .topbar .v.dn { color: var(--red); }\n48: .topbar .sep { color: var(--dim); }\n214: <header class=\"topbar\">",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "text_delta",
|
||||
"delta": "让我看一下 KPI 色条的当前样式定位。让我找 KPI 色条样式的定义。找到了。当前 `top: 12px; bottom: 12px` — KPI cell 高度是 78px(来自 stage grid `32px 78px 1fr 44px` 的第二行),padding 是 `14px 22px`。\n\n色条 `top:12 / bottom:12` 意味着色条上下各留 12px,色条本身高度 = 78 - 12 - 12 = 54px。\n\n但 KPI cell 的 padding 是 `14px 22px`(上下 14px),意味着内容区从 14px 开始。色条 `top:12px` 比内容区上沿(14px)高 2px,bottom 同理。这看起来挺合理啊...\n\n等等,可能是用户感觉\"统一往下偏移几像素\"是因为色条上端比 KPI 数字(在 padding 14px 后开始)更高 2px,但下端比 KPI 文字下边缘更深 — 视觉上整条色条的中点跟内容的中点错开了。\n\n让我让色条跟 KPI 内容(padding 14 上下)完全对齐:`top:14px / bottom:14px`。改完。色条上下从 12px 改成 14px,跟 KPI cell 的 padding(14px)完全对齐 — 现在色条的上下端正好齐内容区的上下沿,不再有 2px 错位。\n\n但你说\"统一往下偏移几个像素\" — 还有一种可能是色条整体被往下推了。让我再看一下是不是有别的样式干扰。topbar 32px 高度正常。color bar 现在 `top:14 / bottom:14` 应该对齐到 KPI 内容区的上下沿了。\n\n刷新 ref-b-tickets-v3.html 看看现在对齐了没?如果还是觉得偏移,告诉我具体是色条整体往下移、还是上下不对称,我再精调。"
|
||||
},
|
||||
{
|
||||
"type": "turn_end",
|
||||
"stopReason": "end_turn"
|
||||
},
|
||||
{
|
||||
"type": "usage",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"output_tokens": 1594,
|
||||
"cache_creation_input_tokens": 0,
|
||||
"cache_read_input_tokens": 0
|
||||
},
|
||||
"costUsd": 0,
|
||||
"durationMs": 40885,
|
||||
"stopReason": "end_turn"
|
||||
}
|
||||
]
|
||||
}
|
||||
187
mocks/golden/9a9522ec-575f-432f-aeed-efc491e900aa.events.json
Normal file
187
mocks/golden/9a9522ec-575f-432f-aeed-efc491e900aa.events.json
Normal file
|
|
@ -0,0 +1,187 @@
|
|||
{
|
||||
"agent": "opencode",
|
||||
"trace": "9a9522ec-575f-432f-aeed-efc491e900aa",
|
||||
"events": [
|
||||
{
|
||||
"type": "status",
|
||||
"label": "running"
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_9F86gxR0AA5Ea5dwfJQA0062",
|
||||
"name": "read",
|
||||
"input": {
|
||||
"filePath": "%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001\\evidguard-pitch-deck.html"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_9F86gxR0AA5Ea5dwfJQA0062",
|
||||
"content": "<path>%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001\\evidguard-pitch-deck.html</path>\n<type>file</type>\n<content>\n1: \n2: <!doctype html>\n3: <html lang=\"es\">\n4: <head>\n5: <meta charset=\"utf-8\" />\n6: <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\" />\n7: <title>EvidGuard · Pitch deck</title>\n8: <style>\n9: :root {\n10: /* Apple system — Zhangzara accent */\n11: --bg: #ffffff;\n12: --surface: #f5f5f7;\n13: --fg: #1d1d1f;\n14: --fg-2: #424245;\n15: --muted: #6e6e73;\n16: --border: #d2d2d7;\n17: --accent: #e5ff00;\n18: --accent-on: #000000;\n19: --accent-soft: color-mix(in oklch, var(--accent) 14%, transparent);\n20: \n21: --font-display: \"SF Pro Display\", \"SF Pro Icons\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n22: --font-body: \"SF Pro Text\", \"SF Pro Icons\", \"Helvetica Neue\", Helvetica, Arial, sans-serif;\n23: --font-mono: \"SF Mono\", ui-monospace, \"JetBrains Mono\", \"IBM Plex Mono\", Menlo, monospace;\n24: \n25: --dark-bg: #000000;\n26: --light-text: #f5f5f7;\n27: \n28: --radius-sm: 8px;\n29: --radius-md: 12px;\n30: --radius-lg: 18px;\n31: --radius-pill: 980px;\n32: \n33: --elev-flat: none;\n34: --elev-ring: 0 0 0 1px var(--border);\n35: --elev-raised: 0 12px 32px rgba(0,0,0,0.08);\n36: \n37: --motion-fast: 150ms;\n38: --motion-base: 220ms;\n39: --ease-standard: cubic-bezier(0.28, 0, 0.22, 1);\n40: }\n41: \n42: *, *::before, *::after { box-sizing: border-box; }\n43: @keyframes fadeSlideIn {\n44: from { opacity: 0; transform: translateY(24px); }\n45: to { opacity: 1; transform: translateY(0); }\n46: }\n47: html, body { margin: 0; height: 100%; }\n48: body {\n49: background: var(--dark-bg);\n50: color: var(--fg);\n51: font-family: var(--font-body);\n52: font-size: 18px;\n53: line-height: 1.5;\n54: -webkit-font-smoothing: antialiased;\n55: display: flex;\n56: overflow-x: auto;\n57: overflow-y: hidden;\n58: scroll-snap-type: x mandatory;\n59: scroll-behavior: smooth;\n60: }\n61: body::-webkit-scrollbar { display: none; }\n62: p { text-wrap: pretty; }\n63: h1, h2, h3 { text-wrap: balance; }\n64: \n65: .slide {\n66: flex: 0 0 100vw;\n67: width: 100vw;\n68: height: 100vh;\n69: scroll-snap-align: start;\n70: padding: clamp(48px, 7vw, 96px) clamp(48px, 8vw, 112px);\n71: display: flex;\n72: flex-direction: column;\n73: justify-content: center;\n74: position: relative;\n75: overflow: hidden;\n76: }\n77: .slide > * { animation: fadeSlideIn 0.6s var(--ease-standard) both; }\n78: .slide > *:nth-child(1) { animation-delay: 0.1s; }\n79: .slide > *:nth-child(2) { animation-delay: 0.2s; }\n80: .slide > *:nth-child(3) { animation-delay: 0.3s; }\n81: .slide > *:nth-child(4) { animation-delay: 0.4s; }\n82: .slide > *:nth-child(5) { animation-delay: 0.5s; }\n83: .slide > *:nth-child(6) { animation-delay: 0.6s; }\n84: \n85: .slide.light { background: var(--bg); color: var(--fg); }\n86: .slide.dark { background: var(--dark-bg); color: var(--light-text); }\n87: .slide.dark .muted { color: rgba(245,245,247,0.65); }\n88: .slide.dark .border { border-color: rgba(245,245,247,0.18); }\n89: .slide.hero { padding-block: clamp(64px, 9vw, 128px); }\n90: .slide.center { align-items: center; text-align: center; justify-content: center; }\n91: .slide.center .body { margin-inline: auto; }\n92: \n93: .eyebrow {\n94: font-family: var(--font-mono);\n95: font-size: 12px;\n96: letter-spacing: 0.1em;\n97: text-transform: uppercase;\n98: color: var(--accent);\n99: margin: 0 0 28px;\n100: }\n101: .slide.light .eyebrow { color: var(--fg); }\n102: .slide.light .eyebrow span { color: var(--accent); }\n103: \n104: .h-hero {\n105: font-family: var(--font-display);\n106: font-size: clamp(56px",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_XGSoulTKMbeomO2LlXC40391",
|
||||
"name": "todowrite",
|
||||
"input": {
|
||||
"todos": [
|
||||
{
|
||||
"content": "Rewrite CSS with cleaner minimal styling and advanced animations",
|
||||
"status": "in_progress",
|
||||
"priority": "high"
|
||||
},
|
||||
{
|
||||
"content": "Simplify slide layouts — remove excess chrome, tighter copy",
|
||||
"status": "pending",
|
||||
"priority": "high"
|
||||
},
|
||||
{
|
||||
"content": "Add scroll-triggered parallax and staggered micro-animations",
|
||||
"status": "pending",
|
||||
"priority": "high"
|
||||
},
|
||||
{
|
||||
"content": "Verify all animations work and deck is responsive",
|
||||
"status": "pending",
|
||||
"priority": "medium"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_XGSoulTKMbeomO2LlXC40391",
|
||||
"content": "[{\"content\":\"Rewrite CSS with cleaner minimal styling and advanced animations\",\"status\":\"in_progress\",\"priority\":\"high\"},{\"content\":\"Simplify slide layouts — remove excess chrome, tighter copy\",\"status\":\"pending\",\"priority\":\"high\"},{\"content\":\"Add scroll-triggered parallax and staggered micro-animations\",\"status\":\"pending\",\"priority\":\"high\"},{\"content\":\"Verify all animations work and deck is responsive\",\"status\":\"pending\",\"priority\":\"medium\"}]",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_rb6Fsyip171hkqP6OXk18071",
|
||||
"name": "write",
|
||||
"input": "{\"filePath\":\"C:\\\\Users\\\\borde\\\\AppData\\\\Roaming\\\\Open Design\\\\namespaces\\\\release-stable-win\\\\data\\\\projects\\\\proj-001\\\\evidguard-pitch-deck.html\",\"content\":\"<!doctype html>\\n<html lang=\\\"es\\\">\\n<head>\\n <meta charset=\\\"utf-8\\\" />\\n <meta name=\\\"viewport\\\" content=\\\"width=device-width, initial-scale=1\\\" />\\n <title>EvidGuard · Pitch deck</title>\\n <style>\\n :root {\\n --bg: #ffffff;\\n --surface: #f5f5f7;\\n --fg: #1d1d1f;\\n --fg-2: #424245;\\n --muted: #6e6e73;\\n --border: #d2d2d7;\\n --accent: #e5ff00;\\n --accent-on: #000000;\\n\\n --font-display: \\\"SF Pro Display\\\", \\\"SF Pro Icons\\\", \\\"Helvetica Neue\\\", Helvetica, Arial, sans-serif;\\n --font-body: \\\"SF Pro Text\\\", \\\"SF Pro Icons\\\", \\\"Helvetica Neue\\\", Helvetica, Arial, sans-serif;\\n --font-mono: \\\"SF Mono\\\", ui-monospace, \\\"JetBrains Mono\\\", \\\"IBM Plex Mono\\\", Menlo, monospace;\\n\\n --dark-bg: #000000;\\n --light-text: #f5f5f7;\\n\\n --radius-sm: 6px;\\n --radius-pill: 980px;\\n\\n --ease-standard: cubic-bezier(0.28, 0, 0.22, 1);\\n --ease-out: cubic-bezier(0.16, 1, 0.3, 1);\\n --ease-in-out: cubic-bezier(0.76, 0, 0.24, 1);\\n }\\n *,*::before,*::after{box-sizing:border-box;margin:0;padding:0}\\n\\n @keyframes revealUp {\\n 0% { opacity: 0; transform: translateY(28px); }\\n 100% { opacity: 1; transform: translateY(0); }\\n }\\n @keyframes revealScale {\\n 0% { opacity: 0; transform: scale(0.92); }\\n 100% { opacity: 1; transform: scale(1); }\\n }\\n @keyframes revealAccent {\\n 0% { opacity: 0; transform: scaleX(0); }\\n 100% { opacity: 1; transform: scaleX(1); }\\n }\\n @keyframes glowPulse {\\n 0%, 100% { box-shadow: 0 0 30px color-mix(in oklch, var(--accent) 10%, transparent); }\\n 50% { box-shadow: 0 0 60px color-mix(in oklch, var(--accent) 18%, transparent); }\\n }\\n @keyframes counterPop {\\n 0% { transform: scale(1); }\\n 50% { transform: scale(1.12); }\\n 100% { transform: scale(1); }\\n }\\n @keyframes progressShimmer {\\n 0% { background-position: -200% 0; }\\n 100% { background-position: 200% 0; }\\n }\\n\\n html,body{margin:0;height:100%}\\n body {\\n background: var(--dark-bg);\\n color: var(--fg);\\n font-family: var(--font-body);\\n font-size: 18px;\\n line-height: 1.55;\\n -webkit-font-smoothing: antialiased;\\n display: flex;\\n overflow-x: auto;\\n overflow-y: hidden;\\n scroll-snap-type: x mandatory;\\n scroll-behavior: smooth;\\n }\\n body::-webkit-scrollbar{display:none}\\n p{text-wrap:pretty}\\n h1,h2,h3,h4{text-wrap:balance;font-weight:500}\\n\\n .slide {\\n flex: 0 0 100vw;\\n width: 100vw;\\n height: 100vh;\\n scroll-snap-align: start;\\n padding: clamp(60px, 8vw, 100px) clamp(64px, 10vw, 140px);\\n display: flex;\\n flex-direction: column;\\n justify-content: center;\\n position: relative;\\n overflow: hidden;\\n }\\n\\n /* ── Entrance animation layers ── */\\n .slide .entrance-1 { animation: revealUp 0.7s var(--ease-out) both; }\\n .slide .entrance-2 { animation: revealUp 0.7s var(--ease-out) 0.15s both; }\\n .slide .entrance-3 { animation: revealUp 0.7s var(--ease-out) 0.3s both; }\\n .slide .entrance-4 { animation: revealUp 0.7s var(--ease-out) 0.45s both; }\\n .slide .entrance-5 { animation: revealUp 0.7s var(--ease-out) 0.6s both; }\\n .slide .entrance-scale { animation: revealScale 0.8s var(--ease-out) 0.1s both; }\\n .slide.hero .eyebrow { animation: revealUp 0.6s var(--ease-out) 0.05s both; }\\n .slide.hero .h-hero { animation: revealUp 0.8s var(--ease-out) 0.2s both; }\\n\\n .slide.light { background: var(--bg); }\\n .slide.dark { background: var(--dark-bg); color: var(--light-text); }\\n .slide.center { align-items: center; text-align: center; }\\n .slide.center .entrance-1,\\n .slide.center .entrance-2,\\n .slide.center .entrance-3 { margin-inline: auto; }\\n\\n .eyebrow {\\n font-"
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_rb6Fsyip171hkqP6OXk18071",
|
||||
"content": "Wrote file successfully.",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_4eTH38Zw5qidE8X3PTkU1514",
|
||||
"name": "todowrite",
|
||||
"input": {
|
||||
"todos": [
|
||||
{
|
||||
"priority": "high",
|
||||
"content": "Rewrite CSS with cleaner minimal styling and advanced animations",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"priority": "high",
|
||||
"content": "Simplify slide layouts — remove excess chrome, tighter copy",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"priority": "high",
|
||||
"content": "Add scroll-triggered parallax and staggered micro-animations",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"priority": "medium",
|
||||
"content": "Verify all animations work and deck is responsive",
|
||||
"status": "in_progress"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_4eTH38Zw5qidE8X3PTkU1514",
|
||||
"content": "[{\"content\":\"Rewrite CSS with cleaner minimal styling and advanced animations\",\"status\":\"completed\",\"priority\":\"high\"},{\"content\":\"Simplify slide layouts — remove excess chrome, tighter copy\",\"status\":\"completed\",\"priority\":\"high\"},{\"content\":\"Add scroll-triggered parallax and staggered micro-animations\",\"status\":\"completed\",\"priority\":\"high\"},{\"content\":\"Verify all animations work and deck is responsive\",\"status\":\"in_progress\",\"priority\":\"medium\"}]",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_01_C7MtbryrHlpniLdo33pp2533",
|
||||
"name": "grep",
|
||||
"input": {
|
||||
"pattern": "class=\"slide",
|
||||
"path": "%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001",
|
||||
"include": "evidguard-pitch-deck.html"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_01_C7MtbryrHlpniLdo33pp2533",
|
||||
"content": "Found 5 matches\n%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001\\evidguard-pitch-deck.html:\n Line 301: <section class=\"slide hero dark center\" data-screen-label=\"01 Cover\">\n\n Line 314: <section class=\"slide light\" data-screen-label=\"02 Performance\">\n\n Line 336: <section class=\"slide hero dark center\" data-screen-label=\"03 Enterprise\">\n\n Line 342: <section class=\"slide light\" data-screen-label=\"04 ROI\">\n\n Line 361: <section class=\"slide hero dark center\" data-screen-label=\"05 CTA\">\n",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_O7Acmy77VQy2GyT4o6mk6480",
|
||||
"name": "bash",
|
||||
"input": {
|
||||
"command": "Get-Content \"%USERPROFILE%\\AppData\\Roaming\\Open Design\\namespaces\\release-stable-win\\data\\projects\\proj-001\\evidguard-pitch-deck.html\" -Tail 5",
|
||||
"description": "Verify file ends properly"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_O7Acmy77VQy2GyT4o6mk6480",
|
||||
"content": " } catch (_) { setActive(0); }\r\n })();\r\n </script>\r\n</body>\r\n</html>\r\n",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "tool_use",
|
||||
"id": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_njLhyQDJQcFCAXLXT4fD3309",
|
||||
"name": "todowrite",
|
||||
"input": {
|
||||
"todos": [
|
||||
{
|
||||
"priority": "high",
|
||||
"content": "Rewrite CSS with cleaner minimal styling and advanced animations",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"priority": "high",
|
||||
"content": "Simplify slide layouts — remove excess chrome, tighter copy",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"priority": "high",
|
||||
"content": "Add scroll-triggered parallax and staggered micro-animations",
|
||||
"status": "completed"
|
||||
},
|
||||
{
|
||||
"priority": "medium",
|
||||
"content": "Verify all animations work and deck is responsive",
|
||||
"status": "completed"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "tool_result",
|
||||
"toolUseId": "9a9522ec-575f-432f-aeed-efc491e900aa-tool-call_00_njLhyQDJQcFCAXLXT4fD3309",
|
||||
"content": "[{\"content\":\"Rewrite CSS with cleaner minimal styling and advanced animations\",\"status\":\"completed\",\"priority\":\"high\"},{\"content\":\"Simplify slide layouts — remove excess chrome, tighter copy\",\"status\":\"completed\",\"priority\":\"high\"},{\"content\":\"Add scroll-triggered parallax and staggered micro-animations\",\"status\":\"completed\",\"priority\":\"high\"},{\"content\":\"Verify all animations work and deck is responsive\",\"status\":\"completed\",\"priority\":\"medium\"}]",
|
||||
"isError": false
|
||||
},
|
||||
{
|
||||
"type": "text_delta",
|
||||
"delta": "Hecho — `evidguard-pitch-deck.html` actualizado con dos mejoras clave:\n\n**Minimalista:** sin borders dashed en el logo, sin hint text, sin bullets decorativos, tipografía más limpia, líneas de acento finas como único adorno visual.\n\n**Animaciones profesionales:** cada slide tiene 4 capas de entrada con `revealUp` escalonadas (0.05s → 0.2s → 0.3s → 0.45s), el número grande (94%) escala con `revealScale`, el contador rebota con `counterPop` al cambiar de slide, la barra de progreso tiene un shimmer animado, y las easing curves son Apple-style (`cubic-bezier(0.16, 1, 0.3, 1)`) para un movimiento que se siente físico, no lineal."
|
||||
},
|
||||
{
|
||||
"type": "usage",
|
||||
"usage": {
|
||||
"input_tokens": 0,
|
||||
"output_tokens": 379,
|
||||
"thought_tokens": 0,
|
||||
"cached_read_tokens": 0,
|
||||
"cached_write_tokens": 0
|
||||
},
|
||||
"costUsd": 0
|
||||
}
|
||||
]
|
||||
}
|
||||
56
mocks/golden/README.md
Normal file
56
mocks/golden/README.md
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
# `mocks/golden/` — daemon-event regression snapshots
|
||||
|
||||
Each `<trace-id>.events.json` is a committed snapshot of the event
|
||||
sequence that flows out of the OD daemon's stream handler when fed the
|
||||
matching mock CLI's output for that recording.
|
||||
|
||||
The shape is:
|
||||
|
||||
```json
|
||||
{
|
||||
"agent": "claude",
|
||||
"trace": "314d6833-...",
|
||||
"events": [
|
||||
{ "type": "status", ... },
|
||||
{ "type": "tool_use", "name": "Read", ... },
|
||||
{ "type": "tool_result", ... },
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
## Why these exist
|
||||
|
||||
`mocks/scripts/smoke-test.sh` confirms the mock CLIs *run* and emit
|
||||
events. It doesn't tell you whether the events have the *right shape*
|
||||
— a parser change in `apps/daemon/src/claude-stream.ts` or
|
||||
`json-event-stream.ts` could silently drop a field, rename
|
||||
`sessionID` → `sessionId`, or stop emitting `turn_end`. The goldens
|
||||
are diffed by `apps/daemon/tests/mocks-golden.test.ts`; a regression
|
||||
makes that test fail loudly.
|
||||
|
||||
## Updating after an intentional parser change
|
||||
|
||||
```bash
|
||||
MOCKS_GOLDEN_UPDATE=1 pnpm --filter @open-design/daemon test mocks-golden
|
||||
git diff mocks/golden/
|
||||
git add mocks/golden/ && git commit -m "mocks: refresh goldens — parser X now emits Y"
|
||||
```
|
||||
|
||||
The diff is the part the reviewer eyeballs. If the change matches what
|
||||
the parser refactor intended, ship. If it doesn't, the refactor broke
|
||||
something.
|
||||
|
||||
## Coverage
|
||||
|
||||
| File | Agent | Trace | Tools | Why it's representative |
|
||||
|---|---|---|---|---|
|
||||
| `314d6833…events.json` | claude | 17-tool agent-browser session | 8 | Median complexity for claude; exercises Read / Write / Edit / Bash tool shapes + content_block streaming |
|
||||
| `9a9522ec…events.json` | opencode | data-report session | 7 | Covers `step_start` / `step_finish` / `tool_use` / `cost` envelope |
|
||||
| `dcdff3b3…events.json` | codex | 14-tool refactor | 14 | High tool density; exercises `thread.started` + thread message streaming |
|
||||
|
||||
Other agents (gemini / cursor-agent / plain / ACP / vela) aren't
|
||||
golden-tested here because their parsers either (a) emit only assistant
|
||||
text (gemini, cursor-agent, plain) — covered by simpler unit tests, or
|
||||
(b) use JSON-RPC over stdio rather than streamed stdout (ACP / vela)
|
||||
— which needs a different harness; see `apps/daemon/tests/acp.test.ts`.
|
||||
222
mocks/golden/dcdff3b3-cd39-4dcd-be83-372830a29639.events.json
Normal file
222
mocks/golden/dcdff3b3-cd39-4dcd-be83-372830a29639.events.json
Normal file
File diff suppressed because one or more lines are too long
6452
mocks/manifest.json
6452
mocks/manifest.json
File diff suppressed because it is too large
Load diff
108
mocks/scripts/contract-check.sh
Executable file
108
mocks/scripts/contract-check.sh
Executable file
|
|
@ -0,0 +1,108 @@
|
|||
#!/usr/bin/env bash
|
||||
# Real-CLI contract check — spot-check that the actual agent CLI's
|
||||
# stream protocol still matches what the mock formatters emit. Catches
|
||||
# drift like:
|
||||
# - real CLI adds a new event `type`
|
||||
# - real CLI changes a field name (e.g. `sessionID` → `sessionId`)
|
||||
# - real CLI's usage object gains/loses a sub-key
|
||||
#
|
||||
# When mocks drift toward "satisfy the current OD parser" instead of
|
||||
# "mimic the actual CLI protocol", the OD-level smoke tests stay green
|
||||
# but real-world behavior diverges. Periodic runs of this script (manual
|
||||
# or scheduled in CI on a real-CLI-available runner) surface that drift
|
||||
# before it becomes a real-PR debugging session.
|
||||
#
|
||||
# bash mocks/scripts/contract-check.sh <agent>
|
||||
# bash mocks/scripts/contract-check.sh claude
|
||||
# bash mocks/scripts/contract-check.sh opencode
|
||||
#
|
||||
# Prereqs: the real agent CLI must be installed AND authenticated. This
|
||||
# script DOES make a live LLM call (couple of cents of tokens) so it's
|
||||
# not free.
|
||||
#
|
||||
# Output: side-by-side top-level event `type` distribution from real
|
||||
# vs mock for the same prompt. A maintainer eyeballs the diff. Future
|
||||
# work (see docs/MOCKS-CONTRACT-CHECK.md) is to lift this into a
|
||||
# stricter structural compare.
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
AGENT="${1:-}"
|
||||
if [ -z "$AGENT" ]; then
|
||||
echo "usage: $0 <agent>" >&2
|
||||
echo " supported: claude | opencode | codex" >&2
|
||||
exit 2
|
||||
fi
|
||||
|
||||
HERE="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd -P)"
|
||||
MOCKS_DIR="$(cd "$HERE/.." && pwd -P)"
|
||||
|
||||
# Fixed deterministic prompt — small, no creative variability, costs
|
||||
# pennies. Picked to be a realistic OD-shaped task: 1-2 tool calls.
|
||||
PROMPT='List the entries of the current working directory and tell me how many JSON files are present. Reply with just the count, like "N JSON files".'
|
||||
|
||||
real_out="$(mktemp -t contract-real.XXXX).jsonl"
|
||||
mock_out="$(mktemp -t contract-mock.XXXX).jsonl"
|
||||
trap 'rm -f "$real_out" "$mock_out"' EXIT
|
||||
|
||||
# Resolve the real CLI binary. We deliberately ignore mocks/bin/ PATH
|
||||
# overlays.
|
||||
real_bin=""
|
||||
case "$AGENT" in
|
||||
claude) real_bin="$(PATH=$(echo "$PATH" | tr ':' '\n' | grep -v "$MOCKS_DIR/bin" | paste -sd: -) command -v claude || true)" ;;
|
||||
codex) real_bin="$(PATH=$(echo "$PATH" | tr ':' '\n' | grep -v "$MOCKS_DIR/bin" | paste -sd: -) command -v codex || true)" ;;
|
||||
opencode) real_bin="$(PATH=$(echo "$PATH" | tr ':' '\n' | grep -v "$MOCKS_DIR/bin" | paste -sd: -) command -v opencode || true)" ;;
|
||||
*) echo "✗ unsupported agent for contract check: $AGENT" >&2; exit 2 ;;
|
||||
esac
|
||||
if [ -z "$real_bin" ]; then
|
||||
echo "✗ real '$AGENT' CLI not on PATH. Install + login, then re-run." >&2
|
||||
exit 1
|
||||
fi
|
||||
echo "real CLI: $real_bin"
|
||||
echo "prompt: $PROMPT"
|
||||
echo
|
||||
|
||||
# 1. Real CLI
|
||||
echo "→ invoking real $AGENT…"
|
||||
case "$AGENT" in
|
||||
claude)
|
||||
printf '%s' "$PROMPT" | "$real_bin" -p --output-format=stream-json --verbose >"$real_out" 2>&1 || true ;;
|
||||
codex)
|
||||
printf '%s' "$PROMPT" | "$real_bin" exec >"$real_out" 2>&1 || true ;;
|
||||
opencode)
|
||||
printf '%s' "$PROMPT" | "$real_bin" run >"$real_out" 2>&1 || true ;;
|
||||
esac
|
||||
|
||||
# 2. Mock CLI (PATH-overlay)
|
||||
echo "→ invoking mock $AGENT…"
|
||||
case "$AGENT" in
|
||||
claude)
|
||||
PATH="$MOCKS_DIR/bin:$PATH" OD_MOCKS_NO_DELAY=1 \
|
||||
bash -c "printf '%s' \"\$PROMPT\" | claude -p --output-format=stream-json --verbose >'$mock_out' 2>&1" || true ;;
|
||||
codex)
|
||||
PATH="$MOCKS_DIR/bin:$PATH" OD_MOCKS_NO_DELAY=1 \
|
||||
bash -c "printf '%s' \"\$PROMPT\" | codex exec >'$mock_out' 2>&1" || true ;;
|
||||
opencode)
|
||||
PATH="$MOCKS_DIR/bin:$PATH" OD_MOCKS_NO_DELAY=1 \
|
||||
bash -c "printf '%s' \"\$PROMPT\" | opencode run >'$mock_out' 2>&1" || true ;;
|
||||
esac
|
||||
|
||||
# 3. Compare top-level event `type` distributions (skip content)
|
||||
summarize() {
|
||||
jq -r 'try .type catch empty' "$1" 2>/dev/null | sort | uniq -c | sort -rn || true
|
||||
}
|
||||
real_summary=$(summarize "$real_out")
|
||||
mock_summary=$(summarize "$mock_out")
|
||||
|
||||
echo
|
||||
echo "real:"
|
||||
printf '%s\n' "${real_summary:- (no parseable JSON events)}" | sed 's/^/ /'
|
||||
echo
|
||||
echo "mock:"
|
||||
printf '%s\n' "${mock_summary:- (no parseable JSON events)}" | sed 's/^/ /'
|
||||
echo
|
||||
echo "raw outputs kept at:"
|
||||
echo " real: $real_out"
|
||||
echo " mock: $mock_out"
|
||||
echo "(diff manually — `diff <(jq -r .type $real_out|sort -u) <(jq -r .type $mock_out|sort -u)`)"
|
||||
trap - EXIT # leave the tmpfiles for the maintainer to inspect
|
||||
|
|
@ -45,6 +45,15 @@ export function inspectRecording(jsonlPath) {
|
|||
? String(meta.user_input).slice(0, 200).replace(/\s+/g, ' ').trim()
|
||||
: null,
|
||||
multi_turn: false, // filled in after histograms
|
||||
// ──── Provenance (fixture-trust signals; mrcfps review #3241) ────
|
||||
// Capture-time fields the harvester writes into the meta event.
|
||||
// Older recordings may not have them all — null is the explicit "we
|
||||
// don't know" so consumers can decide whether the fixture is still
|
||||
// trustworthy as the real CLIs evolve.
|
||||
captured_at: meta.timestamp ?? null, // ISO 8601 of the original session
|
||||
cli_version: meta.cli_version ?? null, // e.g. "claude-code 1.0.65" / "codex 0.40"
|
||||
protocol_version: meta.protocol_version ?? null,
|
||||
anonymization_version: meta.anonymization_version ?? null,
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue