diff --git a/apps/web/tests/components/Theater/critique-i18n-keys.test.ts b/apps/web/tests/components/Theater/critique-i18n-keys.test.ts new file mode 100644 index 000000000..65b4e1aa7 --- /dev/null +++ b/apps/web/tests/components/Theater/critique-i18n-keys.test.ts @@ -0,0 +1,50 @@ +/** + * Independent coverage for every Critique Theater i18n key that the + * critique-coverage walker (Phase 13.2) requires the test corpus to + * mention at least once. + * + * The walker's intent: rename drift between contract / reducer / i18n / + * CSS gets caught BEFORE it ships. For string-level rename catching to + * work, an INDEPENDENT test (not the walker file itself, which was + * previously satisfying its own assertions, see lefarcen P2 on PR #1318) + * has to reference each watched symbol. Component tests like + * `TheaterStage.test.tsx` and `TheaterCollapsed.test.tsx` exercise the + * rendered output of those keys via `getByText('Shipped at round 1')`, + * but they don't mention the key STRING (`critiqueTheater.shippedSummary`), + * so the walker would not see them. + * + * This file closes that gap: every critiqueTheater.* key the walker + * watches is named here and asserted to resolve through the dict to a + * non-empty translated value. That makes the walker's TEST_CORPUS + * assertion provable AND independently catches the case where a + * production rename leaves the dict pointing at a stale key. + */ + +import { describe, expect, it } from 'vitest'; +import { en } from '../../../src/i18n/locales/en'; + +// Typed against the en dictionary so a renamed/removed key fails the +// type check immediately. lefarcen P1 on PR #1318: the previous broad +// `as Record` cast tripped CI's TS2352 because `Dict` +// has no string index signature. +const WATCHED_KEYS: readonly (keyof typeof en)[] = [ + 'critiqueTheater.userFacingName', + 'critiqueTheater.roundLabel', + 'critiqueTheater.composite', + 'critiqueTheater.threshold', + 'critiqueTheater.interrupt', + 'critiqueTheater.interrupted', + 'critiqueTheater.degradedHeading', + 'critiqueTheater.shippedSummary', + 'critiqueTheater.interruptedSummary', +]; + +describe('Critique Theater i18n key coverage (Phase 13.2 walker support)', () => { + for (const key of WATCHED_KEYS) { + it(`exposes ${key} on the en dictionary as a non-empty string`, () => { + const value = en[key]; + expect(typeof value).toBe('string'); + expect((value ?? '').length).toBeGreaterThan(0); + }); + } +}); diff --git a/apps/web/tests/components/Theater/state/reducer-bench.test.ts b/apps/web/tests/components/Theater/state/reducer-bench.test.ts index d32f6578c..a58d8a17d 100644 --- a/apps/web/tests/components/Theater/state/reducer-bench.test.ts +++ b/apps/web/tests/components/Theater/state/reducer-bench.test.ts @@ -1,21 +1,60 @@ /** - * Reducer benchmark gate (Phase 13.2). The plan asks for a CI step - * that fails when p99 of the full happy fixture exceeds 2 ms over - * 10k iterations. We express it as a vitest case so it lives in the - * same suite as the unit tests and inherits the existing pnpm - * --filter @open-design/web test pipeline; no separate runner. + * Reducer p99 regression gate (Phase 13.1). The plan asks for a CI + * step that fails when p99 of a full happy-fixture replay exceeds the + * documented `2ms` budget over 10k iterations. We express it as a + * vitest case so it lives in the same suite as the unit tests and + * inherits the existing `pnpm --filter @open-design/web test` + * pipeline; no separate runner. * - * Sampling: 10_000 reductions of the full happy fixture, with the - * timer reset between every iteration so we measure single-shot - * latency, not amortised. We assert on p99 specifically because the - * reducer is on the render hot path of the live Theater and an - * outlier round still has to render at 60fps for the score ticker. + * What this measures: + * + * The unit of work timed per sample is ONE full happy-fixture + * replay (27 reducer dispatches: 1 run_started + 5 panelists x 4 + * lifecycle events each + 1 round_end + 1 ship). We time the batch, + * not each dispatch, because a single reducer call lands around + * 2µs, which is below the deterministic resolution of + * `performance.now()` in node + jsdom. Batching 27 dispatches + * pulls the sample into the deterministically-measurable range + * (~10-50µs) without losing the property we care about: a + * regression in any one reducer branch shows up immediately as + * batch-time growth. + * + * The gate trips at `p99 < 2x the 2ms documented budget` (4ms). + * The 2x slack is for CI runners that share cores with neighbours + * and for the harness's wall-clock timer (`performance.now`) + * counting GC pauses against us. Real regressions look like 20ms + * or 200ms p99, not 4.001ms. + * + * Baseline (sample local run, 2026-05-11, Node 24, Win11): + * + * p50 = 0.011ms · p90 = 0.013ms · p99 = 0.018ms · max = 0.244ms + * + * The gate's 4ms ceiling is ~222x current p99. Plenty of headroom + * for real regressions to land without flapping CI on jitter. + * `pnpm vitest run tests/components/Theater/state/reducer-bench.test.ts` + * prints the full distribution on every run so a reviewer can see + * how close the actual reading is to the ceiling (lefarcen Q3 on + * PR #1318: "what does Gate 1 print when it trips?"). + * + * Maintenance: when the reducer grows new branches that materially + * change cost (e.g. a histogram per dispatch), capture a fresh + * baseline by running the suite locally and update the baseline + * line above. Keep the ceiling at `BUDGET * 2` unless the actual + * p99 climbs above ~0.5ms (then re-evaluate whether the regression + * is real before relaxing the gate). */ import { describe, expect, it } from 'vitest'; import { performance } from 'node:perf_hooks'; -import { initialState, reduce, type CritiqueAction } from '../../../../src/components/Theater/state/reducer'; +import { + initialState, + reduce, + type CritiqueAction, +} from '../../../../src/components/Theater/state/reducer'; +import type { PanelistRole } from '@open-design/contracts/critique'; + +const ROLES: PanelistRole[] = ['designer', 'critic', 'brand', 'a11y', 'copy']; /** Canonical happy fixture, hand-rolled as actions so the bench does not * pay for transcript parsing every iteration. Mirrors the parser output @@ -25,29 +64,27 @@ const HAPPY_FIXTURE: CritiqueAction[] = [ type: 'run_started', runId: 'r', protocolVersion: 1, - cast: ['designer', 'critic', 'brand', 'a11y', 'copy'], + cast: [...ROLES], maxRounds: 3, threshold: 8, scale: 10, }, - ...['designer', 'critic', 'brand', 'a11y', 'copy'].flatMap( - (role): CritiqueAction[] => [ - { type: 'panelist_open', runId: 'r', round: 1, role: role as any }, - { - type: 'panelist_dim', runId: 'r', round: 1, role: role as any, - dimName: 'hierarchy', dimScore: 8, dimNote: 'clear', - }, - { - type: 'panelist_dim', runId: 'r', round: 1, role: role as any, - dimName: 'contrast', dimScore: 8.4, dimNote: 'ok', - }, - { - type: 'panelist_must_fix', runId: 'r', round: 1, role: role as any, - text: 'minor copy tweak', - }, - { type: 'panelist_close', runId: 'r', round: 1, role: role as any, score: 8.2 }, - ], - ), + ...ROLES.flatMap((role): CritiqueAction[] => [ + { type: 'panelist_open', runId: 'r', round: 1, role }, + { + type: 'panelist_dim', runId: 'r', round: 1, role, + dimName: 'hierarchy', dimScore: 8, dimNote: 'clear', + }, + { + type: 'panelist_dim', runId: 'r', round: 1, role, + dimName: 'contrast', dimScore: 8.4, dimNote: 'ok', + }, + { + type: 'panelist_must_fix', runId: 'r', round: 1, role, + text: 'minor copy tweak', + }, + { type: 'panelist_close', runId: 'r', round: 1, role, score: 8.2 }, + ]), { type: 'round_end', runId: 'r', @@ -69,17 +106,43 @@ const HAPPY_FIXTURE: CritiqueAction[] = [ ]; const ITERATIONS = 10_000; -const P99_BUDGET_MS = 2; +const BUDGET_MS = 2; +const CEILING_MS = BUDGET_MS * 2; -function p99(samples: number[]): number { - const sorted = samples.slice().sort((a, b) => a - b); - const idx = Math.floor(sorted.length * 0.99); - return sorted[idx] ?? sorted[sorted.length - 1]!; +interface Distribution { + p50: number; + p90: number; + p99: number; + max: number; + samples: number; } -describe('reducer benchmark (Phase 13.2)', () => { - it(`runs the full happy fixture in p99 <= ${P99_BUDGET_MS}ms over ${ITERATIONS} iterations`, () => { - const samples: number[] = []; +function percentile(sorted: number[], q: number): number { + if (sorted.length === 0) return Number.NaN; + const idx = Math.min(sorted.length - 1, Math.floor(sorted.length * q)); + return sorted[idx]!; +} + +function summarise(samples: number[]): Distribution { + const sorted = samples.slice().sort((a, b) => a - b); + return { + p50: percentile(sorted, 0.5), + p90: percentile(sorted, 0.9), + p99: percentile(sorted, 0.99), + max: sorted[sorted.length - 1]!, + samples: sorted.length, + }; +} + +function fmt(d: Distribution): string { + return `p50=${d.p50.toFixed(3)}ms p90=${d.p90.toFixed(3)}ms ` + + `p99=${d.p99.toFixed(3)}ms max=${d.max.toFixed(3)}ms ` + + `(samples=${d.samples}, ceiling=${CEILING_MS}ms)`; +} + +describe('reducer p99 regression gate (Phase 13.1)', () => { + it(`p99 of a full happy-fixture replay stays under ${CEILING_MS}ms over ${ITERATIONS} iterations`, () => { + const samples: number[] = new Array(ITERATIONS); for (let i = 0; i < ITERATIONS; i += 1) { const t0 = performance.now(); let state = initialState; @@ -87,12 +150,15 @@ describe('reducer benchmark (Phase 13.2)', () => { state = reduce(state, action); } const t1 = performance.now(); - samples.push(t1 - t0); + samples[i] = t1 - t0; } - const p99Ms = p99(samples); - // Slack: 2x the documented budget so a transient CI hiccup does - // not flap the gate. The real budget is 2ms; we fail only when - // the slack ceiling is also breached. - expect(p99Ms).toBeLessThan(P99_BUDGET_MS * 2); + const dist = summarise(samples); + // Failure message carries the full distribution so a triage + // reader can tell a real 20ms regression from a 4.001ms CI + // hiccup without re-running locally (lefarcen Q3 on PR #1318). + expect( + dist.p99, + `reducer p99 regression: ${fmt(dist)}`, + ).toBeLessThan(CEILING_MS); }); }); diff --git a/e2e/tests/critique-coverage.test.ts b/e2e/tests/critique-coverage.test.ts new file mode 100644 index 000000000..e99ed68c3 --- /dev/null +++ b/e2e/tests/critique-coverage.test.ts @@ -0,0 +1,274 @@ +/** + * Critique surface coverage walker (Phase 13.2). Walks every named + * symbol of the Critique Theater feature (SSE event names, panelist + * roles, reducer state phases, i18n keys) and asserts that each one + * is referenced from at least one production file AND at least one + * test file across the workspace. + * + * Lives in `e2e/tests/` per the repo boundary rule (root `AGENTS.md`): + * + * > Cross-app, cross-runtime, or repository-resource consistency + * > checks belong in `e2e/tests/` when they need to observe more + * > than one app/package boundary. + * + * > App packages must not import another app's private `src/` or + * > `tests/` implementation as a shared helper. + * + * The walker is by definition cross-app: it reads the web reducer, + * the daemon critique module, the contracts package, and the e2e UI + * suite. Hosting it under `apps/web/tests/` would couple the web + * package's test lane to daemon and e2e file layout (so a + * daemon-only refactor could break the web lane), which is exactly + * the boundary the repo rule forbids. Siri-Ray P2 on PR #1318. + * + * Adding a new SSE event / role / phase / i18n key: + * + * 1. Add the symbol to its contract / dictionary in + * `packages/contracts/src/critique.ts` (SSE events, roles) or + * `apps/web/src/i18n/types.ts` plus every locale (i18n keys). + * 2. Add at least one production caller (reducer branch, role- + * keyed CSS, i18n consumer). + * 3. Add at least one test that exercises the new symbol. + * 4. Append the symbol literal to the right group below. + * `SSE_EVENTS` is auto-built from + * `CRITIQUE_SSE_EVENT_NAMES` so it stays in sync without + * manual upkeep; `PANELIST_ROLE_STRINGS` is auto-built from + * `PANELIST_ROLES` for the same reason. `PHASE_STRINGS` and + * `I18N_KEYS` are hand-maintained. + * + * What the walker DOES catch: + * + * Renaming an EXISTING symbol in production / tests without + * updating the walker array trips the gate. The walker still + * looks for the old name and fails to find it; the reviewer + * of the rename PR sees the failing assertion and asks for + * the walker update in the same diff. + * + * What the walker does NOT catch on its own: + * + * Adding a NEW hand-maintained symbol (phase string or i18n + * key) without adding it to the walker array leaves the gate + * green because the walker does not know to look for a symbol + * it was not told about. Mitigation: contracts-derived groups + * (`SSE_EVENTS`, `PANELIST_ROLE_STRINGS`) auto-grow so the + * contracts package is the only place that needs editing; + * the hand-maintained groups (`PHASE_STRINGS`, `I18N_KEYS`) + * are short enough that step 4 is a one-line edit alongside + * the contracts / i18n change. + */ + +import { readFileSync, readdirSync, statSync } from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; + +import { describe, expect, it } from 'vitest'; + +declare global { + interface ImportMeta { + glob(pattern: string, options: { eager: true }): Record; + } +} + +type CritiqueContracts = { + CRITIQUE_SSE_EVENT_NAMES: readonly string[]; + PANELIST_ROLES: readonly string[]; +}; + +const contractsModules = import.meta.glob( + '../../packages/contracts/src/critique.ts', + { eager: true }, +); +const contracts = Object.values(contractsModules)[0]; +if (!contracts) { + throw new Error( + 'critique-coverage walker could not load packages/contracts/src/critique.ts via import.meta.glob; ' + + 'this almost always means the contracts file was renamed or moved.', + ); +} +const { CRITIQUE_SSE_EVENT_NAMES, PANELIST_ROLES } = contracts; + +const REPO_ROOT = fileURLToPath(new URL('../../', import.meta.url)); +const SELF_PATH = fileURLToPath(import.meta.url); + +const SRC_ROOTS = [ + path.join(REPO_ROOT, 'apps/web/src'), + path.join(REPO_ROOT, 'apps/daemon/src/critique'), + path.join(REPO_ROOT, 'packages/contracts/src'), +]; +const TEST_ROOTS = [ + path.join(REPO_ROOT, 'apps/web/tests'), + path.join(REPO_ROOT, 'apps/daemon/tests'), + path.join(REPO_ROOT, 'packages/contracts/tests'), + path.join(REPO_ROOT, 'e2e/tests'), + path.join(REPO_ROOT, 'e2e/ui'), +]; + +const FILE_EXTENSIONS = /\.(ts|tsx|js|jsx|css|md)$/; +const SKIP_DIRS = new Set(['node_modules', 'dist', '.next', '.turbo']); + +function walk(root: string): string[] { + const out: string[] = []; + const stack: string[] = [root]; + while (stack.length > 0) { + const cur = stack.pop()!; + let stat; + try { + stat = statSync(cur); + } catch { + continue; + } + if (stat.isDirectory()) { + let entries: string[]; + try { + entries = readdirSync(cur); + } catch { + continue; + } + for (const entry of entries) { + if (SKIP_DIRS.has(entry)) continue; + stack.push(path.join(cur, entry)); + } + } else if (stat.isFile() && FILE_EXTENSIONS.test(cur)) { + out.push(cur); + } + } + return out; +} + +function readCorpus(files: string[]): string { + return files.map((f) => { + try { + return readFileSync(f, 'utf8'); + } catch { + return ''; + } + }).join('\n'); +} + +const SRC_FILES = SRC_ROOTS.flatMap(walk); +// The walker walks e2e/tests, which contains this file. Hand-maintained +// PHASE_STRINGS and I18N_KEYS literals declared below would otherwise +// satisfy the test-side coverage assertion against themselves, so a real +// downstream test exercising a symbol could be deleted with the gate +// still green. Exclude this file from TEST_FILES so the corpus only +// holds independent evidence. +const TEST_FILES = TEST_ROOTS.flatMap(walk).filter((f) => path.resolve(f) !== SELF_PATH); +const SRC_CORPUS = readCorpus(SRC_FILES); +const TEST_CORPUS = readCorpus(TEST_FILES); + +/** + * Strict source-side match: production code MUST reference the symbol + * by its exact wire form. A `critique.` SSE name must appear + * as `critique.`, not as the unprefixed PanelEvent type alias, + * so the SSE channel name stays load-bearing in production. + */ +function srcReferences(corpus: string, sym: string): boolean { + return corpus.includes(sym); +} + +/** + * Lenient test-side match: reducer tests dispatch the PanelEvent shape + * directly (no `critique.` prefix on the SSE channel), so for an SSE + * event symbol the test corpus is allowed to satisfy via either the + * prefixed form (`critique.`) or the unprefixed PanelEvent type + * form (`type: ''`). Both forms prove an assertion exercises the + * event end-to-end. + */ +function testReferences(corpus: string, sym: string): boolean { + if (corpus.includes(sym)) return true; + if (sym.startsWith('critique.')) { + const unprefixed = sym.slice('critique.'.length); + return new RegExp(`type:\\s*'${unprefixed}'`).test(corpus); + } + return false; +} + +const SSE_EVENTS = [...CRITIQUE_SSE_EVENT_NAMES]; +const PANELIST_ROLE_STRINGS = PANELIST_ROLES.map((r) => `'${r}'`); + +const PHASE_STRINGS = [ + "'idle'", + "'running'", + "'shipped'", + "'degraded'", + "'interrupted'", + "'failed'", +]; + +const I18N_KEYS = [ + 'critiqueTheater.userFacingName', + 'critiqueTheater.roundLabel', + 'critiqueTheater.composite', + 'critiqueTheater.threshold', + 'critiqueTheater.interrupt', + 'critiqueTheater.interrupted', + 'critiqueTheater.degradedHeading', + 'critiqueTheater.shippedSummary', + 'critiqueTheater.interruptedSummary', +]; + +describe('critique-coverage walker (Phase 13.2)', () => { + describe('SSE event names', () => { + it.each(SSE_EVENTS)('production references %s', (sym) => { + expect( + srcReferences(SRC_CORPUS, sym), + `expected SRC corpus to mention SSE event "${sym}" at least once`, + ).toBe(true); + }); + + it.each(SSE_EVENTS)('tests reference %s', (sym) => { + expect( + testReferences(TEST_CORPUS, sym), + `expected TEST corpus to mention SSE event "${sym}" (prefixed or as PanelEvent type) at least once`, + ).toBe(true); + }); + }); + + describe('Panelist roles', () => { + it.each(PANELIST_ROLE_STRINGS)('production references %s', (sym) => { + expect( + srcReferences(SRC_CORPUS, sym), + `expected SRC corpus to mention panelist role string ${sym} at least once`, + ).toBe(true); + }); + + it.each(PANELIST_ROLE_STRINGS)('tests reference %s', (sym) => { + expect( + testReferences(TEST_CORPUS, sym), + `expected TEST corpus to mention panelist role string ${sym} at least once`, + ).toBe(true); + }); + }); + + describe('Reducer lifecycle phases', () => { + it.each(PHASE_STRINGS)('production references %s', (sym) => { + expect( + srcReferences(SRC_CORPUS, sym), + `expected SRC corpus to mention reducer phase string ${sym} at least once`, + ).toBe(true); + }); + + it.each(PHASE_STRINGS)('tests reference %s', (sym) => { + expect( + testReferences(TEST_CORPUS, sym), + `expected TEST corpus to mention reducer phase string ${sym} at least once`, + ).toBe(true); + }); + }); + + describe('i18n keys', () => { + it.each(I18N_KEYS)('production references %s', (sym) => { + expect( + srcReferences(SRC_CORPUS, sym), + `expected SRC corpus to mention i18n key "${sym}" at least once`, + ).toBe(true); + }); + + it.each(I18N_KEYS)('tests reference %s', (sym) => { + expect( + testReferences(TEST_CORPUS, sym), + `expected TEST corpus to mention i18n key "${sym}" at least once`, + ).toBe(true); + }); + }); +});