From a8277c678b431f2938e4927c6c1ad5201f06589d Mon Sep 17 00:00:00 2001 From: Amy <1184569493@qq.com> Date: Fri, 29 May 2026 17:37:01 +0800 Subject: [PATCH 1/3] Add main launch review E2E coverage --- .../launch-review-e2e-regressions.zh-CN.md | 45 ++++++++++++++ e2e/ui/amr-onboarding.test.ts | 40 +++++++++++- e2e/ui/app-manual-edit.test.ts | 24 +++++++ e2e/ui/project-management-flows.test.ts | 62 +++++++++++++++++++ e2e/ui/settings-api-protocol.test.ts | 62 +++++++++++++++++++ 5 files changed, 232 insertions(+), 1 deletion(-) diff --git a/docs/testing/launch-review-e2e-regressions.zh-CN.md b/docs/testing/launch-review-e2e-regressions.zh-CN.md index c44758e9b..eaa938fbf 100644 --- a/docs/testing/launch-review-e2e-regressions.zh-CN.md +++ b/docs/testing/launch-review-e2e-regressions.zh-CN.md @@ -151,6 +151,32 @@ 2. `filters user design systems by draft and published status in the manager` 3. `deleting the active design system falls back to another user system` +### 10. main 最新功能回归补测 + +文件: +- [e2e/ui/project-management-flows.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/e2e/ui/project-management-flows.test.ts) +- [e2e/ui/amr-onboarding.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/e2e/ui/amr-onboarding.test.ts) +- [e2e/ui/app-manual-edit.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/e2e/ui/app-manual-edit.test.ts) +- [e2e/ui/settings-api-protocol.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/e2e/ui/settings-api-protocol.test.ts) + +新增用例: + +1. `projects empty state create action opens the new project flow` + - 覆盖空项目页 CTA 能正确拉起创建流程 + +2. `project header keeps the settings, handoff, and avatar controls pinned on compact desktop widths` + - 覆盖紧凑桌面宽度下项目头部右侧控制区不被挤掉 + +3. `onboarding AMR card lets the user pick a live runtime model before continuing` + - 覆盖 onboarding 场景下 AMR 运行时卡片、模型选择与持久化 + +4. `simple deck keeps the active slide stable across preview mode switches` + - 覆盖 Simple Deck 在 Preview / Code 切换后的导航状态稳定性 + +5. `BYOK fetched models are searchable inside the Settings model dropdown` + - 覆盖大模型目录下 BYOK 模型下拉内搜索 + - 直接锁住 #3262 的高风险交互面 + ## 当前覆盖对应的产品结论 这批用例重点拦住的是下面这些历史高频回归: @@ -214,6 +240,25 @@ pnpm exec playwright test -c playwright.config.ts ui/diagnostics-export.test.ts pnpm exec playwright test -c playwright.config.ts ui/automations-page.test.ts --grep "places a newly created automation at the top of the list and highlights it|keeps saved automations ordered by newest createdAt first|renders the routine target and last-run status in the row summary" ``` + +### main 最新功能回归补测 + +```bash +pnpm exec playwright test -c playwright.config.ts ui/project-management-flows.test.ts --grep "projects empty state create action opens the new project flow|project header keeps the settings, handoff, and avatar controls pinned on compact desktop widths" +``` + +```bash +pnpm exec playwright test -c playwright.config.ts ui/amr-onboarding.test.ts --grep "onboarding AMR card lets the user pick a live runtime model before continuing" +``` + +```bash +pnpm exec playwright test -c playwright.config.ts ui/app-manual-edit.test.ts --grep "simple deck keeps the active slide stable across preview mode switches" +``` + +```bash +pnpm exec playwright test -c playwright.config.ts ui/settings-api-protocol.test.ts --grep "BYOK fetched models are searchable inside the Settings model dropdown" +``` + ### Connectors:happy path + recovery ```bash diff --git a/e2e/ui/amr-onboarding.test.ts b/e2e/ui/amr-onboarding.test.ts index 140d36a6f..174f01060 100644 --- a/e2e/ui/amr-onboarding.test.ts +++ b/e2e/ui/amr-onboarding.test.ts @@ -77,12 +77,50 @@ test('onboarding recovers from a transient AMR status failure and still continue await expect(page.getByRole('button', { name: /Continue/i })).toBeVisible({ timeout: 12_000 }); }); + +test('onboarding AMR card lets the user pick a live runtime model before continuing', async ({ page }) => { + const config = await wireOnboardingMocks(page, { + amrAvailable: true, + initialLoggedIn: true, + amrModels: [ + { id: 'claude-opus-4.8', label: 'Claude Opus 4.8' }, + { id: 'deepseek-v4-flash', label: 'DeepSeek V4 Flash' }, + { id: 'glm-5.1', label: 'GLM 5.1' }, + ], + }); + + await page.addInitScript( + ({ key, value }) => window.localStorage.setItem(key, JSON.stringify(value)), + { key: STORAGE_KEY, value: config }, + ); + + await gotoOnboarding(page); + + await expect(page.getByText('AMR v0.1.0')).toBeVisible(); + const modelSelect = page.locator('.onboarding-view__model-picker select'); + await expect(modelSelect).toHaveValue('claude-opus-4.8'); + await modelSelect.selectOption('deepseek-v4-flash'); + await page.getByRole('button', { name: /Continue/i }).click(); + + await expect + .poll(() => page.evaluate((key) => JSON.parse(window.localStorage.getItem(key) || '{}'), STORAGE_KEY)) + .toMatchObject({ + agentId: 'amr', + agentModels: { + amr: { + model: 'deepseek-v4-flash', + }, + }, + }); +}); + async function wireOnboardingMocks( page: Page, options: { amrAvailable: boolean; initialLoggedIn: boolean; failFirstStatusPollAfterLogin?: boolean; + amrModels?: Array<{ id: string; label: string }>; }, ): Promise { const config: OnboardingConfig = { @@ -139,7 +177,7 @@ async function wireOnboardingMocks( bin: 'vela', available: true, version: '1.0.0', - models: [{ id: 'default', label: 'Default' }], + models: options.amrModels ?? [{ id: 'default', label: 'Default' }], }] : []), { diff --git a/e2e/ui/app-manual-edit.test.ts b/e2e/ui/app-manual-edit.test.ts index 535cb04d0..9f752f029 100644 --- a/e2e/ui/app-manual-edit.test.ts +++ b/e2e/ui/app-manual-edit.test.ts @@ -203,6 +203,30 @@ test('manual edit mode keeps deck navigation available for deck-shaped HTML', as await expect(frame.getByText('Slide Two')).toBeVisible(); }); + +test('simple deck keeps the active slide stable across preview mode switches', async ({ page }) => { + await routeMockAgents(page); + const projectId = await createEmptyProject(page, 'Simple deck navigation state'); + await seedDeckArtifact(page, projectId, 'simple-deck.html', 'Simple Deck', ['Slide One', 'Slide Two', 'Slide Three']); + await page.goto(`/projects/${projectId}/files/simple-deck.html`); + await openDesignFile(page, 'simple-deck.html'); + + const frame = page.frameLocator('[data-testid="artifact-preview-frame"]'); + const viewModeTabs = page.getByRole('tablist', { name: 'View mode' }); + + await expect(frame.getByText('Slide One')).toBeVisible(); + + await viewModeTabs.getByRole('tab', { name: 'Code' }).click(); + await expect(page.locator('.viewer-source')).toContainText('Slide Three'); + await viewModeTabs.getByRole('tab', { name: 'Preview' }).click(); + + await expect(frame.getByText('Slide One')).toBeVisible(); + await page.getByLabel('Next slide').click(); + await expect(frame.getByText('Slide Two')).toBeVisible(); + await page.getByLabel('Next slide').click(); + await expect(frame.getByText('Slide Three')).toBeVisible(); +}); + test('HTML preview stays rendered after switching from Preview to Code and back', async ({ page }) => { await routeMockAgents(page); const projectId = await createEmptyProject(page, 'HTML preview toggle regression'); diff --git a/e2e/ui/project-management-flows.test.ts b/e2e/ui/project-management-flows.test.ts index 28e024bce..1a6ba9e14 100644 --- a/e2e/ui/project-management-flows.test.ts +++ b/e2e/ui/project-management-flows.test.ts @@ -140,6 +140,34 @@ test('new project tabs switch visible form sections and preserve drafts', async await expect(page.getByText('Aspect', { exact: true })).toBeVisible(); }); +test('projects empty state create action opens the new project flow', async ({ page }) => { + await page.route('**/api/skills', async (route) => { + await route.fulfill({ json: { skills: TAB_SKILLS } }); + }); + await page.route('**/api/connectors', async (route) => { + await route.fulfill({ json: { connectors: [] } }); + }); + await page.route('**/api/connectors/status', async (route) => { + await route.fulfill({ json: { statuses: {} } }); + }); + await page.route('**/api/projects', async (route) => { + if (route.request().method() === 'GET') { + await route.fulfill({ json: { projects: [] } }); + return; + } + await route.continue(); + }); + + await page.goto('/projects'); + await expect(page.locator('.designs-empty-state')).toBeVisible(); + await page.locator('.designs-empty-cta').click(); + + await expect(page.getByTestId('new-project-modal')).toBeVisible(); + await expect(page.getByTestId('new-project-panel')).toBeVisible(); + await expect(page.getByTestId('new-project-tab-prototype')).toHaveAttribute('aria-selected', 'true'); + await expect(page.locator('.newproj-title')).toContainText('New prototype'); +}); + test('design system multi-select stores primary and inspiration metadata', async ({ page }) => { await page.route('**/api/design-systems', async (route) => { await route.fulfill({ json: { designSystems: DESIGN_SYSTEMS } }); @@ -234,6 +262,40 @@ test('project title rename persists after reload and ignores blank titles', asyn expect(project.name).toBe('Renamed persistent title'); }); + +test('project header keeps the settings, handoff, and avatar controls pinned on compact desktop widths', async ({ page }) => { + await page.setViewportSize({ width: 1100, height: 900 }); + await page.goto('/'); + await createProject(page, 'Header controls stay pinned'); + await expectWorkspaceReady(page); + + const handoffTrigger = page.getByTestId('handoff-trigger'); + const avatarTrigger = page.locator('.avatar-agent-trigger'); + await expect(page.getByTestId('project-title')).toBeVisible(); + await expect(handoffTrigger).toBeVisible(); + await expect(avatarTrigger).toBeVisible(); + + const layout = await page.evaluate(() => { + const root = document.documentElement; + const handoff = document.querySelector('[data-testid="handoff-trigger"]') as HTMLElement | null; + const avatar = document.querySelector('.avatar-agent-trigger') as HTMLElement | null; + const title = document.querySelector('[data-testid="project-title"]') as HTMLElement | null; + const overflow = Math.max(0, root.scrollWidth - root.clientWidth); + return { + overflow, + handoffRight: handoff?.getBoundingClientRect().right ?? 0, + avatarRight: avatar?.getBoundingClientRect().right ?? 0, + titleRight: title?.getBoundingClientRect().right ?? 0, + viewportWidth: window.innerWidth, + }; + }); + + expect(layout.overflow).toBeLessThanOrEqual(2); + expect(layout.handoffRight).toBeGreaterThan(layout.titleRight); + expect(layout.avatarRight).toBeGreaterThan(layout.handoffRight); + expect(layout.avatarRight).toBeLessThanOrEqual(layout.viewportWidth - 8); +}); + test('canceling design file deletion keeps the file and open tab', async ({ page }) => { await page.goto('/'); await createProject(page, 'Design file delete cancel flow'); diff --git a/e2e/ui/settings-api-protocol.test.ts b/e2e/ui/settings-api-protocol.test.ts index cffb2c496..6765fb3db 100644 --- a/e2e/ui/settings-api-protocol.test.ts +++ b/e2e/ui/settings-api-protocol.test.ts @@ -333,6 +333,68 @@ test('BYOK auto-loads provider models and reuses cached results for the same con await expect.poll(() => providerModelRequests.length).toBe(1); }); + +test('BYOK fetched models are searchable inside the Settings model dropdown', async ({ page }) => { + const providerModelRequests: Array> = []; + await page.route('**/api/provider/models', async (route) => { + const payload = route.request().postDataJSON() as Record; + providerModelRequests.push(payload); + await route.fulfill({ + status: 200, + contentType: 'application/json', + body: JSON.stringify({ + ok: true, + kind: 'success', + latencyMs: 15, + models: [ + { id: 'aa-nightly-model', label: 'AA Nightly Model' }, + { id: 'bb-nightly-model', label: 'BB Nightly Model' }, + { id: 'cc-nightly-model', label: 'CC Nightly Model' }, + { id: 'dd-nightly-model', label: 'DD Nightly Model' }, + { id: 'ee-nightly-model', label: 'EE Nightly Model' }, + { id: 'ff-nightly-model', label: 'FF Nightly Model' }, + { id: 'gg-nightly-model', label: 'GG Nightly Model' }, + { id: 'hh-nightly-model', label: 'HH Nightly Model' }, + { id: 'mm-nightly-model', label: 'MM Nightly Model' }, + { id: 'zz-nightly-model', label: 'ZZ Nightly Model' }, + ], + }), + }); + }); + + await openExecutionSettings(page, { + mode: 'api', + apiKey: '', + apiProtocol: 'openai', + apiVersion: '', + baseUrl: 'https://api.openai.com/v1', + model: 'gpt-4o', + apiProviderBaseUrl: 'https://api.openai.com/v1', + agentId: null, + skillId: null, + designSystemId: null, + onboardingCompleted: true, + mediaProviders: {}, + agentModels: {}, + agentCliEnv: {}, + }); + + const dialog = page.getByRole('dialog'); + await dialog.getByLabel('API key').fill('sk-openai-test'); + await dialog.getByLabel('API key').blur(); + await expect(dialog.getByText('Loaded 10 models from your account.')).toBeVisible(); + await expect.poll(() => providerModelRequests.length).toBe(1); + + await dialog.getByRole('combobox', { name: 'Model', exact: true }).click(); + const popover = page.getByTestId('settings-byok-model-popover'); + const search = page.getByTestId('settings-byok-model-search'); + await expect(popover).toBeVisible(); + await expect(search).toBeVisible(); + await search.fill('mm-nightly'); + await expect(popover.getByRole('option', { name: 'MM Nightly Model (mm-nightly-model)' })).toBeVisible(); + await expect(popover.getByRole('option', { name: 'AA Nightly Model (aa-nightly-model)' })).toHaveCount(0); +}); + test('saving Local CLI updates the entry status pill with the selected agent', async ({ page }) => { await openExecutionSettingsWithAgents( page, From f7beb429509b02ecb48ef23f03e2dc886be4a110 Mon Sep 17 00:00:00 2001 From: Amy <1184569493@qq.com> Date: Fri, 29 May 2026 18:25:54 +0800 Subject: [PATCH 2/3] Add daemon launch review regression coverage --- apps/daemon/tests/chat-route.test.ts | 83 ++++++++++++++ .../tests/integrations/vela.routes.test.ts | 55 +++++++++ apps/daemon/tests/runs.test.ts | 21 ++++ apps/daemon/tests/runtimes/agent-args.test.ts | 25 ++++- .../tests/runtimes/env-and-detection.test.ts | 50 +++++++++ .../launch-review-e2e-regressions.zh-CN.md | 106 ++++++++++++++++-- 6 files changed, 330 insertions(+), 10 deletions(-) diff --git a/apps/daemon/tests/chat-route.test.ts b/apps/daemon/tests/chat-route.test.ts index 5d627d8f3..2df4b60e0 100644 --- a/apps/daemon/tests/chat-route.test.ts +++ b/apps/daemon/tests/chat-route.test.ts @@ -1,4 +1,5 @@ import type http from 'node:http'; +import Database from 'better-sqlite3'; import { randomUUID } from 'node:crypto'; import { chmodSync, @@ -27,6 +28,7 @@ import { import { skillCwdAliasSegment } from '../src/cwd-aliases.js'; import { getAgentDef } from '../src/agents.js'; import { readMemoryConfig, writeMemoryConfig } from '../src/memory.js'; +import { upsertMessage } from '../src/db.js'; import { renderCodexImagegenOverride } from '../src/prompts/system.js'; const FAKE_VELA_FIXTURE = resolve(process.cwd(), 'tests', 'fixtures', 'fake-vela.mjs'); @@ -216,6 +218,87 @@ process.exit(0); ); }); + + it('reuses an existing assistant message row instead of creating a duplicate when assistantMessageId is supplied', async () => { + if (!process.env.OD_DATA_DIR) { + throw new Error('OD_DATA_DIR is required for assistant message reuse tests'); + } + const projectId = `proj-${randomUUID()}`; + const assistantMessageId = `assistant-${randomUUID()}`; + + const createProjectResponse = await fetch(`${baseUrl}/api/projects`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ id: projectId, name: 'Assistant row reuse fixture' }), + }); + expect(createProjectResponse.ok).toBe(true); + + const conversationsResponse = await fetch(`${baseUrl}/api/projects/${projectId}/conversations`); + expect(conversationsResponse.ok).toBe(true); + const conversationsBody = await conversationsResponse.json() as { + conversations: Array<{ id: string }>; + }; + const conversationId = conversationsBody.conversations[0]?.id; + expect(conversationId).toBeTruthy(); + + const dbFile = resolve(process.env.OD_DATA_DIR, 'app.sqlite'); + const sqlite = new Database(dbFile); + try { + upsertMessage(sqlite as never, conversationId!, { + id: assistantMessageId, + role: 'assistant', + content: '', + runStatus: 'failed', + startedAt: Date.now() - 1_000, + endedAt: Date.now() - 500, + }); + } finally { + sqlite.close(); + } + + await withFakeAgent( + 'opencode', + ` +process.stdin.resume(); +process.stdin.on('end', () => { + console.log(JSON.stringify({ type: 'step_start' })); + console.log(JSON.stringify({ type: 'text', part: { text: 'reused-assistant-row-ok' } })); + console.log(JSON.stringify({ type: 'step_finish', part: { tokens: { input: 1, output: 1 } } })); + process.exit(0); +}); +`, + async () => { + const response = await fetch(`${baseUrl}/api/chat`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + agentId: 'opencode', + projectId, + conversationId, + assistantMessageId, + message: 'retry this turn', + }), + }); + const body = await response.text(); + expect(response.ok).toBe(true); + expect(body).toContain('reused-assistant-row-ok'); + }, + ); + + const verifyDb = new Database(dbFile, { readonly: true }); + try { + const rows = verifyDb + .prepare(`SELECT id, content, run_id FROM messages WHERE conversation_id = ? AND role = 'assistant'`) + .all(conversationId) as Array<{ id: string; content: string; run_id: string | null }>; + expect(rows.filter((row) => row.id === assistantMessageId)).toHaveLength(1); + expect(rows.some((row) => row.id !== assistantMessageId && row.content.includes('reused-assistant-row-ok'))).toBe(false); + const reused = rows.find((row) => row.id === assistantMessageId); + expect(reused?.content).toContain('reused-assistant-row-ok'); + } finally { + verifyDb.close(); + } + }); + it('rewrites the OpenCode scanner overflow into a generic retry message', async () => { const conversationId = `conv-${randomUUID()}`; diff --git a/apps/daemon/tests/integrations/vela.routes.test.ts b/apps/daemon/tests/integrations/vela.routes.test.ts index ded918bda..a239a343c 100644 --- a/apps/daemon/tests/integrations/vela.routes.test.ts +++ b/apps/daemon/tests/integrations/vela.routes.test.ts @@ -371,6 +371,61 @@ describe('POST /api/integrations/vela/login', () => { } }); + + it('uses the same Settings-configured AMR env for login and subsequent status reads', async () => { + const dataDir = process.env.OD_DATA_DIR as string; + const previous = await readAppConfig(dataDir); + process.env.OPEN_DESIGN_AMR_PROFILE = 'prod'; + process.env.VELA_PROFILE = 'prod'; + process.env.FAKE_VELA_LOGIN_USER_EMAIL = 'settings-roundtrip@example.com'; + await writeAppConfig(dataDir, { + ...previous, + agentCliEnv: { + ...(previous.agentCliEnv ?? {}), + amr: { + ...((previous.agentCliEnv?.amr as Record) ?? {}), + VELA_BIN: FAKE_VELA, + OPEN_DESIGN_AMR_PROFILE: 'local', + }, + }, + }); + try { + const before = await getJson<{ + loggedIn: boolean; + profile: string; + user: { email?: string } | null; + }>(`${baseUrl}/api/integrations/vela/status`); + expect(before.status).toBe(200); + expect(before.body.loggedIn).toBe(false); + expect(before.body.profile).toBe('local'); + + const login = await postJson<{ + pid: number; + profile: string; + }>(`${baseUrl}/api/integrations/vela/login`); + expect(login.status).toBe(202); + expect(login.body.profile).toBe('local'); + + for (let i = 0; i < 50; i += 1) { + const current = await getJson<{ + loggedIn: boolean; + profile: string; + user: { email?: string } | null; + }>(`${baseUrl}/api/integrations/vela/status`); + if (current.body.loggedIn) { + expect(current.body.profile).toBe('local'); + expect(current.body.user?.email).toBe('settings-roundtrip@example.com'); + return; + } + await new Promise((resolve) => setTimeout(resolve, 100)); + } + throw new Error('expected configured-profile AMR login to become visible via /status'); + } finally { + await writeAppConfig(dataDir, previous as unknown as Record); + delete process.env.FAKE_VELA_LOGIN_USER_EMAIL; + } + }); + it('returns 409 when a login subprocess is already in flight', async () => { // Use the stub's delay knob so the first login is still running when // the second request arrives; without this the first exits before the diff --git a/apps/daemon/tests/runs.test.ts b/apps/daemon/tests/runs.test.ts index 9855a7de7..91651d1a3 100644 --- a/apps/daemon/tests/runs.test.ts +++ b/apps/daemon/tests/runs.test.ts @@ -62,6 +62,27 @@ describe('chat run service shutdown', () => { runs.list({ projectId: 'project-1', conversationId: 'conv-b', status: 'active' }), ).toEqual([runB]); }); + it('cancels a queued run immediately without waiting for child process shutdown', async () => { + const runs = createRuns(); + const run = runs.create({ projectId: 'project-1', conversationId: 'conv-queued' }); + + const wait = runs.wait(run); + runs.cancel(run); + + expect(run.status).toBe('canceled'); + expect(run.cancelRequested).toBe(true); + expect(run.signal).toBe('SIGTERM'); + expect(run.events.at(-1)).toMatchObject({ + event: 'end', + data: { status: 'canceled', signal: 'SIGTERM' }, + }); + await expect(wait).resolves.toMatchObject({ + status: 'canceled', + signal: 'SIGTERM', + }); + }); + + it('stores effective media execution policy on run status bodies', () => { const runs = createRuns(); diff --git a/apps/daemon/tests/runtimes/agent-args.test.ts b/apps/daemon/tests/runtimes/agent-args.test.ts index 3021632e7..c7d12d2d8 100644 --- a/apps/daemon/tests/runtimes/agent-args.test.ts +++ b/apps/daemon/tests/runtimes/agent-args.test.ts @@ -1,7 +1,7 @@ import { existsSync, readFileSync } from 'node:fs'; import { test } from 'vitest'; import { - AGENT_DEFS, aider, antigravity, assert, claude, codex, copilot, cursorAgent, deepseek, devin, detectAgents, gemini, join, kilo, kiro, mkdtempSync, opencode, pi, qoder, qwen, rmSync, spawnEnvForAgent, tmpdir, vibe, writeFileSync, chmodSync, + AGENT_DEFS, aider, antigravity, assert, claude, codex, copilot, cursorAgent, deepseek, devin, detectAgents, gemini, grokBuild, join, kilo, kiro, mkdtempSync, opencode, pi, qoder, qwen, rmSync, spawnEnvForAgent, tmpdir, vibe, writeFileSync, chmodSync, } from './helpers/test-helpers.js'; import { writeAntigravityModelSelection } from '../../src/runtimes/defs/antigravity.js'; import type { TestAgentDef } from './helpers/test-helpers.js'; @@ -756,6 +756,29 @@ test('codex buildArgs omits model_reasoning_effort when reasoning is "default"', ); }); +test('grok-build inlines the prompt as -p and never falls back to stdin sentinels', () => { + const prompt = 'summarize the current page layout'; + const args = grokBuild.buildArgs( + prompt, + [], + [], + { model: 'grok-4.3', reasoning: 'high' }, + { cwd: '/tmp/od-project' }, + ); + + assert.equal(grokBuild.promptViaStdin, false); + assert.deepEqual(args, [ + '-p', + prompt, + '--model', + 'grok-4.3', + '--effort', + 'high', + ]); + assert.equal(args.includes('-'), false); + assert.equal(args.filter((entry) => entry === '-p').length, 1); +}); + test('claude flags promptViaStdin and never embeds the prompt in argv', () => { // Long composed prompts (system prompt + design system + skill body + // user message) routinely exceed Linux MAX_ARG_STRLEN (~128 KB) and the diff --git a/apps/daemon/tests/runtimes/env-and-detection.test.ts b/apps/daemon/tests/runtimes/env-and-detection.test.ts index 82cbf78fd..0cece5a16 100644 --- a/apps/daemon/tests/runtimes/env-and-detection.test.ts +++ b/apps/daemon/tests/runtimes/env-and-detection.test.ts @@ -478,6 +478,56 @@ fsTest('detectAgents marks AMR available from packaged built-in Vela with the bu } }); + +fsTest('detectAgents prefers configured AMR live models over stale fallback defaults', async () => { + const root = mkdtempSync(join(tmpdir(), 'od-detect-amr-live-models-')); + try { + return await withEnvSnapshot(['PATH', 'OD_AGENT_HOME', 'OD_RESOURCE_ROOT', 'VELA_OPENCODE_BIN'], async () => { + const fakeVela = join(root, 'vela'); + const fakeOpenCode = join(root, 'opencode'); + writeFileSync( + fakeVela, + `#!/bin/sh +if [ "$1" = "--version" ]; then echo "vela custom-live"; exit 0; fi +if [ "$1" = "models" ]; then printf "%s\n" "public_model_deepseek_v4_flash vela" "public_model_glm_5 vela"; exit 0; fi +exit 0 +`, + ); + writeFileSync(fakeOpenCode, `#!/bin/sh +exit 0 +`); + chmodSync(fakeVela, 0o755); + chmodSync(fakeOpenCode, 0o755); + process.env.PATH = ''; + process.env.OD_AGENT_HOME = join(root, 'empty-home'); + delete process.env.OD_RESOURCE_ROOT; + delete process.env.VELA_OPENCODE_BIN; + + const agents = await detectAgents({ + amr: { + VELA_BIN: fakeVela, + VELA_OPENCODE_BIN: fakeOpenCode, + }, + }); + const amrAgent = agents.find((agent) => agent.id === 'amr'); + + assert.ok(amrAgent); + assert.equal(amrAgent.available, true); + assert.equal(amrAgent.path, fakeVela); + assert.equal(amrAgent.version, 'vela custom-live'); + assert.equal(amrAgent.modelsSource, 'live'); + assert.deepEqual(amrAgent.models, [ + { id: 'deepseek-v4-flash', label: 'deepseek-v4-flash' }, + { id: 'glm-5', label: 'glm-5' }, + ]); + assert.equal(amrAgent.models.some((model) => model.id === 'default'), false); + assert.equal(amrAgent.models.some((model) => model.id === 'gpt-5.4-mini'), false); + }); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + function codexNativeTargetTriple(): string { if (process.platform === 'darwin' && process.arch === 'arm64') return 'aarch64-apple-darwin'; if (process.platform === 'darwin' && process.arch === 'x64') return 'x86_64-apple-darwin'; diff --git a/docs/testing/launch-review-e2e-regressions.zh-CN.md b/docs/testing/launch-review-e2e-regressions.zh-CN.md index eaa938fbf..279c28531 100644 --- a/docs/testing/launch-review-e2e-regressions.zh-CN.md +++ b/docs/testing/launch-review-e2e-regressions.zh-CN.md @@ -320,7 +320,7 @@ pnpm exec playwright test -c playwright.config.ts ui/design-systems-manager.test ## 新增 daemon 契约回归 -这批 launch review 补测不只停留在 Playwright。对于前端 E2E 无法替代的契约层问题,当前已补 5 条 daemon 定向回归。 +这批 launch review 补测不只停留在 Playwright。对于前端 E2E 无法替代的契约层问题,当前已补 10 条 daemon 定向回归。 ### 1. Diagnostics 导出路径与缺失日志清单 @@ -392,6 +392,82 @@ pnpm exec playwright test -c playwright.config.ts ui/design-systems-manager.test - `generated-plugin/SKILL.md` - daemon 会把本轮转成 `failed`,而不是错误地保留 `succeeded` + +### 6. Grok Build prompt inline argv 契约 + +文件: +- [apps/daemon/tests/runtimes/agent-args.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/runtimes/agent-args.test.ts) +- [apps/daemon/src/runtimes/defs/grok-build.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/src/runtimes/defs/grok-build.ts) + +新增覆盖: + +1. `grok-build inlines the prompt as -p and never falls back to stdin sentinels` + - 覆盖近期 `grok-build` 适配器从 stdin 路径切到 `-p ` 的真实契约 + - 明确要求: + - `promptViaStdin = false` + - argv 中必须带 `-p` 与 prompt 正文 + - 不能出现旧的 `-` stdin 哨兵 + - 防止后续 refactor 把 Grok Build 又错误退回到 stdin / 占位符路径 + +### 7. AMR login/status 同源配置契约 + +文件: +- [apps/daemon/tests/integrations/vela.routes.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/integrations/vela.routes.test.ts) + +新增覆盖: + +1. `uses the same Settings-configured AMR env for login and subsequent status reads` + - 覆盖 `/api/integrations/vela/login` 与 `/api/integrations/vela/status` 都必须使用同一份 `agentCliEnv.amr` + - 避免终端里的 `vela` 已登录,但 Open Design 因配置源不一致读成未登录 + +### 8. queued run 取消的终态语义 + +文件: +- [apps/daemon/tests/runs.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/runs.test.ts) + +新增覆盖: + +1. `cancels a queued run immediately without waiting for child process shutdown` + - 覆盖 queued run 在尚未启动子进程时被取消的路径 + - 要求 run 立即进入 `canceled` 终态、发出 `end` 事件并解析 waiter,而不是悬挂在 active 状态 + +### 9. /api/agents 的 AMR live model discovery 不得回退假默认 + +文件: +- [apps/daemon/tests/runtimes/env-and-detection.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/runtimes/env-and-detection.test.ts) + +新增覆盖: + +1. `detectAgents prefers configured AMR live models over stale fallback defaults` + - 覆盖 `detectAgents({ amr: { VELA_BIN, VELA_OPENCODE_BIN } })` 走真实 live catalog + - 要求 `modelsSource = live` 且返回归一化后的可运行模型 id + - 明确禁止回退到 `default` / 旧的 `gpt-5.4-mini` 伪默认模型 + +### 10. retry 复用已有 assistant message 行而不是复制新行 + +文件: +- [apps/daemon/tests/chat-route.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/chat-route.test.ts) + +新增覆盖: + +1. `reuses an existing assistant message row instead of creating a duplicate when assistantMessageId is supplied` + - 覆盖 retry / 续跑场景下显式传入 `assistantMessageId` 时,daemon 会复用原有 assistant message 行 + - 要求同一 `conversation_id` 下该 assistant message 仍然只有一条记录 + - 新输出会回填到原行,而不是额外插入一条新的 assistant message + +### 已覆盖的 artifact quiet-period / watchdog 收尾 + +文件: +- [apps/daemon/tests/chat-run-artifact-quiet-period.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/chat-run-artifact-quiet-period.test.ts) + +现有覆盖已经锁住: + +- `returns succeeded when the watchdog-initiated quiet-period SIGTERM fires` +- `returns succeeded when the watchdog quiet-period escalates to SIGKILL` +- `returns failed when SIGTERM/SIGKILL arrive but no quiet-period shutdown was requested` + +这部分本轮没有重复补新测试,避免和现有专用回归重叠。 + ### daemon 定向运行命令 仓库根目录: @@ -401,7 +477,19 @@ cd /Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon ``` ```bash -pnpm exec vitest run tests/chat-route.test.ts tests/diagnostics-export.test.ts tests/projects-routes.test.ts tests/runs.test.ts tests/amr-acp-integration.test.ts tests/runtimes/env-and-detection.test.ts tests/runtimes/resolve-model.test.ts +pnpm exec vitest run tests/chat-route.test.ts tests/diagnostics-export.test.ts tests/projects-routes.test.ts tests/runs.test.ts tests/amr-acp-integration.test.ts tests/runtimes/env-and-detection.test.ts tests/runtimes/resolve-model.test.ts tests/runtimes/agent-args.test.ts --testNamePattern "grok-build inlines the prompt as -p and never falls back to stdin sentinels" +``` + +```bash +pnpm exec vitest run tests/integrations/vela.routes.test.ts tests/runs.test.ts --testNamePattern "uses the same Settings-configured AMR env for login and subsequent status reads|cancels a queued run immediately without waiting for child process shutdown" +``` + +```bash +pnpm exec vitest run tests/chat-run-artifact-quiet-period.test.ts tests/runtimes/env-and-detection.test.ts --testNamePattern "detectAgents prefers configured AMR live models over stale fallback defaults|returns succeeded when the watchdog-initiated quiet-period SIGTERM fires|returns failed when SIGTERM/SIGKILL arrive but no quiet-period shutdown was requested" +``` + +```bash +pnpm exec vitest run tests/chat-route.test.ts --testNamePattern "reuses an existing assistant message row instead of creating a duplicate when assistantMessageId is supplied" ``` ## 这批 daemon 补测当前没有覆盖的点 @@ -409,13 +497,13 @@ pnpm exec vitest run tests/chat-route.test.ts tests/diagnostics-export.test.ts t 下面这些仍然值得继续补,但这轮没有为了追求数量硬塞进去: 1. AMR / agent 运行结束态收敛 - - 例如“工作完成但没有 terminal event,最后被 watchdog 打成 failed” - - 例如“有有效产物但收尾阶段卡住”的 terminal-state 修正 + - quiet-period / watchdog 的 close-status 分流已经有专用测试文件锁住 + - 更深一层仍可继续补“真实 HTTP live-artifact create 后收尾成功”的整链回归 2. AMR auth / model discovery 的更完整契约 - - 例如 auth probe 与真实 launch path / env 必须同源 - - 例如 live models 成功时不能回退到假默认模型 + - 已补 login/status 同源配置,以及 `/api/agents` 的 live catalog 不回退假默认 + - 更深一层仍可继续补 live model discovery 与运行时 launch path 的完全同源 -3. queued / retry 的持久化语义 - - 前端行为已覆盖 - - daemon 侧仍可继续锁住 message 关联和队列启动顺序 +3. queued / retry 的更深层持久化语义 + - 前端行为已覆盖,daemon 侧已补 queued run 取消终态 + - 仍可继续锁住 retry 的 message 关联和队列启动顺序 From 91691f3e665044db0140439c77b00f499a9e71ff Mon Sep 17 00:00:00 2001 From: Amy <1184569493@qq.com> Date: Fri, 29 May 2026 18:43:08 +0800 Subject: [PATCH 3/3] Tighten plugin authoring completion regressions --- apps/daemon/tests/chat-route.test.ts | 70 +++++++++++++++++++ .../launch-review-e2e-regressions.zh-CN.md | 4 ++ 2 files changed, 74 insertions(+) diff --git a/apps/daemon/tests/chat-route.test.ts b/apps/daemon/tests/chat-route.test.ts index 2df4b60e0..614f5044f 100644 --- a/apps/daemon/tests/chat-route.test.ts +++ b/apps/daemon/tests/chat-route.test.ts @@ -394,6 +394,76 @@ child.on('exit', (code, signal) => { } }); + it('allows plugin authoring to succeed when the requested generated-plugin artifacts exist before close', async () => { + const projectId = `proj-plugin-authoring-success-${randomUUID()}`; + + const createProjectResponse = await fetch(`${baseUrl}/api/projects`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + id: projectId, + name: 'Plugin authoring artifact success fixture', + skillId: null, + designSystemId: null, + }), + }); + expect(createProjectResponse.status).toBe(200); + const conversationsResponse = await fetch(`${baseUrl}/api/projects/${projectId}/conversations`); + expect(conversationsResponse.status).toBe(200); + const conversationsBody = await conversationsResponse.json() as { + conversations: Array<{ id: string }>; + }; + const conversationId = conversationsBody.conversations[0]?.id; + expect(conversationId).toBeTruthy(); + + await withFakeAgent( + 'opencode', + ` +const fs = require('node:fs'); +const path = require('node:path'); +process.stdin.resume(); +process.stdin.on('end', () => { + const pluginDir = path.join(process.cwd(), 'generated-plugin'); + fs.mkdirSync(pluginDir, { recursive: true }); + fs.writeFileSync(path.join(pluginDir, 'open-design.json'), JSON.stringify({ name: 'generated-plugin' }, null, 2)); + fs.writeFileSync(path.join(pluginDir, 'SKILL.md'), '# Generated plugin\\n'); + console.log(JSON.stringify({ type: 'step_start' })); + console.log(JSON.stringify({ type: 'text', part: { text: '我来帮你创建一个通用的 Open Design 插件脚手架。先读取文档规范,再生成插件文件。' } })); + console.log(JSON.stringify({ type: 'step_finish', part: { tokens: { input: 1, output: 1 } } })); + process.exit(0); +}); +`, + async () => { + const createResponse = await fetch(`${baseUrl}/api/runs`, { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + agentId: 'opencode', + projectId, + conversationId, + pluginId: 'od-plugin-authoring', + message: '请创建一个可刷新、可审计、由 API 驱动的 Open Design 插件脚手架。', + }), + }); + expect(createResponse.status).toBe(202); + const { runId } = await createResponse.json() as { runId: string }; + + const eventsResponse = await fetch(`${baseUrl}/api/runs/${runId}/events`); + const eventsBody = await readSseUntil(eventsResponse, 'event: final'); + const statusBody = await waitForRunStatus(baseUrl, runId); + + expect(eventsBody).toContain('先读取文档规范,再生成插件文件'); + expect(statusBody.status).toBe('succeeded'); + + const filesResponse = await fetch(`${baseUrl}/api/projects/${projectId}/files`); + expect(filesResponse.status).toBe(200); + const filesBody = await filesResponse.json() as { files: Array<{ name: string }> }; + expect(filesBody.files.some((file) => file.name === 'generated-plugin/open-design.json')).toBe(true); + expect(filesBody.files.some((file) => file.name === 'generated-plugin/SKILL.md')).toBe(true); + }, + ); + }); + it('does not report plugin authoring as succeeded when the agent only emits planning text without artifacts', async () => { const projectId = `proj-plugin-authoring-${randomUUID()}`; diff --git a/docs/testing/launch-review-e2e-regressions.zh-CN.md b/docs/testing/launch-review-e2e-regressions.zh-CN.md index 279c28531..f7cab5e8d 100644 --- a/docs/testing/launch-review-e2e-regressions.zh-CN.md +++ b/docs/testing/launch-review-e2e-regressions.zh-CN.md @@ -392,6 +392,10 @@ pnpm exec playwright test -c playwright.config.ts ui/design-systems-manager.test - `generated-plugin/SKILL.md` - daemon 会把本轮转成 `failed`,而不是错误地保留 `succeeded` +2. `allows plugin authoring to succeed when the requested generated-plugin artifacts exist before close` + - 覆盖同一条 guard 的对称路径:只要关键插件产物已经落地,就不会被误伤成失败 + - 锁住 daemon 的判断是“缺少目标产物才失败”,而不是“只要文本看起来像计划句就失败” + ### 6. Grok Build prompt inline argv 契约