Add daemon launch review regression coverage

2026-05-31 19:04:39 +07:00 · 2026-05-29 18:25:54 +08:00 · 2026-05-29 18:25:54 +08:00 · f7beb42950
commit f7beb42950
parent a8277c678b
6 changed files with 330 additions and 10 deletions
--- a/apps/daemon/tests/chat-route.test.ts
+++ b/apps/daemon/tests/chat-route.test.ts
@ -1,4 +1,5 @@
 import type http from 'node:http';
+import Database from 'better-sqlite3';
 import { randomUUID } from 'node:crypto';
 import {
  chmodSync,
@ -27,6 +28,7 @@ import {
 import { skillCwdAliasSegment } from '../src/cwd-aliases.js';
 import { getAgentDef } from '../src/agents.js';
 import { readMemoryConfig, writeMemoryConfig } from '../src/memory.js';
+import { upsertMessage } from '../src/db.js';
 import { renderCodexImagegenOverride } from '../src/prompts/system.js';

 const FAKE_VELA_FIXTURE = resolve(process.cwd(), 'tests', 'fixtures', 'fake-vela.mjs');
@ -216,6 +218,87 @@ process.exit(0);
    );
  });

+
+  it('reuses an existing assistant message row instead of creating a duplicate when assistantMessageId is supplied', async () => {
+    if (!process.env.OD_DATA_DIR) {
+      throw new Error('OD_DATA_DIR is required for assistant message reuse tests');
+    }
+    const projectId = `proj-${randomUUID()}`;
+    const assistantMessageId = `assistant-${randomUUID()}`;
+
+    const createProjectResponse = await fetch(`${baseUrl}/api/projects`, {
+      method: 'POST',
+      headers: { 'Content-Type': 'application/json' },
+      body: JSON.stringify({ id: projectId, name: 'Assistant row reuse fixture' }),
+    });
+    expect(createProjectResponse.ok).toBe(true);
+
+    const conversationsResponse = await fetch(`${baseUrl}/api/projects/${projectId}/conversations`);
+    expect(conversationsResponse.ok).toBe(true);
+    const conversationsBody = await conversationsResponse.json() as {
+      conversations: Array<{ id: string }>;
+    };
+    const conversationId = conversationsBody.conversations[0]?.id;
+    expect(conversationId).toBeTruthy();
+
+    const dbFile = resolve(process.env.OD_DATA_DIR, 'app.sqlite');
+    const sqlite = new Database(dbFile);
+    try {
+      upsertMessage(sqlite as never, conversationId!, {
+        id: assistantMessageId,
+        role: 'assistant',
+        content: '',
+        runStatus: 'failed',
+        startedAt: Date.now() - 1_000,
+        endedAt: Date.now() - 500,
+      });
+    } finally {
+      sqlite.close();
+    }
+
+    await withFakeAgent(
+      'opencode',
+      `
+process.stdin.resume();
+process.stdin.on('end', () => {
+  console.log(JSON.stringify({ type: 'step_start' }));
+  console.log(JSON.stringify({ type: 'text', part: { text: 'reused-assistant-row-ok' } }));
+  console.log(JSON.stringify({ type: 'step_finish', part: { tokens: { input: 1, output: 1 } } }));
+  process.exit(0);
+});
+`,
+      async () => {
+        const response = await fetch(`${baseUrl}/api/chat`, {
+          method: 'POST',
+          headers: { 'Content-Type': 'application/json' },
+          body: JSON.stringify({
+            agentId: 'opencode',
+            projectId,
+            conversationId,
+            assistantMessageId,
+            message: 'retry this turn',
+          }),
+        });
+        const body = await response.text();
+        expect(response.ok).toBe(true);
+        expect(body).toContain('reused-assistant-row-ok');
+      },
+    );
+
+    const verifyDb = new Database(dbFile, { readonly: true });
+    try {
+      const rows = verifyDb
+        .prepare(`SELECT id, content, run_id FROM messages WHERE conversation_id = ? AND role = 'assistant'`)
+        .all(conversationId) as Array<{ id: string; content: string; run_id: string | null }>;
+      expect(rows.filter((row) => row.id === assistantMessageId)).toHaveLength(1);
+      expect(rows.some((row) => row.id !== assistantMessageId && row.content.includes('reused-assistant-row-ok'))).toBe(false);
+      const reused = rows.find((row) => row.id === assistantMessageId);
+      expect(reused?.content).toContain('reused-assistant-row-ok');
+    } finally {
+      verifyDb.close();
+    }
+  });
+
  it('rewrites the OpenCode scanner overflow into a generic retry message', async () => {
    const conversationId = `conv-${randomUUID()}`;

--- a/apps/daemon/tests/integrations/vela.routes.test.ts
+++ b/apps/daemon/tests/integrations/vela.routes.test.ts
@ -371,6 +371,61 @@ describe('POST /api/integrations/vela/login', () => {
    }
  });

+
+  it('uses the same Settings-configured AMR env for login and subsequent status reads', async () => {
+    const dataDir = process.env.OD_DATA_DIR as string;
+    const previous = await readAppConfig(dataDir);
+    process.env.OPEN_DESIGN_AMR_PROFILE = 'prod';
+    process.env.VELA_PROFILE = 'prod';
+    process.env.FAKE_VELA_LOGIN_USER_EMAIL = 'settings-roundtrip@example.com';
+    await writeAppConfig(dataDir, {
+      ...previous,
+      agentCliEnv: {
+        ...(previous.agentCliEnv ?? {}),
+        amr: {
+          ...((previous.agentCliEnv?.amr as Record<string, string>) ?? {}),
+          VELA_BIN: FAKE_VELA,
+          OPEN_DESIGN_AMR_PROFILE: 'local',
+        },
+      },
+    });
+    try {
+      const before = await getJson<{
+        loggedIn: boolean;
+        profile: string;
+        user: { email?: string } | null;
+      }>(`${baseUrl}/api/integrations/vela/status`);
+      expect(before.status).toBe(200);
+      expect(before.body.loggedIn).toBe(false);
+      expect(before.body.profile).toBe('local');
+
+      const login = await postJson<{
+        pid: number;
+        profile: string;
+      }>(`${baseUrl}/api/integrations/vela/login`);
+      expect(login.status).toBe(202);
+      expect(login.body.profile).toBe('local');
+
+      for (let i = 0; i < 50; i += 1) {
+        const current = await getJson<{
+          loggedIn: boolean;
+          profile: string;
+          user: { email?: string } | null;
+        }>(`${baseUrl}/api/integrations/vela/status`);
+        if (current.body.loggedIn) {
+          expect(current.body.profile).toBe('local');
+          expect(current.body.user?.email).toBe('settings-roundtrip@example.com');
+          return;
+        }
+        await new Promise((resolve) => setTimeout(resolve, 100));
+      }
+      throw new Error('expected configured-profile AMR login to become visible via /status');
+    } finally {
+      await writeAppConfig(dataDir, previous as unknown as Record<string, unknown>);
+      delete process.env.FAKE_VELA_LOGIN_USER_EMAIL;
+    }
+  });
+
  it('returns 409 when a login subprocess is already in flight', async () => {
    // Use the stub's delay knob so the first login is still running when
    // the second request arrives; without this the first exits before the
--- a/apps/daemon/tests/runs.test.ts
+++ b/apps/daemon/tests/runs.test.ts
@ -62,6 +62,27 @@ describe('chat run service shutdown', () => {
      runs.list({ projectId: 'project-1', conversationId: 'conv-b', status: 'active' }),
    ).toEqual([runB]);
  });
+  it('cancels a queued run immediately without waiting for child process shutdown', async () => {
+    const runs = createRuns();
+    const run = runs.create({ projectId: 'project-1', conversationId: 'conv-queued' });
+
+    const wait = runs.wait(run);
+    runs.cancel(run);
+
+    expect(run.status).toBe('canceled');
+    expect(run.cancelRequested).toBe(true);
+    expect(run.signal).toBe('SIGTERM');
+    expect(run.events.at(-1)).toMatchObject({
+      event: 'end',
+      data: { status: 'canceled', signal: 'SIGTERM' },
+    });
+    await expect(wait).resolves.toMatchObject({
+      status: 'canceled',
+      signal: 'SIGTERM',
+    });
+  });
+
+

  it('stores effective media execution policy on run status bodies', () => {
    const runs = createRuns();
--- a/apps/daemon/tests/runtimes/agent-args.test.ts
+++ b/apps/daemon/tests/runtimes/agent-args.test.ts
@ -1,7 +1,7 @@
 import { existsSync, readFileSync } from 'node:fs';
 import { test } from 'vitest';
 import {
-  AGENT_DEFS, aider, antigravity, assert, claude, codex, copilot, cursorAgent, deepseek, devin, detectAgents, gemini, join, kilo, kiro, mkdtempSync, opencode, pi, qoder, qwen, rmSync, spawnEnvForAgent, tmpdir, vibe, writeFileSync, chmodSync,
+  AGENT_DEFS, aider, antigravity, assert, claude, codex, copilot, cursorAgent, deepseek, devin, detectAgents, gemini, grokBuild, join, kilo, kiro, mkdtempSync, opencode, pi, qoder, qwen, rmSync, spawnEnvForAgent, tmpdir, vibe, writeFileSync, chmodSync,
 } from './helpers/test-helpers.js';
 import { writeAntigravityModelSelection } from '../../src/runtimes/defs/antigravity.js';
 import type { TestAgentDef } from './helpers/test-helpers.js';
@ -756,6 +756,29 @@ test('codex buildArgs omits model_reasoning_effort when reasoning is "default"',
  );
 });

+test('grok-build inlines the prompt as -p <value> and never falls back to stdin sentinels', () => {
+  const prompt = 'summarize the current page layout';
+  const args = grokBuild.buildArgs(
+    prompt,
+    [],
+    [],
+    { model: 'grok-4.3', reasoning: 'high' },
+    { cwd: '/tmp/od-project' },
+  );
+
+  assert.equal(grokBuild.promptViaStdin, false);
+  assert.deepEqual(args, [
+    '-p',
+    prompt,
+    '--model',
+    'grok-4.3',
+    '--effort',
+    'high',
+  ]);
+  assert.equal(args.includes('-'), false);
+  assert.equal(args.filter((entry) => entry === '-p').length, 1);
+});
+
 test('claude flags promptViaStdin and never embeds the prompt in argv', () => {
  // Long composed prompts (system prompt + design system + skill body +
  // user message) routinely exceed Linux MAX_ARG_STRLEN (~128 KB) and the
--- a/apps/daemon/tests/runtimes/env-and-detection.test.ts
+++ b/apps/daemon/tests/runtimes/env-and-detection.test.ts
@ -478,6 +478,56 @@ fsTest('detectAgents marks AMR available from packaged built-in Vela with the bu
  }
 });

+
+fsTest('detectAgents prefers configured AMR live models over stale fallback defaults', async () => {
+  const root = mkdtempSync(join(tmpdir(), 'od-detect-amr-live-models-'));
+  try {
+    return await withEnvSnapshot(['PATH', 'OD_AGENT_HOME', 'OD_RESOURCE_ROOT', 'VELA_OPENCODE_BIN'], async () => {
+      const fakeVela = join(root, 'vela');
+      const fakeOpenCode = join(root, 'opencode');
+      writeFileSync(
+        fakeVela,
+        `#!/bin/sh
+if [ "$1" = "--version" ]; then echo "vela custom-live"; exit 0; fi
+if [ "$1" = "models" ]; then printf "%s\n" "public_model_deepseek_v4_flash    vela" "public_model_glm_5    vela"; exit 0; fi
+exit 0
+`,
+      );
+      writeFileSync(fakeOpenCode, `#!/bin/sh
+exit 0
+`);
+      chmodSync(fakeVela, 0o755);
+      chmodSync(fakeOpenCode, 0o755);
+      process.env.PATH = '';
+      process.env.OD_AGENT_HOME = join(root, 'empty-home');
+      delete process.env.OD_RESOURCE_ROOT;
+      delete process.env.VELA_OPENCODE_BIN;
+
+      const agents = await detectAgents({
+        amr: {
+          VELA_BIN: fakeVela,
+          VELA_OPENCODE_BIN: fakeOpenCode,
+        },
+      });
+      const amrAgent = agents.find((agent) => agent.id === 'amr');
+
+      assert.ok(amrAgent);
+      assert.equal(amrAgent.available, true);
+      assert.equal(amrAgent.path, fakeVela);
+      assert.equal(amrAgent.version, 'vela custom-live');
+      assert.equal(amrAgent.modelsSource, 'live');
+      assert.deepEqual(amrAgent.models, [
+        { id: 'deepseek-v4-flash', label: 'deepseek-v4-flash' },
+        { id: 'glm-5', label: 'glm-5' },
+      ]);
+      assert.equal(amrAgent.models.some((model) => model.id === 'default'), false);
+      assert.equal(amrAgent.models.some((model) => model.id === 'gpt-5.4-mini'), false);
+    });
+  } finally {
+    rmSync(root, { recursive: true, force: true });
+  }
+});
+
 function codexNativeTargetTriple(): string {
  if (process.platform === 'darwin' && process.arch === 'arm64') return 'aarch64-apple-darwin';
  if (process.platform === 'darwin' && process.arch === 'x64') return 'x86_64-apple-darwin';
--- a/docs/testing/launch-review-e2e-regressions.zh-CN.md
+++ b/docs/testing/launch-review-e2e-regressions.zh-CN.md
@ -320,7 +320,7 @@ pnpm exec playwright test -c playwright.config.ts ui/design-systems-manager.test

 ## 新增 daemon 契约回归

-这批 launch review 补测不只停留在 Playwright。对于前端 E2E 无法替代的契约层问题，当前已补 5 条 daemon 定向回归。
+这批 launch review 补测不只停留在 Playwright。对于前端 E2E 无法替代的契约层问题，当前已补 10 条 daemon 定向回归。

 ### 1. Diagnostics 导出路径与缺失日志清单

@ -392,6 +392,82 @@ pnpm exec playwright test -c playwright.config.ts ui/design-systems-manager.test
     - `generated-plugin/SKILL.md`
   - daemon 会把本轮转成 `failed`，而不是错误地保留 `succeeded`

+
+### 6. Grok Build prompt inline argv 契约
+
+文件：
+- [apps/daemon/tests/runtimes/agent-args.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/runtimes/agent-args.test.ts)
+- [apps/daemon/src/runtimes/defs/grok-build.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/src/runtimes/defs/grok-build.ts)
+
+新增覆盖：
+
+1. `grok-build inlines the prompt as -p <value> and never falls back to stdin sentinels`
+   - 覆盖近期 `grok-build` 适配器从 stdin 路径切到 `-p <PROMPT>` 的真实契约
+   - 明确要求：
+     - `promptViaStdin = false`
+     - argv 中必须带 `-p` 与 prompt 正文
+     - 不能出现旧的 `-` stdin 哨兵
+   - 防止后续 refactor 把 Grok Build 又错误退回到 stdin / 占位符路径
+
+### 7. AMR login/status 同源配置契约
+
+文件：
+- [apps/daemon/tests/integrations/vela.routes.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/integrations/vela.routes.test.ts)
+
+新增覆盖：
+
+1. `uses the same Settings-configured AMR env for login and subsequent status reads`
+   - 覆盖 `/api/integrations/vela/login` 与 `/api/integrations/vela/status` 都必须使用同一份 `agentCliEnv.amr`
+   - 避免终端里的 `vela` 已登录，但 Open Design 因配置源不一致读成未登录
+
+### 8. queued run 取消的终态语义
+
+文件：
+- [apps/daemon/tests/runs.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/runs.test.ts)
+
+新增覆盖：
+
+1. `cancels a queued run immediately without waiting for child process shutdown`
+   - 覆盖 queued run 在尚未启动子进程时被取消的路径
+   - 要求 run 立即进入 `canceled` 终态、发出 `end` 事件并解析 waiter，而不是悬挂在 active 状态
+
+### 9. /api/agents 的 AMR live model discovery 不得回退假默认
+
+文件：
+- [apps/daemon/tests/runtimes/env-and-detection.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/runtimes/env-and-detection.test.ts)
+
+新增覆盖：
+
+1. `detectAgents prefers configured AMR live models over stale fallback defaults`
+   - 覆盖 `detectAgents({ amr: { VELA_BIN, VELA_OPENCODE_BIN } })` 走真实 live catalog
+   - 要求 `modelsSource = live` 且返回归一化后的可运行模型 id
+   - 明确禁止回退到 `default` / 旧的 `gpt-5.4-mini` 伪默认模型
+
+### 10. retry 复用已有 assistant message 行而不是复制新行
+
+文件：
+- [apps/daemon/tests/chat-route.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/chat-route.test.ts)
+
+新增覆盖：
+
+1. `reuses an existing assistant message row instead of creating a duplicate when assistantMessageId is supplied`
+   - 覆盖 retry / 续跑场景下显式传入 `assistantMessageId` 时，daemon 会复用原有 assistant message 行
+   - 要求同一 `conversation_id` 下该 assistant message 仍然只有一条记录
+   - 新输出会回填到原行，而不是额外插入一条新的 assistant message
+
+### 已覆盖的 artifact quiet-period / watchdog 收尾
+
+文件：
+- [apps/daemon/tests/chat-run-artifact-quiet-period.test.ts](/Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon/tests/chat-run-artifact-quiet-period.test.ts)
+
+现有覆盖已经锁住：
+
+- `returns succeeded when the watchdog-initiated quiet-period SIGTERM fires`
+- `returns succeeded when the watchdog quiet-period escalates to SIGKILL`
+- `returns failed when SIGTERM/SIGKILL arrive but no quiet-period shutdown was requested`
+
+这部分本轮没有重复补新测试，避免和现有专用回归重叠。
+
 ### daemon 定向运行命令

 仓库根目录：
@ -401,7 +477,19 @@ cd /Users/mac/open-design/open-design-amr-runtime-acp/apps/daemon
 ```

 ```bash
-pnpm exec vitest run tests/chat-route.test.ts tests/diagnostics-export.test.ts tests/projects-routes.test.ts tests/runs.test.ts tests/amr-acp-integration.test.ts tests/runtimes/env-and-detection.test.ts tests/runtimes/resolve-model.test.ts
+pnpm exec vitest run tests/chat-route.test.ts tests/diagnostics-export.test.ts tests/projects-routes.test.ts tests/runs.test.ts tests/amr-acp-integration.test.ts tests/runtimes/env-and-detection.test.ts tests/runtimes/resolve-model.test.ts tests/runtimes/agent-args.test.ts --testNamePattern "grok-build inlines the prompt as -p <value> and never falls back to stdin sentinels"
+```
+
+```bash
+pnpm exec vitest run tests/integrations/vela.routes.test.ts tests/runs.test.ts --testNamePattern "uses the same Settings-configured AMR env for login and subsequent status reads|cancels a queued run immediately without waiting for child process shutdown"
+```
+
+```bash
+pnpm exec vitest run tests/chat-run-artifact-quiet-period.test.ts tests/runtimes/env-and-detection.test.ts --testNamePattern "detectAgents prefers configured AMR live models over stale fallback defaults|returns succeeded when the watchdog-initiated quiet-period SIGTERM fires|returns failed when SIGTERM/SIGKILL arrive but no quiet-period shutdown was requested"
+```
+
+```bash
+pnpm exec vitest run tests/chat-route.test.ts --testNamePattern "reuses an existing assistant message row instead of creating a duplicate when assistantMessageId is supplied"
 ```

 ## 这批 daemon 补测当前没有覆盖的点
@ -409,13 +497,13 @@ pnpm exec vitest run tests/chat-route.test.ts tests/diagnostics-export.test.ts t
 下面这些仍然值得继续补，但这轮没有为了追求数量硬塞进去：

 1. AMR / agent 运行结束态收敛
-   - 例如“工作完成但没有 terminal event，最后被 watchdog 打成 failed”
-   - 例如“有有效产物但收尾阶段卡住”的 terminal-state 修正
+   - quiet-period / watchdog 的 close-status 分流已经有专用测试文件锁住
+   - 更深一层仍可继续补“真实 HTTP live-artifact create 后收尾成功”的整链回归

 2. AMR auth / model discovery 的更完整契约
-   - 例如 auth probe 与真实 launch path / env 必须同源
-   - 例如 live models 成功时不能回退到假默认模型
+   - 已补 login/status 同源配置，以及 `/api/agents` 的 live catalog 不回退假默认
+   - 更深一层仍可继续补 live model discovery 与运行时 launch path 的完全同源

-3. queued / retry 的持久化语义
-   - 前端行为已覆盖
-   - daemon 侧仍可继续锁住 message 关联和队列启动顺序
+3. queued / retry 的更深层持久化语义
+   - 前端行为已覆盖，daemon 侧已补 queued run 取消终态
+   - 仍可继续锁住 retry 的 message 关联和队列启动顺序