refactor(daemon): split agent runtime definitions (#1063)

This commit is contained in:
nettee 2026-05-11 15:01:55 +08:00 committed by GitHub
parent b1d440d2bd
commit ab922327f4
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
41 changed files with 4266 additions and 3777 deletions

View file

@ -14,7 +14,7 @@ Follow the root `AGENTS.md` first. This file only records module-level boundarie
- `apps/daemon/src/` contains only daemon app source.
- `apps/daemon/tests/` contains daemon tests.
- `apps/daemon/sidecar/` contains the daemon sidecar entry.
- CLI/agent argument changes or stdout parser changes belong in `apps/daemon/src/agents.ts` and the matching parser tests.
- CLI/agent argument definition changes belong in `apps/daemon/src/runtimes/defs/`; stdout parser changes belong with the matching runtime helpers and parser tests.
### Router layout

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,3 @@
import type { RuntimeCapabilityMap } from './types.js';
export const agentCapabilities = new Map<string, RuntimeCapabilityMap>();

View file

@ -0,0 +1,70 @@
import { agentCapabilities } from '../capabilities.js';
import { DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const claudeAgentDef = {
id: 'claude',
name: 'Claude Code',
bin: 'claude',
// Drop-in forks that ship a CLI argv-compatible with `claude`. Tried in
// order if `claude` itself isn't on PATH, so users on a single-binary
// install (e.g. only OpenClaude — https://github.com/Gitlawb/openclaude
// — issue #235) get auto-detected without writing wrapper scripts.
fallbackBins: ['openclaude'],
versionArgs: ['--version'],
helpArgs: ['-p', '--help'],
capabilityFlags: {
// Flag string -> capability key. After probing `--help`, we set
// `agentCapabilities[id][key] = true` for each substring that matches.
// `--add-dir` and `--include-partial-messages` live under `claude -p`
// subcommand, so we probe `claude -p --help` instead of `claude --help`.
// Fixes issue #430: --add-dir never detected because it wasn't in global help.
'--include-partial-messages': 'partialMessages',
'--add-dir': 'addDir',
},
// `claude` has no list-models subcommand; the CLI accepts both short
// aliases (sonnet/opus/haiku) and the full ids, so we ship both as
// hints. Users who want a non-shipped model can paste it via the
// Settings dialog's custom-model input.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'sonnet', label: 'Sonnet (alias)' },
{ id: 'opus', label: 'Opus (alias)' },
{ id: 'haiku', label: 'Haiku (alias)' },
{ id: 'claude-opus-4-5', label: 'claude-opus-4-5' },
{ id: 'claude-sonnet-4-5', label: 'claude-sonnet-4-5' },
{ id: 'claude-haiku-4-5', label: 'claude-haiku-4-5' },
],
// Prompt delivered via stdin to avoid both Linux `spawn E2BIG`
// (MAX_ARG_STRLEN caps a single argv entry at ~128 KB) and Windows
// `spawn ENAMETOOLONG` (CreateProcess caps the full command line at
// ~32 KB direct, ~8 KB via .cmd shim). `claude -p` with no positional
// prompt reads the prompt from stdin under `--input-format text` (the
// default), which has no length cap. Mirrors the codex/gemini/opencode/
// cursor/qwen entries below.
buildArgs: (_prompt, _imagePaths, extraAllowedDirs = [], options = {}) => {
const caps = agentCapabilities.get('claude') || {};
const args = ['-p', '--output-format', 'stream-json', '--verbose'];
// `--include-partial-messages` lands richer streaming events but only
// exists in newer Claude Code builds. Older installs reject it with
// "unknown option" and exit 1, killing the chat. Gate on the probe.
if (caps.partialMessages) {
args.push('--include-partial-messages');
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && d.length > 0,
);
// `--add-dir` is older but still gate it for symmetry — old/forked
// builds may lack it.
if (dirs.length > 0 && caps.addDir !== false) {
args.push('--add-dir', ...dirs);
}
args.push('--permission-mode', 'bypassPermissions');
return args;
},
promptViaStdin: true,
streamFormat: 'claude-stream-json',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,82 @@
import { DEFAULT_MODEL_OPTION, clampCodexReasoning } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const codexAgentDef = {
id: 'codex',
name: 'Codex CLI',
bin: 'codex',
versionArgs: ['--version'],
// Codex doesn't have a `models` subcommand; ship the most common ids
// as a hint. Users can supply other ids via the custom-model input.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'gpt-5.5', label: 'gpt-5.5' },
{ id: 'gpt-5.4', label: 'gpt-5.4' },
{ id: 'gpt-5.4-mini', label: 'gpt-5.4-mini' },
{ id: 'gpt-5.3-codex', label: 'gpt-5.3-codex' },
{ id: 'gpt-5.1', label: 'gpt-5.1' },
{ id: 'gpt-5.1-codex-mini', label: 'gpt-5.1-codex-mini' },
{ id: 'gpt-5-codex', label: 'gpt-5-codex' },
{ id: 'gpt-5', label: 'gpt-5' },
{ id: 'o3', label: 'o3' },
{ id: 'o4-mini', label: 'o4-mini' },
],
reasoningOptions: [
{ id: 'default', label: 'Default' },
{ id: 'none', label: 'None' },
{ id: 'minimal', label: 'Minimal' },
{ id: 'low', label: 'Low' },
{ id: 'medium', label: 'Medium' },
{ id: 'high', label: 'High' },
{ id: 'xhigh', label: 'XHigh' },
],
// Prompt is delivered via stdin pipe (gated by `promptViaStdin: true`
// below) to avoid Windows `spawn ENAMETOOLONG` while keeping Codex on
// its structured JSON stream. Recent Codex CLI versions reject a bare
// `-` argv sentinel — passing both the pipe and `-` produces
// `error: unexpected argument '-' found` and the agent exits with
// code 2 before any prompt is read (see issue #237). The pipe alone
// is sufficient for stdin delivery.
buildArgs: (
_prompt,
_imagePaths,
extraAllowedDirs = [],
options = {},
runtimeContext = {},
) => {
const args = [
'exec',
'--json',
'--skip-git-repo-check',
'--sandbox',
'workspace-write',
'-c',
'sandbox_workspace_write.network_access=true',
];
if (process.env.OD_CODEX_DISABLE_PLUGINS === '1') {
args.push('--disable', 'plugins');
}
if (runtimeContext.cwd) {
args.push('-C', runtimeContext.cwd);
}
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && d.length > 0,
);
for (const d of dirs) {
args.push('--add-dir', d);
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
if (options.reasoning && options.reasoning !== 'default') {
const effort = clampCodexReasoning(options.model, options.reasoning);
// Codex accepts `-c key=value` config overrides; reasoning effort
// is exposed as `model_reasoning_effort`.
args.push('-c', `model_reasoning_effort="${effort}"`);
}
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'codex',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,70 @@
import { DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const copilotAgentDef = {
id: 'copilot',
name: 'GitHub Copilot CLI',
bin: 'copilot',
versionArgs: ['--version'],
// Prompt is delivered via stdin (gated by `promptViaStdin: true`
// below) to avoid Windows `spawn ENAMETOOLONG` (issue #705):
// `copilot -p <body>` ships the full composed prompt as a single
// argv entry, and CreateProcess caps `lpCommandLine` at ~32 KB
// direct or ~8 KB through a `.cmd` shim. Any non-trivial Open
// Design prompt blows past that — even a "Hi" expands to several
// thousand chars after skills + design-system context are composed
// in.
//
// The transport is "omit `-p` entirely, pipe the prompt to stdin"
// per upstream copilot-cli issue #1046 (closed as already supported,
// confirmed working on Copilot CLI for `echo "..." | copilot
// --model <id>` and `cat prompt.txt | copilot --model <id>`). The
// earlier `-p -` attempt (PR #351) and the argv-bound revert
// (PR #466) both pre-dated that confirmation: `-p -` made Copilot
// interpret `-` as a literal one-character prompt, but omitting
// `-p` entirely is a separate code path that does delegate to
// stdin under a non-TTY pipe — which is exactly how the daemon
// spawns the child (`stdio: ['pipe', 'pipe', 'pipe']`).
//
// `--allow-all-tools` is still required for non-interactive runs:
// without it the CLI blocks waiting for human approval on every
// tool call. Unlike Codex (where `exec` is a dedicated headless
// subcommand with auto-approve baked in) or Claude Code (which
// inherits its permission policy from the user's settings.json),
// Copilot always prompts unless this flag is passed explicitly.
//
// `--output-format json` produces JSONL that copilot-stream.js
// parses into the same typed events as claude-stream.js.
//
// `--add-dir` (repeatable, same flag as Claude Code's) widens
// Copilot's path-level sandbox to skill seeds + design-system
// specs outside the project cwd.
//
// No `models` subcommand; the CLI accepts whatever the user's
// Copilot subscription exposes. Ship a small evidence-based hint
// list — the default we observed in the JSON stream and the
// example from `copilot --help`. Users can paste any other id via
// Settings.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'claude-sonnet-4.6', label: 'Claude Sonnet 4.6' },
{ id: 'gpt-5.2', label: 'GPT-5.2' },
],
buildArgs: (_prompt, _imagePaths, extraAllowedDirs = [], options = {}) => {
const args = [
'--allow-all-tools',
'--output-format',
'json',
];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && d.length > 0,
);
for (const d of dirs) args.push('--add-dir', d);
return args;
},
promptViaStdin: true,
streamFormat: 'copilot-stream-json',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,59 @@
import { DEFAULT_MODEL_OPTION, parseLineSeparatedModels } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const cursorAgentDef = {
id: 'cursor-agent',
name: 'Cursor Agent',
bin: 'cursor-agent',
versionArgs: ['--version'],
// `cursor-agent models` prints account-bound model ids per line. When
// the user isn't authed it prints "No models available for this
// account." — that's not a model list, so we detect it and fall back.
listModels: {
args: ['models'],
timeoutMs: 5000,
parse: (stdout) => {
const trimmed = String(stdout || '').trim();
if (!trimmed || /no models available/i.test(trimmed)) return null;
return parseLineSeparatedModels(trimmed);
},
},
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'auto', label: 'auto' },
{ id: 'sonnet-4', label: 'sonnet-4' },
{ id: 'sonnet-4-thinking', label: 'sonnet-4-thinking' },
{ id: 'gpt-5', label: 'gpt-5' },
],
// Cursor Agent does not use `-` as a "read prompt from stdin" sentinel.
// Passing it makes the CLI treat the dash as the literal user prompt,
// which then surfaces as "your message only contains '-'". Keep stdin
// piped for prompt delivery, but do not append a fake prompt arg.
buildArgs: (
_prompt,
_imagePaths,
_extra,
options = {},
runtimeContext = {},
) => {
const args = [];
args.push(
'--print',
'--output-format',
'stream-json',
'--stream-partial-output',
'--force',
'--trust',
);
if (runtimeContext.cwd) {
args.push('--workspace', runtimeContext.cwd);
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'cursor-agent',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,55 @@
import { DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const deepseekAgentDef = {
id: 'deepseek',
name: 'DeepSeek TUI',
// The `deepseek` dispatcher owns the `exec` / `--auto` subcommands and
// delegates to a sibling `deepseek-tui` runtime binary at exec time.
// Upstream documents both binaries as required (npm and cargo paths
// install them together), so a host with only `deepseek-tui` on PATH
// isn't a supported install — and `deepseek-tui` itself doesn't accept
// the argv shape `buildArgs` produces (`exec --auto <prompt>`). We only
// probe the dispatcher; advertising availability via a `deepseek-tui`
// fallback would surface the agent as runnable but make `/api/chat`
// exit immediately on the first prompt.
bin: 'deepseek',
versionArgs: ['--version'],
// No `models` subcommand that prints a clean id-per-line list; the
// canonical model ids for DeepSeek V4 are documented in the README,
// and the CLI accepts arbitrary provider/model strings via `--model`,
// so users can paste anything else through the custom-model input.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'deepseek-v4-pro', label: 'deepseek-v4-pro' },
{ id: 'deepseek-v4-flash', label: 'deepseek-v4-flash' },
],
// DeepSeek's exec mode requires the prompt as a positional argument
// (no `-` stdin sentinel; `prompt: String` is a required clap field).
// `--auto` enables agentic mode with auto-approval — the daemon runs
// every CLI without a TTY, so the interactive approval prompt would
// hang the run. Streaming is plain text on stdout (tool calls go to
// stderr); skipping `--json` keeps deltas streaming live instead of
// batched into one trailing summary object at end-of-turn.
buildArgs: (prompt, _imagePaths, _extra, options = {}) => {
const args = ['exec', '--auto'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
args.push(prompt);
return args;
},
// Guard against prompts that would blow Windows' ~32 KB CreateProcess
// limit (or Linux MAX_ARG_STRLEN on extreme edges) before spawn. Every
// other argv-sensitive adapter sets `promptViaStdin: true` to dodge
// this; DeepSeek's CLI doesn't accept `-` as a stdin sentinel yet, so
// we have to ship the prompt as argv. The /api/chat spawn path checks
// this byte budget against the composed prompt and emits an actionable
// SSE error ("reduce skills/design-system context, or use an adapter
// with stdin support") instead of letting the spawn fail with a
// generic ENAMETOOLONG/E2BIG message. 30_000 bytes leaves ~2.7 KB of
// argv headroom under the Windows command-line limit for `exec
// --auto --model <id>` and any internal quoting.
maxPromptArgBytes: 30_000,
streamFormat: 'plain',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,45 @@
import { detectAcpModels, DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const devinAgentDef = {
id: 'devin',
name: 'Devin for Terminal',
bin: 'devin',
versionArgs: ['--version'],
fetchModels: async (resolvedBin, env) =>
detectAcpModels({
bin: resolvedBin,
args: [
'--permission-mode',
'dangerous',
'--respect-workspace-trust',
'false',
'acp',
],
env,
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
// Fallback aliases from Devin for Terminal docs
// (https://cli.devin.ai/docs/models): `adaptive` appears in the config example;
// `opus`, `sonnet`, `swe`, `codex`, `gemini`, and `gpt` are documented
// as short model-family names / recommended picks.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'adaptive', label: 'adaptive' },
{ id: 'swe', label: 'swe' },
{ id: 'opus', label: 'opus' },
{ id: 'sonnet', label: 'sonnet' },
{ id: 'codex', label: 'codex' },
{ id: 'gpt', label: 'gpt' },
{ id: 'gemini', label: 'gemini' },
],
buildArgs: () => [
'--permission-mode',
'dangerous',
'--respect-workspace-trust',
'false',
'acp',
],
streamFormat: 'acp-json-rpc',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,38 @@
import { DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const geminiAgentDef = {
id: 'gemini',
name: 'Gemini CLI',
bin: 'gemini',
versionArgs: ['--version'],
fallbackModels: [
DEFAULT_MODEL_OPTION,
// Gemini 3 (May 2026): top-tier reasoning + fast frontier-class.
// Both currently ship as previews via the Gemini CLI. Issue #981.
{ id: 'gemini-3-pro-preview', label: 'gemini-3-pro-preview' },
{ id: 'gemini-3-flash-preview', label: 'gemini-3-flash-preview' },
{ id: 'gemini-2.5-pro', label: 'gemini-2.5-pro' },
{ id: 'gemini-2.5-flash', label: 'gemini-2.5-flash' },
// Cheapest 2.5 multimodal variant; useful for high-volume / low-latency work.
{ id: 'gemini-2.5-flash-lite', label: 'gemini-2.5-flash-lite' },
],
// Gemini reads from stdin when `-p` is omitted and stdin is a pipe.
// Passing the full composed prompt as a CLI arg causes ENAMETOOLONG on
// Windows (CreateProcess limit ~32 KB) for any non-trivial prompt.
// `--yolo` skips interactive approval prompts in the no-TTY web UI.
// Workspace trust is provided via `GEMINI_CLI_TRUST_WORKSPACE` below
// instead of `--skip-trust`; several Gemini CLI builds hide or reject the
// flag even though they accept the documented environment variable.
env: { GEMINI_CLI_TRUST_WORKSPACE: 'true' },
buildArgs: (_prompt, _imagePaths, _extra, options = {}) => {
const args = ['--output-format', 'stream-json', '--yolo'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'gemini',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,29 @@
import { detectAcpModels, DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const hermesAgentDef = {
id: 'hermes',
name: 'Hermes',
bin: 'hermes',
versionArgs: ['--version'],
fetchModels: async (resolvedBin, env) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp', '--accept-hooks'],
env,
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'openai-codex:gpt-5.5', label: 'gpt-5.5 (openai-codex:gpt-5.5)' },
{ id: 'openai-codex:gpt-5.4', label: 'gpt-5.4 (openai-codex:gpt-5.4)' },
{
id: 'openai-codex:gpt-5.4-mini',
label: 'gpt-5.4-mini (openai-codex:gpt-5.4-mini)',
},
],
buildArgs: () => ['acp', '--accept-hooks'],
streamFormat: 'acp-json-rpc',
mcpDiscovery: 'mature-acp',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,20 @@
import { detectAcpModels, DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const kiloAgentDef = {
id: 'kilo',
name: 'Kilo',
bin: 'kilo',
versionArgs: ['--version'],
fetchModels: async (resolvedBin, env) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp'],
env,
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [DEFAULT_MODEL_OPTION],
buildArgs: () => ['acp'],
streamFormat: 'acp-json-rpc',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,26 @@
import { detectAcpModels, DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const kimiAgentDef = {
id: 'kimi',
name: 'Kimi CLI',
bin: 'kimi',
versionArgs: ['--version'],
fetchModels: async (resolvedBin, env) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp'],
env,
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'kimi-k2-turbo-preview', label: 'kimi-k2-turbo-preview' },
{ id: 'moonshot-v1-8k', label: 'moonshot-v1-8k' },
{ id: 'moonshot-v1-32k', label: 'moonshot-v1-32k' },
],
buildArgs: () => ['acp'],
streamFormat: 'acp-json-rpc',
mcpDiscovery: 'mature-acp',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,20 @@
import { detectAcpModels, DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const kiroAgentDef = {
id: 'kiro',
name: 'Kiro CLI',
bin: 'kiro-cli',
versionArgs: ['--version'],
fetchModels: async (resolvedBin, env) =>
detectAcpModels({
bin: resolvedBin,
args: ['acp'],
env,
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [DEFAULT_MODEL_OPTION],
buildArgs: () => ['acp'],
streamFormat: 'acp-json-rpc',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,43 @@
import { DEFAULT_MODEL_OPTION, parseLineSeparatedModels } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const opencodeAgentDef = {
id: 'opencode',
name: 'OpenCode',
bin: 'opencode-cli',
fallbackBins: ['opencode'],
versionArgs: ['--version'],
// `opencode models` prints `provider/model` per line.
listModels: {
args: ['models'],
parse: parseLineSeparatedModels,
timeoutMs: 8000,
},
fallbackModels: [
DEFAULT_MODEL_OPTION,
{
id: 'anthropic/claude-sonnet-4-5',
label: 'anthropic/claude-sonnet-4-5',
},
{ id: 'openai/gpt-5', label: 'openai/gpt-5' },
{ id: 'google/gemini-2.5-pro', label: 'google/gemini-2.5-pro' },
],
// Prompt delivered via stdin (`opencode run -`) to avoid Windows
// `spawn ENAMETOOLONG` while preserving OpenCode's structured stream.
buildArgs: (_prompt, _imagePaths, _extra, options = {}) => {
const args = [
'run',
'--format',
'json',
'--dangerously-skip-permissions',
];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
args.push('-');
return args;
},
promptViaStdin: true,
streamFormat: 'json-event-stream',
eventParser: 'opencode',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,95 @@
import path from 'node:path';
import { DEFAULT_MODEL_OPTION, execAgentFile, parsePiModels } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const piAgentDef = {
id: 'pi',
name: 'Pi',
bin: 'pi',
versionArgs: ['--version'],
// `pi --list-models` prints a TSV table to stderr (not stdout),
// so we use a custom fetchModels that reads stderr.
fetchModels: async (resolvedBin, env) => {
try {
const { stderr } = await execAgentFile(resolvedBin, ['--list-models'], {
env,
timeout: 20_000,
maxBuffer: 8 * 1024 * 1024,
});
const parsed = parsePiModels(stderr);
if (!parsed || parsed.length === 0) return null;
return parsed;
} catch {
return null;
}
},
// Fallback models — the most commonly used providers/models when
// `pi --list-models` fails or times out.
fallbackModels: [
DEFAULT_MODEL_OPTION,
{
id: 'anthropic/claude-sonnet-4-5',
label: 'Claude Sonnet 4.5 (anthropic)',
},
{ id: 'anthropic/claude-opus-4-5', label: 'Claude Opus 4.5 (anthropic)' },
{ id: 'openai/gpt-5', label: 'GPT-5 (openai)' },
{ id: 'openai/o4-mini', label: 'o4-mini (openai)' },
{ id: 'google/gemini-2.5-pro', label: 'Gemini 2.5 Pro (google)' },
{ id: 'google/gemini-2.5-flash', label: 'Gemini 2.5 Flash (google)' },
],
// Thinking level presets mapped to pi's --thinking flag.
reasoningOptions: [
{ id: 'default', label: 'Default' },
{ id: 'off', label: 'Off' },
{ id: 'minimal', label: 'Minimal' },
{ id: 'low', label: 'Low' },
{ id: 'medium', label: 'Medium' },
{ id: 'high', label: 'High' },
{ id: 'xhigh', label: 'XHigh' },
],
// pi's RPC mode drives the entire conversation over stdio JSON-RPC.
// The daemon sends a `prompt` command and pi streams back typed events.
// No prompt in argv — avoids ENAMETOOLONG and keeps the protocol clean.
buildArgs: (
_prompt,
_imagePaths,
extraAllowedDirs = [],
options = {},
runtimeContext = {},
) => {
const args = ['--mode', 'rpc'];
if (options.model && options.model !== 'default') {
// pi --model accepts patterns ("sonnet", "anthropic/claude-sonnet-4-5",
// "openai/gpt-5:high") so we pass the value through as-is.
args.push('--model', options.model);
}
if (options.reasoning && options.reasoning !== 'default') {
args.push('--thinking', options.reasoning);
}
// pi supports --append-system-prompt for cwd and extra context.
// For now we rely on the composed prompt containing the cwd hint
// (same pattern as other agents) rather than using system-prompt flags.
//
// extraAllowedDirs carries skill seed and design-system directories
// that live outside the project cwd. pi doesn't have an --add-dir
// sandbox flag (it uses OS cwd), so we use --append-system-prompt to
// hint that these directories exist. The agent can then use its Read
// tool to access files inside them. Without this, pi runs inside the
// project cwd and has no way to discover or reach skill/design-system
// assets that live elsewhere.
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && path.isAbsolute(d),
);
for (const d of dirs) {
args.push('--append-system-prompt', d);
}
return args;
},
// Prompt is sent via RPC `prompt` command on stdin, not as a CLI arg.
promptViaStdin: true,
streamFormat: 'pi-rpc',
// pi's RPC `prompt` command supports an `images` field for multimodal
// input (base64-encoded). The daemon attaches image paths to the
// session so attachPiRpcSession can read and forward them.
supportsImagePaths: true,
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,58 @@
import path from 'node:path';
import { DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const qoderAgentDef = {
id: 'qoder',
name: 'Qoder CLI',
bin: 'qodercli',
versionArgs: ['--version'],
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'lite', label: 'Lite' },
{ id: 'efficient', label: 'Efficient' },
{ id: 'auto', label: 'Auto' },
{ id: 'performance', label: 'Performance' },
{ id: 'ultimate', label: 'Ultimate' },
],
// Qoder print mode exits after the turn. Deliver the composed prompt via
// stdin to avoid argv length limits, while using stream-json so the daemon
// can surface text and usage incrementally. `--yolo` is Qoder's documented
// non-interactive approval flag, and `-w` selects the workspace.
// Authentication remains Qoder CLI-owned: users can rely on persisted
// `qodercli login` state, or launch the daemon with
// QODER_PERSONAL_ACCESS_TOKEN for automation. Do not add that token to
// static adapter env; unlike Gemini's workspace trust flag it is a user
// secret and already flows through the inherited process environment.
buildArgs: (
_prompt,
imagePaths,
extraAllowedDirs = [],
options = {},
runtimeContext = {},
) => {
const args = [
'-p',
'--output-format',
'stream-json',
'--yolo',
];
if (runtimeContext.cwd) {
args.push('-w', runtimeContext.cwd);
}
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
const dirs = (extraAllowedDirs || []).filter(
(d) => typeof d === 'string' && path.isAbsolute(d),
);
const attachments = (imagePaths || []).filter(
(p) => typeof p === 'string' && path.isAbsolute(p),
);
for (const d of dirs) args.push('--add-dir', d);
for (const p of attachments) args.push('--attachment', p);
return args;
},
promptViaStdin: true,
streamFormat: 'qoder-stream-json',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,27 @@
import { DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const qwenAgentDef = {
id: 'qwen',
name: 'Qwen Code',
bin: 'qwen',
versionArgs: ['--version'],
fallbackModels: [
DEFAULT_MODEL_OPTION,
{ id: 'qwen3-coder-plus', label: 'qwen3-coder-plus' },
{ id: 'qwen3-coder-flash', label: 'qwen3-coder-flash' },
],
// Prompt delivered via stdin (`qwen -`) to avoid Windows
// `spawn ENAMETOOLONG` for large composed prompts. Qwen Code is a
// Gemini-CLI fork and supports the same `--yolo` non-interactive mode.
buildArgs: (_prompt, _imagePaths, _extra, options = {}) => {
const args = ['--yolo'];
if (options.model && options.model !== 'default') {
args.push('--model', options.model);
}
args.push('-');
return args;
},
promptViaStdin: true,
streamFormat: 'plain',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,47 @@
import { detectAcpModels } from '../../acp.js';
import { parsePiModels } from '../../pi-rpc.js';
import { execAgentFile } from '../invocation.js';
import { DEFAULT_MODEL_OPTION } from '../models.js';
import type { RuntimeModelOption } from '../types.js';
export { detectAcpModels, parsePiModels, execAgentFile, DEFAULT_MODEL_OPTION };
export function clampCodexReasoning(
modelId: string | null | undefined,
effort: string | null | undefined,
) {
if (!effort) return effort;
const raw = String(modelId ?? '').trim();
const id = raw.includes('/') ? raw.split('/').pop() : raw;
const isGpt5LateFamily =
!id ||
id === 'default' ||
id.startsWith('gpt-5.2') ||
id.startsWith('gpt-5.3') ||
id.startsWith('gpt-5.4') ||
id.startsWith('gpt-5.5');
if (isGpt5LateFamily && effort === 'minimal') return 'low';
if (id === 'gpt-5.1' && effort === 'xhigh') return 'high';
if (id === 'gpt-5.1-codex-mini') {
return effort === 'high' || effort === 'xhigh' ? 'high' : 'medium';
}
return effort;
}
// Parse one-id-per-line stdout from `<cli> models` and prepend the synthetic
// default option. Used by opencode / cursor-agent.
export function parseLineSeparatedModels(stdout: string): RuntimeModelOption[] {
const ids = String(stdout || '')
.split('\n')
.map((line) => line.trim())
.filter((line) => line.length > 0 && !line.startsWith('#'));
// De-dupe while preserving order — some CLIs print near-duplicates.
const seen = new Set();
const out = [DEFAULT_MODEL_OPTION];
for (const id of ids) {
if (seen.has(id)) continue;
seen.add(id);
out.push({ id, label: id });
}
return out;
}

View file

@ -0,0 +1,20 @@
import { detectAcpModels, DEFAULT_MODEL_OPTION } from './shared.js';
import type { RuntimeAgentDef } from '../types.js';
export const vibeAgentDef = {
id: 'vibe',
name: 'Mistral Vibe CLI',
bin: 'vibe-acp',
versionArgs: ['--version'],
fetchModels: async (resolvedBin, env) =>
detectAcpModels({
bin: resolvedBin,
args: [],
env,
timeoutMs: 15_000,
defaultModelOption: DEFAULT_MODEL_OPTION,
}),
fallbackModels: [DEFAULT_MODEL_OPTION],
buildArgs: () => [],
streamFormat: 'acp-json-rpc',
} satisfies RuntimeAgentDef;

View file

@ -0,0 +1,148 @@
import { execAgentFile } from './invocation.js';
import { AGENT_DEFS } from './registry.js';
import { DEFAULT_MODEL_OPTION, rememberLiveModels } from './models.js';
import { resolveAgentExecutable } from './executables.js';
import { spawnEnvForAgent } from './env.js';
import { agentCapabilities } from './capabilities.js';
import { installMetaForAgent } from './metadata.js';
import type {
DetectedAgent,
RuntimeAgentDef,
RuntimeCapabilityMap,
RuntimeModelOption,
} from './types.js';
async function fetchModels(
def: RuntimeAgentDef,
resolvedBin: string,
env: NodeJS.ProcessEnv,
): Promise<RuntimeModelOption[]> {
if (typeof def.fetchModels === 'function') {
try {
const parsed = await def.fetchModels(resolvedBin, env);
if (!parsed || parsed.length === 0) return def.fallbackModels;
return parsed;
} catch {
return def.fallbackModels;
}
}
if (!def.listModels) return def.fallbackModels;
try {
const { stdout } = await execAgentFile(resolvedBin, def.listModels.args, {
env,
timeout: def.listModels.timeoutMs ?? 5000,
// Models lists from popular CLIs (e.g. opencode) easily exceed the
// default 1MB buffer once you include every openrouter model. Bump
// it so we don't truncate the listing.
maxBuffer: 8 * 1024 * 1024,
});
const parsed = def.listModels.parse(String(stdout));
// Empty / null parse result means the CLI didn't actually return a
// usable list (e.g. cursor-agent's "No models available"); fall back
// to the static hint so the picker isn't stuck on Default-only.
if (!parsed || parsed.length === 0) return def.fallbackModels;
return parsed;
} catch {
return def.fallbackModels;
}
}
async function probe(
def: RuntimeAgentDef,
configuredEnv: Record<string, string> = {},
): Promise<DetectedAgent> {
const resolved = resolveAgentExecutable(def, configuredEnv);
if (!resolved) {
return {
...stripFns(def),
models: def.fallbackModels ?? [DEFAULT_MODEL_OPTION],
available: false,
...installMetaForAgent(def.id),
};
}
const probeEnv = spawnEnvForAgent(
def.id,
{
...process.env,
...(def.env || {}),
},
configuredEnv,
);
let version = null;
try {
const { stdout } = await execAgentFile(resolved, def.versionArgs, {
env: probeEnv,
timeout: 3000,
});
version = String(stdout).trim().split('\n')[0] ?? null;
} catch {
// binary exists but --version failed; still mark available
}
// Probe `--help` once per agent and record which flags the installed CLI
// advertises. Cached on `agentCapabilities` for buildArgs to consult.
if (def.helpArgs && def.capabilityFlags) {
const caps: RuntimeCapabilityMap = {};
try {
const { stdout } = await execAgentFile(resolved, def.helpArgs, {
env: probeEnv,
timeout: 5000,
maxBuffer: 4 * 1024 * 1024,
});
for (const [flag, key] of Object.entries(def.capabilityFlags)) {
caps[key] = String(stdout).includes(flag);
}
} catch {
// If --help fails, leave caps empty — buildArgs falls back to the safe
// baseline (no optional flags).
}
agentCapabilities.set(def.id, caps);
}
const models = await fetchModels(def, resolved, probeEnv);
return {
...stripFns(def),
models,
available: true,
path: resolved,
version,
...installMetaForAgent(def.id),
};
}
function stripFns(
def: RuntimeAgentDef,
): Omit<DetectedAgent, 'models' | 'available' | 'path' | 'version'> {
// Drop the buildArgs / listModels closures but keep declarative metadata
// (reasoningOptions, streamFormat, name, bin, etc.). `models` is
// populated separately by `fetchModels`, so we strip the static
// `fallbackModels` slot here too. `helpArgs` / `capabilityFlags` /
// `fallbackBins` / `maxPromptArgBytes` / `env` are probe-or-spawn-only
// metadata and shouldn't bleed into the API response either.
const {
buildArgs,
listModels,
fetchModels,
fallbackModels,
helpArgs,
capabilityFlags,
fallbackBins,
maxPromptArgBytes,
env,
...rest
} = def;
return rest;
}
export async function detectAgents(
configuredEnvByAgent: Record<string, Record<string, string>> = {},
) {
const results = await Promise.all(
AGENT_DEFS.map((def) => probe(def, configuredEnvByAgent?.[def.id] ?? {})),
);
// Refresh the validation cache from whatever we just surfaced to the UI
// so /api/chat can accept any model the user could have just picked,
// including ones that only showed up after a CLI re-auth.
for (const agent of results) {
rememberLiveModels(agent.id, agent.models);
}
return results;
}

View file

@ -0,0 +1,44 @@
import { expandConfiguredEnv } from './paths.js';
type RuntimeEnvMap = NodeJS.ProcessEnv | Record<string, string>;
// Build the env passed to spawn() for a given agent adapter.
//
// The claude adapter strips ANTHROPIC_API_KEY so Claude Code's own auth
// resolution (claude login / Pro/Max plan) wins instead of silently
// falling back to API-key billing whenever the daemon happened to be
// launched from a shell that exported the key for SDK or scripting use.
// See issue #398.
//
// However, when ANTHROPIC_BASE_URL is set the user is intentionally
// routing Claude Code to a custom endpoint (e.g. a Kimi/Moonshot proxy).
// In that case claude login is meaningless, so preserve the API key so
// the child can authenticate against the custom base URL.
//
// Windows env-var names are case-insensitive at the kernel level
// (`GetEnvironmentVariable`), but spreading `process.env` into a plain
// object loses Node's case-insensitive accessor — `Anthropic_Api_Key`
// would survive a literal `delete env.ANTHROPIC_API_KEY` and still reach
// the child. Iterate keys and compare case-insensitively to close that.
export function spawnEnvForAgent(
agentId: string,
baseEnv: RuntimeEnvMap,
configuredEnv: unknown = {},
): NodeJS.ProcessEnv {
const env: NodeJS.ProcessEnv = {
...baseEnv,
...expandConfiguredEnv(configuredEnv),
};
if (agentId !== 'claude') return env;
const hasCustomBaseUrl = Object.keys(env).some(
(k) =>
k.toUpperCase() === 'ANTHROPIC_BASE_URL' &&
typeof env[k] === 'string' &&
env[k].trim() !== '',
);
if (hasCustomBaseUrl) return env;
for (const key of Object.keys(env)) {
if (key.toUpperCase() === 'ANTHROPIC_API_KEY') delete env[key];
}
return env;
}

View file

@ -0,0 +1,170 @@
import { accessSync, constants, existsSync, statSync } from 'node:fs';
import { delimiter } from 'node:path';
import path from 'node:path';
import { homedir } from 'node:os';
import { wellKnownUserToolchainBins } from '@open-design/platform';
import { expandHomePath } from './paths.js';
import type { RuntimeAgentDef } from './types.js';
const AGENT_BIN_ENV_KEYS = new Map<string, string>([
['claude', 'CLAUDE_BIN'],
['codex', 'CODEX_BIN'],
['copilot', 'COPILOT_BIN'],
['cursor-agent', 'CURSOR_AGENT_BIN'],
['deepseek', 'DEEPSEEK_BIN'],
['devin', 'DEVIN_BIN'],
['gemini', 'GEMINI_BIN'],
['hermes', 'HERMES_BIN'],
['kimi', 'KIMI_BIN'],
['kiro', 'KIRO_BIN'],
['kilo', 'KILO_BIN'],
['opencode', 'OPENCODE_BIN'],
['pi', 'PI_BIN'],
['qoder', 'QODER_BIN'],
['qwen', 'QWEN_BIN'],
['vibe', 'VIBE_BIN'],
]);
const TOOLCHAIN_DIR_CACHE_TTL_MS = 5000;
let cachedToolchainHome: string | null = null;
let cachedToolchainDirs: string[] | null = null;
let cachedToolchainDirsAt = 0;
function userToolchainDirs() {
const homeOverride = process.env.OD_AGENT_HOME;
const home = homeOverride || homedir();
const now = Date.now();
if (
cachedToolchainHome === home &&
cachedToolchainDirs &&
now - cachedToolchainDirsAt < TOOLCHAIN_DIR_CACHE_TTL_MS
) {
return cachedToolchainDirs;
}
cachedToolchainHome = home;
cachedToolchainDirsAt = now;
// When OD_AGENT_HOME is set, scope the search strictly to the override
// home: skip Homebrew / /usr/local *and* pass an empty env so that a
// developer or CI runner with NPM_CONFIG_PREFIX / npm_config_prefix
// exported can't leak the real machine's <prefix>/bin into a sandboxed
// detection run. Without this the agents.test.ts cases that build a
// tmp home would be machine-environment-dependent.
cachedToolchainDirs = wellKnownUserToolchainBins({
home,
includeSystemBins: process.platform !== 'win32' && !homeOverride,
env: homeOverride ? {} : process.env,
});
return cachedToolchainDirs;
}
function resolvePathDirs() {
const seen = new Set();
const dirs = [
...(process.env.PATH || '').split(delimiter),
// GUI launchers (macOS .app bundles, Linux .desktop files) often start
// with a minimal PATH. Include common user-level CLI install locations
// so agent detection matches the user's shell-installed tools,
// especially Node version managers.
...userToolchainDirs(),
];
return dirs.filter((dir) => {
if (!dir || seen.has(dir)) return false;
seen.add(dir);
return true;
});
}
export function resolveOnPath(bin: string): string | null {
const exts =
process.platform === 'win32'
? (process.env.PATHEXT || '.EXE;.CMD;.BAT').split(';')
: [''];
const dirs = resolvePathDirs();
for (const dir of dirs) {
for (const ext of exts) {
const full = path.join(dir, bin + ext);
if (full && existsSync(full)) return full;
}
}
return null;
}
function looksExecutableOnWindows(filePath: string): boolean {
const ext = path.extname(filePath).trim().toUpperCase();
if (!ext) return false;
const executableExts = (process.env.PATHEXT || '.EXE;.CMD;.BAT')
.split(';')
.map((value) => value.trim().toUpperCase())
.filter(Boolean);
return executableExts.includes(ext);
}
// Resolve the first available binary for an agent definition. Tries
// `def.bin` first, then walks `def.fallbackBins` in order. Used for
// agents whose forks ship under a different binary name but speak the
// exact same CLI (Claude Code → OpenClaude, issue #235). Returns null
// when no candidate is on PATH.
function configuredExecutableOverride(
def: RuntimeAgentDef,
configuredEnv: Record<string, string> = {},
): string | null {
const envKey = AGENT_BIN_ENV_KEYS.get(def?.id);
if (!envKey) return null;
const raw = configuredEnv?.[envKey];
if (typeof raw !== 'string' || raw.trim().length === 0) return null;
const expanded = expandHomePath(raw.trim());
if (!path.isAbsolute(expanded)) return null;
try {
if (!statSync(expanded).isFile()) return null;
if (process.platform === 'win32') {
if (!looksExecutableOnWindows(expanded)) return null;
} else {
accessSync(expanded, constants.X_OK);
}
return expanded;
} catch {
return null;
}
}
export function resolveAgentExecutable(
def: RuntimeAgentDef,
configuredEnv: Record<string, string> = {},
): string | null {
return inspectAgentExecutableResolution(def, configuredEnv).selectedPath;
}
export function inspectAgentExecutableResolution(
def: RuntimeAgentDef,
configuredEnv: Record<string, string> = {},
): {
configuredOverridePath: string | null;
pathResolvedPath: string | null;
selectedPath: string | null;
} {
if (!def?.bin) {
return {
configuredOverridePath: null,
pathResolvedPath: null,
selectedPath: null,
};
}
const configuredOverridePath = configuredExecutableOverride(def, configuredEnv);
const candidates = [
def.bin,
...(Array.isArray(def.fallbackBins) ? def.fallbackBins : []),
];
let pathResolvedPath: string | null = null;
for (const bin of candidates) {
const resolved = resolveOnPath(bin);
if (resolved) {
pathResolvedPath = resolved;
break;
}
}
return {
configuredOverridePath,
pathResolvedPath,
selectedPath: configuredOverridePath || pathResolvedPath,
};
}

View file

@ -0,0 +1,29 @@
import { execFile } from 'node:child_process';
import { promisify } from 'node:util';
import { createCommandInvocation } from '@open-design/platform';
import type { RuntimeExecOptions } from './types.js';
const execFileP = promisify(execFile);
export function execAgentFile(
command: string,
args: string[],
options: RuntimeExecOptions = {},
) {
const invocation = createCommandInvocation(
options.env
? {
command,
args,
env: options.env,
}
: {
command,
args,
},
);
return execFileP(invocation.command, invocation.args, {
...options,
windowsVerbatimArguments: invocation.windowsVerbatimArguments,
});
}

View file

@ -0,0 +1,22 @@
import type { RuntimeAgentDef } from './types.js';
type McpOptions = {
enabled?: boolean;
command?: string;
argsPrefix?: string[];
};
export function buildLiveArtifactsMcpServersForAgent(
def: RuntimeAgentDef,
{ enabled = true, command = 'od', argsPrefix = [] }: McpOptions = {},
) {
if (!enabled || def?.mcpDiscovery !== 'mature-acp') return [];
return [
{
name: 'open-design-live-artifacts',
command,
args: [...argsPrefix, 'mcp', 'live-artifacts'],
env: [],
},
];
}

View file

@ -0,0 +1,93 @@
/** HTTPS links for the web UI when an agent is unavailable. Keys match `AGENT_DEFS[].id`. */
const AGENT_INSTALL_LINKS: Record<
string,
{ installUrl?: string; docsUrl?: string }
> = {
claude: {
installUrl: 'https://docs.anthropic.com/en/docs/claude-code/setup',
docsUrl: 'https://docs.anthropic.com/en/docs/claude-code',
},
codex: {
installUrl: 'https://github.com/openai/codex',
docsUrl: 'https://developers.openai.com/codex',
},
devin: {
installUrl: 'https://cli.devin.ai/docs',
docsUrl: 'https://docs.devin.ai',
},
gemini: {
installUrl: 'https://github.com/google-gemini/gemini-cli',
docsUrl: 'https://github.com/google-gemini/gemini-cli/blob/main/README.md',
},
opencode: {
installUrl: 'https://opencode.ai/docs',
docsUrl: 'https://github.com/sst/opencode',
},
hermes: {
installUrl: 'https://github.com/nexu-io/open-design/blob/main/docs/agent-adapters.md',
docsUrl: 'https://hermes-agent.nousresearch.com/docs/',
},
kimi: {
installUrl: 'https://github.com/MoonshotAI/kimi-cli',
docsUrl: 'https://www.kimi.com/code/docs/en/kimi-cli/guides/getting-started.html',
},
'cursor-agent': {
installUrl: 'https://cursor.com/docs/cli/overview',
docsUrl: 'https://docs.cursor.com/en/cli/overview',
},
qwen: {
installUrl: 'https://github.com/QwenLM/qwen-code',
docsUrl: 'https://qwenlm.github.io/qwen-code-docs/en/index',
},
qoder: {
installUrl: 'https://qoder.com/download',
docsUrl: 'https://docs.qoder.com',
},
copilot: {
installUrl: 'https://github.com/github/copilot-cli',
docsUrl: 'https://docs.github.com/en/copilot/how-tos/use-copilot-extensions/use-in-cli',
},
pi: {
installUrl: 'https://github.com/nexu-io/open-design/blob/main/docs/agent-adapters.md',
docsUrl: 'https://github.com/badlogic/pi-mono/blob/main/packages/coding-agent/README.md',
},
kiro: {
installUrl: 'https://kiro.dev',
docsUrl: 'https://kiro.dev/docs/cli/',
},
kilo: {
installUrl: 'https://kilo.ai',
docsUrl: 'https://kilo.ai/docs/cli',
},
vibe: {
installUrl: 'https://docs.mistral.ai',
docsUrl: 'https://github.com/mistralai/vibe-acp',
},
deepseek: {
installUrl: 'https://github.com/deepseek-ai/DeepSeek-TUI',
docsUrl: 'https://github.com/deepseek-ai/DeepSeek-TUI/blob/main/README.md',
},
};
function sanitizeHttpsUrl(value: string | undefined): string | undefined {
if (!value) return undefined;
try {
const parsed = new URL(value);
return parsed.protocol === 'https:' ? parsed.toString() : undefined;
} catch {
return undefined;
}
}
export function installMetaForAgent(
agentId: string,
): { installUrl?: string; docsUrl?: string } {
const meta = AGENT_INSTALL_LINKS[agentId];
if (!meta) return {};
const installUrl = sanitizeHttpsUrl(meta.installUrl);
const docsUrl = sanitizeHttpsUrl(meta.docsUrl);
return {
...(installUrl ? { installUrl } : {}),
...(docsUrl ? { docsUrl } : {}),
};
}

View file

@ -0,0 +1,47 @@
import type { RuntimeAgentDef, RuntimeModelOption } from './types.js';
export const DEFAULT_MODEL_OPTION: RuntimeModelOption = {
id: 'default',
label: 'Default (CLI config)',
};
// Daemon's /api/chat needs to validate the user's model pick against the
// list we last surfaced to the UI. We keep a per-agent cache of the most
// recent live list (refreshed every detectAgents() call) and additionally
// trust any value present in the static fallback. A model that's neither
// gets rejected so a stale or hostile value can't smuggle arbitrary flags.
const liveModelCache = new Map<string, Set<string>>();
export function rememberLiveModels(agentId: string, models: RuntimeModelOption[]) {
if (!Array.isArray(models)) return;
liveModelCache.set(
agentId,
new Set(
models.map((m) => m && m.id).filter((id) => typeof id === 'string'),
),
);
}
export function isKnownModel(def: RuntimeAgentDef, modelId: string | null | undefined) {
if (!modelId) return false;
const live = liveModelCache.get(def.id);
if (live && live.has(modelId)) return true;
if (Array.isArray(def.fallbackModels)) {
return def.fallbackModels.some((m) => m.id === modelId);
}
return false;
}
// Permit user-typed model ids that didn't appear in either the live
// listing or the static fallback (e.g. the user is on a brand-new model
// the CLI's `models` command hasn't surfaced yet). The CLI gets the value
// as a child-process arg — not a shell string — so injection isn't a
// concern, but we still reject anything that could be misread as a flag
// by a downstream CLI or that contains whitespace / control chars.
export function sanitizeCustomModel(id: string | null | undefined) {
if (typeof id !== 'string') return null;
const trimmed = id.trim();
if (trimmed.length === 0 || trimmed.length > 200) return null;
if (!/^[A-Za-z0-9][A-Za-z0-9._/:@-]*$/.test(trimmed)) return null;
return trimmed;
}

View file

@ -0,0 +1,20 @@
import path from 'node:path';
import { homedir } from 'node:os';
export function expandConfiguredEnv(configuredEnv: unknown): Record<string, string> {
const out: Record<string, string> = {};
if (!configuredEnv || typeof configuredEnv !== 'object') return out;
for (const [key, value] of Object.entries(configuredEnv)) {
if (typeof value !== 'string') continue;
out[key] = expandHomePath(value);
}
return out;
}
export function expandHomePath(value: string): string {
if (value === '~') return homedir();
if (value.startsWith('~/') || value.startsWith('~\\')) {
return path.join(homedir(), value.slice(2));
}
return value;
}

View file

@ -0,0 +1,212 @@
import type { RuntimeAgentDef, RuntimePromptBudgetError } from './types.js';
export function checkPromptArgvBudget(
def: RuntimeAgentDef | null | undefined,
composed: unknown,
): RuntimePromptBudgetError | null {
if (!def || typeof def.maxPromptArgBytes !== 'number') return null;
const bytes = Buffer.byteLength(
typeof composed === 'string' ? composed : '',
'utf8',
);
if (bytes <= def.maxPromptArgBytes) return null;
return {
code: 'AGENT_PROMPT_TOO_LARGE',
message:
`${def.name} requires the prompt as a command-line argument and this run's composed prompt exceeds the safe size (${bytes} > ${def.maxPromptArgBytes} bytes). ` +
'Reduce the selected skills/design-system context, shorten the conversation, or pick an adapter with stdin support.',
bytes,
limit: def.maxPromptArgBytes,
};
}
// Mirror of packages/platform's `quoteWindowsCommandArg`, kept local so
// `checkWindowsCmdShimCommandLineBudget` can run on macOS/Linux against
// a fake `.cmd` path in tests without forking on `process.platform`.
// Must stay byte-for-byte identical to the platform copy — the helper's
// whole point is to compute the exact `cmd.exe /d /s /c "<inner>"` line
// the spawn path will produce on Windows. The `%` → `"^%"` substitution
// neutralizes cmd.exe's percent-expansion for prompts that ride argv
// (DeepSeek TUI today): `%name%` pairs would otherwise be expanded from
// the daemon environment before the child reads them, leaking secrets
// like `%DEEPSEEK_API_KEY%` whenever the prompt mentions an env-var name.
function quoteForWindowsCmdShim(value: unknown): string {
const str = String(value ?? '');
if (!/[\s"&<>|^%]/.test(str)) return str;
const escaped = str.replace(/"/g, '""').replace(/%/g, '"^%"');
return `"${escaped}"`;
}
// Mirror of libuv's `quote_cmd_arg` (process-stdio.c), the exact rule
// Node uses on Windows when it composes a CreateProcess command line for
// a direct executable spawn (not a `.cmd` / `.bat` shim, which goes
// through `quoteForWindowsCmdShim` above). Each embedded `"` becomes
// `\"`, every backslash that ends up adjacent to a quote (or to the
// closing wrap quote) gets doubled, and an arg with whitespace or a
// quote is wrapped in outer `"..."`. Kept local so the budget check
// works on macOS/Linux test hosts against a fake `C:\…\foo.exe` path.
function quoteForWindowsDirectExe(value: unknown): string {
const str = String(value ?? '');
// libuv emits a literal `""` for an empty argv entry so it survives
// CommandLineToArgvW round-tripping; mirror that.
if (str.length === 0) return '""';
// Fast path: no whitespace and no quote — pass through unchanged. This
// matches libuv's `wcspbrk(source, L" \t\"")` early return.
if (!/[\s"]/.test(str)) return str;
// No quote, no backslash: simple wrap, no per-char escaping needed.
if (!/[\\"]/.test(str)) return `"${str}"`;
// Slow path: walk the string, counting consecutive backslashes so we
// can double them whenever they precede a `"` or the closing wrap
// quote. Following the documented Windows convention:
// - 2n backslashes + `"` → emit `\\` × 2n + `\"`
// - 2n+1 backslashes + `"` → emit `\\` × (2n+1) + `\"`
// - n backslashes not before `"` → emit `\\` × n unchanged
// - trailing backslashes (before the closing wrap quote) → doubled
let result = '"';
let backslashes = 0;
for (let i = 0; i < str.length; i++) {
const ch = str[i];
if (ch === '\\') {
backslashes++;
} else if (ch === '"') {
result += '\\'.repeat(2 * backslashes + 1) + '"';
backslashes = 0;
} else {
result += '\\'.repeat(backslashes) + ch;
backslashes = 0;
}
}
result += '\\'.repeat(2 * backslashes) + '"';
return result;
}
// Windows' CreateProcess caps `lpCommandLine` at 32_767 chars. Going
// through a `.cmd` / `.bat` shim adds a `cmd.exe /d /s /c "<inner>"`
// wrapper, and `quoteForWindowsCmdShim` doubles every embedded `"` plus
// wraps any whitespace/special-char arg in outer quotes — so a prompt
// well under `maxPromptArgBytes` can still expand past the kernel cap
// once it's run through the shim. Leave headroom for any per-CLI flag
// the adapter might tack on at exec time and for cmd.exe's own framing.
const WINDOWS_CREATE_PROCESS_LIMIT = 32_767;
const WINDOWS_CREATE_PROCESS_HEADROOM = 256;
// Post-buildArgs guard for argv-bound adapters whose binary resolves to
// a Windows `.cmd` / `.bat` shim. Computes the exact command line shape
// `createCommandInvocation` (in packages/platform) hands to `spawn` —
// `cmd.exe /d /s /c "<quoted command + quoted args>"` — and refuses the
// run when that line would exceed the CreateProcess limit (less a small
// headroom). Returns the same `AGENT_PROMPT_TOO_LARGE` shape as
// `checkPromptArgvBudget` so the SSE error path in `/api/chat` doesn't
// have to special-case it.
//
// No-op when:
// - the adapter doesn't declare `maxPromptArgBytes` (stdin adapters
// never go through this path);
// - the resolved binary isn't a `.cmd` / `.bat` (POSIX hosts and
// direct `.exe` resolutions on Windows skip the cmd.exe wrap);
// - the assembled line fits comfortably under the kernel cap.
//
// Pure: takes `resolvedBin` explicitly so a test on macOS can pass a
// fake `C:\\…\\deepseek.cmd` path and exercise the same math the daemon
// would run on Windows.
export function checkWindowsCmdShimCommandLineBudget(
def: RuntimeAgentDef | null | undefined,
resolvedBin: unknown,
args: unknown,
): RuntimePromptBudgetError | null {
if (!def || typeof def.maxPromptArgBytes !== 'number') return null;
if (typeof resolvedBin !== 'string' || !/\.(bat|cmd)$/i.test(resolvedBin))
return null;
const argList = Array.isArray(args) ? args : [];
const inner = [resolvedBin, ...argList].map(quoteForWindowsCmdShim).join(' ');
// `cmd.exe /d /s /c "<inner>"` — same shape as buildCmdShimInvocation
// in packages/platform; the leading 'cmd.exe ' + '/d /s /c ' framing
// plus the two outer quote chars rounds out the full command line.
const commandLineLength = 'cmd.exe /d /s /c '.length + inner.length + 2;
const safeLimit =
WINDOWS_CREATE_PROCESS_LIMIT - WINDOWS_CREATE_PROCESS_HEADROOM;
if (commandLineLength <= safeLimit) return null;
return {
code: 'AGENT_PROMPT_TOO_LARGE',
message:
`${def.name} on Windows runs through a .cmd shim and this run's prompt would expand past the CreateProcess command-line limit ` +
`after cmd.exe quote-doubling (${commandLineLength} > ${safeLimit} chars). ` +
'Reduce quote-heavy content in the selected skills/design-system context, shorten the conversation, or pick an adapter with stdin support.',
commandLineLength,
limit: safeLimit,
};
}
// Heuristic: does `resolvedBin` look like a Windows path? Used by the
// direct-exe guard so a test on a POSIX host can drive a fake
// `C:\…\foo.exe` path through the same math the daemon would run on
// Windows, while still skipping POSIX-shaped paths (which never go
// through CreateProcess).
function looksLikeWindowsPath(p: unknown): boolean {
if (typeof p !== 'string' || p.length === 0) return false;
// Drive-letter (`C:\…`, `C:/…`) or UNC (`\\server\share\…`).
return /^[a-zA-Z]:[\\/]/.test(p) || p.startsWith('\\\\');
}
// Companion to `checkWindowsCmdShimCommandLineBudget` for argv-bound
// adapters whose binary resolves directly to a Windows executable
// (a cargo-installed `deepseek.exe`, a hand-built release, or any other
// non-shim install path). `createCommandInvocation` does *not* wrap the
// call in `cmd.exe /d /s /c "<inner>"` for those — but Node/libuv still
// composes a CreateProcess `lpCommandLine` by walking each argv entry
// through `quote_cmd_arg`, which doubles backslashes adjacent to quotes
// and escapes every embedded `"` as `\"`. A quote-heavy prompt that fits
// under the raw `maxPromptArgBytes` budget can therefore still expand
// past the kernel's 32_767-char `lpCommandLine` cap on a direct `.exe`
// spawn, surfacing as a generic `spawn ENAMETOOLONG` instead of the
// adapter-named `AGENT_PROMPT_TOO_LARGE` the budget guard exists to
// emit. Returns the same error shape as the cmd-shim guard so the SSE
// error path in `/api/chat` doesn't have to special-case it.
//
// No-op when:
// - the adapter doesn't declare `maxPromptArgBytes` (stdin adapters
// never go through this path);
// - the resolved binary is a `.cmd` / `.bat` shim — that's handled by
// `checkWindowsCmdShimCommandLineBudget` so we don't double-emit;
// - the resolved binary is not a Windows path (no CreateProcess
// command-line shape to budget);
// - the assembled command line fits under the safe limit.
//
// Pure: takes `resolvedBin` and `args` explicitly so a test on macOS can
// pass a fake `C:\…\deepseek.exe` and exercise the same math the daemon
// would run on Windows. The libuv quoting math lives in
// `quoteForWindowsDirectExe` above.
export function checkWindowsDirectExeCommandLineBudget(
def: RuntimeAgentDef | null | undefined,
resolvedBin: unknown,
args: unknown,
): RuntimePromptBudgetError | null {
if (!def || typeof def.maxPromptArgBytes !== 'number') return null;
if (typeof resolvedBin !== 'string' || resolvedBin.length === 0) return null;
// The cmd-shim guard owns `.bat` / `.cmd`; skip those here so a single
// oversized prompt doesn't trip both guards.
if (/\.(bat|cmd)$/i.test(resolvedBin)) return null;
// Only fire for Windows-shaped resolved binaries. On POSIX-shaped
// paths, `execvp` accepts each argv entry as a separate buffer —
// there's no command-line concatenation step that could expand past a
// kernel cap, so we have nothing to guard.
if (!looksLikeWindowsPath(resolvedBin)) return null;
const argList = Array.isArray(args) ? args : [];
// `[command, ...args].map(quote).join(' ')` is the exact shape libuv
// builds before handing it to CreateProcess.
const commandLineLength = [resolvedBin, ...argList]
.map(quoteForWindowsDirectExe)
.join(' ').length;
const safeLimit =
WINDOWS_CREATE_PROCESS_LIMIT - WINDOWS_CREATE_PROCESS_HEADROOM;
if (commandLineLength <= safeLimit) return null;
return {
code: 'AGENT_PROMPT_TOO_LARGE',
message:
`${def.name} on Windows builds a CreateProcess command line and this run's prompt would expand past the limit ` +
`after libuv quote-escaping (${commandLineLength} > ${safeLimit} chars). ` +
'Reduce quote-heavy content in the selected skills/design-system context, shorten the conversation, or pick an adapter with stdin support.',
commandLineLength,
limit: safeLimit,
};
}

View file

@ -0,0 +1,48 @@
import { claudeAgentDef } from './defs/claude.js';
import { codexAgentDef } from './defs/codex.js';
import { devinAgentDef } from './defs/devin.js';
import { geminiAgentDef } from './defs/gemini.js';
import { opencodeAgentDef } from './defs/opencode.js';
import { hermesAgentDef } from './defs/hermes.js';
import { kimiAgentDef } from './defs/kimi.js';
import { cursorAgentDef } from './defs/cursor-agent.js';
import { qwenAgentDef } from './defs/qwen.js';
import { qoderAgentDef } from './defs/qoder.js';
import { copilotAgentDef } from './defs/copilot.js';
import { piAgentDef } from './defs/pi.js';
import { kiroAgentDef } from './defs/kiro.js';
import { kiloAgentDef } from './defs/kilo.js';
import { vibeAgentDef } from './defs/vibe.js';
import { deepseekAgentDef } from './defs/deepseek.js';
import type { RuntimeAgentDef } from './types.js';
export const AGENT_DEFS: RuntimeAgentDef[] = [
claudeAgentDef,
codexAgentDef,
devinAgentDef,
geminiAgentDef,
opencodeAgentDef,
hermesAgentDef,
kimiAgentDef,
cursorAgentDef,
qwenAgentDef,
qoderAgentDef,
copilotAgentDef,
piAgentDef,
kiroAgentDef,
kiloAgentDef,
vibeAgentDef,
deepseekAgentDef,
];
const ids = new Set();
for (const def of AGENT_DEFS) {
if (ids.has(def.id)) {
throw new Error(`Duplicate agent definition id: ${def.id}`);
}
ids.add(def.id);
}
export function getAgentDef(id: string): RuntimeAgentDef | null {
return AGENT_DEFS.find((a) => a.id === id) || null;
}

View file

@ -0,0 +1,12 @@
import { getAgentDef } from './registry.js';
import { resolveAgentExecutable } from './executables.js';
// Resolve the absolute path of an agent's binary on the current PATH.
// Used by the chat handler so spawn() gets the same executable that
// detection reported as available — fixes Windows ENOENT when the bare
// bin name isn't on the child process's PATH (issue #10).
export function resolveAgentBin(id: string, configuredEnv: Record<string, string> = {}) {
const def = getAgentDef(id);
if (!def?.bin) return null;
return resolveAgentExecutable(def, configuredEnv);
}

View file

@ -0,0 +1,90 @@
import type { ExecFileOptions } from 'node:child_process';
export type RuntimeEnv = NodeJS.ProcessEnv | Record<string, string>;
export type RuntimeModelOption = {
id: string;
label: string;
};
export type RuntimeReasoningOption = RuntimeModelOption;
export type RuntimeBuildOptions = {
model?: string | null;
reasoning?: string | null;
};
export type RuntimeContext = {
cwd?: string;
};
export type RuntimeCapabilityMap = Record<string, boolean>;
export type RuntimeListModels = {
args: string[];
timeoutMs?: number;
parse: (stdout: string) => RuntimeModelOption[] | null;
};
export type RuntimePromptBudgetError = {
code: 'AGENT_PROMPT_TOO_LARGE';
message: string;
bytes?: number;
commandLineLength?: number;
limit: number;
};
export type RuntimeAgentDef = {
id: string;
name: string;
bin: string;
versionArgs: string[];
fallbackModels: RuntimeModelOption[];
buildArgs: (
prompt: string,
imagePaths: string[],
extraAllowedDirs?: string[],
options?: RuntimeBuildOptions,
runtimeContext?: RuntimeContext,
) => string[];
streamFormat: string;
fallbackBins?: string[];
helpArgs?: string[];
capabilityFlags?: Record<string, string>;
promptViaStdin?: boolean;
eventParser?: string;
env?: Record<string, string>;
listModels?: RuntimeListModels;
fetchModels?: (
resolvedBin: string,
env: RuntimeEnv,
) => Promise<RuntimeModelOption[] | null>;
reasoningOptions?: RuntimeReasoningOption[];
supportsImagePaths?: boolean;
maxPromptArgBytes?: number;
mcpDiscovery?: string;
installUrl?: string;
docsUrl?: string;
};
export type DetectedAgent = Omit<
RuntimeAgentDef,
| 'buildArgs'
| 'listModels'
| 'fetchModels'
| 'fallbackModels'
| 'helpArgs'
| 'capabilityFlags'
| 'fallbackBins'
| 'maxPromptArgBytes'
| 'env'
> & {
models: RuntimeModelOption[];
available: boolean;
path?: string;
version?: string | null;
};
export type RuntimeExecOptions = ExecFileOptions & {
env?: NodeJS.ProcessEnv;
};

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,591 @@
import { test } from 'vitest';
import {
assert, claude, codex, copilot, cursorAgent, deepseek, devin, detectAgents, gemini, join, kilo, kiro, mkdtempSync, pi, qoder, rmSync, spawnEnvForAgent, tmpdir, vibe, writeFileSync, chmodSync,
} from './helpers/test-helpers.js';
import type { TestAgentDef } from './helpers/test-helpers.js';
test('cursor-agent args deliver prompts via stdin without passing a literal dash prompt', () => {
const args = cursorAgent.buildArgs(
'',
[],
[],
{},
{ cwd: '/tmp/od-project' },
);
assert.deepEqual(args, [
'--print',
'--output-format',
'stream-json',
'--stream-partial-output',
'--force',
'--trust',
'--workspace',
'/tmp/od-project',
]);
});
// Copilot reads the prompt from stdin when `-p` is omitted entirely
// (upstream copilot-cli issue #1046, confirmed working as
// `echo "..." | copilot --model <id>`). The earlier `-p -` attempt
// was a dead end because Copilot takes `-` as a literal one-character
// prompt; omitting `-p` is a separate code path that does delegate to
// stdin under a non-TTY pipe. Pin `promptViaStdin: true` and the
// stdin-only argv shape so a future refactor can't silently bring
// `-p <prompt>` back and reintroduce the Windows ENAMETOOLONG
// regression (issue #705).
test('copilot delivers the prompt via stdin (no -p, no prompt body in argv)', () => {
const prompt = 'design a landing page';
const baseArgs = copilot.buildArgs(prompt, [], [], {});
assert.equal(copilot.promptViaStdin, true);
assert.ok(
!baseArgs.includes('-p'),
'copilot argv must not include -p; the prompt rides stdin',
);
assert.ok(
!baseArgs.includes(prompt),
'copilot argv must not include the prompt body; it rides stdin',
);
assert.deepEqual(baseArgs, [
'--allow-all-tools',
'--output-format',
'json',
]);
});
test('copilot args append model and extra dirs after the base flags without reintroducing -p', () => {
const prompt = 'design a landing page';
const args = copilot.buildArgs(
prompt,
[],
['/tmp/od-skills', '/tmp/od-design-systems'],
{ model: 'claude-sonnet-4.6' },
);
assert.ok(!args.includes('-p'));
assert.ok(!args.includes(prompt));
assert.deepEqual(args, [
'--allow-all-tools',
'--output-format',
'json',
'--model',
'claude-sonnet-4.6',
'--add-dir',
'/tmp/od-skills',
'--add-dir',
'/tmp/od-design-systems',
]);
});
test('copilot drops empty / non-string entries from extraAllowedDirs without reintroducing -p', () => {
const prompt = 'design a landing page';
const args = copilot.buildArgs(
prompt,
[],
['', null, '/tmp/od-skills', undefined] as unknown as string[],
{},
);
assert.ok(!args.includes('-p'));
// Only the one valid path survives.
const addDirIndex = args.indexOf('--add-dir');
assert.equal(args[addDirIndex + 1], '/tmp/od-skills');
assert.equal(args.filter((a) => a === '--add-dir').length, 1);
});
// Mirror of the Claude Code 200_000-char synthetic-prompt guard: even
// when the composed prompt is large enough to blow the Windows
// CreateProcess command-line cap (~32 KB direct, ~8 KB through a `.cmd`
// shim), no argv entry must ever carry the prompt body. This is the
// structural assertion that the issue #705 fix can't quietly regress.
test('copilot flags promptViaStdin and never embeds the prompt in argv', () => {
assert.equal(copilot.promptViaStdin, true);
const longPrompt = 'x'.repeat(200_000);
const args = copilot.buildArgs(longPrompt, [], [], {});
assert.ok(Array.isArray(args), 'copilot.buildArgs must return argv');
assert.equal(
args.includes(longPrompt),
false,
'prompt must not appear in argv',
);
for (const arg of args) {
assert.ok(
typeof arg === 'string' && arg.length < 1000,
`no argv entry should carry the prompt body (saw length ${arg.length})`,
);
}
});
test('kiro args use acp subcommand for json-rpc streaming', () => {
const args = kiro.buildArgs('', [], [], {});
assert.deepEqual(args, ['acp']);
assert.equal(kiro.streamFormat, 'acp-json-rpc');
});
test('devin args use acp subcommand for json-rpc streaming', () => {
const args = devin.buildArgs('', [], [], {});
assert.deepEqual(args, [
'--permission-mode',
'dangerous',
'--respect-workspace-trust',
'false',
'acp',
]);
assert.equal(devin.streamFormat, 'acp-json-rpc');
});
test('pi args use rpc mode without --no-session and append model/thinking options', () => {
const baseArgs = pi.buildArgs('', [], [], {}, {});
assert.deepEqual(baseArgs, ['--mode', 'rpc']);
assert.ok(!baseArgs.includes('--no-session'), 'pi must not pass --no-session');
assert.equal(pi.promptViaStdin, true);
assert.equal(pi.streamFormat, 'pi-rpc');
assert.equal(pi.supportsImagePaths, true);
const withModel = pi.buildArgs('', [], [], { model: 'anthropic/claude-sonnet-4-5' }, {});
assert.deepEqual(withModel, [
'--mode',
'rpc',
'--model',
'anthropic/claude-sonnet-4-5',
]);
const withThinking = pi.buildArgs('', [], [], { reasoning: 'high' }, {});
assert.deepEqual(withThinking, [
'--mode',
'rpc',
'--thinking',
'high',
]);
});
test('pi args forward extraAllowedDirs as --append-system-prompt flags', () => {
const args = pi.buildArgs(
'',
[],
['/tmp/skills', '/tmp/design-systems'],
{},
{},
);
assert.deepEqual(args, [
'--mode',
'rpc',
'--append-system-prompt',
'/tmp/skills',
'--append-system-prompt',
'/tmp/design-systems',
]);
});
test('pi args filter relative paths from extraAllowedDirs', () => {
const args = pi.buildArgs(
'',
[],
['/tmp/skills', 'relative/path', '/tmp/design-systems'],
{},
{},
);
// Relative paths should be filtered out.
assert.deepEqual(args, [
'--mode',
'rpc',
'--append-system-prompt',
'/tmp/skills',
'--append-system-prompt',
'/tmp/design-systems',
]);
});
test('pi args combine model, thinking, and extraAllowedDirs', () => {
const args = pi.buildArgs(
'',
[],
['/tmp/skills'],
{ model: 'openai/gpt-5', reasoning: 'medium' },
{},
);
assert.deepEqual(args, [
'--mode',
'rpc',
'--model',
'openai/gpt-5',
'--thinking',
'medium',
'--append-system-prompt',
'/tmp/skills',
]);
});
test('gemini args avoid version-fragile trust flags', () => {
const args = gemini.buildArgs('', [], [], {});
assert.deepEqual(args, ['--output-format', 'stream-json', '--yolo']);
assert.equal(args.includes('--skip-trust'), false);
assert.deepEqual(gemini.env, { GEMINI_CLI_TRUST_WORKSPACE: 'true' });
});
test('gemini args preserve custom model selection', () => {
const args = gemini.buildArgs('', [], [], { model: 'gemini-2.5-pro' });
assert.deepEqual(args, [
'--output-format',
'stream-json',
'--yolo',
'--model',
'gemini-2.5-pro',
]);
});
test('gemini picker exposes the Gemini 3 previews and 2.5 family in priority order', () => {
// Pin the picker contents and ordering so the Settings UI cannot be
// silently reshaped by a future edit to AGENT_DEFS. Gemini also accepts
// arbitrary custom ids, which makes it especially easy for a regression
// here to slip through manual QA. Issue #981.
assert.deepEqual(gemini.fallbackModels.map((m) => m.id), [
'default',
'gemini-3-pro-preview',
'gemini-3-flash-preview',
'gemini-2.5-pro',
'gemini-2.5-flash',
'gemini-2.5-flash-lite',
]);
});
test('qoder entry uses qodercli with stream-json stdin delivery and tier model hints', () => {
assert.equal(qoder.name, 'Qoder CLI');
assert.equal(qoder.bin, 'qodercli');
assert.deepEqual(qoder.versionArgs, ['--version']);
assert.equal(qoder.promptViaStdin, true);
assert.equal(qoder.streamFormat, 'qoder-stream-json');
assert.deepEqual(qoder.fallbackModels.map((m) => m.id), [
'default',
'lite',
'efficient',
'auto',
'performance',
'ultimate',
]);
});
test('qoder args use non-interactive print mode with cwd, model, and add-dir', () => {
const args = qoder.buildArgs(
'prompt must not appear in argv',
['/tmp/uploads/logo.png', '/tmp/uploads/hero concept.png'],
[
'/repo/skills',
'',
null as unknown as string,
'./relative-skills',
'relative-design-systems',
'/repo/design-systems',
],
{ model: 'performance' },
{ cwd: '/tmp/od-project' },
);
assert.deepEqual(args, [
'-p',
'--output-format',
'stream-json',
'--yolo',
'-w',
'/tmp/od-project',
'--model',
'performance',
'--add-dir',
'/repo/skills',
'--add-dir',
'/repo/design-systems',
'--attachment',
'/tmp/uploads/logo.png',
'--attachment',
'/tmp/uploads/hero concept.png',
]);
assert.equal(args.includes('prompt must not appear in argv'), false);
assert.equal(args.includes('./relative-skills'), false);
assert.equal(args.includes('relative-design-systems'), false);
});
test('qoder args omit default model and cwd when absent', () => {
const args = qoder.buildArgs('', [], [], { model: 'default' }, {});
assert.deepEqual(args, [
'-p',
'--output-format',
'stream-json',
'--yolo',
]);
assert.equal(args.includes('--model'), false);
assert.equal(args.includes('-w'), false);
});
test('qoder args omit empty, non-string, and relative add-dir entries', () => {
const args = qoder.buildArgs('', [], [
'',
null as unknown as string,
undefined as unknown as string,
42 as unknown as string,
'./skills',
'design-systems',
]);
assert.equal(args.includes('--add-dir'), false);
});
test('qoder args omit empty, non-string, and relative image attachment entries', () => {
const args = qoder.buildArgs('', [
'',
null as unknown as string,
undefined as unknown as string,
42 as unknown as string,
'./uploads/logo.png',
'uploads/hero.png',
'/tmp/uploads/logo.png',
], []);
assert.deepEqual(
args.filter((arg) => arg === '--attachment').length,
1,
);
assert.ok(args.includes('/tmp/uploads/logo.png'));
assert.equal(args.includes('./uploads/logo.png'), false);
assert.equal(args.includes('uploads/hero.png'), false);
});
test('qoder adapter inherits QODER_PERSONAL_ACCESS_TOKEN from daemon env', () => {
const env = spawnEnvForAgent('qoder', {
QODER_PERSONAL_ACCESS_TOKEN: 'qoder-pat',
PATH: '/usr/bin',
OD_DAEMON_URL: 'http://127.0.0.1:7456',
});
assert.equal(env.QODER_PERSONAL_ACCESS_TOKEN, 'qoder-pat');
assert.equal(env.PATH, '/usr/bin');
assert.equal(env.OD_DAEMON_URL, 'http://127.0.0.1:7456');
});
test('qoder adapter does not define static secret env', () => {
assert.equal(
(qoder as TestAgentDef & { env?: Record<string, string> }).env?.QODER_PERSONAL_ACCESS_TOKEN,
undefined,
);
});
test('detectAgents keeps qoder unavailable with fallback metadata when qodercli is missing', async () => {
const dir = mkdtempSync(join(tmpdir(), 'od-agents-empty-'));
try {
process.env.OD_AGENT_HOME = dir;
process.env.PATH = dir;
const agents = await detectAgents();
const detected = agents.find((agent) => agent.id === 'qoder');
assert.ok(detected);
assert.equal(detected.available, false);
assert.equal(detected.bin, 'qodercli');
assert.deepEqual(detected.models.map((m: { id: string }) => m.id), [
'default',
'lite',
'efficient',
'auto',
'performance',
'ultimate',
]);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('kiro fetchModels falls back to fallbackModels when detection fails', async () => {
// fetchModels rejects when the binary doesn't exist; the daemon's
// probe() catches this and uses fallbackModels instead.
assert.ok(kiro.fetchModels, 'kiro must define fetchModels');
const result = await kiro
.fetchModels('/nonexistent/kiro-cli', {})
.catch(() => null);
assert.equal(result, null);
assert.ok(Array.isArray(kiro.fallbackModels));
const fallbackModel = kiro.fallbackModels[0];
assert.ok(fallbackModel);
assert.equal(fallbackModel.id, 'default');
});
test('kilo args use acp subcommand for json-rpc streaming', () => {
const args = kilo.buildArgs('', [], [], {});
assert.deepEqual(args, ['acp']);
assert.equal(kilo.streamFormat, 'acp-json-rpc');
});
test('kilo fetchModels falls back to fallbackModels when detection fails', async () => {
assert.ok(kilo.fetchModels, 'kilo must define fetchModels');
const result = await kilo.fetchModels('/nonexistent/kilo', {}).catch(() => null);
assert.equal(result, null);
assert.ok(Array.isArray(kilo.fallbackModels));
const fallbackModel = kilo.fallbackModels[0];
assert.ok(fallbackModel);
assert.equal(fallbackModel.id, 'default');
assert.equal(kilo.fallbackModels.length, 1);
});
// ---- reasoning-effort clamp ------------------------------------------------
// Drives clampCodexReasoning through the public buildArgs surface so the
// helper stays non-exported. The wire-level `-c model_reasoning_effort="..."`
// flag is what the codex CLI (and ultimately OpenAI) actually sees.
test('codex buildArgs clamps reasoning effort per model', () => {
const cases: Array<[string | undefined, string, string]> = [
// [model, reasoning, expected wire-level effort]
// gpt-5.5 family (and unknown / 'default' which we treat as 5.5):
// minimal -> low, others pass through.
[undefined, 'minimal', 'low'],
['default', 'minimal', 'low'],
['gpt-5.2', 'minimal', 'low'],
['gpt-5.3', 'minimal', 'low'],
['gpt-5.4', 'minimal', 'low'],
['gpt-5.5', 'minimal', 'low'],
['gpt-5.5', 'low', 'low'],
['gpt-5.5', 'medium', 'medium'],
['gpt-5.5', 'high', 'high'],
['vendor/gpt-5.5-foo', 'minimal', 'low'], // path-style id
// gpt-5.1: xhigh isn't supported, others pass through.
['gpt-5.1', 'xhigh', 'high'],
['gpt-5.1', 'high', 'high'],
// gpt-5.1-codex-mini: caps at medium / high only.
['gpt-5.1-codex-mini', 'minimal', 'medium'],
['gpt-5.1-codex-mini', 'low', 'medium'],
['gpt-5.1-codex-mini', 'medium', 'medium'],
['gpt-5.1-codex-mini', 'high', 'high'],
['gpt-5.1-codex-mini', 'xhigh', 'high'],
// Unknown / future families: pass through; let the API surface its error
// as the signal a new rule belongs in clampCodexReasoning.
['gpt-6', 'minimal', 'minimal'],
];
for (const [model, reasoning, expected] of cases) {
const args = codex.buildArgs(
'',
[],
[],
{ ...(model === undefined ? {} : { model }), reasoning },
{ cwd: '/tmp/od-project' },
);
assert.ok(
args.includes(`model_reasoning_effort="${expected}"`),
`(model=${model ?? '<none>'}, reasoning=${reasoning}) → expected ${expected}; args=${JSON.stringify(args)}`,
);
}
});
test('codex buildArgs omits model_reasoning_effort when reasoning is "default"', () => {
const args = codex.buildArgs(
'',
[],
[],
{ reasoning: 'default' },
{ cwd: '/tmp/od-project' },
);
assert.equal(
args.some(
(a) => typeof a === 'string' && a.startsWith('model_reasoning_effort='),
),
false,
);
});
test('claude flags promptViaStdin and never embeds the prompt in argv', () => {
// Long composed prompts (system prompt + design system + skill body +
// user message) routinely exceed Linux MAX_ARG_STRLEN (~128 KB) and the
// Windows CreateProcess command-line cap (~32 KB direct, ~8 KB via .cmd
// shim). The fix is to deliver the prompt on stdin instead of argv —
// these assertions guard that contract.
assert.equal(claude.promptViaStdin, true);
const longPrompt = 'x'.repeat(200_000);
const args = claude.buildArgs(
longPrompt,
[],
[],
{},
{ cwd: '/tmp/od-project' },
);
assert.ok(Array.isArray(args), 'claude.buildArgs must return argv');
assert.equal(
args.includes(longPrompt),
false,
'prompt must not appear in argv',
);
for (const arg of args) {
assert.ok(
typeof arg === 'string' && arg.length < 1000,
`no argv entry should carry the prompt body (saw length ${arg.length})`,
);
}
// `-p` (print mode) must still be present; without it claude drops into
// an interactive REPL that the daemon has no TTY for.
assert.ok(args.includes('-p'), 'claude argv must include -p');
});
// ---- Claude Code --add-dir capability (issue #430) -------------------------
// Skill seeds (`skills/<id>/assets/template.html`) and design-system specs
// (`design-systems/<id>/DESIGN.md`) live outside the project cwd. Without
// `--add-dir`, Claude Code's directory access policy blocks reads on any
// path outside the working directory. Bug was that we probed global `claude
// --help` for `--add-dir` but that flag only appears in `claude -p --help`.
test('claude buildArgs passes --add-dir when dirs are supplied (issue #430, probing-failed baseline)', () => {
// This is the default state before any capability probe runs: agentCapabilities
// has no entry -> buildArgs gets `caps = {}` -> caps.addDir is undefined ->
// undefined !== false -> true. This is also the "probing threw" case: timeout,
// binary not found, non-zero exit code from --help. Dirs are always passed
// unless capability probing explicitly detected --help and found no --add-dir.
const args = claude.buildArgs(
'',
[],
['/repo/skills', '/repo/design-systems'],
{},
);
const addDirIndex = args.indexOf('--add-dir');
assert.ok(addDirIndex >= 0, '--add-dir must be present by default (safe baseline)');
assert.equal(args[addDirIndex + 1], '/repo/skills');
assert.equal(args[addDirIndex + 2], '/repo/design-systems');
// Check flag ordering: --add-dir comes before --permission-mode
const permModeIndex = args.indexOf('--permission-mode');
assert.ok(
addDirIndex < permModeIndex,
`--add-dir (index ${addDirIndex}) should appear before --permission-mode (index ${permModeIndex})`,
);
});
test('claude buildArgs drops empty / null dirs but keeps valid ones (issue #430 edge case)', () => {
const args = claude.buildArgs('', [], ['', null, '/repo/skills', undefined] as unknown as string[], {});
const addDirIndex = args.indexOf('--add-dir');
assert.ok(addDirIndex >= 0, '--add-dir should survive filter');
// Only the one valid path survives after --add-dir.
assert.equal(args[addDirIndex + 1], '/repo/skills');
// Should NOT have multiple --add-dir flags (one flag, N arguments).
assert.equal(args.filter((a) => a === '--add-dir').length, 1);
// Should NOT have null / undefined / '' sneaking into argv.
assert.equal(args.includes(''), false);
assert.equal(args.includes(null as unknown as string), false);
assert.equal(args.includes(undefined as unknown as string), false);
});
test('claude helpArgs probes the -p subcommand where --add-dir lives (issue #430 root cause)', () => {
assert.deepEqual(
claude.helpArgs,
['-p', '--help'],
`claude.helpArgs must be ['-p', '--help'], not just ['--help'], because --add-dir lives under the -p subcommand. Probing global help never finds it! Got: ${JSON.stringify(claude.helpArgs)}`,
);
});

View file

@ -0,0 +1,407 @@
import { test } from 'vitest';
import { homedir } from 'node:os';
import {
assert, chmodSync, detectAgents, inspectAgentExecutableResolution, join, minimalAgentDef, mkdirSync, mkdtempSync, opencode, resolveAgentExecutable, rmSync, spawnEnvForAgent, tmpdir, withEnvSnapshot, withPlatform, writeFileSync,
} from './helpers/test-helpers.js';
// Issue #398: Claude Code prefers ANTHROPIC_API_KEY over `claude login`
// credentials, silently billing API usage. Strip it for the claude
// adapter so the user's subscription wins.
test('spawnEnvForAgent strips ANTHROPIC_API_KEY for the claude adapter', () => {
const env = spawnEnvForAgent('claude', {
ANTHROPIC_API_KEY: 'sk-leak',
PATH: '/usr/bin',
OD_DAEMON_URL: 'http://127.0.0.1:7456',
});
assert.equal('ANTHROPIC_API_KEY' in env, false);
assert.equal(env.PATH, '/usr/bin');
assert.equal(env.OD_DAEMON_URL, 'http://127.0.0.1:7456');
});
test('spawnEnvForAgent applies configured Claude Code env before auth stripping', () => {
const env = spawnEnvForAgent(
'claude',
{
ANTHROPIC_API_KEY: 'sk-leak',
PATH: '/usr/bin',
},
{
CLAUDE_CONFIG_DIR: '/Users/test/.claude-2',
},
);
assert.equal(env.CLAUDE_CONFIG_DIR, '/Users/test/.claude-2');
assert.equal('ANTHROPIC_API_KEY' in env, false);
assert.equal(env.PATH, '/usr/bin');
});
test('spawnEnvForAgent applies configured Codex env without mutating the base env', () => {
const base = { PATH: '/usr/bin' };
const env = spawnEnvForAgent('codex', base, {
CODEX_HOME: '/Users/test/.codex-alt',
CODEX_BIN: '/Users/test/bin/codex',
});
assert.equal(env.CODEX_HOME, '/Users/test/.codex-alt');
assert.equal(env.CODEX_BIN, '/Users/test/bin/codex');
assert.equal(env.PATH, '/usr/bin');
assert.equal('CODEX_HOME' in base, false);
assert.equal('CODEX_BIN' in base, false);
});
test('spawnEnvForAgent expands configured env home paths', () => {
const env = spawnEnvForAgent('codex', { PATH: '/usr/bin' }, {
CODEX_HOME: '~/.codex-alt',
CODEX_CACHE: '~',
});
assert.equal(env.CODEX_HOME, join(homedir(), '.codex-alt'));
assert.equal(env.CODEX_CACHE, homedir());
assert.equal(env.PATH, '/usr/bin');
});
test('resolveAgentExecutable prefers a configured CODEX_BIN override over PATH resolution', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-codex-bin-'));
try {
return withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], () => {
const configured = join(dir, 'codex-custom');
writeFileSync(configured, '#!/bin/sh\nexit 0\n');
chmodSync(configured, 0o755);
process.env.PATH = '';
process.env.OD_AGENT_HOME = dir;
const resolved = resolveAgentExecutable(
minimalAgentDef({ id: 'codex', bin: 'codex' }),
{ CODEX_BIN: configured },
);
assert.equal(resolved, configured);
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('inspectAgentExecutableResolution reports configured and PATH Codex binaries separately', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-codex-bin-inspect-'));
try {
return withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], () => {
const configured = join(dir, 'codex-custom');
const fallback = join(dir, 'codex');
writeFileSync(configured, '#!/bin/sh\nexit 0\n');
writeFileSync(fallback, '#!/bin/sh\nexit 0\n');
chmodSync(configured, 0o755);
chmodSync(fallback, 0o755);
process.env.PATH = dir;
process.env.OD_AGENT_HOME = dir;
const resolution = inspectAgentExecutableResolution(
minimalAgentDef({ id: 'codex', bin: 'codex' }),
{ CODEX_BIN: configured },
);
assert.deepEqual(resolution, {
configuredOverridePath: configured,
pathResolvedPath: fallback,
selectedPath: configured,
});
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('resolveAgentExecutable supports configured binary overrides for non-Codex adapters', () => {
const cases: Array<[string, string, string]> = [
['claude', 'claude', 'CLAUDE_BIN'],
['gemini', 'gemini', 'GEMINI_BIN'],
['opencode', 'opencode', 'OPENCODE_BIN'],
['cursor-agent', 'cursor-agent', 'CURSOR_AGENT_BIN'],
['qwen', 'qwen', 'QWEN_BIN'],
['qoder', 'qodercli', 'QODER_BIN'],
['copilot', 'copilot', 'COPILOT_BIN'],
['deepseek', 'deepseek', 'DEEPSEEK_BIN'],
];
const dir = mkdtempSync(join(tmpdir(), 'od-agent-bin-overrides-'));
try {
return withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], () => {
process.env.PATH = '';
process.env.OD_AGENT_HOME = dir;
for (const [id, binName, envKey] of cases) {
const configured = join(dir, `${binName}-custom`);
writeFileSync(configured, '#!/bin/sh\nexit 0\n');
chmodSync(configured, 0o755);
const resolved = resolveAgentExecutable(
minimalAgentDef({ id, bin: binName }),
{ [envKey]: configured },
);
assert.equal(resolved, configured, `expected ${id} to use ${envKey}`);
}
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('resolveAgentExecutable prefers opencode-cli before desktop opencode fallback', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-opencode-cli-'));
try {
return withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], () => {
const cli = join(dir, 'opencode-cli');
const desktop = join(dir, 'opencode');
writeFileSync(cli, '#!/bin/sh\nexit 0\n');
writeFileSync(desktop, '#!/bin/sh\nexit 0\n');
chmodSync(cli, 0o755);
chmodSync(desktop, 0o755);
process.env.PATH = dir;
process.env.OD_AGENT_HOME = dir;
assert.equal(resolveAgentExecutable(opencode), cli);
rmSync(cli, { force: true });
assert.equal(resolveAgentExecutable(opencode), desktop);
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('detectAgents includes sanitized install and docs metadata from split runtime metadata', async () => {
const dir = mkdtempSync(join(tmpdir(), 'od-agent-install-meta-'));
try {
return await withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], async () => {
process.env.PATH = dir;
process.env.OD_AGENT_HOME = dir;
const agents = await detectAgents();
const qoder = agents.find((agent) => agent.id === 'qoder');
const deepseek = agents.find((agent) => agent.id === 'deepseek');
assert.ok(qoder);
assert.equal(qoder.available, false);
assert.equal(qoder.installUrl, 'https://qoder.com/download');
assert.equal(qoder.docsUrl, 'https://docs.qoder.com/');
assert.ok(deepseek);
assert.equal(
deepseek.docsUrl,
'https://github.com/deepseek-ai/DeepSeek-TUI/blob/main/README.md',
);
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('resolveAgentExecutable ignores relative CODEX_BIN overrides', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-codex-bin-rel-'));
const oldCwd = process.cwd();
try {
return withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], () => {
const configured = 'codex-custom';
writeFileSync(join(dir, configured), '#!/bin/sh\nexit 0\n');
chmodSync(join(dir, configured), 0o755);
process.chdir(dir);
process.env.PATH = '';
process.env.OD_AGENT_HOME = dir;
const resolved = resolveAgentExecutable(
minimalAgentDef({ id: 'codex', bin: 'codex' }),
{ CODEX_BIN: configured },
);
assert.equal(resolved, null);
});
} finally {
process.chdir(oldCwd);
rmSync(dir, { recursive: true, force: true });
}
});
test('resolveAgentExecutable ignores configured binary overrides that are not executable files', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-agent-bin-invalid-'));
try {
return withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], () => {
const directoryOverride = join(dir, 'as-directory');
mkdirSync(directoryOverride);
const fileOverride = join(dir, 'not-executable');
writeFileSync(fileOverride, '#!/bin/sh\nexit 0\n');
if (process.platform !== 'win32') chmodSync(fileOverride, 0o644);
process.env.PATH = '';
process.env.OD_AGENT_HOME = dir;
assert.equal(
resolveAgentExecutable(minimalAgentDef({ id: 'codex', bin: 'codex' }), { CODEX_BIN: directoryOverride }),
null,
);
if (process.platform !== 'win32') {
assert.equal(
resolveAgentExecutable(minimalAgentDef({ id: 'codex', bin: 'codex' }), { CODEX_BIN: fileOverride }),
null,
);
}
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('resolveAgentExecutable ignores Windows CODEX_BIN overrides without executable PATHEXT extension', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-agent-bin-win-invalid-'));
try {
return withEnvSnapshot(['PATH', 'PATHEXT', 'OD_AGENT_HOME'], () => {
const invalidOverride = join(dir, 'codex-custom.txt');
const fallback = join(dir, 'codex.CMD');
writeFileSync(invalidOverride, '@echo off\r\nexit /b 0\r\n');
writeFileSync(fallback, '@echo off\r\nexit /b 0\r\n');
process.env.PATH = dir;
process.env.PATHEXT = '.EXE;.CMD;.BAT';
process.env.OD_AGENT_HOME = dir;
const resolved = withPlatform('win32', () =>
resolveAgentExecutable(
minimalAgentDef({ id: 'codex', bin: 'codex' }),
{ CODEX_BIN: invalidOverride },
),
);
assert.equal(resolved, fallback);
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('resolveAgentExecutable accepts Windows CODEX_BIN overrides with executable PATHEXT extension', () => {
const dir = mkdtempSync(join(tmpdir(), 'od-agent-bin-win-valid-'));
try {
return withEnvSnapshot(['PATH', 'PATHEXT', 'OD_AGENT_HOME'], () => {
const configured = join(dir, 'codex-custom.CMD');
writeFileSync(configured, '@echo off\r\nexit /b 0\r\n');
process.env.PATH = '';
process.env.PATHEXT = '.EXE;.CMD;.BAT';
process.env.OD_AGENT_HOME = dir;
const resolved = withPlatform('win32', () =>
resolveAgentExecutable(
minimalAgentDef({ id: 'codex', bin: 'codex' }),
{ CODEX_BIN: configured },
),
);
assert.equal(resolved, configured);
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('detectAgents applies configured env while probing the CLI', async () => {
const dir = mkdtempSync(join(tmpdir(), 'od-agent-env-'));
try {
await withEnvSnapshot(['PATH', 'OD_AGENT_HOME'], async () => {
const bin = join(dir, process.platform === 'win32' ? 'claude.cmd' : 'claude');
if (process.platform === 'win32') {
writeFileSync(
bin,
'@echo off\r\nif "%~1"=="--version" (\r\n echo %CLAUDE_CONFIG_DIR%\r\n exit /b 0\r\n)\r\nif "%~1"=="-p" (\r\n echo --add-dir --include-partial-messages\r\n exit /b 0\r\n)\r\nexit /b 0\r\n',
);
} else {
writeFileSync(
bin,
'#!/bin/sh\nif [ "$1" = "--version" ]; then echo "$CLAUDE_CONFIG_DIR"; exit 0; fi\nif [ "$1" = "-p" ]; then echo "--add-dir --include-partial-messages"; exit 0; fi\nexit 0\n',
);
chmodSync(bin, 0o755);
}
process.env.PATH = dir;
process.env.OD_AGENT_HOME = dir;
const agents = await detectAgents({
claude: { CLAUDE_CONFIG_DIR: '/tmp/claude-config-probe' },
});
const detected = agents.find((agent) => agent.id === 'claude');
assert.equal(detected?.available, true);
assert.equal(detected?.version, '/tmp/claude-config-probe');
});
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
// Windows env-var names are case-insensitive at the kernel level, but
// spreading process.env into a plain object loses Node's case-insensitive
// accessor — a `Anthropic_Api_Key` key would survive a literal
// `delete env.ANTHROPIC_API_KEY` and still reach Claude Code on Windows.
test('spawnEnvForAgent strips ANTHROPIC_API_KEY case-insensitively for the claude adapter', () => {
const env = spawnEnvForAgent('claude', {
Anthropic_Api_Key: 'sk-mixed-case',
anthropic_api_key: 'sk-lower-case',
PATH: '/usr/bin',
});
const remaining = Object.keys(env).filter(
(k) => k.toUpperCase() === 'ANTHROPIC_API_KEY',
);
assert.deepEqual(remaining, []);
assert.equal(env.PATH, '/usr/bin');
});
test('spawnEnvForAgent preserves ANTHROPIC_API_KEY for non-claude adapters', () => {
for (const agentId of ['codex', 'gemini', 'opencode', 'devin']) {
const env = spawnEnvForAgent(agentId, {
ANTHROPIC_API_KEY: 'sk-keep',
PATH: '/usr/bin',
});
assert.equal(
env.ANTHROPIC_API_KEY,
'sk-keep',
`expected ${agentId} to preserve ANTHROPIC_API_KEY`,
);
}
});
test('spawnEnvForAgent preserves ANTHROPIC_API_KEY when ANTHROPIC_BASE_URL is set', () => {
const env = spawnEnvForAgent('claude', {
ANTHROPIC_API_KEY: 'sk-kimi',
ANTHROPIC_BASE_URL: 'https://api.moonshot.cn/v1',
PATH: '/usr/bin',
});
assert.equal(env.ANTHROPIC_API_KEY, 'sk-kimi');
assert.equal(env.ANTHROPIC_BASE_URL, 'https://api.moonshot.cn/v1');
assert.equal(env.PATH, '/usr/bin');
});
test('spawnEnvForAgent strips ANTHROPIC_API_KEY when ANTHROPIC_BASE_URL is empty', () => {
const env = spawnEnvForAgent('claude', {
ANTHROPIC_API_KEY: 'sk-leak',
ANTHROPIC_BASE_URL: '',
PATH: '/usr/bin',
});
assert.equal('ANTHROPIC_API_KEY' in env, false);
assert.equal(env.PATH, '/usr/bin');
});
test('spawnEnvForAgent strips ANTHROPIC_API_KEY when ANTHROPIC_BASE_URL is whitespace', () => {
const env = spawnEnvForAgent('claude', {
ANTHROPIC_API_KEY: 'sk-leak',
ANTHROPIC_BASE_URL: ' ',
PATH: '/usr/bin',
});
assert.equal('ANTHROPIC_API_KEY' in env, false);
assert.equal(env.PATH, '/usr/bin');
});
test('spawnEnvForAgent does not mutate the input env', () => {
const original = { ANTHROPIC_API_KEY: 'sk-leak', PATH: '/usr/bin' };
const env = spawnEnvForAgent('claude', original);
assert.equal(original.ANTHROPIC_API_KEY, 'sk-leak');
assert.notEqual(env, original);
});

View file

@ -0,0 +1,307 @@
import { test } from 'vitest';
import {
assert, chmodSync, claude, gemini, join, minimalAgentDef, mkdirSync, mkdtempSync, resolveAgentExecutable, rmSync, tmpdir, withPlatform, writeFileSync,
} from './helpers/test-helpers.js';
const fsTest = process.platform === 'win32' ? test.skip : test;
// ---- OpenClaude fallback (issue #235) -------------------------------------
// OpenClaude (https://github.com/Gitlawb/openclaude) is a Claude Code fork
// that ships under a different binary name but speaks an argv-compatible
// CLI. Users with only `openclaude` on PATH should be auto-detected as the
// Claude Code agent without writing a wrapper script. The mechanism is the
// `fallbackBins` array on the Claude AGENT_DEF, consumed by
// `resolveAgentExecutable`.
test('claude entry declares openclaude as a fallback bin (issue #235)', () => {
assert.ok(
Array.isArray(claude.fallbackBins),
'claude.fallbackBins must be an array',
);
assert.ok(
claude.fallbackBins.includes('openclaude'),
`claude.fallbackBins must include 'openclaude'; got ${JSON.stringify(claude.fallbackBins)}`,
);
});
// resolveAgentExecutable touches the filesystem via existsSync; on
// Windows resolveOnPath also walks PATHEXT extensions, which our fixture
// files don't carry. Skip the filesystem-backed cases there — the
// declarative `fallbackBins`-on-claude assertion above still runs on
// every platform and is what catches regressions in the AGENT_DEF.
fsTest(
'resolveAgentExecutable prefers def.bin over fallbackBins when bin is on PATH',
() => {
const dir = mkdtempSync(join(tmpdir(), 'od-agents-resolve-'));
try {
writeFileSync(join(dir, 'claude'), '');
writeFileSync(join(dir, 'openclaude'), '');
chmodSync(join(dir, 'claude'), 0o755);
chmodSync(join(dir, 'openclaude'), 0o755);
process.env.OD_AGENT_HOME = dir;
process.env.PATH = dir;
const resolved = resolveAgentExecutable(minimalAgentDef({
bin: 'claude',
fallbackBins: ['openclaude'],
}));
assert.equal(resolved, join(dir, 'claude'));
} finally {
rmSync(dir, { recursive: true, force: true });
}
},
);
fsTest(
'resolveAgentExecutable falls back through fallbackBins when def.bin is missing',
() => {
const dir = mkdtempSync(join(tmpdir(), 'od-agents-resolve-'));
try {
// Only `openclaude` is installed (Claude Code fork-only setup).
writeFileSync(join(dir, 'openclaude'), '');
chmodSync(join(dir, 'openclaude'), 0o755);
process.env.OD_AGENT_HOME = dir;
process.env.PATH = dir;
const resolved = resolveAgentExecutable(minimalAgentDef({
bin: 'claude',
fallbackBins: ['openclaude'],
}));
assert.equal(resolved, join(dir, 'openclaude'));
} finally {
rmSync(dir, { recursive: true, force: true });
}
},
);
fsTest(
'resolveAgentExecutable returns null when neither def.bin nor any fallback is on PATH',
() => {
const dir = mkdtempSync(join(tmpdir(), 'od-agents-resolve-'));
try {
process.env.OD_AGENT_HOME = dir;
process.env.PATH = dir;
const resolved = resolveAgentExecutable(minimalAgentDef({
bin: 'claude',
fallbackBins: ['openclaude'],
}));
assert.equal(resolved, null);
} finally {
rmSync(dir, { recursive: true, force: true });
}
},
);
fsTest(
'resolveAgentExecutable searches mise node bins when PATH is minimal',
() => {
const home = mkdtempSync(join(tmpdir(), 'od-agents-home-'));
try {
const dir = join(
home,
'.local',
'share',
'mise',
'installs',
'node',
'24.14.1',
'bin',
);
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, 'codex'), '');
chmodSync(join(dir, 'codex'), 0o755);
process.env.OD_AGENT_HOME = home;
process.env.PATH = '/usr/bin:/bin';
const resolved = resolveAgentExecutable(minimalAgentDef({
bin: 'codex',
}));
assert.equal(resolved, join(dir, 'codex'));
} finally {
rmSync(home, { recursive: true, force: true });
}
},
);
fsTest(
'resolveAgentExecutable still resolves agents without a fallbackBins field',
() => {
// Guard against a regression that would require every AGENT_DEF to
// declare fallbackBins. Most agents (codex / gemini / opencode / ...)
// only have a single binary name and must keep working unchanged.
const dir = mkdtempSync(join(tmpdir(), 'od-agents-resolve-'));
try {
writeFileSync(join(dir, 'codex'), '');
chmodSync(join(dir, 'codex'), 0o755);
process.env.PATH = dir;
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'codex' }));
assert.equal(resolved, join(dir, 'codex'));
} finally {
rmSync(dir, { recursive: true, force: true });
}
},
);
// Issue #442: GUI-launched daemons (Finder/Dock on macOS, .desktop on Linux)
// inherit a stripped PATH that doesn't include the user's npm global prefix.
// Most third-party "fix npm EACCES without sudo" tutorials configure
// `~/.npm-global` as the prefix, so any CLI installed via `npm i -g <cli>`
// lives at `~/.npm-global/bin/<cli>`. The daemon must search there even when
// the inherited PATH only carries `/usr/bin:/bin:...`.
fsTest(
'resolveAgentExecutable searches ~/.npm-global/bin under a minimal GUI-launched PATH (issue #442)',
() => {
const home = mkdtempSync(join(tmpdir(), 'od-agents-npm-global-'));
try {
const dir = join(home, '.npm-global', 'bin');
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, 'gemini'), '');
chmodSync(join(dir, 'gemini'), 0o755);
process.env.OD_AGENT_HOME = home;
// Mirror the launchd default a `.app` actually inherits — no
// `~/.npm-global/bin`, no `/opt/homebrew/bin`, nothing user-side.
process.env.PATH = '/usr/bin:/bin';
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'gemini' }));
assert.equal(resolved, join(dir, 'gemini'));
} finally {
rmSync(home, { recursive: true, force: true });
}
},
);
// Same root cause as #442 but for the second-most-common alternative
// non-canonical npm prefix shipped in older "fix sudo-free npm" guides.
fsTest(
'resolveAgentExecutable also searches ~/.npm-packages/bin (alt npm prefix)',
() => {
const home = mkdtempSync(join(tmpdir(), 'od-agents-npm-packages-'));
try {
const dir = join(home, '.npm-packages', 'bin');
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, 'gemini'), '');
chmodSync(join(dir, 'gemini'), 0o755);
process.env.OD_AGENT_HOME = home;
process.env.PATH = '/usr/bin:/bin';
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'gemini' }));
assert.equal(resolved, join(dir, 'gemini'));
} finally {
rmSync(home, { recursive: true, force: true });
}
},
);
fsTest(
'resolveAgentExecutable searches ~/.vite-plus/bin under a minimal GUI-launched PATH (vp global install)',
() => {
const home = mkdtempSync(join(tmpdir(), 'od-agents-vp-home-'));
try {
const dir = join(home, '.vite-plus', 'bin');
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, 'vp-cli-probe'), '');
chmodSync(join(dir, 'vp-cli-probe'), 0o755);
process.env.OD_AGENT_HOME = home;
process.env.PATH = '/usr/bin:/bin';
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'vp-cli-probe' }));
assert.equal(resolved, join(dir, 'vp-cli-probe'));
} finally {
rmSync(home, { recursive: true, force: true });
}
},
);
fsTest(
'resolveAgentExecutable honors $VP_HOME/bin when the custom Vite+ home is outside PATH',
() => {
const vpHome = mkdtempSync(join(tmpdir(), 'od-agents-vp-custom-'));
try {
const dir = join(vpHome, 'bin');
mkdirSync(dir, { recursive: true });
writeFileSync(join(dir, 'vp-cli-probe'), '');
chmodSync(join(dir, 'vp-cli-probe'), 0o755);
process.env.PATH = '/usr/bin:/bin';
process.env.VP_HOME = vpHome;
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'vp-cli-probe' }));
assert.equal(resolved, join(dir, 'vp-cli-probe'));
} finally {
rmSync(vpHome, { recursive: true, force: true });
}
},
);
// Test isolation: when OD_AGENT_HOME points at a sandbox, an exported
// $NPM_CONFIG_PREFIX / $npm_config_prefix on the developer's or CI
// runner's environment must not leak a real <prefix>/bin into the
// sandboxed search list. Otherwise an agent installed by the host
// machine could satisfy a "not on PATH" assertion in the sandbox and
// make detection tests environment-dependent. Raised in PR review on
// #442 (review comment by @mrcfps on apps/daemon/src/agents.ts:742).
fsTest(
'OD_AGENT_HOME isolates resolution from $NPM_CONFIG_PREFIX leakage',
() => {
const sandbox = mkdtempSync(join(tmpdir(), 'od-agents-sandbox-'));
const realPrefix = mkdtempSync(join(tmpdir(), 'od-agents-real-prefix-'));
const realPrefixBin = join(realPrefix, 'bin');
try {
// Sandbox is empty — gemini does not exist under OD_AGENT_HOME.
// Real prefix has a gemini, simulating the developer's /opt/...
// or ~/.npm-global install. NPM_CONFIG_PREFIX points at it.
mkdirSync(realPrefixBin, { recursive: true });
writeFileSync(join(realPrefixBin, 'gemini'), '');
chmodSync(join(realPrefixBin, 'gemini'), 0o755);
process.env.OD_AGENT_HOME = sandbox;
process.env.PATH = '/usr/bin:/bin';
process.env.NPM_CONFIG_PREFIX = realPrefix;
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'gemini' }));
assert.equal(
resolved,
null,
`OD_AGENT_HOME sandbox must not see the real $NPM_CONFIG_PREFIX bin; ` +
`got ${resolved}`,
);
} finally {
// afterEach restores NPM_CONFIG_PREFIX to its pre-test value (or
// deletes it when it was unset), so do not unconditionally
// `delete` it here — that would clobber an export the developer
// / CI runner had already set, leaking into the next test in the
// same Vitest worker.
rmSync(sandbox, { recursive: true, force: true });
rmSync(realPrefix, { recursive: true, force: true });
}
},
);
fsTest(
'OD_AGENT_HOME isolates resolution from $VP_HOME leakage',
() => {
const sandbox = mkdtempSync(join(tmpdir(), 'od-agents-vp-sandbox-'));
const realVpHome = mkdtempSync(join(tmpdir(), 'od-agents-vp-real-home-'));
const realVpBin = join(realVpHome, 'bin');
try {
mkdirSync(realVpBin, { recursive: true });
writeFileSync(join(realVpBin, 'vp-cli-probe'), '');
chmodSync(join(realVpBin, 'vp-cli-probe'), 0o755);
process.env.OD_AGENT_HOME = sandbox;
process.env.PATH = '/usr/bin:/bin';
process.env.VP_HOME = realVpHome;
const resolved = resolveAgentExecutable(minimalAgentDef({ bin: 'vp-cli-probe' }));
assert.equal(
resolved,
null,
`OD_AGENT_HOME sandbox must not see the real $VP_HOME bin; got ${resolved}`,
);
} finally {
rmSync(sandbox, { recursive: true, force: true });
rmSync(realVpHome, { recursive: true, force: true });
}
},
);

View file

@ -0,0 +1,187 @@
import { afterEach } from 'vitest';
import assert from 'node:assert/strict';
import {
chmodSync,
mkdirSync,
mkdtempSync,
rmSync,
writeFileSync,
} from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
AGENT_DEFS,
buildLiveArtifactsMcpServersForAgent,
checkPromptArgvBudget,
checkWindowsCmdShimCommandLineBudget,
checkWindowsDirectExeCommandLineBudget,
detectAgents,
inspectAgentExecutableResolution,
resolveAgentExecutable,
spawnEnvForAgent,
} from '../../../src/agents.js';
import type { RuntimeAgentDef } from '../../../src/runtimes/types.js';
export {
assert,
AGENT_DEFS,
buildLiveArtifactsMcpServersForAgent,
checkPromptArgvBudget,
checkWindowsCmdShimCommandLineBudget,
checkWindowsDirectExeCommandLineBudget,
chmodSync,
detectAgents,
inspectAgentExecutableResolution,
join,
mkdirSync,
mkdtempSync,
resolveAgentExecutable,
rmSync,
spawnEnvForAgent,
tmpdir,
writeFileSync,
};
export type TestAgentDef = RuntimeAgentDef;
export function requireAgent(id: string): TestAgentDef {
const agent = AGENT_DEFS.find((candidate) => candidate.id === id);
assert.ok(agent, `missing agent definition for ${id}`);
return agent;
}
export function minimalAgentDef(
partial: Pick<TestAgentDef, 'bin'> & Partial<TestAgentDef>,
): TestAgentDef {
const { bin, ...rest } = partial;
return {
id: partial.id ?? `test-${bin}`,
name: partial.name ?? bin,
bin,
versionArgs: partial.versionArgs ?? ['--version'],
fallbackModels: partial.fallbackModels ?? [{ id: 'default', label: 'Default' }],
buildArgs: partial.buildArgs ?? (() => []),
streamFormat: partial.streamFormat ?? 'plain',
...rest,
};
}
export const codex = requireAgent('codex');
export const hermes = requireAgent('hermes');
export const kimi = requireAgent('kimi');
export const copilot = requireAgent('copilot');
export const cursorAgent = requireAgent('cursor-agent');
export const kiro = requireAgent('kiro');
export const kilo = requireAgent('kilo');
export const vibe = requireAgent('vibe');
export const claude = requireAgent('claude');
export const devin = requireAgent('devin');
export const pi = requireAgent('pi');
export const deepseek = requireAgent('deepseek');
export const gemini = requireAgent('gemini');
export const qoder = requireAgent('qoder');
export const opencode = requireAgent('opencode');
export const deepseekMaxPromptArgBytes = (() => {
assert.ok(
deepseek.maxPromptArgBytes !== undefined,
'deepseek must define maxPromptArgBytes for argv budget tests',
);
return deepseek.maxPromptArgBytes;
})();
const originalDisablePlugins = process.env.OD_CODEX_DISABLE_PLUGINS;
const originalPath = process.env.PATH;
const originalHome = process.env.HOME;
const originalAgentHome = process.env.OD_AGENT_HOME;
const originalDaemonUrl = process.env.OD_DAEMON_URL;
const originalToolToken = process.env.OD_TOOL_TOKEN;
const originalNpmConfigPrefix = process.env.NPM_CONFIG_PREFIX;
const originalPathExt = process.env.PATHEXT;
const originalVpHome = process.env.VP_HOME;
const originalFetch = globalThis.fetch;
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, 'platform');
afterEach(() => {
if (originalDisablePlugins == null) {
delete process.env.OD_CODEX_DISABLE_PLUGINS;
} else {
process.env.OD_CODEX_DISABLE_PLUGINS = originalDisablePlugins;
}
process.env.PATH = originalPath;
if (originalHome == null) {
delete process.env.HOME;
} else {
process.env.HOME = originalHome;
}
if (originalAgentHome == null) {
delete process.env.OD_AGENT_HOME;
} else {
process.env.OD_AGENT_HOME = originalAgentHome;
}
if (originalDaemonUrl == null) {
delete process.env.OD_DAEMON_URL;
} else {
process.env.OD_DAEMON_URL = originalDaemonUrl;
}
if (originalToolToken == null) {
delete process.env.OD_TOOL_TOKEN;
} else {
process.env.OD_TOOL_TOKEN = originalToolToken;
}
if (originalNpmConfigPrefix == null) {
delete process.env.NPM_CONFIG_PREFIX;
} else {
process.env.NPM_CONFIG_PREFIX = originalNpmConfigPrefix;
}
if (originalPathExt == null) {
delete process.env.PATHEXT;
} else {
process.env.PATHEXT = originalPathExt;
}
if (originalVpHome == null) {
delete process.env.VP_HOME;
} else {
process.env.VP_HOME = originalVpHome;
}
globalThis.fetch = originalFetch;
if (originalPlatformDescriptor) {
Object.defineProperty(process, 'platform', originalPlatformDescriptor);
}
});
export function withPlatform<T>(platform: NodeJS.Platform, run: () => T): T {
Object.defineProperty(process, 'platform', {
configurable: true,
value: platform,
});
return run();
}
export function withEnvSnapshot<T>(
keys: readonly string[],
run: () => T | Promise<T>,
): T | Promise<T> {
const snapshot = new Map(keys.map((key) => [key, process.env[key]]));
const restore = () => {
for (const key of keys) {
const value = snapshot.get(key);
if (value == null) {
delete process.env[key];
} else {
process.env[key] = value;
}
}
};
let result: T | Promise<T>;
try {
result = run();
} catch (error) {
restore();
throw error;
}
if (result instanceof Promise) {
return result.finally(restore);
}
restore();
return result;
}

View file

@ -0,0 +1,162 @@
import { test } from 'vitest';
import { createLiveArtifactsMcpTools, handleLiveArtifactsMcpRequest } from '../../src/mcp-live-artifacts-server.js';
import { AGENT_DEFS, assert, buildLiveArtifactsMcpServersForAgent, hermes, kimi } from './helpers/test-helpers.js';
test('live artifact MCP discovery is limited to mature ACP agents', () => {
assert.deepEqual(buildLiveArtifactsMcpServersForAgent(hermes), [
{
name: 'open-design-live-artifacts',
command: 'od',
args: ['mcp', 'live-artifacts'],
env: [],
},
]);
assert.deepEqual(buildLiveArtifactsMcpServersForAgent(kimi), [
{
name: 'open-design-live-artifacts',
command: 'od',
args: ['mcp', 'live-artifacts'],
env: [],
},
]);
for (const agent of AGENT_DEFS) {
if (agent.id === 'hermes' || agent.id === 'kimi') continue;
assert.deepEqual(buildLiveArtifactsMcpServersForAgent(agent), []);
}
});
test('live artifact MCP discovery is disabled when run-scoped tool auth is unavailable', () => {
assert.deepEqual(buildLiveArtifactsMcpServersForAgent(hermes, { enabled: false }), []);
});
test('live artifact MCP discovery can use daemon-resolved CLI command', () => {
assert.deepEqual(
buildLiveArtifactsMcpServersForAgent(hermes, {
command: process.execPath,
argsPrefix: ['/workspace/apps/daemon/dist/cli.js'],
} as unknown as Parameters<typeof buildLiveArtifactsMcpServersForAgent>[1]),
[
{
name: 'open-design-live-artifacts',
command: process.execPath,
args: ['/workspace/apps/daemon/dist/cli.js', 'mcp', 'live-artifacts'],
env: [],
},
],
);
});
test('MCP-capable agents can discover equivalent live artifact and connector tools', async () => {
const tools = createLiveArtifactsMcpTools();
assert.deepEqual(tools.map((tool) => tool.name), [
'live_artifacts_create',
'live_artifacts_list',
'live_artifacts_update',
'live_artifacts_refresh',
'connectors_list',
'connectors_execute',
]);
for (const tool of tools) {
assert.equal(typeof tool.description, 'string');
assert.match(tool.description, /POSIX equivalent: `"\$OD_NODE_BIN" "\$OD_BIN" tools /u);
assert.equal(tool.inputSchema.type, 'object');
}
const initialized = await handleLiveArtifactsMcpRequest({ jsonrpc: '2.0', id: 1, method: 'initialize', params: {} }) as { result: { serverInfo: { name: string }; capabilities: unknown } };
assert.equal(initialized.result.serverInfo.name, 'open-design-live-artifacts');
assert.deepEqual(initialized.result.capabilities, { tools: {} });
const listed = await handleLiveArtifactsMcpRequest({ jsonrpc: '2.0', id: 2, method: 'tools/list', params: {} }) as { result: { tools: Array<{ name: string }> } };
assert.deepEqual(listed.result.tools.map((tool) => tool.name), tools.map((tool) => tool.name));
const createTool = tools.find((tool) => tool.name === 'live_artifacts_create')!;
const updateTool = tools.find((tool) => tool.name === 'live_artifacts_update')!;
const connectorsListTool = tools.find((tool) => tool.name === 'connectors_list')!;
const createProperties = createTool.inputSchema.properties as Record<string, unknown>;
const updateProperties = updateTool.inputSchema.properties as Record<string, unknown>;
const connectorsListProperties = connectorsListTool.inputSchema.properties as Record<string, unknown>;
assert.deepEqual(Object.keys(createProperties).sort(), ['input', 'provenanceJson', 'templateHtml']);
assert.deepEqual(Object.keys(updateProperties).sort(), ['artifactId', 'input', 'provenanceJson', 'templateHtml']);
assert.deepEqual(Object.keys(connectorsListProperties).sort(), ['useCase']);
});
test('live artifact MCP connector list forwards daily digest use case to daemon tools', async () => {
process.env.OD_DAEMON_URL = 'http://127.0.0.1:17456/base';
process.env.OD_TOOL_TOKEN = 'test-tool-token';
const calls: Array<{ url: string; init: RequestInit | undefined }> = [];
globalThis.fetch = async (url, init) => {
calls.push({ url: String(url), init });
return new Response(JSON.stringify({ connectors: [] }), { status: 200 });
};
const response = await handleLiveArtifactsMcpRequest({
jsonrpc: '2.0',
id: 5,
method: 'tools/call',
params: { name: 'connectors_list', arguments: { useCase: 'personal_daily_digest' } },
}) as { error?: unknown };
assert.equal(response.error, undefined);
assert.equal(calls.length, 1);
const call = calls[0];
assert.ok(call);
assert.equal(call.url, 'http://127.0.0.1:17456/base/api/tools/connectors/list?useCase=personal_daily_digest');
});
test('live artifact MCP create forwards input and artifact payload fields to daemon tools', async () => {
process.env.OD_DAEMON_URL = 'http://127.0.0.1:17456';
process.env.OD_TOOL_TOKEN = 'test-tool-token';
const calls: Array<{ url: string; init: RequestInit | undefined }> = [];
globalThis.fetch = async (url, init) => {
calls.push({ url: String(url), init });
return new Response(JSON.stringify({ artifact: { id: 'artifact-1' } }), { status: 200 });
};
const input = { title: 'Demo', preview: { type: 'html', entry: 'index.html' } };
const templateHtml = '<h1>{{data.title}}</h1>';
const provenanceJson = { source: { type: 'mcp-test' } };
const response = await handleLiveArtifactsMcpRequest({
jsonrpc: '2.0',
id: 3,
method: 'tools/call',
params: { name: 'live_artifacts_create', arguments: { input, templateHtml, provenanceJson } },
}) as { error?: unknown };
assert.equal(response.error, undefined);
assert.equal(calls.length, 1);
const call = calls[0];
assert.ok(call);
assert.ok(call.init);
assert.equal(call.url, 'http://127.0.0.1:17456/api/tools/live-artifacts/create');
assert.deepEqual(JSON.parse(call.init.body as string), { input, templateHtml, provenanceJson });
});
test('live artifact MCP update preserves nested input and artifact payload fields', async () => {
process.env.OD_DAEMON_URL = 'http://127.0.0.1:17456';
process.env.OD_TOOL_TOKEN = 'test-tool-token';
const calls: Array<{ url: string; init: RequestInit | undefined }> = [];
globalThis.fetch = async (url, init) => {
calls.push({ url: String(url), init });
return new Response(JSON.stringify({ artifact: { id: 'artifact-1', title: 'Updated' } }), { status: 200 });
};
const input = { title: 'Updated', pinned: true };
const templateHtml = '<p>{{data.value}}</p>';
const provenanceJson = { source: { type: 'mcp-update-test' } };
const response = await handleLiveArtifactsMcpRequest({
jsonrpc: '2.0',
id: 4,
method: 'tools/call',
params: { name: 'live_artifacts_update', arguments: { artifactId: 'artifact-1', input, templateHtml, provenanceJson } },
}) as { error?: unknown };
assert.equal(response.error, undefined);
assert.equal(calls.length, 1);
const call = calls[0];
assert.ok(call);
assert.ok(call.init);
assert.equal(call.url, 'http://127.0.0.1:17456/api/tools/live-artifacts/update');
assert.deepEqual(JSON.parse(call.init.body as string), { artifactId: 'artifact-1', input, templateHtml, provenanceJson });
});

View file

@ -0,0 +1,454 @@
import { test } from 'vitest';
import {
assert, checkPromptArgvBudget, checkWindowsCmdShimCommandLineBudget, checkWindowsDirectExeCommandLineBudget, claude, deepseek, deepseekMaxPromptArgBytes, vibe,
} from './helpers/test-helpers.js';
import type { TestAgentDef } from './helpers/test-helpers.js';
// DeepSeek TUI's exec subcommand requires the prompt as a positional
// argument (no `-` stdin sentinel; clap declares `prompt: String` as a
// required field). `--auto` enables agentic mode with auto-approval —
// the daemon runs every CLI without a TTY, so the interactive approval
// prompt would hang the run.
test('deepseek args use exec --auto and append prompt as positional', () => {
const args = deepseek.buildArgs('write hello world', [], [], {});
assert.deepEqual(args, ['exec', '--auto', 'write hello world']);
assert.equal(deepseek.streamFormat, 'plain');
});
test('deepseek args inject --model when the user picks one', () => {
const args = deepseek.buildArgs('hi', [], [], { model: 'deepseek-v4-pro' });
assert.deepEqual(args, [
'exec',
'--auto',
'--model',
'deepseek-v4-pro',
'hi',
]);
});
test('deepseek args omit --model when model is "default"', () => {
const args = deepseek.buildArgs('hi', [], [], { model: 'default' });
assert.equal(args.includes('--model'), false);
});
// DeepSeek's exec mode requires the prompt as a positional argv arg
// (no `-` stdin sentinel upstream), so a sufficiently large composed
// prompt — system text + history + skills/design-system content + the
// user message — could blow Windows' ~32 KB CreateProcess command-line
// limit (or Linux MAX_ARG_STRLEN on extreme edges) and surface as a
// generic spawn ENAMETOOLONG / E2BIG instead of a DeepSeek-specific,
// user-actionable message. The adapter declares `maxPromptArgBytes` so
// /api/chat can fail fast with guidance ("reduce skills/design context
// or use an adapter with stdin support") before calling `spawn`. Pin
// the field so removing it can't silently regress the guard.
test('deepseek declares a conservative argv-byte budget for the prompt', () => {
assert.equal(
typeof deepseekMaxPromptArgBytes,
'number',
'deepseek must set maxPromptArgBytes so the spawn path can pre-flight oversized prompts before hitting CreateProcess / E2BIG',
);
assert.ok(
deepseekMaxPromptArgBytes > 0 && deepseekMaxPromptArgBytes < 32_768,
`deepseekMaxPromptArgBytes must stay strictly under the Windows CreateProcess limit (~32 KB); got ${deepseekMaxPromptArgBytes}`,
);
});
// Regression: composed prompts larger than the deepseek argv budget
// (chosen as a conservative under-Windows-CreateProcess size) must
// trip `checkPromptArgvBudget` with the DeepSeek-named, actionable
// `AGENT_PROMPT_TOO_LARGE` payload the chat handler emits over SSE,
// while normal-sized prompts must pass through cleanly so the chat
// happy path keeps working. This exercises the same pure helper the
// `/api/chat` spawn path uses, so removing the guard or letting the
// budget drift over the Windows limit fails this test before any
// real spawn would surface a generic ENAMETOOLONG / E2BIG.
test('checkPromptArgvBudget flags oversized DeepSeek prompts and lets short prompts through', () => {
const oversized = 'x'.repeat(deepseekMaxPromptArgBytes + 1);
const flagged = checkPromptArgvBudget(deepseek, oversized);
assert.ok(flagged, 'oversized prompts must trip the argv-byte guard');
assert.equal(flagged.code, 'AGENT_PROMPT_TOO_LARGE');
assert.equal(flagged.limit, deepseekMaxPromptArgBytes);
assert.equal(flagged.bytes, deepseekMaxPromptArgBytes + 1);
assert.match(flagged.message, /DeepSeek/);
assert.match(flagged.message, /command-line argument/);
assert.match(flagged.message, /stdin support/);
// Normal-sized prompts must not trip the guard; the chat happy path
// depends on this returning null so it can proceed to spawn.
assert.equal(checkPromptArgvBudget(deepseek, 'hello'), null);
// The exact-budget edge: a prompt right at the limit must pass; the
// guard fires only when the byte count strictly exceeds the budget.
const atLimit = 'x'.repeat(deepseekMaxPromptArgBytes);
assert.equal(checkPromptArgvBudget(deepseek, atLimit), null);
// A multi-byte UTF-8 prompt (e.g. CJK characters) is measured in
// bytes, not code points — pin that so a 3-byte-per-char prompt
// can't sneak past a code-point-based regression of the helper.
const cjkOversized = '汉'.repeat(
Math.ceil(deepseekMaxPromptArgBytes / 3) + 1,
);
const cjkFlagged = checkPromptArgvBudget(deepseek, cjkOversized);
assert.ok(cjkFlagged, 'byte-counted UTF-8 prompts must also trip the guard');
assert.equal(cjkFlagged.code, 'AGENT_PROMPT_TOO_LARGE');
});
// Adapters that ship the prompt over stdin (every other code agent
// today) don't declare `maxPromptArgBytes` and must skip the guard
// entirely — applying it to them would refuse perfectly valid huge
// prompts those CLIs handle just fine via stdin.
test('checkPromptArgvBudget is a no-op for adapters without maxPromptArgBytes', () => {
assert.equal(claude.maxPromptArgBytes, undefined);
const huge = 'x'.repeat(100_000);
assert.equal(checkPromptArgvBudget(claude, huge), null);
});
// On Windows an npm-installed `deepseek` resolves to a `.cmd` shim and
// the spawn path wraps the call in `cmd.exe /d /s /c "<inner>"`, with
// every embedded `"` doubled by `quoteWindowsCommandArg`. A prompt that
// fits under the raw `maxPromptArgBytes` budget but is heavy on quote
// characters (code blocks, JSON-shaped skill seeds) can therefore still
// expand past CreateProcess's 32_767-char `lpCommandLine` cap — surfacing
// as a generic spawn ENAMETOOLONG instead of the actionable DeepSeek-
// named error the budget guard was meant to provide. The post-buildArgs
// check `checkWindowsCmdShimCommandLineBudget` computes the would-be
// command line length using the same quoting math the platform layer
// uses on Windows, so a quote-heavy prompt under the byte budget still
// fails with `AGENT_PROMPT_TOO_LARGE` before spawn.
test('checkWindowsCmdShimCommandLineBudget flags quote-heavy prompts that expand past CreateProcess limit', () => {
// Prompt is *under* the raw byte budget, but ~entirely `"` chars so
// cmd.exe's quote-doubling roughly doubles its command-line cost.
const quoteHeavyPromptLength = deepseekMaxPromptArgBytes - 100;
const quoteHeavyPrompt = '"'.repeat(quoteHeavyPromptLength);
// Sanity: the raw-byte guard must let this through, otherwise the new
// post-buildArgs check would never fire on a real run.
assert.equal(
checkPromptArgvBudget(deepseek, quoteHeavyPrompt),
null,
'quote-heavy prompt under the raw byte budget must pass the pre-buildArgs guard',
);
const args = deepseek.buildArgs(quoteHeavyPrompt, [], [], {});
// Use a realistic npm-style Windows install path so the resolved-bin
// contribution mirrors a real user's environment.
const resolvedBin = 'C:\\Users\\Tester\\AppData\\Roaming\\npm\\deepseek.cmd';
const flagged = checkWindowsCmdShimCommandLineBudget(
deepseek,
resolvedBin,
args,
);
assert.ok(
flagged,
'quote-heavy prompt that doubles past the CreateProcess cap must trip the cmd-shim guard',
);
assert.equal(flagged.code, 'AGENT_PROMPT_TOO_LARGE');
const commandLineLength = flagged.commandLineLength;
assert.ok(commandLineLength !== undefined);
assert.ok(
commandLineLength > flagged.limit,
`commandLineLength (${commandLineLength}) must exceed limit (${flagged.limit})`,
);
assert.ok(
flagged.limit < 32_768,
'guard must keep its safe limit strictly under the documented Windows CreateProcess cap',
);
assert.match(flagged.message, /DeepSeek/);
assert.match(flagged.message, /cmd\.exe quote-doubling/);
assert.match(flagged.message, /stdin support/);
});
test('checkWindowsCmdShimCommandLineBudget lets ordinary prompts through .cmd resolutions', () => {
// Same Windows-shim resolution path, but a plain prompt — well under
// every limit. The guard must return null so the chat happy path
// proceeds to spawn.
const args = deepseek.buildArgs('write hello world', [], [], {});
const resolvedBin = 'C:\\Users\\Tester\\AppData\\Roaming\\npm\\deepseek.cmd';
assert.equal(
checkWindowsCmdShimCommandLineBudget(deepseek, resolvedBin, args),
null,
);
});
test('checkWindowsCmdShimCommandLineBudget is a no-op for non-.cmd resolutions', () => {
// POSIX hosts (and direct `.exe` resolutions on Windows) don't go
// through the cmd.exe wrap, so the cmd-shim guard never fires on
// those — `checkPromptArgvBudget` catches POSIX oversize argv, and
// `checkWindowsDirectExeCommandLineBudget` catches direct-exe argv
// expansion under libuv's quoting rules. Use a non-quote-heavy prompt
// so this test stays focused on the `.cmd`/`.bat` path filter rather
// than overlapping with the direct-exe guard's contract.
const args = deepseek.buildArgs('x'.repeat(20_000), [], [], {});
assert.equal(
checkWindowsCmdShimCommandLineBudget(
deepseek,
'/usr/local/bin/deepseek',
args,
),
null,
);
assert.equal(
checkWindowsCmdShimCommandLineBudget(
deepseek,
'C:\\Program Files\\DeepSeek\\deepseek.exe',
args,
),
null,
);
});
// Security regression: cmd.exe runs percent-expansion on the inner line
// of `cmd /s /c "..."` regardless of quote state, so a `.cmd` shim spawn
// whose argv carries an attacker-influenced `%DEEPSEEK_API_KEY%` substring
// would otherwise let cmd substitute the daemon's env value into the
// prompt before the child ran. The cmd-shim quoting in agents.ts (which
// the budget guard uses to compute the projected line) must mirror the
// platform fix: each `%` is wrapped in `"^%"` so cmd's `^` escape makes
// the next `%` literal while `CommandLineToArgvW` concatenates the quote
// segments back into the original arg byte-for-byte. The budget math
// reflects the longer projected line; pinning the projection here means a
// regression that drops the `%` escape would surface as a budget mismatch
// (or, worse, as cmd silently expanding the env var on a real Windows
// run). Composes the prompt right at the cmd-shim limit so the guard's
// length math also has to add up.
test('checkWindowsCmdShimCommandLineBudget projects the %var% escape into the command line length', () => {
// Carry exactly 200 `%DEEPSEEK_API_KEY%` references in the prompt; each
// raw `%` (400 total) becomes `"^%"` (4 chars) in the projected line, so
// a regression that drops the `%` escape shifts the projected length by
// 1200 chars and breaks the budget math without obviously failing in
// unrelated tests.
const promptPiece = '%DEEPSEEK_API_KEY%';
const prompt = promptPiece.repeat(200);
// Pre-buildArgs guard: the raw prompt is well under DeepSeek's argv
// budget, so this path must let it through.
assert.equal(checkPromptArgvBudget(deepseek, prompt), null);
const args = deepseek.buildArgs(prompt, [], [], {});
const resolvedBin = 'C:\\Users\\Tester\\AppData\\Roaming\\npm\\deepseek.cmd';
const flagged = checkWindowsCmdShimCommandLineBudget(
deepseek,
resolvedBin,
args,
);
// The prompt is short enough that the cmd-shim budget should still pass —
// the test isn't about an oversized prompt; it's about the *content* of
// the projected line. A null result here means the escape is in place
// and didn't push us past the limit.
assert.equal(flagged, null);
});
test('checkWindowsCmdShimCommandLineBudget no-ops when resolvedBin is null or adapter has no budget', () => {
// Bin resolution failed but the run continued long enough to reach
// this guard — must be a no-op so the existing AGENT_UNAVAILABLE path
// still fires from server.ts.
assert.equal(checkWindowsCmdShimCommandLineBudget(deepseek, null, []), null);
// Stdin-delivered adapters never declare `maxPromptArgBytes` — the
// guard must skip them even when handed a `.cmd` path.
assert.equal(
checkWindowsCmdShimCommandLineBudget(claude, 'C:\\fake\\claude.cmd', []),
null,
);
});
// Companion to the cmd-shim guard for non-shim Windows installs (e.g. a
// cargo-built `deepseek.exe` rather than the npm `.cmd` shim). The
// cmd-shim guard early-returns on `.exe` paths because those skip the
// `cmd.exe /d /s /c` wrap, but Node/libuv still composes a
// CreateProcess `lpCommandLine` by walking each argv element through
// `quote_cmd_arg` — every embedded `"` becomes `\"`, backslashes
// adjacent to a quote get doubled. A quote-heavy prompt that fits under
// `maxPromptArgBytes` can therefore still expand past the 32_767-char
// kernel cap on a direct `.exe` spawn. The new guard recomputes the
// would-be command line using the exact libuv math so those users hit
// the same actionable `AGENT_PROMPT_TOO_LARGE` instead of a generic
// `spawn ENAMETOOLONG`.
test('checkWindowsDirectExeCommandLineBudget flags quote-heavy prompts on a direct .exe resolution', () => {
// Prompt is *under* the raw byte budget, but ~entirely `"` chars so
// libuv's `\"` escaping roughly doubles its command-line cost.
const quoteHeavyPromptLength = deepseekMaxPromptArgBytes - 100;
const quoteHeavyPrompt = '"'.repeat(quoteHeavyPromptLength);
// Sanity: the raw-byte guard must let this through, otherwise the
// post-buildArgs check would never fire on a real run.
assert.equal(
checkPromptArgvBudget(deepseek, quoteHeavyPrompt),
null,
'quote-heavy prompt under the raw byte budget must pass the pre-buildArgs guard',
);
const args = deepseek.buildArgs(quoteHeavyPrompt, [], [], {});
// Realistic non-shim install: a cargo-built `.exe` under Program Files
// (path has spaces so the resolved-bin contribution itself gets
// wrapped in `"…"`, which mirrors what libuv would do on Windows).
const resolvedBin = 'C:\\Program Files\\DeepSeek\\deepseek.exe';
const flagged = checkWindowsDirectExeCommandLineBudget(
deepseek,
resolvedBin,
args,
);
assert.ok(
flagged,
'quote-heavy prompt that expands past the CreateProcess cap on a direct .exe spawn must trip the guard',
);
assert.equal(flagged.code, 'AGENT_PROMPT_TOO_LARGE');
const commandLineLength = flagged.commandLineLength;
assert.ok(commandLineLength !== undefined);
assert.ok(
commandLineLength > flagged.limit,
`commandLineLength (${commandLineLength}) must exceed limit (${flagged.limit})`,
);
assert.ok(
flagged.limit < 32_768,
'guard must keep its safe limit strictly under the documented Windows CreateProcess cap',
);
assert.match(flagged.message, /DeepSeek/);
assert.match(flagged.message, /libuv quote-escaping/);
assert.match(flagged.message, /stdin support/);
});
test('checkWindowsDirectExeCommandLineBudget lets ordinary prompts through .exe resolutions', () => {
// Non-shim `.exe` install with a plain prompt — well under every
// limit. Guard must return null so the chat happy path proceeds to
// spawn.
const args = deepseek.buildArgs('write hello world', [], [], {});
const resolvedBin = 'C:\\Program Files\\DeepSeek\\deepseek.exe';
assert.equal(
checkWindowsDirectExeCommandLineBudget(deepseek, resolvedBin, args),
null,
);
});
test('checkWindowsDirectExeCommandLineBudget no-ops on .cmd / .bat resolutions and POSIX paths', () => {
// The cmd-shim guard owns `.bat` / `.cmd` — the direct-exe guard must
// skip them so an oversized prompt on a `.cmd` install doesn't trip
// both guards (and double-emit an SSE error).
const args = deepseek.buildArgs(
'"'.repeat(deepseekMaxPromptArgBytes - 100),
[],
[],
{},
);
assert.equal(
checkWindowsDirectExeCommandLineBudget(
deepseek,
'C:\\Users\\Tester\\AppData\\Roaming\\npm\\deepseek.cmd',
args,
),
null,
);
assert.equal(
checkWindowsDirectExeCommandLineBudget(
deepseek,
'C:\\Users\\Tester\\AppData\\Roaming\\npm\\deepseek.bat',
args,
),
null,
);
// POSIX hosts never go through Windows' CreateProcess — `execvp`
// accepts each argv buffer separately, so there's no command-line
// concatenation to bust. The pre-buildArgs `checkPromptArgvBudget` is
// the one responsible for catching oversized argv on those hosts.
assert.equal(
checkWindowsDirectExeCommandLineBudget(
deepseek,
'/usr/local/bin/deepseek',
args,
),
null,
);
assert.equal(
checkWindowsDirectExeCommandLineBudget(
deepseek,
'/home/dev/.cargo/bin/deepseek',
args,
),
null,
);
});
test('checkWindowsDirectExeCommandLineBudget no-ops when resolvedBin is null/empty or adapter has no budget', () => {
// Bin resolution failed but the run continued long enough to reach
// this guard — must be a no-op so the existing AGENT_UNAVAILABLE path
// still fires from server.ts.
assert.equal(
checkWindowsDirectExeCommandLineBudget(deepseek, null, []),
null,
);
assert.equal(checkWindowsDirectExeCommandLineBudget(deepseek, '', []), null);
// Stdin-delivered adapters never declare `maxPromptArgBytes` — the
// guard must skip them even when handed a Windows `.exe` path.
assert.equal(
checkWindowsDirectExeCommandLineBudget(claude, 'C:\\fake\\claude.exe', []),
null,
);
});
// The two post-buildArgs guards are deliberately exclusive: the
// cmd-shim guard owns `.cmd` / `.bat` (cmd.exe quote-doubling math),
// the direct-exe guard owns everything else on Windows (libuv
// quote-escaping math). For any single resolved bin, at most one
// should ever fire — otherwise an oversized prompt would emit two
// SSE error events back to back. Pin both branches with a quote-heavy
// prompt that's over the kernel cap under either quoting rule.
test('cmd-shim and direct-exe guards are mutually exclusive on a single resolution', () => {
const quoteHeavy = '"'.repeat(deepseekMaxPromptArgBytes - 100);
const args = deepseek.buildArgs(quoteHeavy, [], [], {});
const cmdPath = 'C:\\Users\\Tester\\AppData\\Roaming\\npm\\deepseek.cmd';
assert.ok(checkWindowsCmdShimCommandLineBudget(deepseek, cmdPath, args));
assert.equal(
checkWindowsDirectExeCommandLineBudget(deepseek, cmdPath, args),
null,
);
const exePath = 'C:\\Program Files\\DeepSeek\\deepseek.exe';
assert.equal(
checkWindowsCmdShimCommandLineBudget(deepseek, exePath, args),
null,
);
assert.ok(checkWindowsDirectExeCommandLineBudget(deepseek, exePath, args));
});
test('deepseek entry does not advertise deepseek-tui as a fallback bin', () => {
// `deepseek` is the dispatcher that owns `exec` / `--auto`; `deepseek-tui`
// is the runtime companion the dispatcher invokes. Upstream installs both
// together (npm and cargo). A `deepseek-tui`-only host is not a supported
// install, and `deepseek-tui` itself doesn't accept `exec --auto <prompt>`
// — surfacing it via fallbackBins would advertise availability but make
// the first /api/chat run fail. Pin the absence so the fallback can't
// drift back without an accompanying buildArgs branch + test.
assert.equal(
Array.isArray((deepseek as TestAgentDef & { fallbackBins?: string[] }).fallbackBins)
&& ((deepseek as TestAgentDef & { fallbackBins?: string[] }).fallbackBins?.length ?? 0) > 0,
false,
`deepseek must not declare fallbackBins until the deepseek-tui-only invocation is implemented and tested; got ${JSON.stringify((deepseek as TestAgentDef & { fallbackBins?: string[] }).fallbackBins)}`,
);
});
test('vibe args use empty array for acp-json-rpc streaming', () => {
const args = vibe.buildArgs('', [], [], {});
assert.deepEqual(args, []);
assert.equal(vibe.streamFormat, 'acp-json-rpc');
});
test('vibe fetchModels falls back to fallbackModels when detection fails', async () => {
// fetchModels rejects when the binary doesn't exist; the daemon's
// probe() catches this and uses fallbackModels instead.
assert.ok(vibe.fetchModels, 'vibe must define fetchModels');
const result = await vibe
.fetchModels('/nonexistent/vibe-acp', {})
.catch(() => null);
assert.equal(result, null);
assert.ok(Array.isArray(vibe.fallbackModels));
const fallbackModel = vibe.fallbackModels[0];
assert.ok(fallbackModel);
assert.equal(fallbackModel.id, 'default');
});

View file

@ -0,0 +1,185 @@
import { test } from 'vitest';
import {
AGENT_DEFS, assert, chmodSync, codex, detectAgents, join, mkdtempSync, rmSync, tmpdir, writeFileSync,
} from './helpers/test-helpers.js';
test('AGENT_DEFS ids are unique', () => {
const ids = AGENT_DEFS.map((a) => a.id);
const dupes = ids.filter((id, i) => ids.indexOf(id) !== i);
assert.deepEqual(dupes, [], `duplicate agent ids: ${JSON.stringify(dupes)}`);
});
test('codex args disable plugins when OD_CODEX_DISABLE_PLUGINS is 1', () => {
process.env.OD_CODEX_DISABLE_PLUGINS = '1';
const args = codex.buildArgs('', [], [], {}, { cwd: '/tmp/od-project' });
assert.deepEqual(args.slice(0, 9), [
'exec',
'--json',
'--skip-git-repo-check',
'--sandbox',
'workspace-write',
'-c',
'sandbox_workspace_write.network_access=true',
'--disable',
'plugins',
]);
});
test('codex args use workspace-write sandbox instead of deprecated full-auto', () => {
delete process.env.OD_CODEX_DISABLE_PLUGINS;
const args = codex.buildArgs('', [], [], {}, { cwd: '/tmp/od-project' });
assert.equal(args.includes('--full-auto'), false);
assert.deepEqual(args.slice(0, 5), [
'exec',
'--json',
'--skip-git-repo-check',
'--sandbox',
'workspace-write',
]);
});
test('codex args keep plugins enabled when OD_CODEX_DISABLE_PLUGINS is unset', () => {
delete process.env.OD_CODEX_DISABLE_PLUGINS;
const args = codex.buildArgs('', [], [], {}, { cwd: '/tmp/od-project' });
assert.equal(args.includes('--disable'), false);
assert.equal(args.includes('plugins'), false);
});
test('codex args keep plugins enabled when OD_CODEX_DISABLE_PLUGINS is not 1', () => {
process.env.OD_CODEX_DISABLE_PLUGINS = 'true';
const args = codex.buildArgs('', [], [], {}, { cwd: '/tmp/od-project' });
assert.equal(args.includes('--disable'), false);
assert.equal(args.includes('plugins'), false);
});
test('codex model picker includes current OpenAI choices in priority order', async () => {
const expectedModels = [
'default',
'gpt-5.5',
'gpt-5.4',
'gpt-5.4-mini',
'gpt-5.3-codex',
'gpt-5.1',
'gpt-5.1-codex-mini',
'gpt-5-codex',
'gpt-5',
'o3',
'o4-mini',
];
assert.deepEqual(codex.fallbackModels.map((m) => m.id), expectedModels);
assert.ok(codex.reasoningOptions, 'codex must define reasoningOptions');
assert.deepEqual(codex.reasoningOptions.map((o) => o.id), [
'default',
'none',
'minimal',
'low',
'medium',
'high',
'xhigh',
]);
const args = codex.buildArgs(
'',
[],
[],
{ model: 'gpt-5.5', reasoning: 'xhigh' },
{ cwd: '/tmp/od-project' },
);
assert.ok(args.includes('--model'));
assert.ok(args.includes('gpt-5.5'));
assert.ok(args.includes('model_reasoning_effort="xhigh"'));
const dir = mkdtempSync(join(tmpdir(), 'od-agents-codex-models-'));
try {
const codexBin = join(dir, 'codex');
writeFileSync(
codexBin,
'#!/bin/sh\nif [ "$1" = "--version" ]; then echo "codex 1.0.0"; exit 0; fi\nexit 0\n',
);
chmodSync(codexBin, 0o755);
process.env.OD_AGENT_HOME = dir;
process.env.PATH = dir;
const agents = await detectAgents();
const detected = agents.find((agent) => agent.id === 'codex');
assert.ok(detected);
assert.equal(detected.available, true);
assert.equal(detected.version, 'codex 1.0.0');
assert.deepEqual(detected.models.map((m: { id: string }) => m.id), expectedModels);
} finally {
rmSync(dir, { recursive: true, force: true });
}
});
test('codex picker includes gpt-5.1 model family', () => {
const pickerModels = new Set(codex.fallbackModels.map((model) => model.id));
assert.equal(pickerModels.has('gpt-5.1'), true);
assert.equal(pickerModels.has('gpt-5.1-codex-mini'), true);
});
// Recent Codex CLI versions reject a bare `-` argv sentinel; passing it
// alongside the stdin pipe causes `error: unexpected argument '-' found`
// and exit code 2 before any prompt is read. We deliver the prompt via
// stdin pipe alone (gated by `promptViaStdin: true`). Regression of #237.
test('codex args do not include the literal `-` stdin sentinel (regression of #237)', () => {
delete process.env.OD_CODEX_DISABLE_PLUGINS;
const baseArgs = codex.buildArgs('', [], [], {}, { cwd: '/tmp/od-project' });
assert.equal(baseArgs.includes('-'), false);
const withModel = codex.buildArgs(
'',
[],
[],
{ model: 'gpt-5-codex' },
{ cwd: '/tmp/od-project' },
);
assert.equal(withModel.includes('-'), false);
const withReasoning = codex.buildArgs(
'',
[],
[],
{ reasoning: 'high' },
{ cwd: '/tmp/od-project' },
);
assert.equal(withReasoning.includes('-'), false);
process.env.OD_CODEX_DISABLE_PLUGINS = '1';
const withDisablePlugins = codex.buildArgs(
'',
[],
[],
{},
{ cwd: '/tmp/od-project' },
);
assert.equal(withDisablePlugins.includes('-'), false);
});
test('codex args pass valid extraAllowedDirs with repeatable --add-dir flags', () => {
delete process.env.OD_CODEX_DISABLE_PLUGINS;
const args = codex.buildArgs(
'',
[],
['/repo/skills', '', null, '/tmp/codex/generated_images', undefined] as unknown as string[],
{},
{ cwd: '/tmp/od-project' },
);
assert.deepEqual(
args.filter((arg, index) => arg === '--add-dir' || args[index - 1] === '--add-dir'),
['--add-dir', '/repo/skills', '--add-dir', '/tmp/codex/generated_images'],
);
});

View file

@ -0,0 +1,210 @@
---
id: 20260509-agents-ts-split
name: Agents Ts Split
status: implemented
created: '2026-05-09'
---
## Overview
### Goals
- Split `agents.ts` and `agents.test.ts`.
- During the split, keep the `agents.ts` name unchanged, but use `runtimes` as the new directory name so the agent terminology can gradually migrate to runtimes later.
- Because test coverage is limited, keep the split refactor primarily as code movement rather than logic rewriting. If any logic truly needs to be rewritten, leave it for later to reduce the overall change risk.
## Research
### Existing System
- `apps/daemon/src/agents.ts` is currently both the public facade and the implementation collection for daemon agent adapters. It exports `AGENT_DEFS`, agent detection, binary resolution, MCP helpers, prompt budget helpers, spawn env helpers, and model validation helpers. Source: `apps/daemon/src/agents.ts:157,970-983,1094-1111,1132-1433`
- `AGENT_DEFS` is a linear array that currently contains adapter definitions for Claude, Codex, Devin, Gemini, OpenCode, Hermes, Kimi, Cursor Agent, Qwen, Qoder, Copilot, Pi, Kiro, Kilo, Vibe, DeepSeek, and others. Source: `apps/daemon/src/agents.ts:157-812`
- Agent definitions inline adapter-specific CLI protocols, model fallbacks, reasoning options, stream formats, fallback binaries, MCP discovery, and env knobs. Source: `apps/daemon/src/agents.ts:157-260,420-467,549-760,812-899`
- Executable resolution shares `AGENT_BIN_ENV_KEYS`, PATH/toolchain directory discovery, configured binary overrides, fallback binaries, and Windows PATHEXT handling. Source: `apps/daemon/src/agents.ts:91-109,900-983`
- The detection flow reuses executable resolution, agent env, version probing, help capability probing, and model fetching, and refreshes the live model cache. Source: `apps/daemon/src/agents.ts:985-1105,1394-1419`
- MCP live artifacts are currently generated by `buildLiveArtifactsMcpServersForAgent`, which creates the `od mcp live-artifacts` server config when `def.mcpDiscovery === 'mature-acp'`. Source: `apps/daemon/src/agents.ts:1111-1121`
- Prompt budget helpers cover the raw prompt byte budget for argv-bound adapters, the Windows `.cmd/.bat` shim command-line budget, and the Windows direct `.exe` command-line budget. Source: `apps/daemon/src/agents.ts:1123-1330`
- The spawn env helper merges configured env, expands `~`, and handles Claude Code's `ANTHROPIC_API_KEY`/`ANTHROPIC_BASE_URL` strategy case-insensitively. Source: `apps/daemon/src/agents.ts:1342-1392`
- `apps/daemon/tests/agents.test.ts` imports facade exports through `../src/agents.js` and centrally tests the registry, per-agent args, MCP, executable resolution, env, and prompt budget. Source: `apps/daemon/tests/agents.test.ts:12-21,134-220,315-379,1060-1369,1812-2109`
- The top of the test file centrally maintains agent fixtures, env snapshots, a `globalThis.fetch` snapshot, the `process.platform` descriptor, and `afterEach` restoration. Source: `apps/daemon/tests/agents.test.ts:24-124`
- Other daemon modules use the facade directly through `./agents.js` / `../src/agents.js`: the server uses the MCP helper and spawn env, the connection test uses spawn env, and the chat-route test uses `getAgentDef`. Source: `apps/daemon/src/server.ts:22-32,5298,5576`; `apps/daemon/src/connectionTest.ts:26-27,1009`; `apps/daemon/tests/chat-route.test.ts:25`
- Repository guidance still points CLI/agent argument changes to `apps/daemon/src/agents.ts` and the matching parser tests; app tests should remain in `apps/<app>/tests/`. Source: `apps/AGENTS.md:12-24`
### Available Approaches
- **Pure migration with facade**: Keep `apps/daemon/src/agents.ts` as the facade for existing imports, split the implementation internally, and let existing tests continue reading from `../src/agents.js` in the first phase. Source: `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:160-175,227-249`
- **Domain module split**: Split the executable resolver, env, MCP, prompt budget, registry, models, and per-adapter defs into independent modules, following the target structure listed in the report. Source: `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:177-206`
- **Test split by responsibility**: Split the single `agents.test.ts` file into defs, per-adapter args, executables, env, MCP, and prompt-budget test files, and extract env restore and tmp executable fixture helpers. Source: `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:208-266`
- **Registry stabilization**: Later, have `defs/index.ts` export by sorted agent id, and have `registry.ts` aggregate the array and enforce id uniqueness. Source: `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:268-277`
- **No facade migration**: Directly changing caller imports has limited viability because `server.ts`, `connectionTest.ts`, `chat-route.test.ts`, and existing agent tests all depend on the `agents.js` facade. Source: `apps/daemon/src/server.ts:22-32`; `apps/daemon/src/connectionTest.ts:26-27`; `apps/daemon/tests/chat-route.test.ts:25`; `apps/daemon/tests/agents.test.ts:12-21`
### Constraints & Dependencies
- The Zest spec requires keeping the `agents.ts` name, using `runtimes` for the new directory name, and gradually renaming the agents concept to runtimes later. Source: `specs/change/20260509-agents-ts-split/spec.md:12-14`
- The current source uses `.js` import specifiers, so split TypeScript files must preserve the ESM import suffix convention. Source: `apps/daemon/src/agents.ts:12-13`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:292-293`
- `agentCapabilities` is a module-level cache, and `buildArgs` reads capability flags written during detection. Source: `apps/daemon/src/agents.ts:29-35,198-217,1043-1061`
- The executable resolver has module-level `cachedToolchainDirs`/toolchain path behavior, with tests covering OD_AGENT_HOME, NPM_CONFIG_PREFIX, VP_HOME, PATHEXT, fallbackBins, and configured `*_BIN` overrides. Source: `apps/daemon/src/agents.ts:900-983`; `apps/daemon/tests/agents.test.ts:1060-1361,1858-2004`
- `fetchModels`/`probe` have intentional fallback behavior: model listing and version/help probing failures keep fallback models or availability state. Source: `apps/daemon/src/agents.ts:985-1069`
- Test changes should stay in `apps/daemon/tests/`, while `src/` remains source-only. Source: `apps/AGENTS.md:14-24`
- Verification commands should use daemon-scoped checks: `pnpm --filter @open-design/daemon typecheck` and `pnpm --filter @open-design/daemon test`. Source: `apps/AGENTS.md:39-46`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:244-249,262-266`
- The main risks listed in the report are module initialization order, circular dependencies, ESM suffixes, test isolation, and export compatibility. Source: `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:278-299`
### Key References
- `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:10-28` - merge conflict counts and registry conflict pattern.
- `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:41-80` - executable/env conflict surfaces.
- `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:82-123` - MCP, argv/stdin tests, fixture conflict surfaces.
- `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:158-277` - proposed staged split.
- `apps/daemon/src/agents.ts:157-1433` - current implementation surface.
- `apps/daemon/tests/agents.test.ts:1-2109` - current concentrated test surface.
## Design
### Architecture Overview
```mermaid
flowchart TD
Facade["apps/daemon/src/agents.ts\npublic compatibility facade"]
Registry["runtimes/registry.ts\nAGENT_DEFS + getAgentDef"]
Defs["runtimes/defs/*.ts\nper-runtime definitions"]
Detection["runtimes/detection.ts\ndetect/probe/fetch models"]
Executables["runtimes/executables.ts\nPATH/PATHEXT/*_BIN/fallback bins"]
Env["runtimes/env.ts\nspawn env + configured env"]
Models["runtimes/models.ts\ndefaults + live model cache"]
Capabilities["runtimes/capabilities.ts\nshared capability cache"]
Mcp["runtimes/mcp.ts\nlive artifacts MCP"]
PromptBudget["runtimes/prompt-budget.ts\nargv/windows budget guards"]
Facade --> Registry
Facade --> Detection
Facade --> Executables
Facade --> Env
Facade --> Models
Facade --> Mcp
Facade --> PromptBudget
Registry --> Defs
Defs --> Models
Defs --> Capabilities
Detection --> Registry
Detection --> Executables
Detection --> Env
Detection --> Models
Detection --> Capabilities
```
### Change Scope
- Area: `apps/daemon/src/agents.ts` becomes a thin public facade that re-exports the existing API surface, keeping current imports from `./agents.js` and `../src/agents.js` stable. Impact: internal file movement without caller migration in this change. Source: `apps/daemon/src/server.ts:22-32,5298,5576`; `apps/daemon/src/connectionTest.ts:26-27,1009`; `apps/daemon/tests/chat-route.test.ts:25`; `apps/daemon/tests/agents.test.ts:12-21`
- Area: new `apps/daemon/src/runtimes/` modules own the moved implementation. Impact: the new directory name follows the spec's runtimes naming direction while preserving the old facade name. Source: `specs/change/20260509-agents-ts-split/spec.md:12-14`
- Area: adapter definitions move from the monolithic `AGENT_DEFS` array to per-runtime definition files under `runtimes/defs/`. Impact: merge conflicts shrink to individual runtime files while registry behavior stays centralized. Source: `apps/daemon/src/agents.ts:157-812`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:10-28,177-206`
- Area: tests move from one concentrated `agents.test.ts` into responsibility-based files under `apps/daemon/tests/runtimes/` plus shared helpers. Impact: test ownership stays in daemon tests and source remains source-only. Source: `apps/daemon/tests/agents.test.ts:1-2109`; `apps/AGENTS.md:14-24`
- Area: no database, API contract, generated artifact, or rollout migration surface. Impact: validation is daemon typecheck and daemon tests. Source: `apps/AGENTS.md:39-46`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:244-266`
### Design Decisions
- Decision: keep `apps/daemon/src/agents.ts` as the only compatibility facade and move implementation to `apps/daemon/src/runtimes/`. Source: `specs/change/20260509-agents-ts-split/spec.md:12-14`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:160-206`
- Decision: preserve current public export names from the facade, including registry, detection, executable resolution, MCP, prompt budget, env, and model helpers. Source: `apps/daemon/src/agents.ts:970-983,1094-1111,1132-1433`; `apps/daemon/tests/agents.test.ts:12-21`
- Decision: keep phase 1 and phase 2 as code movement and test movement, with no behavioral rewrites or runtime-order changes. Source: `specs/change/20260509-agents-ts-split/spec.md:12-14`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:160-175,208-266`
- Decision: keep `AGENT_DEFS` aggregation centralized in `runtimes/registry.ts`, importing individual `runtimes/defs/*.ts` definitions in the existing order. Source: `apps/daemon/src/agents.ts:157-812`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:268-277`
- Decision: isolate singleton mutable state in dedicated helper modules: `capabilities.ts` for `agentCapabilities`, `executables.ts` for toolchain directory cache, and `models.ts` for live model cache. Source: `apps/daemon/src/agents.ts:29-35,900-983,1394-1433`
- Decision: enforce dependency direction from facade to domain modules, registry to defs, detection to helpers, and helpers away from the facade/registry unless explicitly required. Source: `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:278-299`
- Decision: preserve ESM `.js` import specifiers in every new TypeScript import. Source: `apps/daemon/src/agents.ts:12-13`; `specs/change/20260509-agents-ts-split/spec.md:43-50`
- Decision: split tests by behavior while continuing to import through `../../src/agents.js` where compatibility is the behavior under test. Source: `apps/daemon/tests/agents.test.ts:12-21`; `apps/daemon/tests/agents.test.ts:134-220,315-379,1060-1369,1812-2109`
### Why this design
- It reduces merge conflicts at the adapter-definition and test-responsibility seams identified in the report while keeping the public facade stable.
- It follows the runtimes naming direction immediately, so later terminology migration does not start with a newly created `src/agents/` implementation tree.
- It minimizes behavior risk by moving stateful helpers as single modules instead of duplicating or rewriting them.
- It keeps compatibility tests pointed at the facade, so missing exports and accidental caller breakage fail early.
### Test Strategy
- Registry and facade: verify exported definitions, ids, lookup behavior, and current compatibility imports through `../../src/agents.js`. Source: `apps/daemon/tests/agents.test.ts:12-21,134-220`
- Adapter args: split argv/stdin/acp/runtime argument assertions into focused files while preserving existing fixtures. Source: `apps/daemon/tests/agents.test.ts:315-379`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:208-266`
- Executables: preserve coverage for configured `*_BIN`, PATH lookup, fallback binaries, toolchain dirs, OD_AGENT_HOME, NPM_CONFIG_PREFIX, VP_HOME, Windows PATHEXT, and missing executable cases. Source: `apps/daemon/src/agents.ts:900-983`; `apps/daemon/tests/agents.test.ts:1060-1361,1858-2004`
- Env: preserve configured env merge, `~` expansion, and Claude Code API key/base URL case-insensitive handling. Source: `apps/daemon/src/agents.ts:1342-1392`
- Detection and models: preserve probing, help capability flags, fetch model fallback behavior, and live model cache updates. Source: `apps/daemon/src/agents.ts:985-1105,1394-1419`
- MCP and prompt budget: preserve mature ACP MCP live artifacts behavior and argv/Windows command-line budget checks. Source: `apps/daemon/src/agents.ts:1111-1330`
- Validation: run `pnpm --filter @open-design/daemon typecheck` and `pnpm --filter @open-design/daemon test` after each implementation step. Source: `apps/AGENTS.md:39-46`; `specs/change/20260509-agents-ts-split/agents-merge-conflict-report.md:244-266`
### Pseudocode
Flow:
1. Create `runtimes/` helper modules for models, capabilities, invocation, paths, executables, env, MCP, prompt budget, detection, registry, and definitions.
2. Move unchanged code blocks from `agents.ts` into the matching modules.
3. Update imports with `.js` suffixes and keep singleton state in one owning module.
4. Replace `agents.ts` contents with facade exports matching the previous public API.
5. Split `agents.test.ts` into focused `apps/daemon/tests/runtimes/*.test.ts` files and shared helpers.
6. Run daemon typecheck/tests, then address only movement-related failures.
### File Structure
- `apps/daemon/src/agents.ts` - stable public facade re-exporting existing daemon runtime helpers.
- `apps/daemon/src/runtimes/types.ts` - shared runtime definition and helper types moved from the monolith.
- `apps/daemon/src/runtimes/models.ts` - default model option, model validation helpers, live model cache.
- `apps/daemon/src/runtimes/capabilities.ts` - shared capability cache used by detection and runtime args.
- `apps/daemon/src/runtimes/invocation.ts` - process invocation wrapper around `execAgentFile`.
- `apps/daemon/src/runtimes/paths.ts` - home expansion and path utilities.
- `apps/daemon/src/runtimes/executables.ts` - executable resolution, PATH scanning, PATHEXT, fallback bins, toolchain dirs.
- `apps/daemon/src/runtimes/env.ts` - `spawnEnvForAgent` and configured environment handling.
- `apps/daemon/src/runtimes/mcp.ts` - live artifacts MCP server construction.
- `apps/daemon/src/runtimes/prompt-budget.ts` - prompt argv and Windows command-line budget checks.
- `apps/daemon/src/runtimes/detection.ts` - runtime detection, probing, help capability discovery, model fetching.
- `apps/daemon/src/runtimes/resolution.ts` - `resolveAgentBin` glue from registry to executable resolver.
- `apps/daemon/src/runtimes/registry.ts` - `AGENT_DEFS`, `getAgentDef`, id uniqueness guard.
- `apps/daemon/src/runtimes/defs/*.ts` - per-runtime definitions moved from the current `AGENT_DEFS` array.
- `apps/daemon/tests/runtimes/*.test.ts` - split daemon runtime tests by responsibility.
- `apps/daemon/tests/runtimes/helpers/*.ts` - shared env, fetch, platform, executable fixture helpers extracted from the current monolith.
### Interfaces / APIs
- `apps/daemon/src/agents.ts` continues to export the current public API used by daemon callers and tests.
- New `runtimes/*` modules are internal daemon implementation modules; external app/package imports should continue through `agents.ts` unless a future spec promotes a dedicated contract.
- Test helpers remain under `apps/daemon/tests/runtimes/helpers/` and are not imported by app source.
### Edge Cases
- Preserve current `AGENT_DEFS` order during the initial split so UI order and existing assertions stay stable.
- Move caches, do not duplicate them, for `agentCapabilities`, toolchain dirs, and live models.
- Keep intentional fallback behavior for failed version/help/model probing.
- Keep Windows-specific PATHEXT and command-line budget paths covered after module movement.
- Keep env restoration, `globalThis.fetch` restoration, and `process.platform` descriptor restoration shared across split tests.
## Plan
- [x] Step 1: Establish runtime module skeleton and facade
- [x] Substep 1.1 Implement: create `apps/daemon/src/runtimes/` modules and move shared types/constants/helpers without behavior changes.
- [x] Substep 1.2 Implement: replace `apps/daemon/src/agents.ts` with compatibility exports for the existing API surface.
- [x] Substep 1.3 Verify: run daemon typecheck and the split runtime tests against the facade.
- [x] Step 2: Split runtime definitions and registry
- [x] Substep 2.1 Implement: move each `AGENT_DEFS` entry into `runtimes/defs/*.ts` while preserving order.
- [x] Substep 2.2 Implement: centralize aggregation and lookup in `runtimes/registry.ts` with an id uniqueness guard.
- [x] Substep 2.3 Verify: run registry, args, detection, and daemon typecheck coverage.
- [x] Step 3: Split tests by responsibility
- [x] Substep 3.1 Implement: extract shared env/fetch/platform/tmp executable helpers under `apps/daemon/tests/runtimes/helpers/`.
- [x] Substep 3.2 Implement: split `agents.test.ts` into registry, args, executables, env, detection, MCP, and prompt-budget test files.
- [x] Substep 3.3 Verify: run `pnpm --filter @open-design/daemon test` and ensure split tests still import compatibility APIs through the facade where relevant.
- [x] Step 4: Stabilize edge cases and review boundaries
- [x] Substep 4.1 Implement: fix movement-only circular imports, `.js` import suffixes, and singleton ownership issues found by validation.
- [x] Substep 4.2 Verify: run `pnpm --filter @open-design/daemon typecheck` and `pnpm --filter @open-design/daemon test`.
- [x] Substep 4.3 Verify: review changed files against app test placement and facade compatibility boundaries.
## Notes
<!-- Optional sections — add what's relevant. -->
### Implementation
- Split `apps/daemon/src/agents.ts` into a thin facade over `apps/daemon/src/runtimes/*` modules.
- Moved adapter definitions into `apps/daemon/src/runtimes/defs/*.ts` and preserved registry order in `apps/daemon/src/runtimes/registry.ts`.
- Kept singleton ownership in dedicated modules: capabilities cache, executable toolchain-dir cache, and live model cache.
- Split daemon agent tests into `apps/daemon/tests/runtimes/*.test.ts` with shared helpers under `apps/daemon/tests/runtimes/helpers/`.
- Fixed configured-env `~` expansion after review and added split-test coverage for home-path expansion.
### Verification
- `pnpm --filter @open-design/daemon typecheck`
- `pnpm --filter @open-design/daemon exec vitest run -c vitest.config.ts tests/runtimes`
- `pnpm --filter @open-design/daemon exec vitest run -c vitest.config.ts tests/chat-route.test.ts` ✅ after one full-suite flaky failure in `tests/chat-route.test.ts`.
- `pnpm --filter @open-design/daemon test` ⚠️ runtime split tests passed; full suite still fails in existing unrelated `tests/finalize-design.test.ts` assertions where resolved artifact names include long relative temp paths.