open-design/apps/daemon/tests/runtimes/helpers/test-helpers.ts
Sriram Sivakumar 0bd07b2a3d
fix(daemon): grok-build — pass prompt inline as -p value, drop stdin (#2259)
* fix(daemon): grok-build runtime — pass prompt inline as -p value, drop stdin

Grok Build CLI 0.1.212 enforces `-p, --single <PROMPT>` as a value-requiring
flag — invoking with bare `-p` and piping the prompt to stdin now fails with:

  error: a value is required for '--single <PROMPT>' but none was supplied

The previous runtime def used `promptViaStdin: true` + `buildArgs` returning
`['-p']`, which only worked against earlier grok builds that read the prompt
from stdin when `-p` had no inline value.

This change inlines the prompt as the `-p` argument value and flips
`promptViaStdin: false`. Linux `MAX_ARG_STRLEN` (128 KB) is enough headroom
for typical Open Design prompts; if we ever hit `E2BIG` on a very large
brief, a follow-up could shell out to `--prompt-file <tempfile>`.

Verified against grok 0.1.212 (b7b8204a4) — single-turn invocations now
return clean text replies instead of exit 2.

* fix(daemon): declare grok-build argv prompt budget + regression coverage

@mrcfps' review on #2259 flagged that moving the Grok Build adapter from
the (no-longer-working) stdin path to argv would regress oversized
composed prompts from the actionable AGENT_PROMPT_TOO_LARGE error we
already emit for DeepSeek to a raw spawn ENAMETOOLONG / E2BIG instead.
Fixed by mirroring the DeepSeek argv-budget shape:

- grok-build.ts: `maxPromptArgBytes: 30_000` (same headroom as DeepSeek,
  ~2.7 KB under the Windows CreateProcess 32_767-char cap) so
  `checkPromptArgvBudget` pre-flights composed prompts (system + history
  + skills + design-system content + user message) before spawn.
- prompt-budget.ts: Grok-Build-specific message — names the `-p /
  --single` flag, the xAI CLI 0.1.212+ behavior change, and points the
  user at stdin-capable adapters (claude / codex / hermes) when they
  need to ship large local context.
- Tests: 3 new vitest cases in prompt-budget.test.ts — pin the budget
  field, exercise the strict-overrun + at-limit + CJK byte-count guards
  exactly like the DeepSeek regression set, and assert the Grok-named
  diagnostic copy. New `grokBuild` + `grokBuildMaxPromptArgBytes`
  helpers exported alongside the existing `deepseek*` ones.

All 23 prompt-budget tests pass locally (`pnpm exec vitest run
tests/runtimes/prompt-budget.test.ts`).

---------

Co-authored-by: Sriram Sivakumar <sriram155@gmail.com>
Co-authored-by: Siri-Ray <2667192167@qq.com>
2026-05-29 08:45:57 +00:00

202 lines
5.7 KiB
TypeScript

import { afterEach } from 'vitest';
import assert from 'node:assert/strict';
import {
chmodSync,
mkdirSync,
mkdtempSync,
rmSync,
writeFileSync,
} from 'node:fs';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
AGENT_DEFS,
applyAgentLaunchEnv,
buildLiveArtifactsMcpServersForAgent,
checkPromptArgvBudget,
checkWindowsCmdShimCommandLineBudget,
checkWindowsDirectExeCommandLineBudget,
detectAgents,
inspectAgentExecutableResolution,
resolveAgentLaunch,
resolveAgentExecutable,
spawnEnvForAgent,
} from '../../../src/agents.js';
import type { RuntimeAgentDef } from '../../../src/runtimes/types.js';
export {
assert,
AGENT_DEFS,
applyAgentLaunchEnv,
buildLiveArtifactsMcpServersForAgent,
checkPromptArgvBudget,
checkWindowsCmdShimCommandLineBudget,
checkWindowsDirectExeCommandLineBudget,
chmodSync,
detectAgents,
inspectAgentExecutableResolution,
join,
mkdirSync,
mkdtempSync,
resolveAgentExecutable,
resolveAgentLaunch,
rmSync,
spawnEnvForAgent,
tmpdir,
writeFileSync,
};
export type TestAgentDef = RuntimeAgentDef;
export function requireAgent(id: string): TestAgentDef {
const agent = AGENT_DEFS.find((candidate) => candidate.id === id);
assert.ok(agent, `missing agent definition for ${id}`);
return agent;
}
export function minimalAgentDef(
partial: Pick<TestAgentDef, 'bin'> & Partial<TestAgentDef>,
): TestAgentDef {
const { bin, ...rest } = partial;
return {
id: partial.id ?? `test-${bin}`,
name: partial.name ?? bin,
bin,
versionArgs: partial.versionArgs ?? ['--version'],
fallbackModels: partial.fallbackModels ?? [{ id: 'default', label: 'Default' }],
buildArgs: partial.buildArgs ?? (() => []),
streamFormat: partial.streamFormat ?? 'plain',
...rest,
};
}
export const codex = requireAgent('codex');
export const hermes = requireAgent('hermes');
export const kimi = requireAgent('kimi');
export const copilot = requireAgent('copilot');
export const cursorAgent = requireAgent('cursor-agent');
export const kiro = requireAgent('kiro');
export const kilo = requireAgent('kilo');
export const vibe = requireAgent('vibe');
export const claude = requireAgent('claude');
export const devin = requireAgent('devin');
export const pi = requireAgent('pi');
export const deepseek = requireAgent('deepseek');
export const gemini = requireAgent('gemini');
export const qoder = requireAgent('qoder');
export const qwen = requireAgent('qwen');
export const opencode = requireAgent('opencode');
export const grokBuild = requireAgent('grok-build');
export const aider = requireAgent('aider');
export const antigravity = requireAgent('antigravity');
export const deepseekMaxPromptArgBytes = (() => {
assert.ok(
deepseek.maxPromptArgBytes !== undefined,
'deepseek must define maxPromptArgBytes for argv budget tests',
);
return deepseek.maxPromptArgBytes;
})();
export const grokBuildMaxPromptArgBytes = (() => {
assert.ok(
grokBuild.maxPromptArgBytes !== undefined,
'grok-build must define maxPromptArgBytes for argv budget tests',
);
return grokBuild.maxPromptArgBytes;
})();
const originalDisablePlugins = process.env.OD_CODEX_DISABLE_PLUGINS;
const originalPath = process.env.PATH;
const originalHome = process.env.HOME;
const originalAgentHome = process.env.OD_AGENT_HOME;
const originalDaemonUrl = process.env.OD_DAEMON_URL;
const originalToolToken = process.env.OD_TOOL_TOKEN;
const originalNpmConfigPrefix = process.env.NPM_CONFIG_PREFIX;
const originalPathExt = process.env.PATHEXT;
const originalVpHome = process.env.VP_HOME;
const originalFetch = globalThis.fetch;
const originalPlatformDescriptor = Object.getOwnPropertyDescriptor(process, 'platform');
afterEach(() => {
if (originalDisablePlugins == null) {
delete process.env.OD_CODEX_DISABLE_PLUGINS;
} else {
process.env.OD_CODEX_DISABLE_PLUGINS = originalDisablePlugins;
}
process.env.PATH = originalPath;
if (originalHome == null) {
delete process.env.HOME;
} else {
process.env.HOME = originalHome;
}
if (originalAgentHome == null) {
delete process.env.OD_AGENT_HOME;
} else {
process.env.OD_AGENT_HOME = originalAgentHome;
}
if (originalDaemonUrl == null) {
delete process.env.OD_DAEMON_URL;
} else {
process.env.OD_DAEMON_URL = originalDaemonUrl;
}
if (originalToolToken == null) {
delete process.env.OD_TOOL_TOKEN;
} else {
process.env.OD_TOOL_TOKEN = originalToolToken;
}
if (originalNpmConfigPrefix == null) {
delete process.env.NPM_CONFIG_PREFIX;
} else {
process.env.NPM_CONFIG_PREFIX = originalNpmConfigPrefix;
}
if (originalPathExt == null) {
delete process.env.PATHEXT;
} else {
process.env.PATHEXT = originalPathExt;
}
if (originalVpHome == null) {
delete process.env.VP_HOME;
} else {
process.env.VP_HOME = originalVpHome;
}
globalThis.fetch = originalFetch;
if (originalPlatformDescriptor) {
Object.defineProperty(process, 'platform', originalPlatformDescriptor);
}
});
export function withPlatform<T>(platform: NodeJS.Platform, run: () => T): T {
Object.defineProperty(process, 'platform', {
configurable: true,
value: platform,
});
return run();
}
export function withEnvSnapshot<T>(
keys: readonly string[],
run: () => T | Promise<T>,
): T | Promise<T> {
const snapshot = new Map(keys.map((key) => [key, process.env[key]]));
const restore = () => {
for (const key of keys) {
const value = snapshot.get(key);
if (value == null) {
delete process.env[key];
} else {
process.env[key] = value;
}
}
};
let result: T | Promise<T>;
try {
result = run();
} catch (error) {
restore();
throw error;
}
if (result instanceof Promise) {
return result.finally(restore);
}
restore();
return result;
}