diff --git a/apps/daemon/src/connectionTest.ts b/apps/daemon/src/connectionTest.ts index 6a911ed50..41865cc7b 100644 --- a/apps/daemon/src/connectionTest.ts +++ b/apps/daemon/src/connectionTest.ts @@ -54,7 +54,9 @@ import { validateBaseUrl, type AgentTestRequest, type BaseUrlValidationResult, + type ConnectionTestDiagnostics, type ConnectionTestKind, + type ConnectionTestPhase, type ConnectionTestProtocol, type ConnectionTestResponse, type ParsedBaseUrl, @@ -1184,6 +1186,7 @@ async function testAgentConnectionInternal( model, agentName: input.agentId, detail: `Unknown agent id: ${input.agentId}`, + diagnostics: { phase: 'binary_resolution' }, }; } const configuredAgentEnv = agentCliEnvForAgent( @@ -1199,6 +1202,7 @@ async function testAgentConnectionInternal( latencyMs: Date.now() - start, model, agentName: def.name, + diagnostics: { phase: 'binary_resolution' }, }; } @@ -1210,7 +1214,37 @@ async function testAgentConnectionInternal( let abortHandler: (() => void) | null = null; const sink = createAgentSink(); - const resultFromAgentText = (text: string): ConnectionTestResponse => { + // Phase tracker for structured diagnostics (#2248). The order matches + // the lifecycle: binary_resolution → spawn → connection_smoke_test → + // output_parse. Each result helper below stamps the *current* phase + // into the response so consumers don't have to scrape `detail` to + // know how far the test got. Phase is mutated at the points where + // the daemon meaningfully advances (just before spawn, when the + // child first produces stdout, etc.) — not on every event. + let phase: ConnectionTestPhase = 'binary_resolution'; + const buildDiagnostics = ( + overrides: Partial = {}, + ): ConnectionTestDiagnostics => { + const rawStderr = sink.getStderrTail().trim(); + const rawStdout = sink.getRawStdoutTail().trim(); + // `exactOptionalPropertyTypes: true` means we can't pass `undefined` + // to an optional field directly — conditionally spread instead so + // empty values just don't appear in the response. + return { + phase, + ...(executableResolution.launchPath + ? { binaryPath: executableResolution.launchPath } + : {}), + ...(rawStderr ? { stderrTail: redactSecrets(rawStderr) } : {}), + ...(rawStdout ? { stdoutTail: redactSecrets(rawStdout) } : {}), + ...overrides, + }; + }; + + const resultFromAgentText = ( + text: string, + exit?: { code: number | null; signal: NodeJS.Signals | null }, + ): ConnectionTestResponse => { const latencyMs = Date.now() - start; const rawSample = truncateSample(text); const sample = redactSecrets(rawSample); @@ -1226,6 +1260,10 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail, + diagnostics: buildDiagnostics({ + phase: 'output_parse', + ...(exit ? { exitCode: exit.code, signal: exit.signal } : {}), + }), }; } if (!isSmokeOkReply(text)) { @@ -1234,6 +1272,14 @@ async function testAgentConnectionInternal( ); } console.log(`[test:agent] ${def.name} → ok in ${(latencyMs / 1000).toFixed(1)}s`); + // resultFromChildExit can route ACP forced shutdown (code === null, + // signal === 'SIGTERM' + acpCleanCompletion) through this success + // helper. Hard-coding `exitCode: 0` would silently overwrite the + // SIGTERM signal and violate the raw code/signal contract in + // packages/contracts/src/api/connectionTest.ts. Pass through the + // real `winner.code` / `winner.signal` when the caller has them and + // only synthesize `exitCode: 0` when no exit context is available + // (theoretical text-without-exit path). return { ok: true, kind: 'success', @@ -1241,6 +1287,11 @@ async function testAgentConnectionInternal( model, agentName: def.name, sample, + diagnostics: buildDiagnostics( + exit + ? { phase: 'connection_smoke_test', exitCode: exit.code, signal: exit.signal } + : { phase: 'connection_smoke_test', exitCode: 0 }, + ), }; }; @@ -1259,6 +1310,7 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: auth.message ?? cursorAuthGuidance(), + diagnostics: buildDiagnostics(), }; } if (detail && isLikelyModelErrorText(detail)) { @@ -1272,6 +1324,7 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail, + diagnostics: buildDiagnostics({ phase: 'output_parse' }), }; } console.warn( @@ -1284,6 +1337,7 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail, + diagnostics: buildDiagnostics(), }; }; @@ -1298,6 +1352,7 @@ async function testAgentConnectionInternal( latencyMs, model, agentName: def.name, + diagnostics: buildDiagnostics(), }; }; @@ -1313,6 +1368,10 @@ async function testAgentConnectionInternal( ); } catch (err) { const detail = err instanceof Error ? err.message : String(err); + // buildArgs runs *after* binary resolution but *before* spawn, so + // phase is still 'binary_resolution' here. Stamp diagnostics so the + // contract advertised in packages/contracts/src/api/connectionTest.ts + // ("Always set on local agent test responses") actually holds. return { ok: false, kind: 'agent_spawn_failed', @@ -1320,6 +1379,7 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: redactSecrets(detail), + diagnostics: buildDiagnostics(), }; } const stdinMode = @@ -1335,6 +1395,17 @@ async function testAgentConnectionInternal( const env = applyAgentLaunchEnv(baseEnv, executableResolution); const auth = await probeAgentAuthStatus(input.agentId, executableResolution.launchPath, env); if (auth?.status === 'missing') { + // Preflight auth probe runs after binary resolution but before the + // smoke spawn — phase is still 'binary_resolution'. The smoke + // sink is empty here (no spawn happened), so the probe itself is + // the only source of stderr/stdout/exit context. Fold what the + // probe captured into the diagnostics block; `...overrides` in + // buildDiagnostics() lets these win over the empty sink tails. + const probeOverrides: Partial = {}; + if (auth.stdoutTail) probeOverrides.stdoutTail = redactSecrets(auth.stdoutTail); + if (auth.stderrTail) probeOverrides.stderrTail = redactSecrets(auth.stderrTail); + if (auth.exitCode !== undefined) probeOverrides.exitCode = auth.exitCode; + if (auth.signal !== undefined) probeOverrides.signal = auth.signal; return { ok: false, kind: 'agent_auth_required', @@ -1342,6 +1413,7 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: auth.message ?? cursorAuthGuidance(), + diagnostics: buildDiagnostics(probeOverrides), }; } const invocation = createCommandInvocation({ @@ -1349,6 +1421,12 @@ async function testAgentConnectionInternal( args, env, }); + // We are about to hand off to child_process.spawn(). Any failure + // from here on (ENOENT, bad argv, non-zero exit) belongs to the + // 'spawn' phase rather than 'binary_resolution', so flip the tracker + // *before* spawning. resultFromAgentText flips it again to + // 'connection_smoke_test' / 'output_parse' once we get text out. + phase = 'spawn'; child = spawn(invocation.command, invocation.args, { env, stdio: [stdinMode, 'pipe', 'pipe'], @@ -1402,6 +1480,9 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: `${detail}${guidance}`, + diagnostics: buildDiagnostics({ + phase: isMissing ? 'binary_resolution' : 'spawn', + }), }; } @@ -1431,10 +1512,11 @@ async function testAgentConnectionInternal( (winner.code === 0 && !winner.signal) || acpForcedShutdown; if (buffered) { const rawSample = truncateSample(buffered); + const exitInfo = { code: winner.code, signal: winner.signal }; if (rawSample && isLikelyModelErrorText(rawSample)) { - return resultFromAgentText(buffered); + return resultFromAgentText(buffered, exitInfo); } - if (exitedCleanly) return resultFromAgentText(buffered); + if (exitedCleanly) return resultFromAgentText(buffered, exitInfo); } const stderrTail = sink.getStderrTail().trim(); const rawStdoutTail = sink.getRawStdoutTail().trim(); @@ -1459,6 +1541,11 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: auth.message ?? cursorAuthGuidance(), + diagnostics: buildDiagnostics({ + phase: 'connection_smoke_test', + exitCode: winner.code, + signal: winner.signal, + }), }; } const claudeDiagnostic = diagnoseClaudeCliFailure({ @@ -1480,6 +1567,11 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: claudeDiagnostic.detail, + diagnostics: buildDiagnostics({ + phase: 'spawn', + exitCode: winner.code, + signal: winner.signal, + }), }; } const detail = redactSecrets( @@ -1504,6 +1596,11 @@ async function testAgentConnectionInternal( agentName: def.name, detail: `${detail || 'Agent exited without producing assistant text'}${guidance}`, + diagnostics: buildDiagnostics({ + phase: buffered ? 'output_parse' : 'spawn', + exitCode: winner.code, + signal: winner.signal, + }), }; }; @@ -1560,6 +1657,10 @@ async function testAgentConnectionInternal( return resultFromChildExit(winner); } catch (err) { const detail = err instanceof Error ? err.message : String(err); + // Outer catch — the failure may have happened at any phase between + // binary_resolution and output_parse, so stamp the current phase as + // observed. buildDiagnostics is defined in the enclosing scope and + // is safe to call here. return { ok: false, kind: 'agent_spawn_failed', @@ -1567,6 +1668,7 @@ async function testAgentConnectionInternal( model, agentName: def.name, detail: redactSecrets(detail), + diagnostics: buildDiagnostics(), }; } finally { if (timer) clearTimeout(timer); diff --git a/apps/daemon/src/runtimes/auth.ts b/apps/daemon/src/runtimes/auth.ts index ad0ba63cf..9b4eb129a 100644 --- a/apps/daemon/src/runtimes/auth.ts +++ b/apps/daemon/src/runtimes/auth.ts @@ -4,6 +4,16 @@ import type { RuntimeEnv } from './types.js'; export type AgentAuthProbeResult = { status: 'ok' | 'missing' | 'unknown'; message?: string; + // Output captured from the probe child process (e.g. + // `cursor-agent status`). Exposed so callers like the connection + // test layer can fold the probe's own stderr/exit context into their + // structured diagnostics — the probe runs before the smoke spawn, + // so without this the diagnostics block would otherwise drop the + // probe output entirely. + stdoutTail?: string; + stderrTail?: string; + exitCode?: number | null; + signal?: string | null; }; const CURSOR_AUTH_GUIDANCE = @@ -66,6 +76,30 @@ export function classifyAgentAuthFailure( return null; } +// Tail length matches the smoke-test sink so the diagnostics block +// stays compact when it folds probe output back into its overrides. +const PROBE_TAIL_BYTES = 400; + +function tailString(value: unknown): string | undefined { + if (typeof value !== 'string') return undefined; + const trimmed = value.trim(); + if (!trimmed) return undefined; + return trimmed.length > PROBE_TAIL_BYTES ? trimmed.slice(-PROBE_TAIL_BYTES) : trimmed; +} + +function withProbeTails( + base: AgentAuthProbeResult, + stdoutText: string, + stderrText: string, +): AgentAuthProbeResult { + const result: AgentAuthProbeResult = { ...base }; + const stdoutTail = tailString(stdoutText); + const stderrTail = tailString(stderrText); + if (stdoutTail) result.stdoutTail = stdoutTail; + if (stderrTail) result.stderrTail = stderrTail; + return result; +} + export async function probeAgentAuthStatus( agentId: string, resolvedBin: string, @@ -78,27 +112,54 @@ export async function probeAgentAuthStatus( timeout: 5000, maxBuffer: 1024 * 1024, }); - const output = `${stdout ?? ''}\n${stderr ?? ''}`; + const stdoutText = typeof stdout === 'string' ? stdout : ''; + const stderrText = typeof stderr === 'string' ? stderr : ''; + const output = `${stdoutText}\n${stderrText}`; if (isCursorAuthFailureText(output)) { - return { status: 'missing', message: cursorAuthGuidance() }; + return withProbeTails( + { status: 'missing', message: cursorAuthGuidance(), exitCode: 0, signal: null }, + stdoutText, + stderrText, + ); } return { status: 'ok' }; } catch (error) { const err = error as NodeJS.ErrnoException & { stdout?: unknown; stderr?: unknown; + code?: string | number; + signal?: string; }; - const output = [ - err.message, - typeof err.stdout === 'string' ? err.stdout : '', - typeof err.stderr === 'string' ? err.stderr : '', - ].join('\n'); + const stdoutText = typeof err.stdout === 'string' ? err.stdout : ''; + const stderrText = typeof err.stderr === 'string' ? err.stderr : ''; + const output = [err.message, stdoutText, stderrText].join('\n'); + // util.promisify(execFile) attaches `code` and `signal` to the + // rejection error. `code` may be a number (real non-zero exit) or + // a Node ErrnoException string ("ENOENT"); only the numeric form + // is meaningful as an exit code. + const numericExit = typeof err.code === 'number' ? err.code : null; + const childSignal = typeof err.signal === 'string' ? err.signal : null; if (isCursorAuthFailureText(output)) { - return { status: 'missing', message: cursorAuthGuidance() }; + return withProbeTails( + { + status: 'missing', + message: cursorAuthGuidance(), + exitCode: numericExit, + signal: childSignal, + }, + stdoutText, + stderrText, + ); } - return { - status: 'unknown', - message: 'Cursor Agent authentication status could not be verified with `cursor-agent status`.', - }; + return withProbeTails( + { + status: 'unknown', + message: 'Cursor Agent authentication status could not be verified with `cursor-agent status`.', + exitCode: numericExit, + signal: childSignal, + }, + stdoutText, + stderrText, + ); } } diff --git a/apps/daemon/tests/connection-test.test.ts b/apps/daemon/tests/connection-test.test.ts index a9ed8a6ff..c034eb2bc 100644 --- a/apps/daemon/tests/connection-test.test.ts +++ b/apps/daemon/tests/connection-test.test.ts @@ -2470,6 +2470,141 @@ setInterval(() => {}, 1000); }); expect(res.status).toBe(400); }); + + // Regression coverage for #2248: the daemon must return structured + // diagnostics next to the existing `kind`/`detail` strings so Settings + // and CLI consumers don't have to scrape the human-readable detail + // line to know what phase failed, which binary path was used, or what + // the child's exit metadata was. The legacy fields stay unchanged so + // older clients keep rendering. + it('attaches structured diagnostics on Claude smoke-test success (#2248)', async () => { + await withFakeClaude( + ` +let input = ''; +process.stdin.setEncoding('utf8'); +process.stdin.on('data', (chunk) => { input += chunk; }); +process.stdin.on('end', () => { + try { + JSON.parse(input.trim()); + console.log(JSON.stringify({ + type: 'assistant', + message: { + id: 'msg_1', + content: [{ type: 'text', text: 'ok' }], + stop_reason: 'end_turn', + }, + })); + } catch (err) { + console.error(err instanceof Error ? err.message : String(err)); + process.exit(1); + } +}); +`, + async () => { + const result = await testAgentConnection({ agentId: 'claude' }); + + expect(result).toMatchObject({ ok: true, kind: 'success' }); + expect(result.diagnostics).toBeDefined(); + expect(result.diagnostics?.phase).toBe('connection_smoke_test'); + // The binary path is whatever fake bin the test harness installed + // on PATH (a temp directory). All we want here is that the + // daemon actually fills it in, not that it matches an exact path. + expect(typeof result.diagnostics?.binaryPath).toBe('string'); + expect(result.diagnostics?.binaryPath ?? '').toMatch(/claude/); + expect(result.diagnostics?.exitCode).toBe(0); + }, + ); + }); + + it('attaches structured diagnostics on Claude exit-failed (#2248)', async () => { + await withFakeClaude( + `console.error('boom-on-stderr'); process.exit(7);`, + async () => { + const result = await testAgentConnection({ agentId: 'claude' }); + + expect(result.ok).toBe(false); + // Back-compat: existing kind + detail keep their shape. + expect(typeof result.kind).toBe('string'); + expect(typeof result.detail).toBe('string'); + // New: structured fields are attached. + expect(result.diagnostics).toBeDefined(); + expect(result.diagnostics?.phase).toBe('spawn'); + expect(result.diagnostics?.exitCode).toBe(7); + expect(result.diagnostics?.stderrTail ?? '').toContain('boom-on-stderr'); + expect(result.diagnostics?.binaryPath ?? '').toMatch(/claude/); + }, + ); + }); + + it('reports an early-phase diagnostics block when the agent CLI is missing (#2248)', async () => { + // Clear PATH so the daemon cannot locate `claude`. We restore the + // env in `finally` to avoid leaking the empty PATH to later tests. + // Depending on whether the resolver short-circuits or the spawn + // itself ENOENTs, the kind may be agent_not_installed or + // agent_spawn_failed and the phase may be 'binary_resolution' or + // 'spawn'. Both are valid "we never reached the smoke test" shapes + // — the actionable bit for the UI is that diagnostics arrived at + // all and that the phase is one of the two early values. + const oldPath = process.env.PATH; + process.env.PATH = ''; + try { + const result = await testAgentConnection({ agentId: 'claude' }); + expect(result.ok).toBe(false); + expect(['agent_not_installed', 'agent_spawn_failed']).toContain(result.kind); + expect(result.diagnostics).toBeDefined(); + expect(['binary_resolution', 'spawn']).toContain(result.diagnostics?.phase); + } finally { + process.env.PATH = oldPath; + } + }); + + it('attaches diagnostics when the preflight auth probe reports missing auth (#2248)', async () => { + // Cursor Agent's preflight `cursor-agent status` check rejects the + // smoke run before the daemon ever spawns the smoke prompt. The + // initial #2248 pass forgot to stamp diagnostics on that return + // path, which contradicted the "Always set on local agent test + // responses" contract in packages/contracts. Lock the contract, + // and additionally lock the probe's own stderr/exit metadata — + // without those, the diagnostics block would drop the only context + // a caller has on a missing-auth failure (no smoke spawn ever ran, + // so the smoke sink is empty). + await withFakeCursorAgent( + ` +const args = process.argv.slice(2); +if (args[0] === '--version') { + console.log('2026.05.07-test'); + process.exit(0); +} +if (args[0] === 'models') { + console.log('auto'); + process.exit(0); +} +if (args[0] === 'status') { + console.error('Not logged in'); + process.exit(1); +} +console.error('smoke prompt should not run when status reports missing auth'); +process.exit(1); +`, + async () => { + const result = await testAgentConnection({ agentId: 'cursor-agent' }); + expect(result).toMatchObject({ + ok: false, + kind: 'agent_auth_required', + }); + expect(result.diagnostics).toBeDefined(); + // Preflight runs after binary resolution but before the smoke + // spawn, so phase should still be 'binary_resolution'. + expect(result.diagnostics?.phase).toBe('binary_resolution'); + expect(result.diagnostics?.binaryPath ?? '').toMatch(/cursor-agent/); + // The probe child wrote "Not logged in" on stderr and exited + // 1; both must propagate into diagnostics so Settings/CLI can + // render the structured auth-failure context. + expect(result.diagnostics?.stderrTail ?? '').toContain('Not logged in'); + expect(result.diagnostics?.exitCode).toBe(1); + }, + ); + }); }); describe('connection test helpers', () => { diff --git a/packages/contracts/src/api/connectionTest.ts b/packages/contracts/src/api/connectionTest.ts index 51f4451b7..12e77a0a2 100644 --- a/packages/contracts/src/api/connectionTest.ts +++ b/packages/contracts/src/api/connectionTest.ts @@ -139,6 +139,45 @@ export type ConnectionTestKind = | 'agent_spawn_failed' | 'unknown'; +// Phase markers describing how far the local agent connection test +// progressed before it produced its result. Used inside +// `ConnectionTestResponse.diagnostics.phase` and intended to be stable +// across daemon versions so Settings UI and CLI consumers can render +// phase-aware copy without re-deriving it from the free-form `detail` +// string. See issue #2248. +export type ConnectionTestPhase = + | 'binary_resolution' + | 'version_probe' + | 'model_list' + | 'spawn' + | 'connection_smoke_test' + | 'output_parse'; + +export interface ConnectionTestDiagnostics { + // How far the test progressed before producing the result. Always + // set on local agent test responses. + phase: ConnectionTestPhase; + // Absolute filesystem path of the executable the daemon actually + // attempted to run, when resolution succeeded. + binaryPath?: string; + // Best-effort version string captured during `version_probe`. Null + // when the CLI exposes no machine-parseable version output. + binaryVersion?: string | null; + // Child process exit metadata. Both fields keep the raw `code` / + // `signal` shape from `child_process` so consumers can distinguish + // a clean non-zero exit from a SIGTERM teardown. `signal` is typed as + // `string | null` (not `NodeJS.Signals`) so the generated `.d.ts` + // stays browser-safe — the daemon writes one of the + // `NodeJS.Signals` literals here but consumers never need to import + // ambient Node namespaces just to read an HTTP response shape. + exitCode?: number | null; + signal?: string | null; + // Last ~400 bytes of the child's streams, already passed through + // the daemon's secret redactor. + stdoutTail?: string; + stderrTail?: string; +} + export type ConnectionTestProtocol = 'anthropic' | 'openai' | 'azure' | 'google' | 'ollama' | 'senseaudio'; export interface ProviderTestRequest { @@ -183,4 +222,11 @@ export interface ConnectionTestResponse { detectedExecutablePath?: string; usedExecutablePath?: string; usedExecutableSource?: 'configured' | 'path' | 'fallback_invalid' | 'fallback_failed'; + // Structured diagnostics for the local agent connection test path + // (#2248). Optional and additive: existing consumers that only read + // `kind` and `detail` keep working unchanged. Populated on local + // agent test responses — including early failures that never reach + // the spawn step (unknown agent id, unresolved binary, preflight + // auth probe). Provider tests omit it. + diagnostics?: ConnectionTestDiagnostics; }