This commit is contained in:
YOMXXX 2026-05-31 04:51:34 +00:00 committed by GitHub
commit f53f1d2399
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 350 additions and 11 deletions

View file

@ -67,4 +67,14 @@ export const copilotAgentDef = {
},
promptViaStdin: true,
streamFormat: 'copilot-stream-json',
// GitHub Copilot's deck-generation and large-prompt turns go silent
// (no stdout, no streamed events) for stretches that exceed the
// 10-minute global default — the model is still working but the
// CLI does not emit keepalive frames. The default watchdog used to
// kill those runs as `stalled` even though the agent was healthy
// (issue #2467: "GitHub Copilot agent getting stuck after 10 mins
// and few seconds"). 30 minutes gives the heavy turns room to land
// while still bounding genuine hangs; operators can override via
// `OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS` if they want it tighter.
inactivityTimeoutMs: 30 * 60 * 1000,
} satisfies RuntimeAgentDef;

View file

@ -207,6 +207,9 @@ function stripFns(
// `fallbackModels` slot here too. `helpArgs` / `capabilityFlags` /
// `fallbackBins` / `maxPromptArgBytes` / `env` are probe-or-spawn-only
// metadata and shouldn't bleed into the API response either.
// `inactivityTimeoutMs` is a spawn-time hint for the chat-run watchdog
// and is not part of the public AgentInfo contract — strip it here so
// the runtime registry stays the only consumer.
const {
buildArgs,
listModels,
@ -218,6 +221,7 @@ function stripFns(
versionProbeTimeoutMs,
maxPromptArgBytes,
env,
inactivityTimeoutMs,
...rest
} = def;
return rest;

View file

@ -162,6 +162,14 @@ export type RuntimeAgentDef = {
// present in the daemon's `process.env`; Settings-UI per-agent env
// values only reach the spawned child and are NOT consulted here.
defaultModelEnvVar?: string;
// Agent-recommended override for the chat-run inactivity watchdog.
// The watchdog observes child stdout/stderr/SSE activity, not real
// CPU progress, so agents whose CLIs go silent for long stretches
// during legitimate work (e.g. Copilot's deck-generation thinking
// phase from #2467) need a longer ceiling than the 10-minute global
// default. Operators can still override per-process via
// `OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS` — that env wins.
inactivityTimeoutMs?: number;
};
export type DetectedAgent = Omit<
@ -176,6 +184,13 @@ export type DetectedAgent = Omit<
| 'versionProbeTimeoutMs'
| 'maxPromptArgBytes'
| 'env'
// `inactivityTimeoutMs` is a spawn-time-only hint consumed by the
// chat-run watchdog. It is not part of the public `/api/agents`
// contract (`packages/contracts/src/api/registry.ts#AgentInfo`), so
// omitting it here keeps the daemon response aligned with that
// shared web/CLI shape — agents pick it up by reading the runtime
// def directly, the registry payload stays unchanged.
| 'inactivityTimeoutMs'
> & {
models: RuntimeModelOption[];
modelsSource: RuntimeModelSource;

View file

@ -3743,16 +3743,58 @@ const MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS = 24 * 60 * 60 * 1000;
// always means the agent is winding down or hanging. See #1451.
const DEFAULT_CHAT_RUN_ARTIFACT_QUIET_PERIOD_MS = 60 * 1000;
function resolveChatRunInactivityTimeoutMs() {
const raw = Number(process.env.OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS);
// This watchdog observes child stdout/stderr/SSE activity, not real CPU or
// filesystem progress. Keep the default long enough for agents that spend
// several minutes silently writing large artifacts.
if (!Number.isFinite(raw)) return DEFAULT_CHAT_RUN_INACTIVITY_TIMEOUT_MS;
// Node clamps delays larger than a signed 32-bit integer down to 1ms, which
// makes an oversized override fail almost immediately while reporting a huge
// timeout. Keep explicit overrides bounded to a practical, timer-safe value.
return Math.min(MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS, Math.max(0, Math.floor(raw)));
// Resolve the chat-run inactivity watchdog ceiling. Priority order:
// 1. `OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS` (operator escape hatch).
// 2. The agent runtime def's `inactivityTimeoutMs` recommendation —
// lets agents whose CLIs go silent for long stretches during
// legitimate work (e.g. Copilot from #2467) raise the ceiling
// without every operator having to set an env var.
// 3. The 10-minute global default.
//
// The env path is lenient (silently normalizes / falls back) because it
// comes from a runtime knob an operator can mis-type at any time. The
// def path is strict (throws on non-finite or negative) because the
// value lives in checked-in source — a typo like `inactivityTimeoutMs: -1`
// should crash loudly at chat-run time rather than silently disable the
// watchdog for that agent. Both paths still pass through the 24-hour
// clamp because Node silently downgrades signed-32-bit-overflowing
// setTimeout delays to 1ms.
//
// Order matters: validate the def hint *before* checking the env
// override. Otherwise a finite env value would hide a bad checked-in
// value (e.g. `inactivityTimeoutMs: -1`) from ever tripping the
// fast-fail — the typo could sit unnoticed in source until someone
// removed the override. Validation now runs on every call regardless
// of which branch ultimately wins.
export function assertValidRuntimeDefInactivityTimeoutMs(agentDefault?: number): void {
// Strict checked-in-config guard for `RuntimeAgentDef.inactivityTimeoutMs`.
// Exported (and called by `resolveChatRunInactivityTimeoutMs` below)
// so that chat-run startup can invoke this immediately after the
// runtime def is selected — before any filesystem or
// prompt-building work happens — and abort with a loud RangeError
// when a checked-in def carries an invalid value. That keeps a
// typo from leaving partial setup state (e.g. a `.mcp.json` write
// / unlink, a freshly-composed system prompt) on disk when the
// run then aborts later at watchdog-arm time.
if (agentDefault === undefined) return;
if (!Number.isFinite(agentDefault) || agentDefault < 0 || !Number.isInteger(agentDefault)) {
throw new RangeError(
`RuntimeAgentDef.inactivityTimeoutMs must be a non-negative integer, got ${String(agentDefault)}. ` +
'Fix the runtime def — invalid values used to silently disable the watchdog.',
);
}
}
export function resolveChatRunInactivityTimeoutMs(agentDefault?: number) {
assertValidRuntimeDefInactivityTimeoutMs(agentDefault);
const env = Number(process.env.OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS);
if (Number.isFinite(env)) {
return Math.min(MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS, Math.max(0, Math.floor(env)));
}
if (agentDefault !== undefined) {
return Math.min(MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS, agentDefault);
}
return DEFAULT_CHAT_RUN_INACTIVITY_TIMEOUT_MS;
}
// Resolve the post-artifact quiet-period window. Same clamp as the outer
@ -10750,6 +10792,30 @@ export async function startServer({
);
if (!def.bin)
return design.runs.fail(run, 'AGENT_UNAVAILABLE', 'agent has no binary');
// Validate the checked-in `inactivityTimeoutMs` hint immediately
// after the runtime def is selected and before any side-effectful
// setup (auto-memory extract, `.mcp.json` write/unlink,
// composeSystemPrompt, prompt persistence). A bad def value would
// otherwise abort the run only at watchdog-arm time, after that
// setup has already mutated local state, leaving confusing partial
// residue behind (issue #2467 review on PR #2579).
//
// Catch is intentionally narrowed to `RangeError`, the only kind
// `assertValidRuntimeDefInactivityTimeoutMs` is allowed to throw
// for invalid checked-in values. Anything else (a regression that
// makes the helper throw on a valid value, an unrelated bug
// introduced while touching this path) should bubble up to the
// outer chat-run starter — which surfaces it as
// `AGENT_EXECUTION_FAILED` — rather than being misreported as
// "the runtime def is bad" and burying the real failure.
try {
assertValidRuntimeDefInactivityTimeoutMs(def.inactivityTimeoutMs);
} catch (err) {
if (err instanceof RangeError) {
return design.runs.fail(run, 'AGENT_RUNTIME_DEF_INVALID', err.message);
}
throw err;
}
const safeCommentAttachments =
normalizeCommentAttachments(commentAttachments);
if (
@ -11537,7 +11603,7 @@ export async function startServer({
// here; on this branch `send` was hoisted into the AMR preflight
// earlier, so we keep only the new `runStartTimeMs` declaration.
const runStartTimeMs = Date.now();
const inactivityTimeoutMs = resolveChatRunInactivityTimeoutMs();
const inactivityTimeoutMs = resolveChatRunInactivityTimeoutMs(def.inactivityTimeoutMs);
const artifactQuietPeriodMs = resolveChatRunArtifactQuietPeriodMs();
const inactivityKillGraceMs = 3_000;
let inactivityTimer = null;

View file

@ -0,0 +1,218 @@
/**
* Per-agent inactivity-timeout resolution (#2467).
*
* The chat-run inactivity watchdog defaults to 10 minutes. Some agents
* (GitHub Copilot CLI) genuinely stay silent for longer than that on
* heavy deck-generation turns the model is still working but emits
* no stdout, so the watchdog used to kill the run as `stalled` even
* though the agent was healthy.
*
* Runtime defs can now advertise a recommended `inactivityTimeoutMs`,
* and the resolver merges it under the env override:
*
* OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS highest priority (operator override)
* def.inactivityTimeoutMs next (agent-specific recommendation)
* DEFAULT_CHAT_RUN_INACTIVITY_TIMEOUT_MS (10 min) global default
*/
import { afterEach, describe, expect, it } from 'vitest';
import {
assertValidRuntimeDefInactivityTimeoutMs,
resolveChatRunInactivityTimeoutMs,
} from '../src/server.js';
import { copilotAgentDef } from '../src/runtimes/defs/copilot.js';
const ENV_KEY = 'OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS';
const TEN_MINUTES_MS = 10 * 60 * 1000;
const THIRTY_MINUTES_MS = 30 * 60 * 1000;
const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
describe('resolveChatRunInactivityTimeoutMs', () => {
const originalEnv = process.env[ENV_KEY];
afterEach(() => {
if (originalEnv === undefined) {
delete process.env[ENV_KEY];
} else {
process.env[ENV_KEY] = originalEnv;
}
});
it('returns the 10-minute global default when no def hint and no env override are set', () => {
delete process.env[ENV_KEY];
expect(resolveChatRunInactivityTimeoutMs()).toBe(TEN_MINUTES_MS);
});
it('uses the def-level hint when env is unset', () => {
delete process.env[ENV_KEY];
expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(THIRTY_MINUTES_MS);
});
it('lets the env override take precedence over the def hint (operator escape hatch)', () => {
// Operators must be able to shrink or lengthen the watchdog for any
// agent without editing source — diagnosing flaky CLIs, taming runaway
// sessions, etc.
process.env[ENV_KEY] = '900000'; // 15 min
expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(900_000);
});
it('falls back to the def hint when the env value is not a finite number', () => {
process.env[ENV_KEY] = 'not-a-number';
expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(THIRTY_MINUTES_MS);
});
it('still honors env=0 to disable the watchdog entirely', () => {
// Existing behavior the watchdog code already supports — preserve it
// even when an agent def would otherwise contribute a larger value.
process.env[ENV_KEY] = '0';
expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(0);
});
it('clamps an oversized env override to the 24-hour ceiling so Node does not fire the timer immediately', () => {
process.env[ENV_KEY] = String(TWENTY_FOUR_HOURS_MS * 100);
expect(resolveChatRunInactivityTimeoutMs()).toBe(TWENTY_FOUR_HOURS_MS);
});
it('clamps an oversized def hint to the 24-hour ceiling for the same reason', () => {
delete process.env[ENV_KEY];
expect(resolveChatRunInactivityTimeoutMs(TWENTY_FOUR_HOURS_MS * 100)).toBe(TWENTY_FOUR_HOURS_MS);
});
it('throws RangeError on a non-finite def hint — runtime defs are checked-in source and a typo must fail loudly, not silently disable the watchdog', () => {
// The previous draft normalized NaN/Infinity to the global default,
// which made a runtime def like `inactivityTimeoutMs: Number.NaN`
// look fine in source review while quietly losing the agent-specific
// ceiling at runtime. The reviewer-correctness fix: fast-fail on the
// checked-in side so the bug surfaces in dev, not in production logs.
delete process.env[ENV_KEY];
expect(() => resolveChatRunInactivityTimeoutMs(Number.NaN)).toThrow(
/RuntimeAgentDef\.inactivityTimeoutMs/,
);
});
it('throws RangeError on a negative def hint — `-1` would silently disable the agent-specific watchdog otherwise', () => {
delete process.env[ENV_KEY];
expect(() => resolveChatRunInactivityTimeoutMs(-1)).toThrow(
/must be a non-negative integer/,
);
});
it('throws RangeError on a fractional def hint — Math.floor would mask a wrong-units typo (e.g. seconds instead of ms)', () => {
// A def value like `inactivityTimeoutMs: 30` (seconds, written
// forgetting the unit is ms) is a real footgun; refusing
// non-integer floats keeps such typos from getting silently
// floored to a 0ms or 30ms timer. Operators can still pass
// anything via the env override.
delete process.env[ENV_KEY];
expect(() => resolveChatRunInactivityTimeoutMs(60.5)).toThrow(
/must be a non-negative integer/,
);
});
it('keeps the env override lenient when its value is bogus and no def hint is provided (falls back to the global default)', () => {
// env comes from operator-supplied configuration that can be
// mistyped at any time. Unlike the def path it must not crash the
// chat run — fall back to the 10-minute default instead.
process.env[ENV_KEY] = 'not-a-number';
expect(resolveChatRunInactivityTimeoutMs()).toBe(TEN_MINUTES_MS);
});
it('still throws on an invalid def hint when a finite env override is also set — the env must not hide a checked-in typo', () => {
// Reviewer-correctness fix: an earlier ordering placed the env
// early-return before the def validation, so a bad runtime def
// could sit unnoticed in source whenever an operator had set
// OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS. The fast-fail now runs first
// and catches the typo regardless of which branch eventually
// wins.
process.env[ENV_KEY] = '15000';
expect(() => resolveChatRunInactivityTimeoutMs(-1)).toThrow(
/must be a non-negative integer/,
);
});
it('returns the env override when both the env and a valid def hint are provided (env wins as operator escape hatch)', () => {
// Sanity: validation order does not change priority — env still
// wins when both are valid.
process.env[ENV_KEY] = '15000';
expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(15_000);
});
});
describe('copilotAgentDef.inactivityTimeoutMs', () => {
it('ships a 30-minute inactivity hint so Copilot silent-thinking phases do not trip the default watchdog (#2467)', () => {
expect(copilotAgentDef.inactivityTimeoutMs).toBe(THIRTY_MINUTES_MS);
});
});
describe('assertValidRuntimeDefInactivityTimeoutMs (#2579 fast-fail at def-select time)', () => {
// Reviewer-correctness fix (#2579 non-blocking, round 3): the
// strict checked-in-config check must run *immediately* after the
// runtime def is selected, before any side-effectful setup
// (`.mcp.json` write/unlink, prompt composition, file writes) so
// that an invalid checked-in `inactivityTimeoutMs` aborts the run
// before leaving partial state behind. The assert helper is the
// pure entry point chat-run startup can call without also touching
// env-resolution side effects.
it('returns silently when the def hint is omitted (no-op for adapters without the field)', () => {
expect(() => assertValidRuntimeDefInactivityTimeoutMs()).not.toThrow();
expect(() => assertValidRuntimeDefInactivityTimeoutMs(undefined)).not.toThrow();
});
it('returns silently for a valid non-negative integer (e.g. Copilot 30 min)', () => {
expect(() => assertValidRuntimeDefInactivityTimeoutMs(THIRTY_MINUTES_MS)).not.toThrow();
expect(() => assertValidRuntimeDefInactivityTimeoutMs(0)).not.toThrow();
});
it('throws on NaN / Infinity — bad checked-in source must surface, not be silently normalized', () => {
expect(() => assertValidRuntimeDefInactivityTimeoutMs(Number.NaN)).toThrow(
/must be a non-negative integer/,
);
expect(() => assertValidRuntimeDefInactivityTimeoutMs(Number.POSITIVE_INFINITY)).toThrow(
/must be a non-negative integer/,
);
});
it('throws on a negative def value (typo footgun)', () => {
expect(() => assertValidRuntimeDefInactivityTimeoutMs(-1)).toThrow(
/must be a non-negative integer/,
);
});
it('throws on a fractional def value (wrong-units typo, e.g. seconds instead of ms)', () => {
expect(() => assertValidRuntimeDefInactivityTimeoutMs(60.5)).toThrow(
/must be a non-negative integer/,
);
});
it('throws specifically `RangeError` for every invalid def value — the chat-run startup wrapper relies on this to narrow its catch and only flag AGENT_RUNTIME_DEF_INVALID for genuine validation failures (#2579 round-5 review)', () => {
// The wrapper in server.ts now does:
// try { assertValidRuntimeDefInactivityTimeoutMs(...) }
// catch (err) {
// if (err instanceof RangeError) → AGENT_RUNTIME_DEF_INVALID
// else throw err
// }
// If the helper ever starts throwing TypeError / generic Error
// (e.g. an unrelated bug inside it, or a refactor that wraps the
// RangeError), the wrapper would either misroute the failure
// (the old `err instanceof Error` catch did) or skip the
// structured fail entirely. Pin the error class so the contract
// between helper and wrapper cannot drift silently.
for (const bad of [Number.NaN, Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY, -1, 60.5]) {
expect(() => assertValidRuntimeDefInactivityTimeoutMs(bad)).toThrow(RangeError);
}
});
it('runs independently of the env override — an env value cannot mask the typo', () => {
const originalEnv = process.env[ENV_KEY];
process.env[ENV_KEY] = '15000';
try {
expect(() => assertValidRuntimeDefInactivityTimeoutMs(-1)).toThrow(
/must be a non-negative integer/,
);
} finally {
if (originalEnv === undefined) delete process.env[ENV_KEY];
else process.env[ENV_KEY] = originalEnv;
}
});
});

View file

@ -29,6 +29,14 @@ export const API_ERROR_CODES = [
// `server.ts::abortForRoleMarker` alongside the existing
// `fabricated_role_marker` warning event. Retryable.
'ROLE_MARKER_HALLUCINATION',
// The selected runtime agent def (apps/daemon/src/runtimes/defs/*) has
// a checked-in field that fails strict source-config validation — e.g.
// a non-integer, NaN, Infinity, or negative `inactivityTimeoutMs`
// (issue #2467 review on PR #2579). The bug is in the source file;
// the operator cannot recover the run, the daemon must abort it and
// surface the def-correctness error so it shows up in dev rather
// than silently disabling the agent-specific watchdog.
'AGENT_RUNTIME_DEF_INVALID',
'PROJECT_NOT_FOUND',
// Handoff (`POST /api/projects/:id/handoff`): the requested conversation
// is not in the project, or has no messages to synthesize a handoff from.

View file

@ -0,0 +1,18 @@
import { describe, expect, it } from 'vitest';
import { API_ERROR_CODES, type ApiErrorCode } from '../src/errors';
describe('shared API error codes', () => {
it('exposes AGENT_RUNTIME_DEF_INVALID for runtime-def validation failures', () => {
// Chat-run startup emits this code through the shared SSE/status error
// envelopes when a checked-in runtime def is invalid. Keeping the
// assertion in the contracts package ensures contract-only refactors
// cannot drop the literal without this package's own test lane failing.
expect(API_ERROR_CODES).toContain('AGENT_RUNTIME_DEF_INVALID');
});
it('keeps AGENT_RUNTIME_DEF_INVALID assignable to ApiErrorCode', () => {
const code: ApiErrorCode = 'AGENT_RUNTIME_DEF_INVALID';
expect(code).toBe('AGENT_RUNTIME_DEF_INVALID');
});
});