Merge 5346fd7484 into 333a62cda6

2026-05-31 19:04:39 +07:00 · 2026-05-31 04:51:34 +00:00 · 2026-05-31 04:51:34 +00:00 · f53f1d2399
commit f53f1d2399
parent 333a62cda6 5346fd7484
7 changed files with 350 additions and 11 deletions
--- a/apps/daemon/src/runtimes/defs/copilot.ts
+++ b/apps/daemon/src/runtimes/defs/copilot.ts
@ -67,4 +67,14 @@ export const copilotAgentDef = {
    },
    promptViaStdin: true,
    streamFormat: 'copilot-stream-json',
+    // GitHub Copilot's deck-generation and large-prompt turns go silent
+    // (no stdout, no streamed events) for stretches that exceed the
+    // 10-minute global default — the model is still working but the
+    // CLI does not emit keepalive frames. The default watchdog used to
+    // kill those runs as `stalled` even though the agent was healthy
+    // (issue #2467: "GitHub Copilot agent getting stuck after 10 mins
+    // and few seconds"). 30 minutes gives the heavy turns room to land
+    // while still bounding genuine hangs; operators can override via
+    // `OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS` if they want it tighter.
+    inactivityTimeoutMs: 30 * 60 * 1000,
 } satisfies RuntimeAgentDef;
--- a/apps/daemon/src/runtimes/detection.ts
+++ b/apps/daemon/src/runtimes/detection.ts
@ -207,6 +207,9 @@ function stripFns(
  // `fallbackModels` slot here too. `helpArgs` / `capabilityFlags` /
  // `fallbackBins` / `maxPromptArgBytes` / `env` are probe-or-spawn-only
  // metadata and shouldn't bleed into the API response either.
+  // `inactivityTimeoutMs` is a spawn-time hint for the chat-run watchdog
+  // and is not part of the public AgentInfo contract — strip it here so
+  // the runtime registry stays the only consumer.
  const {
    buildArgs,
    listModels,
@ -218,6 +221,7 @@ function stripFns(
    versionProbeTimeoutMs,
    maxPromptArgBytes,
    env,
+    inactivityTimeoutMs,
    ...rest
  } = def;
  return rest;
--- a/apps/daemon/src/runtimes/types.ts
+++ b/apps/daemon/src/runtimes/types.ts
@ -162,6 +162,14 @@ export type RuntimeAgentDef = {
  // present in the daemon's `process.env`; Settings-UI per-agent env
  // values only reach the spawned child and are NOT consulted here.
  defaultModelEnvVar?: string;
+  // Agent-recommended override for the chat-run inactivity watchdog.
+  // The watchdog observes child stdout/stderr/SSE activity, not real
+  // CPU progress, so agents whose CLIs go silent for long stretches
+  // during legitimate work (e.g. Copilot's deck-generation thinking
+  // phase from #2467) need a longer ceiling than the 10-minute global
+  // default. Operators can still override per-process via
+  // `OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS` — that env wins.
+  inactivityTimeoutMs?: number;
 };

 export type DetectedAgent = Omit<
@ -176,6 +184,13 @@ export type DetectedAgent = Omit<
  | 'versionProbeTimeoutMs'
  | 'maxPromptArgBytes'
  | 'env'
+  // `inactivityTimeoutMs` is a spawn-time-only hint consumed by the
+  // chat-run watchdog. It is not part of the public `/api/agents`
+  // contract (`packages/contracts/src/api/registry.ts#AgentInfo`), so
+  // omitting it here keeps the daemon response aligned with that
+  // shared web/CLI shape — agents pick it up by reading the runtime
+  // def directly, the registry payload stays unchanged.
+  | 'inactivityTimeoutMs'
 > & {
  models: RuntimeModelOption[];
  modelsSource: RuntimeModelSource;
--- a/apps/daemon/src/server.ts
+++ b/apps/daemon/src/server.ts
@ -3743,16 +3743,58 @@ const MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS = 24 * 60 * 60 * 1000;
 // always means the agent is winding down or hanging. See #1451.
 const DEFAULT_CHAT_RUN_ARTIFACT_QUIET_PERIOD_MS = 60 * 1000;

-function resolveChatRunInactivityTimeoutMs() {
-  const raw = Number(process.env.OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS);
-  // This watchdog observes child stdout/stderr/SSE activity, not real CPU or
-  // filesystem progress. Keep the default long enough for agents that spend
-  // several minutes silently writing large artifacts.
-  if (!Number.isFinite(raw)) return DEFAULT_CHAT_RUN_INACTIVITY_TIMEOUT_MS;
-  // Node clamps delays larger than a signed 32-bit integer down to 1ms, which
-  // makes an oversized override fail almost immediately while reporting a huge
-  // timeout. Keep explicit overrides bounded to a practical, timer-safe value.
-  return Math.min(MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS, Math.max(0, Math.floor(raw)));
+// Resolve the chat-run inactivity watchdog ceiling. Priority order:
+//   1. `OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS` (operator escape hatch).
+//   2. The agent runtime def's `inactivityTimeoutMs` recommendation —
+//      lets agents whose CLIs go silent for long stretches during
+//      legitimate work (e.g. Copilot from #2467) raise the ceiling
+//      without every operator having to set an env var.
+//   3. The 10-minute global default.
+//
+// The env path is lenient (silently normalizes / falls back) because it
+// comes from a runtime knob an operator can mis-type at any time. The
+// def path is strict (throws on non-finite or negative) because the
+// value lives in checked-in source — a typo like `inactivityTimeoutMs: -1`
+// should crash loudly at chat-run time rather than silently disable the
+// watchdog for that agent. Both paths still pass through the 24-hour
+// clamp because Node silently downgrades signed-32-bit-overflowing
+// setTimeout delays to 1ms.
+//
+// Order matters: validate the def hint *before* checking the env
+// override. Otherwise a finite env value would hide a bad checked-in
+// value (e.g. `inactivityTimeoutMs: -1`) from ever tripping the
+// fast-fail — the typo could sit unnoticed in source until someone
+// removed the override. Validation now runs on every call regardless
+// of which branch ultimately wins.
+export function assertValidRuntimeDefInactivityTimeoutMs(agentDefault?: number): void {
+  // Strict checked-in-config guard for `RuntimeAgentDef.inactivityTimeoutMs`.
+  // Exported (and called by `resolveChatRunInactivityTimeoutMs` below)
+  // so that chat-run startup can invoke this immediately after the
+  // runtime def is selected — before any filesystem or
+  // prompt-building work happens — and abort with a loud RangeError
+  // when a checked-in def carries an invalid value. That keeps a
+  // typo from leaving partial setup state (e.g. a `.mcp.json` write
+  // / unlink, a freshly-composed system prompt) on disk when the
+  // run then aborts later at watchdog-arm time.
+  if (agentDefault === undefined) return;
+  if (!Number.isFinite(agentDefault) || agentDefault < 0 || !Number.isInteger(agentDefault)) {
+    throw new RangeError(
+      `RuntimeAgentDef.inactivityTimeoutMs must be a non-negative integer, got ${String(agentDefault)}. ` +
+        'Fix the runtime def — invalid values used to silently disable the watchdog.',
+    );
+  }
+}
+
+export function resolveChatRunInactivityTimeoutMs(agentDefault?: number) {
+  assertValidRuntimeDefInactivityTimeoutMs(agentDefault);
+  const env = Number(process.env.OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS);
+  if (Number.isFinite(env)) {
+    return Math.min(MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS, Math.max(0, Math.floor(env)));
+  }
+  if (agentDefault !== undefined) {
+    return Math.min(MAX_CHAT_RUN_INACTIVITY_TIMEOUT_MS, agentDefault);
+  }
+  return DEFAULT_CHAT_RUN_INACTIVITY_TIMEOUT_MS;
 }

 // Resolve the post-artifact quiet-period window. Same clamp as the outer
@ -10750,6 +10792,30 @@ export async function startServer({
      );
    if (!def.bin)
      return design.runs.fail(run, 'AGENT_UNAVAILABLE', 'agent has no binary');
+    // Validate the checked-in `inactivityTimeoutMs` hint immediately
+    // after the runtime def is selected and before any side-effectful
+    // setup (auto-memory extract, `.mcp.json` write/unlink,
+    // composeSystemPrompt, prompt persistence). A bad def value would
+    // otherwise abort the run only at watchdog-arm time, after that
+    // setup has already mutated local state, leaving confusing partial
+    // residue behind (issue #2467 review on PR #2579).
+    //
+    // Catch is intentionally narrowed to `RangeError`, the only kind
+    // `assertValidRuntimeDefInactivityTimeoutMs` is allowed to throw
+    // for invalid checked-in values. Anything else (a regression that
+    // makes the helper throw on a valid value, an unrelated bug
+    // introduced while touching this path) should bubble up to the
+    // outer chat-run starter — which surfaces it as
+    // `AGENT_EXECUTION_FAILED` — rather than being misreported as
+    // "the runtime def is bad" and burying the real failure.
+    try {
+      assertValidRuntimeDefInactivityTimeoutMs(def.inactivityTimeoutMs);
+    } catch (err) {
+      if (err instanceof RangeError) {
+        return design.runs.fail(run, 'AGENT_RUNTIME_DEF_INVALID', err.message);
+      }
+      throw err;
+    }
    const safeCommentAttachments =
      normalizeCommentAttachments(commentAttachments);
    if (
@ -11537,7 +11603,7 @@ export async function startServer({
    // here; on this branch `send` was hoisted into the AMR preflight
    // earlier, so we keep only the new `runStartTimeMs` declaration.
    const runStartTimeMs = Date.now();
-    const inactivityTimeoutMs = resolveChatRunInactivityTimeoutMs();
+    const inactivityTimeoutMs = resolveChatRunInactivityTimeoutMs(def.inactivityTimeoutMs);
    const artifactQuietPeriodMs = resolveChatRunArtifactQuietPeriodMs();
    const inactivityKillGraceMs = 3_000;
    let inactivityTimer = null;
--- a/apps/daemon/tests/chat-run-inactivity-timeout.test.ts
+++ b/apps/daemon/tests/chat-run-inactivity-timeout.test.ts
@ -0,0 +1,218 @@
+/**
+ * Per-agent inactivity-timeout resolution (#2467).
+ *
+ * The chat-run inactivity watchdog defaults to 10 minutes. Some agents
+ * (GitHub Copilot CLI) genuinely stay silent for longer than that on
+ * heavy deck-generation turns — the model is still working but emits
+ * no stdout, so the watchdog used to kill the run as `stalled` even
+ * though the agent was healthy.
+ *
+ * Runtime defs can now advertise a recommended `inactivityTimeoutMs`,
+ * and the resolver merges it under the env override:
+ *
+ *   OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS   highest priority (operator override)
+ *   def.inactivityTimeoutMs             next (agent-specific recommendation)
+ *   DEFAULT_CHAT_RUN_INACTIVITY_TIMEOUT_MS (10 min)   global default
+ */
+
+import { afterEach, describe, expect, it } from 'vitest';
+import {
+  assertValidRuntimeDefInactivityTimeoutMs,
+  resolveChatRunInactivityTimeoutMs,
+} from '../src/server.js';
+import { copilotAgentDef } from '../src/runtimes/defs/copilot.js';
+
+const ENV_KEY = 'OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS';
+const TEN_MINUTES_MS = 10 * 60 * 1000;
+const THIRTY_MINUTES_MS = 30 * 60 * 1000;
+const TWENTY_FOUR_HOURS_MS = 24 * 60 * 60 * 1000;
+
+describe('resolveChatRunInactivityTimeoutMs', () => {
+  const originalEnv = process.env[ENV_KEY];
+
+  afterEach(() => {
+    if (originalEnv === undefined) {
+      delete process.env[ENV_KEY];
+    } else {
+      process.env[ENV_KEY] = originalEnv;
+    }
+  });
+
+  it('returns the 10-minute global default when no def hint and no env override are set', () => {
+    delete process.env[ENV_KEY];
+    expect(resolveChatRunInactivityTimeoutMs()).toBe(TEN_MINUTES_MS);
+  });
+
+  it('uses the def-level hint when env is unset', () => {
+    delete process.env[ENV_KEY];
+    expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(THIRTY_MINUTES_MS);
+  });
+
+  it('lets the env override take precedence over the def hint (operator escape hatch)', () => {
+    // Operators must be able to shrink or lengthen the watchdog for any
+    // agent without editing source — diagnosing flaky CLIs, taming runaway
+    // sessions, etc.
+    process.env[ENV_KEY] = '900000'; // 15 min
+    expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(900_000);
+  });
+
+  it('falls back to the def hint when the env value is not a finite number', () => {
+    process.env[ENV_KEY] = 'not-a-number';
+    expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(THIRTY_MINUTES_MS);
+  });
+
+  it('still honors env=0 to disable the watchdog entirely', () => {
+    // Existing behavior the watchdog code already supports — preserve it
+    // even when an agent def would otherwise contribute a larger value.
+    process.env[ENV_KEY] = '0';
+    expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(0);
+  });
+
+  it('clamps an oversized env override to the 24-hour ceiling so Node does not fire the timer immediately', () => {
+    process.env[ENV_KEY] = String(TWENTY_FOUR_HOURS_MS * 100);
+    expect(resolveChatRunInactivityTimeoutMs()).toBe(TWENTY_FOUR_HOURS_MS);
+  });
+
+  it('clamps an oversized def hint to the 24-hour ceiling for the same reason', () => {
+    delete process.env[ENV_KEY];
+    expect(resolveChatRunInactivityTimeoutMs(TWENTY_FOUR_HOURS_MS * 100)).toBe(TWENTY_FOUR_HOURS_MS);
+  });
+
+  it('throws RangeError on a non-finite def hint — runtime defs are checked-in source and a typo must fail loudly, not silently disable the watchdog', () => {
+    // The previous draft normalized NaN/Infinity to the global default,
+    // which made a runtime def like `inactivityTimeoutMs: Number.NaN`
+    // look fine in source review while quietly losing the agent-specific
+    // ceiling at runtime. The reviewer-correctness fix: fast-fail on the
+    // checked-in side so the bug surfaces in dev, not in production logs.
+    delete process.env[ENV_KEY];
+    expect(() => resolveChatRunInactivityTimeoutMs(Number.NaN)).toThrow(
+      /RuntimeAgentDef\.inactivityTimeoutMs/,
+    );
+  });
+
+  it('throws RangeError on a negative def hint — `-1` would silently disable the agent-specific watchdog otherwise', () => {
+    delete process.env[ENV_KEY];
+    expect(() => resolveChatRunInactivityTimeoutMs(-1)).toThrow(
+      /must be a non-negative integer/,
+    );
+  });
+
+  it('throws RangeError on a fractional def hint — Math.floor would mask a wrong-units typo (e.g. seconds instead of ms)', () => {
+    // A def value like `inactivityTimeoutMs: 30` (seconds, written
+    // forgetting the unit is ms) is a real footgun; refusing
+    // non-integer floats keeps such typos from getting silently
+    // floored to a 0ms or 30ms timer. Operators can still pass
+    // anything via the env override.
+    delete process.env[ENV_KEY];
+    expect(() => resolveChatRunInactivityTimeoutMs(60.5)).toThrow(
+      /must be a non-negative integer/,
+    );
+  });
+
+  it('keeps the env override lenient when its value is bogus and no def hint is provided (falls back to the global default)', () => {
+    // env comes from operator-supplied configuration that can be
+    // mistyped at any time. Unlike the def path it must not crash the
+    // chat run — fall back to the 10-minute default instead.
+    process.env[ENV_KEY] = 'not-a-number';
+    expect(resolveChatRunInactivityTimeoutMs()).toBe(TEN_MINUTES_MS);
+  });
+
+  it('still throws on an invalid def hint when a finite env override is also set — the env must not hide a checked-in typo', () => {
+    // Reviewer-correctness fix: an earlier ordering placed the env
+    // early-return before the def validation, so a bad runtime def
+    // could sit unnoticed in source whenever an operator had set
+    // OD_CHAT_RUN_INACTIVITY_TIMEOUT_MS. The fast-fail now runs first
+    // and catches the typo regardless of which branch eventually
+    // wins.
+    process.env[ENV_KEY] = '15000';
+    expect(() => resolveChatRunInactivityTimeoutMs(-1)).toThrow(
+      /must be a non-negative integer/,
+    );
+  });
+
+  it('returns the env override when both the env and a valid def hint are provided (env wins as operator escape hatch)', () => {
+    // Sanity: validation order does not change priority — env still
+    // wins when both are valid.
+    process.env[ENV_KEY] = '15000';
+    expect(resolveChatRunInactivityTimeoutMs(THIRTY_MINUTES_MS)).toBe(15_000);
+  });
+});
+
+describe('copilotAgentDef.inactivityTimeoutMs', () => {
+  it('ships a 30-minute inactivity hint so Copilot silent-thinking phases do not trip the default watchdog (#2467)', () => {
+    expect(copilotAgentDef.inactivityTimeoutMs).toBe(THIRTY_MINUTES_MS);
+  });
+});
+
+describe('assertValidRuntimeDefInactivityTimeoutMs (#2579 fast-fail at def-select time)', () => {
+  // Reviewer-correctness fix (#2579 non-blocking, round 3): the
+  // strict checked-in-config check must run *immediately* after the
+  // runtime def is selected, before any side-effectful setup
+  // (`.mcp.json` write/unlink, prompt composition, file writes) so
+  // that an invalid checked-in `inactivityTimeoutMs` aborts the run
+  // before leaving partial state behind. The assert helper is the
+  // pure entry point chat-run startup can call without also touching
+  // env-resolution side effects.
+
+  it('returns silently when the def hint is omitted (no-op for adapters without the field)', () => {
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs()).not.toThrow();
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(undefined)).not.toThrow();
+  });
+
+  it('returns silently for a valid non-negative integer (e.g. Copilot 30 min)', () => {
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(THIRTY_MINUTES_MS)).not.toThrow();
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(0)).not.toThrow();
+  });
+
+  it('throws on NaN / Infinity — bad checked-in source must surface, not be silently normalized', () => {
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(Number.NaN)).toThrow(
+      /must be a non-negative integer/,
+    );
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(Number.POSITIVE_INFINITY)).toThrow(
+      /must be a non-negative integer/,
+    );
+  });
+
+  it('throws on a negative def value (typo footgun)', () => {
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(-1)).toThrow(
+      /must be a non-negative integer/,
+    );
+  });
+
+  it('throws on a fractional def value (wrong-units typo, e.g. seconds instead of ms)', () => {
+    expect(() => assertValidRuntimeDefInactivityTimeoutMs(60.5)).toThrow(
+      /must be a non-negative integer/,
+    );
+  });
+
+  it('throws specifically `RangeError` for every invalid def value — the chat-run startup wrapper relies on this to narrow its catch and only flag AGENT_RUNTIME_DEF_INVALID for genuine validation failures (#2579 round-5 review)', () => {
+    // The wrapper in server.ts now does:
+    //   try { assertValidRuntimeDefInactivityTimeoutMs(...) }
+    //   catch (err) {
+    //     if (err instanceof RangeError) → AGENT_RUNTIME_DEF_INVALID
+    //     else throw err
+    //   }
+    // If the helper ever starts throwing TypeError / generic Error
+    // (e.g. an unrelated bug inside it, or a refactor that wraps the
+    // RangeError), the wrapper would either misroute the failure
+    // (the old `err instanceof Error` catch did) or skip the
+    // structured fail entirely. Pin the error class so the contract
+    // between helper and wrapper cannot drift silently.
+    for (const bad of [Number.NaN, Number.POSITIVE_INFINITY, Number.NEGATIVE_INFINITY, -1, 60.5]) {
+      expect(() => assertValidRuntimeDefInactivityTimeoutMs(bad)).toThrow(RangeError);
+    }
+  });
+
+  it('runs independently of the env override — an env value cannot mask the typo', () => {
+    const originalEnv = process.env[ENV_KEY];
+    process.env[ENV_KEY] = '15000';
+    try {
+      expect(() => assertValidRuntimeDefInactivityTimeoutMs(-1)).toThrow(
+        /must be a non-negative integer/,
+      );
+    } finally {
+      if (originalEnv === undefined) delete process.env[ENV_KEY];
+      else process.env[ENV_KEY] = originalEnv;
+    }
+  });
+});
--- a/packages/contracts/src/errors.ts
+++ b/packages/contracts/src/errors.ts
@ -29,6 +29,14 @@ export const API_ERROR_CODES = [
  // `server.ts::abortForRoleMarker` alongside the existing
  // `fabricated_role_marker` warning event. Retryable.
  'ROLE_MARKER_HALLUCINATION',
+  // The selected runtime agent def (apps/daemon/src/runtimes/defs/*) has
+  // a checked-in field that fails strict source-config validation — e.g.
+  // a non-integer, NaN, Infinity, or negative `inactivityTimeoutMs`
+  // (issue #2467 review on PR #2579). The bug is in the source file;
+  // the operator cannot recover the run, the daemon must abort it and
+  // surface the def-correctness error so it shows up in dev rather
+  // than silently disabling the agent-specific watchdog.
+  'AGENT_RUNTIME_DEF_INVALID',
  'PROJECT_NOT_FOUND',
  // Handoff (`POST /api/projects/:id/handoff`): the requested conversation
  // is not in the project, or has no messages to synthesize a handoff from.
--- a/packages/contracts/tests/error-codes.test.ts
+++ b/packages/contracts/tests/error-codes.test.ts
@ -0,0 +1,18 @@
+import { describe, expect, it } from 'vitest';
+
+import { API_ERROR_CODES, type ApiErrorCode } from '../src/errors';
+
+describe('shared API error codes', () => {
+  it('exposes AGENT_RUNTIME_DEF_INVALID for runtime-def validation failures', () => {
+    // Chat-run startup emits this code through the shared SSE/status error
+    // envelopes when a checked-in runtime def is invalid. Keeping the
+    // assertion in the contracts package ensures contract-only refactors
+    // cannot drop the literal without this package's own test lane failing.
+    expect(API_ERROR_CODES).toContain('AGENT_RUNTIME_DEF_INVALID');
+  });
+
+  it('keeps AGENT_RUNTIME_DEF_INVALID assignable to ApiErrorCode', () => {
+    const code: ApiErrorCode = 'AGENT_RUNTIME_DEF_INVALID';
+    expect(code).toBe('AGENT_RUNTIME_DEF_INVALID');
+  });
+});