mirror of
https://github.com/nexu-io/open-design.git
synced 2026-06-01 03:14:35 +07:00
* fix(daemon): restore full assistant turn after mid-flight reload reattach When a daemon run is in progress and the browser reloads, the client reattaches and the artifact recovers, but the restored chat turn drops assistant text, thinking events, and producedFiles. Three independent defects combine to cause this: 1. The reattach onDone never populated producedFiles. The pre-turn file snapshot used as the diff baseline lived only in a closure. Now it is persisted on the assistant message as preTurnFileNames so the reattach path can rebuild the diff after reload. 2. The SSE replay used a strict `>` cursor compare. A client that had already persisted lastRunEventId equal to the final event id received zero replay events on terminal-run reattach, fell into the status-only REST fallback, and never fired a clean onDone. The server now replays the final buffered event on terminal-run reattach when the cursor is at or past the end, so the client always sees a terminal signal. 3. The text buffer flushed on visibilitychange but not on pagehide. Hard reloads on browsers where visibilitychange does not fire before teardown could lose the last ~250ms of streamed text from the persisted message. A pagehide listener now flushes synchronously. Refactor: extracted computeProducedFiles helper so the send and reattach flows share the diff logic and cannot drift apart again. Tests: - apps/web/tests/components/ProjectView.reattach-restore.test.tsx covers: reattach onDone populates producedFiles from preTurnFileNames; reattach reaches succeeded via SSE even when only the end event replays; computeProducedFiles unit cases. - apps/daemon/tests/runs.test.ts adds replay-cursor coverage for both the terminal-replay safety branch and the no-duplicate normal branch. * fix(daemon): persist preTurnFileNames end-to-end on the messages table Review on #2383 caught that `ChatMessage.preTurnFileNames` (added in packages/contracts) had no daemon-side persistence: the messages schema, upsertMessage, and normalizeMessage all ignored the field. saveMessage() would PUT the field, the daemon would silently drop it, and a real page reload would read a row without `preTurnFileNames`, so the reattach onDone fell back to `new Set(nextFiles.map(...))` and still missed files produced earlier in the turn. This commit closes the round trip: - New `pre_turn_file_names_json TEXT` column on the messages table, with a forward-compatible ALTER for existing databases (same pattern as agent_id / feedback_json / run_status). - Both upsertMessage branches (UPDATE and INSERT) now serialize m.preTurnFileNames into the new column. - listMessages, the post-upsert readback SELECT, and normalizeMessage surface the column back to callers. Round-trip tests in apps/daemon/tests/db-pre-turn-file-names.test.ts cover: write+listMessages, the UPDATE upsert path preserving the baseline, and a legacy-row case returning undefined. * fix(web): preserve terminal status + full multi-file diff on reattach Two correctness issues caught in review of the prior reattach commits: 1. The reattach onDone path hard-coded `runStatus: 'succeeded'`, which overwrote a 'failed' or 'canceled' status that the replayed terminal event had already recorded via onRunStatus. Restored messages would come back as success even when the run had actually failed or been canceled. Now derives the final status from `prev.runStatus` via the existing `resolveSucceededRunStatus` helper, mirroring the send path at line 2333. 2. When `findExistingArtifactProjectFile()` recovered an existing on-disk artifact, the produced-files list was replaced with that single file, dropping any other files the turn had created earlier. Now always computes the full diff against `preTurnFileNames`, then appends the recovered artifact only if it isn't already in that set. Extracted as `mergeRecoveredArtifact(diff, recovered)` so the logic is a unit-testable invariant. Tests in ProjectView.reattach-restore.test.tsx: - mergeRecoveredArtifact: three cases (recovered appended to pre-files, no duplication when already in the diff, passthrough on no recovery). - reattach failed-status: onRunStatus('failed') → onDone → final saveMessage has runStatus 'failed', not 'succeeded'. - reattach canceled-status: same shape for cancellation. * fix(web): force keepalive PUT on pagehide so the last buffered chunk survives reload Review on #2383 caught that onPageHide() only called flush(), which updates React state then schedules persistSoon() — a 500ms debounce. On a hard reload the page tears down before that timer fires, so the final ~250ms of streamed text never reaches the daemon. Threaded a new flushAndPersistNow() callback through createBufferedTextUpdates(). Both buffer call sites (send-path + reattach-path) supply it backed by persistMessageById(id, { keepalive: true }). saveMessage in state/projects.ts forwards the new SaveMessageOptions.keepalive flag onto fetch's keepalive option, which the browser honors specifically for unload-time requests. onPageHide now calls flush() followed by flushAndPersistNow?.(), so: - flush() pushes the buffered delta into React state synchronously - the immediate persistMessageById then PUTs the updated message with keepalive:true, surviving document teardown Regression test in ProjectView.reattach-restore.test.tsx: stream a delta, dispatch pagehide, assert saveMessage was called with the flushed content AND { keepalive: true } before the 500ms debounce would otherwise have fired.
203 lines
6.6 KiB
TypeScript
203 lines
6.6 KiB
TypeScript
import { EventEmitter } from 'node:events';
|
|
import { describe, expect, it, vi } from 'vitest';
|
|
|
|
import { createChatRunService } from '../src/runs.js';
|
|
|
|
describe('chat run service shutdown', () => {
|
|
it('retains structured error details on failed run status bodies', async () => {
|
|
const runs = createRuns();
|
|
const run = runs.create({ projectId: 'project-1', conversationId: 'conv-1' });
|
|
|
|
const wait = runs.wait(run);
|
|
runs.emit(run, 'error', {
|
|
message: 'Agent stalled without emitting any new output for 1s.',
|
|
error: {
|
|
code: 'AGENT_EXECUTION_FAILED',
|
|
message: 'Agent stalled without emitting any new output for 1s.',
|
|
retryable: true,
|
|
},
|
|
});
|
|
runs.finish(run, 'failed', 1, null);
|
|
|
|
expect(runs.statusBody(run)).toMatchObject({
|
|
status: 'failed',
|
|
errorCode: 'AGENT_EXECUTION_FAILED',
|
|
error: 'Agent stalled without emitting any new output for 1s.',
|
|
});
|
|
await expect(wait).resolves.toMatchObject({
|
|
status: 'failed',
|
|
errorCode: 'AGENT_EXECUTION_FAILED',
|
|
error: 'Agent stalled without emitting any new output for 1s.',
|
|
});
|
|
});
|
|
|
|
it('filters active runs by conversation within the same project', () => {
|
|
const runs = createRuns();
|
|
const runA = runs.create({ projectId: 'project-1', conversationId: 'conv-a' });
|
|
const runB = runs.create({ projectId: 'project-1', conversationId: 'conv-b' });
|
|
runA.status = 'running';
|
|
runB.status = 'running';
|
|
|
|
expect(
|
|
runs.list({ projectId: 'project-1', conversationId: 'conv-b', status: 'active' }),
|
|
).toEqual([runB]);
|
|
});
|
|
|
|
it('cancels active runs and terminates their child process during daemon shutdown', async () => {
|
|
const runs = createRuns();
|
|
const child = new FakeChildProcess({ closeOn: 'SIGTERM' });
|
|
const run = runs.create({ projectId: 'project-1', conversationId: 'conv-1' });
|
|
run.status = 'running';
|
|
(run as any).child = child;
|
|
|
|
const wait = runs.wait(run);
|
|
await runs.shutdownActive({ graceMs: 10 });
|
|
|
|
expect(child.signals).toEqual(['SIGTERM']);
|
|
expect(run.status).toBe('canceled');
|
|
expect(run.cancelRequested).toBe(true);
|
|
expect(run.signal).toBe('SIGTERM');
|
|
await expect(wait).resolves.toMatchObject({ status: 'canceled', signal: 'SIGTERM' });
|
|
expect(run.events.at(-1)).toMatchObject({
|
|
event: 'end',
|
|
data: { status: 'canceled', signal: 'SIGTERM' },
|
|
});
|
|
});
|
|
|
|
it('escalates to SIGKILL when a child ignores the shutdown SIGTERM grace window', async () => {
|
|
const runs = createRuns();
|
|
const child = new FakeChildProcess({ closeOn: 'SIGKILL' });
|
|
const run = runs.create();
|
|
run.status = 'running';
|
|
(run as any).child = child;
|
|
|
|
await runs.shutdownActive({ graceMs: 1 });
|
|
|
|
expect(child.signals).toEqual(['SIGTERM', 'SIGKILL']);
|
|
expect(run.status).toBe('canceled');
|
|
});
|
|
|
|
it('uses adapter abort before process signals for ACP-style runs', async () => {
|
|
const runs = createRuns();
|
|
const child = new FakeChildProcess({ closeOn: 'SIGTERM' });
|
|
const abort = vi.fn();
|
|
const run = runs.create();
|
|
run.status = 'running';
|
|
(run as any).child = child;
|
|
(run as any).acpSession = { abort };
|
|
|
|
await runs.shutdownActive({ graceMs: 10 });
|
|
|
|
expect(abort).toHaveBeenCalledTimes(1);
|
|
expect(child.signals).toEqual(['SIGTERM']);
|
|
expect(run.status).toBe('canceled');
|
|
});
|
|
});
|
|
|
|
describe('chat run service stream replay', () => {
|
|
it('always replays the final event when a reattaching client cursor is at the end of a terminal run', () => {
|
|
const sendCalls: Array<{ event: string; data: unknown; id: number }> = [];
|
|
const endCalls: number[] = [];
|
|
const runs = createChatRunService({
|
|
createSseResponse: () => ({
|
|
send: vi.fn((event: string, data: unknown, id: number) => {
|
|
sendCalls.push({ event, data, id });
|
|
return true;
|
|
}),
|
|
end: vi.fn(() => endCalls.push(1)),
|
|
cleanup: vi.fn(),
|
|
}),
|
|
createSseErrorPayload: (code: string, message: string) => ({ error: { code, message } }),
|
|
shutdownGraceMs: 10,
|
|
ttlMs: 60_000,
|
|
});
|
|
|
|
const run = runs.create({ projectId: 'p', conversationId: 'c' }) as any;
|
|
runs.emit(run, 'stdout', { text: 'hello' });
|
|
runs.finish(run, 'succeeded', 0, null);
|
|
|
|
const finalEventId = run.events.at(-1).id;
|
|
const fakeReq = {
|
|
get: () => null,
|
|
query: { after: String(finalEventId) },
|
|
} as never;
|
|
const fakeRes = { on: () => {} } as never;
|
|
|
|
sendCalls.length = 0;
|
|
runs.stream(run, fakeReq, fakeRes);
|
|
|
|
expect(sendCalls.length).toBeGreaterThanOrEqual(1);
|
|
expect(sendCalls.at(-1)?.event).toBe('end');
|
|
expect(endCalls.length).toBe(1);
|
|
});
|
|
|
|
it('does not duplicate events when the cursor sits before the final event', () => {
|
|
const sendCalls: Array<{ event: string; data: unknown; id: number }> = [];
|
|
const runs = createChatRunService({
|
|
createSseResponse: () => ({
|
|
send: vi.fn((event: string, data: unknown, id: number) => {
|
|
sendCalls.push({ event, data, id });
|
|
return true;
|
|
}),
|
|
end: vi.fn(),
|
|
cleanup: vi.fn(),
|
|
}),
|
|
createSseErrorPayload: (code: string, message: string) => ({ error: { code, message } }),
|
|
shutdownGraceMs: 10,
|
|
ttlMs: 60_000,
|
|
});
|
|
|
|
const run = runs.create() as any;
|
|
runs.emit(run, 'stdout', { text: 'a' });
|
|
runs.emit(run, 'stdout', { text: 'b' });
|
|
runs.finish(run, 'succeeded', 0, null);
|
|
|
|
const cursor = run.events[0].id;
|
|
runs.stream(
|
|
run,
|
|
{ get: () => null, query: { after: String(cursor) } } as never,
|
|
{ on: () => {} } as never,
|
|
);
|
|
|
|
expect(sendCalls.map((c) => c.id)).toEqual(
|
|
run.events.filter((e: { id: number }) => e.id > cursor).map((e: { id: number }) => e.id),
|
|
);
|
|
});
|
|
});
|
|
|
|
function createRuns() {
|
|
return createChatRunService({
|
|
createSseResponse: () => ({
|
|
send: vi.fn(() => true),
|
|
end: vi.fn(),
|
|
cleanup: vi.fn(),
|
|
}),
|
|
createSseErrorPayload: (code: string, message: string) => ({ error: { code, message } }),
|
|
shutdownGraceMs: 10,
|
|
ttlMs: 60_000,
|
|
});
|
|
}
|
|
|
|
class FakeChildProcess extends EventEmitter {
|
|
exitCode: number | null = null;
|
|
signalCode: string | null = null;
|
|
killed = false;
|
|
signals: string[] = [];
|
|
|
|
constructor(private readonly options: { closeOn: 'SIGTERM' | 'SIGKILL' }) {
|
|
super();
|
|
}
|
|
|
|
kill(signal: string): boolean {
|
|
this.killed = true;
|
|
this.signals.push(signal);
|
|
if (signal === this.options.closeOn) {
|
|
this.signalCode = signal;
|
|
queueMicrotask(() => {
|
|
this.emit('exit', null, signal);
|
|
this.emit('close', null, signal);
|
|
});
|
|
}
|
|
return true;
|
|
}
|
|
}
|