import { describe, expect, it, vi } from 'vitest';
import {
FORM_ANSWERED_GENERIC_OVERRIDE,
composeChatUserRequestForAgent,
createFinalizedMessageTelemetryReporter,
shouldReportRunCompletedFromMessage,
telemetryPromptFromRunRequest,
} from '../src/server.js';
describe('Langfuse message finalization gate', () => {
const terminalMessage = {
id: 'assistant-1',
role: 'assistant',
content: 'final answer',
runId: 'run-1',
runStatus: 'succeeded',
};
it('does not report when only terminal runStatus has been persisted', () => {
expect(
shouldReportRunCompletedFromMessage(terminalMessage, {
...terminalMessage,
}),
).toBe(false);
});
it('reports only on the final telemetry-marked message write', () => {
expect(
shouldReportRunCompletedFromMessage(terminalMessage, {
...terminalMessage,
producedFiles: [],
telemetryFinalized: true,
}),
).toBe(true);
});
it('ignores non-terminal run statuses even if marked finalized', () => {
expect(
shouldReportRunCompletedFromMessage(
{ ...terminalMessage, runStatus: 'running' },
{ telemetryFinalized: true },
),
).toBe(false);
});
it('uses the explicit current prompt for telemetry instead of the full transcript', () => {
expect(
telemetryPromptFromRunRequest(
'## user\npre-consent brief\n\n## assistant\ndraft\n\n## user\npost-consent revision',
'post-consent revision',
),
).toBe('post-consent revision');
});
it('falls back to the legacy message when currentPrompt is absent', () => {
expect(telemetryPromptFromRunRequest('legacy prompt', undefined)).toBe(
'legacy prompt',
);
});
it('promotes discovery form answers above the transcript with a build-now instruction', () => {
const currentPrompt = [
'[form answers \u2014 discovery]',
'- output: Dashboard / tool UI',
'- brand: Pick a direction for me [value: pick_direction]',
].join('\n');
const prompt = composeChatUserRequestForAgent(
'## user\ninitial brief\n\n## assistant\n
',
currentPrompt,
);
expect(prompt).toContain('## Latest user turn - form answers submitted');
expect(prompt).toContain(currentPrompt);
expect(prompt).toContain('The user has answered the discovery form.');
expect(prompt).toContain('For Branch B answers, build now instead of asking another brief.');
expect(prompt.indexOf('## Full conversation transcript')).toBeGreaterThan(
prompt.indexOf(currentPrompt),
);
});
it('task-type form answers trigger the build transition just like discovery', () => {
const prompt = composeChatUserRequestForAgent(
'## user\ninitial brief',
'[form answers - task-type]\n- taskType: Slide deck',
);
expect(prompt).toContain('The user has answered the task-type form.');
expect(prompt).toContain('build now instead of asking another brief');
expect(prompt).not.toContain('Treat these form answers as the active user turn');
});
it('unknown form ids get the generic transition without forcing the build', () => {
const prompt = composeChatUserRequestForAgent(
'## user\ninitial brief',
'[form answers - preferences]\n- theme: dark',
);
expect(prompt).toContain('The user has answered the preferences form.');
expect(prompt).toContain('Treat these form answers as the active user turn');
expect(prompt).not.toContain('build now instead of asking another brief');
});
// `agy -c` carries its own conversation memory, so packing the
// rendered web transcript (the `## user` / `## assistant` blocks)
// into the user request duplicates context the upstream CLI already
// has — AND the embedded copy includes the literal ``
// markup the agent emitted on turn 1, which the model then re-emits
// on turn 2, looking like the discovery form loop never breaks.
// With `skipTranscript: true`, only the latest user turn ships and
// the misleading "## Full conversation transcript" header is dropped.
it('drops the transcript and transcript header when skipTranscript is true', () => {
const currentPrompt = [
'[form answers — discovery]',
'- output: Dashboard / tool UI',
'- brand: Pick a direction for me [value: pick_direction]',
].join('\n');
const transcript = [
'## user',
'初始需求',
'',
'## assistant',
'…',
'',
'## user',
currentPrompt,
].join('\n');
const prompt = composeChatUserRequestForAgent(transcript, currentPrompt, {
skipTranscript: true,
});
// The form-answer transition still fires — that drives RULE 2 / 3.
expect(prompt).toContain('The user has answered the discovery form.');
// The latest user turn is preserved verbatim.
expect(prompt).toContain(currentPrompt);
// The transcript header is dropped — it was misleading because the
// body underneath is no longer a transcript.
expect(prompt).not.toContain('## Full conversation transcript');
// The prior assistant turn's `` markup must NOT
// leak in — that's the form-loop regression we're guarding.
// (The transition block legitimately mentions ""
// in prose, so the assertion targets the opening tag the prior
// turn carried, not the bare substring.)
expect(prompt).not.toContain('');
expect(prompt).not.toContain('## assistant');
});
// The aggressive form-answered OVERRIDE block is what tells weak
// plain agents (GPT-OSS-120B Medium, Gemini 3.5 Flash) to skip
// RULE 1's form example on follow-up turns. We pin the trigger
// condition AND the specific anti-patterns the literal carries,
// because silently weakening any of them — e.g. dropping the
// markdown-fence ban or the "subagents stopped" hallucination ban —
// reintroduces the form-echo regression we hit in PR #3157 on GPT-OSS.
it('FORM_ANSWERED_SYSTEM_OVERRIDE pins the anti-patterns weak plain agents need spelled out', async () => {
const { FORM_ANSWERED_SYSTEM_OVERRIDE } = await import('../src/server.js');
// Headline must call out that this is a follow-up turn, not turn 1.
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('## OVERRIDE — form already answered');
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('turn 2 or later');
// RULE 1 stays in the prompt so turn 1 can still emit a valid form;
// OVERRIDE just demotes it to documentation for follow-up turns.
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('Treat RULE 1\nas read-only documentation');
// Forbidden anti-patterns observed in real captures:
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('`` tag of any id');
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('```json fenced block');
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('Form-asking prose');
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('"subagents stopped"');
// Required path: route to RULE 2 / RULE 3 so the model still
// emits the `` block on the same turn.
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('RULE 2');
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('RULE 3');
expect(FORM_ANSWERED_SYSTEM_OVERRIDE).toContain('``');
});
it('FORM_ANSWERED_GENERIC_OVERRIDE is used for non-discovery/task-type form ids', () => {
// Non-build-transition forms should get a smaller override that only
// suppresses re-asking — not the RULE 2 / RULE 3 / artifact directive.
expect(FORM_ANSWERED_GENERIC_OVERRIDE).toContain('## OVERRIDE — form already answered');
expect(FORM_ANSWERED_GENERIC_OVERRIDE).toContain('turn 2 or later');
expect(FORM_ANSWERED_GENERIC_OVERRIDE).toContain('Do not ask the same form again');
// Must NOT contain the artifact-build directive that only applies to
// discovery / task-type — sending it for an unrelated form id would give
// the model contradictory instructions.
expect(FORM_ANSWERED_GENERIC_OVERRIDE).not.toContain('RULE 2');
expect(FORM_ANSWERED_GENERIC_OVERRIDE).not.toContain('RULE 3');
expect(FORM_ANSWERED_GENERIC_OVERRIDE).not.toContain('``');
});
it('FORM_ANSWERED_SYSTEM_OVERRIDE only fires through composeChatUserRequestForAgent\'s transition gate', async () => {
// Defense-in-depth check: a turn that is NOT a form-answer follow-up
// (no `[form answers — …]` header in `currentPrompt`) must not
// surface any of the OVERRIDE language, even when `message` carries
// a transcript that mentions question-form. Otherwise we'd suppress
// the legitimate turn-1 form ask.
const transcript = '## user\n初始需求\n\n## assistant\n...';
const currentPrompt = '继续做点修改';
const prompt = composeChatUserRequestForAgent(transcript, currentPrompt);
expect(prompt).not.toContain('OVERRIDE — form already answered');
expect(prompt).not.toContain('Treat RULE 1');
});
it('also drops the transcript on a non-form turn when skipTranscript is true', () => {
// Without a form-answer transition, the function previously returned
// `message` verbatim. With skipTranscript the body must come from
// `currentPrompt` instead so a follow-up `agy -c` turn doesn't carry
// the duplicate transcript.
const transcript = '## user\n第一轮\n\n## assistant\n回答\n\n## user\n第二轮 follow-up';
const currentPrompt = '第二轮 follow-up';
const skipped = composeChatUserRequestForAgent(transcript, currentPrompt, {
skipTranscript: true,
});
expect(skipped).toBe(currentPrompt);
// Default behavior unchanged (backward compatibility for every
// adapter that doesn't set resumesSessionViaCli).
const kept = composeChatUserRequestForAgent(transcript, currentPrompt);
expect(kept).toBe(transcript);
});
it('invokes Langfuse reporting once when the final message write is marked', () => {
const run = {
id: 'run-1',
projectId: 'project-1',
conversationId: 'conv-1',
assistantMessageId: 'assistant-1',
status: 'succeeded',
createdAt: 1,
updatedAt: 2,
events: [],
};
const report = vi.fn();
const reporter = createFinalizedMessageTelemetryReporter({
design: { runs: { get: vi.fn(() => run) } },
db: 'db',
dataDir: '/tmp/od-data',
reportedRuns: new Set(),
getAppVersion: () => ({ version: '0.7.0', channel: 'beta', packaged: true }),
report,
});
reporter(
{ ...terminalMessage, endedAt: 1234 },
{ telemetryFinalized: true },
);
reporter(
{ ...terminalMessage, endedAt: 1234 },
{ telemetryFinalized: true },
);
expect(report).toHaveBeenCalledTimes(1);
expect(report).toHaveBeenCalledWith({
db: 'db',
dataDir: '/tmp/od-data',
run,
persistedRunStatus: 'succeeded',
persistedEndedAt: 1234,
appVersion: { version: '0.7.0', channel: 'beta', packaged: true },
});
});
});