mirror of
https://github.com/nexu-io/open-design.git
synced 2026-06-01 03:14:35 +07:00
fix(web): complete finished tool calls missing results (#1240)
This commit is contained in:
parent
e254d1280b
commit
e859c31574
6 changed files with 396 additions and 48 deletions
|
|
@ -73,6 +73,9 @@ export function AssistantMessage({
|
|||
const produced = message.producedFiles ?? [];
|
||||
const roleLabel = assistantRoleLabel(message, t);
|
||||
const unfinishedTodos = streaming ? [] : unfinishedTodosFromEvents(events);
|
||||
const runSucceeded =
|
||||
!streaming &&
|
||||
(message.runStatus === "succeeded" || (!message.runStatus && !!message.endedAt));
|
||||
const canContinueTodos =
|
||||
!streaming &&
|
||||
!!isLast &&
|
||||
|
|
@ -125,6 +128,7 @@ export function AssistantMessage({
|
|||
key={i}
|
||||
items={b.items}
|
||||
runStreaming={streaming}
|
||||
runSucceeded={runSucceeded}
|
||||
projectFileNames={projectFileNames}
|
||||
onRequestOpenFile={onRequestOpenFile}
|
||||
/>
|
||||
|
|
@ -605,11 +609,13 @@ interface ToolItem {
|
|||
function ToolGroupCard({
|
||||
items,
|
||||
runStreaming,
|
||||
runSucceeded,
|
||||
projectFileNames,
|
||||
onRequestOpenFile,
|
||||
}: {
|
||||
items: ToolItem[];
|
||||
runStreaming: boolean;
|
||||
runSucceeded: boolean;
|
||||
projectFileNames?: Set<string>;
|
||||
onRequestOpenFile?: (name: string) => void;
|
||||
}) {
|
||||
|
|
@ -624,14 +630,15 @@ function ToolGroupCard({
|
|||
use={items[0]!.use}
|
||||
result={items[0]!.result}
|
||||
runStreaming={runStreaming}
|
||||
runSucceeded={runSucceeded}
|
||||
projectFileNames={projectFileNames}
|
||||
onRequestOpenFile={onRequestOpenFile}
|
||||
/>
|
||||
);
|
||||
}
|
||||
|
||||
const summary = summarizeGroup(items, t);
|
||||
const running = items.some((it) => !it.result);
|
||||
const summary = summarizeGroup(items, t, runStreaming, runSucceeded);
|
||||
const running = runStreaming && items.some((it) => !it.result);
|
||||
return (
|
||||
<div className="action-card">
|
||||
<button
|
||||
|
|
@ -658,6 +665,7 @@ function ToolGroupCard({
|
|||
use={it.use}
|
||||
result={it.result}
|
||||
runStreaming={runStreaming}
|
||||
runSucceeded={runSucceeded}
|
||||
projectFileNames={projectFileNames}
|
||||
onRequestOpenFile={onRequestOpenFile}
|
||||
/>
|
||||
|
|
@ -670,13 +678,17 @@ function ToolGroupCard({
|
|||
|
||||
function summarizeGroup(
|
||||
items: ToolItem[],
|
||||
t: (k: keyof Dict, vars?: Record<string, string | number>) => string
|
||||
t: (k: keyof Dict, vars?: Record<string, string | number>) => string,
|
||||
runStreaming: boolean,
|
||||
runSucceeded: boolean
|
||||
): { label: string; icon: string } {
|
||||
// All items share a tool family because the grouper only merges by name.
|
||||
const name = items[0]?.use.name ?? "";
|
||||
const family = toolFamily(name);
|
||||
const icon = familyIcon(family);
|
||||
const verbs = items.map((it) => verbForState(it, t));
|
||||
const verbs = items.map((it) =>
|
||||
verbForState(it, t, runStreaming, runSucceeded)
|
||||
);
|
||||
// Roll the verbs into a comma-list with deduplicated last-state. So three
|
||||
// edits whose results are all 'Done' render as "Editing ×3, Done"; mixed
|
||||
// states render as "Editing, Reading, Done".
|
||||
|
|
@ -733,9 +745,15 @@ function countLabel(
|
|||
return n > 1 ? `${verb} ×${n}` : verb;
|
||||
}
|
||||
|
||||
function verbForState(it: ToolItem, t: (k: keyof Dict) => string): string {
|
||||
if (!it.result) return t("assistant.verbRunning");
|
||||
if (it.result.isError) return t("tool.error");
|
||||
function verbForState(
|
||||
it: ToolItem,
|
||||
t: (k: keyof Dict) => string,
|
||||
runStreaming = false,
|
||||
runSucceeded = false
|
||||
): string {
|
||||
if (!it.result && runStreaming) return t("assistant.verbRunning");
|
||||
if (!it.result && !runSucceeded) return t("tool.error");
|
||||
if (it.result?.isError) return t("tool.error");
|
||||
return t("tool.done");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -18,9 +18,11 @@ interface Props {
|
|||
use: Extract<AgentEvent, { kind: 'tool_use' }>;
|
||||
result?: Extract<AgentEvent, { kind: 'tool_result' }> | undefined;
|
||||
// True while the parent run is still streaming. Forwarded to registered
|
||||
// renderers via `status` so they can distinguish "executing" (run alive)
|
||||
// from "inProgress" (run dead before result arrived).
|
||||
// renderers via `status` so they can show live execution.
|
||||
runStreaming?: boolean;
|
||||
// True when the parent run reached a successful terminal status. Missing
|
||||
// tool results in successful completed turns are rendered as done.
|
||||
runSucceeded?: boolean;
|
||||
// Set of file names that exist in the project folder. When the tool's
|
||||
// `file_path`/`path` argument's basename appears in this set we surface
|
||||
// an "open" button on the card. Pass `undefined` to skip the existence
|
||||
|
|
@ -35,10 +37,13 @@ export function ToolCard({
|
|||
use,
|
||||
result,
|
||||
runStreaming,
|
||||
runSucceeded,
|
||||
projectFileNames,
|
||||
onRequestOpenFile,
|
||||
}: Props) {
|
||||
const name = use.name;
|
||||
const isStreaming = runStreaming ?? false;
|
||||
const isSucceeded = runSucceeded ?? false;
|
||||
const custom = getToolRenderer(name);
|
||||
if (custom) {
|
||||
// A misbehaving third-party renderer must not take down the whole
|
||||
|
|
@ -46,26 +51,26 @@ export function ToolCard({
|
|||
// built-in family card. (React's own error boundaries still cover
|
||||
// throws raised inside the returned tree once it's mounted.)
|
||||
try {
|
||||
const node = custom(toRenderProps(use, result, runStreaming ?? false));
|
||||
const node = custom(toRenderProps(use, result, isStreaming, isSucceeded));
|
||||
if (node !== undefined && node !== null && node !== false) return <>{node}</>;
|
||||
} catch (err) {
|
||||
console.error(`[ToolCard] custom renderer for "${name}" threw; falling back`, err);
|
||||
}
|
||||
}
|
||||
const ctx: FileToolCtx = { projectFileNames, onRequestOpenFile };
|
||||
if (name === 'TodoWrite' || name === 'todowrite') return <TodoCard input={use.input} />;
|
||||
if (name === 'TodoWrite' || name === 'todowrite') return <TodoCard input={use.input} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
|
||||
if (name === 'Write' || name === 'write' || name === 'create_file')
|
||||
return <FileWriteCard input={use.input} result={result} ctx={ctx} />;
|
||||
return <FileWriteCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} ctx={ctx} />;
|
||||
if (name === 'Edit' || name === 'str_replace_edit')
|
||||
return <FileEditCard input={use.input} result={result} ctx={ctx} />;
|
||||
return <FileEditCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} ctx={ctx} />;
|
||||
if (name === 'Read' || name === 'read_file')
|
||||
return <FileReadCard input={use.input} result={result} ctx={ctx} />;
|
||||
if (name === 'Bash') return <BashCard input={use.input} result={result} />;
|
||||
if (name === 'Glob' || name === 'list_files') return <GlobCard input={use.input} result={result} />;
|
||||
if (name === 'Grep') return <GrepCard input={use.input} result={result} />;
|
||||
return <FileReadCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} ctx={ctx} />;
|
||||
if (name === 'Bash') return <BashCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
|
||||
if (name === 'Glob' || name === 'list_files') return <GlobCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
|
||||
if (name === 'Grep') return <GrepCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
|
||||
if (name === 'WebFetch' || name === 'web_fetch') return <WebFetchCard input={use.input} />;
|
||||
if (name === 'WebSearch' || name === 'web_search') return <WebSearchCard input={use.input} />;
|
||||
return <GenericCard name={name} input={use.input} result={result} />;
|
||||
return <GenericCard name={name} input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
|
||||
}
|
||||
|
||||
interface FileToolCtx {
|
||||
|
|
@ -94,10 +99,10 @@ function OpenInTabButton({ filePath, ctx }: { filePath: string; ctx: FileToolCtx
|
|||
);
|
||||
}
|
||||
|
||||
function TodoCard({ input }: { input: unknown }) {
|
||||
function TodoCard({ input, runStreaming, runSucceeded }: { input: unknown; runStreaming: boolean; runSucceeded: boolean }) {
|
||||
const t = useT();
|
||||
const todos = parseTodoWriteInput(input);
|
||||
if (todos.length === 0) return <GenericCard name="TodoWrite" input={input} />;
|
||||
if (todos.length === 0) return <GenericCard name="TodoWrite" input={input} runStreaming={runStreaming} runSucceeded={runSucceeded} />;
|
||||
const done = todos.filter((todo) => todo.status === 'completed').length;
|
||||
return (
|
||||
<div className="op-card op-todo">
|
||||
|
|
@ -127,10 +132,14 @@ function TodoCard({ input }: { input: unknown }) {
|
|||
function FileWriteCard({
|
||||
input,
|
||||
result,
|
||||
runStreaming,
|
||||
runSucceeded,
|
||||
ctx,
|
||||
}: {
|
||||
input: unknown;
|
||||
result?: Props['result'];
|
||||
runStreaming: boolean;
|
||||
runSucceeded: boolean;
|
||||
ctx: FileToolCtx;
|
||||
}) {
|
||||
const t = useT();
|
||||
|
|
@ -146,7 +155,7 @@ function FileWriteCard({
|
|||
{lines !== null ? (
|
||||
<span className="op-meta">{t('tool.lines', { n: lines })}</span>
|
||||
) : null}
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
<OpenInTabButton filePath={file} ctx={ctx} />
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -156,10 +165,14 @@ function FileWriteCard({
|
|||
function FileEditCard({
|
||||
input,
|
||||
result,
|
||||
runStreaming,
|
||||
runSucceeded,
|
||||
ctx,
|
||||
}: {
|
||||
input: unknown;
|
||||
result?: Props['result'];
|
||||
runStreaming: boolean;
|
||||
runSucceeded: boolean;
|
||||
ctx: FileToolCtx;
|
||||
}) {
|
||||
const t = useT();
|
||||
|
|
@ -182,7 +195,7 @@ function FileEditCard({
|
|||
<span className="op-meta">
|
||||
{editCount} {editCount === 1 ? t('tool.changeSingular') : t('tool.changePlural')}
|
||||
</span>
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
<OpenInTabButton filePath={file} ctx={ctx} />
|
||||
</div>
|
||||
</div>
|
||||
|
|
@ -192,10 +205,14 @@ function FileEditCard({
|
|||
function FileReadCard({
|
||||
input,
|
||||
result,
|
||||
runStreaming,
|
||||
runSucceeded,
|
||||
ctx,
|
||||
}: {
|
||||
input: unknown;
|
||||
result?: Props['result'];
|
||||
runStreaming: boolean;
|
||||
runSucceeded: boolean;
|
||||
ctx: FileToolCtx;
|
||||
}) {
|
||||
const t = useT();
|
||||
|
|
@ -207,14 +224,14 @@ function FileReadCard({
|
|||
<span className="op-icon op-icon-read" aria-hidden>↗</span>
|
||||
<span className="op-title">{t('tool.read')}</span>
|
||||
<code className="op-path">{file}</code>
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
<OpenInTabButton filePath={file} ctx={ctx} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function BashCard({ input, result }: { input: unknown; result?: Props['result'] }) {
|
||||
function BashCard({ input, result, runStreaming, runSucceeded }: { input: unknown; result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
|
||||
const t = useT();
|
||||
const obj = (input ?? {}) as { command?: string; description?: string };
|
||||
const command = obj.command ?? '';
|
||||
|
|
@ -226,7 +243,7 @@ function BashCard({ input, result }: { input: unknown; result?: Props['result']
|
|||
<span className="op-icon" aria-hidden>$</span>
|
||||
<span className="op-title">{t('tool.bash')}</span>
|
||||
{desc ? <span className="op-meta op-desc">{desc}</span> : null}
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
{result && result.content ? (
|
||||
<button className="op-toggle" onClick={() => setOpen((o) => !o)}>
|
||||
{open ? t('tool.hide') : t('tool.output')}
|
||||
|
|
@ -241,7 +258,7 @@ function BashCard({ input, result }: { input: unknown; result?: Props['result']
|
|||
);
|
||||
}
|
||||
|
||||
function GlobCard({ input, result }: { input: unknown; result?: Props['result'] }) {
|
||||
function GlobCard({ input, result, runStreaming, runSucceeded }: { input: unknown; result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
|
||||
const t = useT();
|
||||
const obj = (input ?? {}) as { pattern?: string; path?: string };
|
||||
return (
|
||||
|
|
@ -253,13 +270,13 @@ function GlobCard({ input, result }: { input: unknown; result?: Props['result']
|
|||
{obj.path ? (
|
||||
<span className="op-meta">{t('tool.in', { path: obj.path })}</span>
|
||||
) : null}
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function GrepCard({ input, result }: { input: unknown; result?: Props['result'] }) {
|
||||
function GrepCard({ input, result, runStreaming, runSucceeded }: { input: unknown; result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
|
||||
const t = useT();
|
||||
const obj = (input ?? {}) as { pattern?: string; path?: string; glob?: string };
|
||||
return (
|
||||
|
|
@ -271,7 +288,7 @@ function GrepCard({ input, result }: { input: unknown; result?: Props['result']
|
|||
{obj.path ? (
|
||||
<span className="op-meta">{t('tool.in', { path: obj.path })}</span>
|
||||
) : null}
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
|
|
@ -309,10 +326,14 @@ function GenericCard({
|
|||
name,
|
||||
input,
|
||||
result,
|
||||
runStreaming,
|
||||
runSucceeded,
|
||||
}: {
|
||||
name: string;
|
||||
input: unknown;
|
||||
result?: Props['result'];
|
||||
runStreaming: boolean;
|
||||
runSucceeded: boolean;
|
||||
}) {
|
||||
const summary = describeInput(input);
|
||||
return (
|
||||
|
|
@ -321,16 +342,17 @@ function GenericCard({
|
|||
<span className="op-icon" aria-hidden>·</span>
|
||||
<span className="op-title">{name}</span>
|
||||
{summary ? <span className="op-meta">{truncate(summary, 200)}</span> : null}
|
||||
<ResultBadge result={result} />
|
||||
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
|
||||
</div>
|
||||
</div>
|
||||
);
|
||||
}
|
||||
|
||||
function ResultBadge({ result }: { result?: Props['result'] }) {
|
||||
function ResultBadge({ result, runStreaming, runSucceeded }: { result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
|
||||
const t = useT();
|
||||
if (!result) return <span className="op-status op-status-running">{t('tool.running')}</span>;
|
||||
if (result.isError) return <span className="op-status op-status-error">{t('tool.error')}</span>;
|
||||
if (!result && runStreaming) return <span className="op-status op-status-running">{t('tool.running')}</span>;
|
||||
if (!result && !runSucceeded) return <span className="op-status op-status-error">{t('tool.error')}</span>;
|
||||
if (result?.isError) return <span className="op-status op-status-error">{t('tool.error')}</span>;
|
||||
return <span className="op-status op-status-ok">{t('tool.done')}</span>;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@ export interface ToolRenderProps {
|
|||
name: string;
|
||||
args: unknown;
|
||||
result: string | undefined;
|
||||
// Mirrors tool_result.isError. Terminal failures without a tool_result are
|
||||
// surfaced through `status: 'error'`.
|
||||
isError: boolean;
|
||||
}
|
||||
|
||||
|
|
@ -85,38 +87,35 @@ export function clearToolRenderers(): void {
|
|||
}
|
||||
|
||||
/**
|
||||
* Map an in-flight tool call to AG-UI's four-state lifecycle.
|
||||
* Map a tool call to AG-UI's lifecycle status.
|
||||
*
|
||||
* - `error` — tool returned with `isError`
|
||||
* - `complete` — tool returned cleanly
|
||||
* - `executing` — no result yet, run still streaming
|
||||
* - `inProgress` — no result yet, run finished (rare: agent crashed
|
||||
* mid-call). Distinct so renderers can surface a
|
||||
* different affordance ("interrupted") than the
|
||||
* live-spinner state.
|
||||
* - `complete` — no result yet, run finished. Some stored assistant
|
||||
* turns can be missing a tool_result even though the run
|
||||
* succeeded, so renderers should show the completed turn.
|
||||
*
|
||||
* The split between `inProgress` and `executing` is the same one
|
||||
* CopilotKit exposes: in their world, `inProgress` = streaming args,
|
||||
* `executing` = handler running. We don't currently receive partial
|
||||
* tool_use args from the daemon, so the two states collapse onto the
|
||||
* "run alive vs. run dead" axis instead. Either way, renderers that
|
||||
* want a single "loading" state can treat both identically.
|
||||
* - `error` — no result after a failed or canceled terminal run.
|
||||
*/
|
||||
export function deriveToolStatus(
|
||||
result: ToolResult | undefined,
|
||||
runStreaming: boolean,
|
||||
runSucceeded = false,
|
||||
): ToolStatus {
|
||||
if (result) return result.isError ? 'error' : 'complete';
|
||||
return runStreaming ? 'executing' : 'inProgress';
|
||||
if (runStreaming) return 'executing';
|
||||
return runSucceeded ? 'complete' : 'error';
|
||||
}
|
||||
|
||||
export function toRenderProps(
|
||||
use: ToolUse,
|
||||
result: ToolResult | undefined,
|
||||
runStreaming: boolean,
|
||||
runSucceeded = false,
|
||||
): ToolRenderProps {
|
||||
return {
|
||||
status: deriveToolStatus(result, runStreaming),
|
||||
status: deriveToolStatus(result, runStreaming, runSucceeded),
|
||||
name: use.name,
|
||||
args: use.input,
|
||||
result: result?.content,
|
||||
|
|
|
|||
161
apps/web/tests/components/assistant-message-tool-status.test.tsx
Normal file
161
apps/web/tests/components/assistant-message-tool-status.test.tsx
Normal file
|
|
@ -0,0 +1,161 @@
|
|||
// @vitest-environment jsdom
|
||||
|
||||
import { cleanup, render, screen } from '@testing-library/react';
|
||||
import { afterEach, describe, expect, it } from 'vitest';
|
||||
import { AssistantMessage } from '../../src/components/AssistantMessage';
|
||||
import type { AgentEvent, ChatMessage } from '../../src/types';
|
||||
|
||||
function messageWithEvents(events: AgentEvent[]): ChatMessage {
|
||||
return {
|
||||
id: 'assistant-1',
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
events,
|
||||
startedAt: 1_000,
|
||||
endedAt: 3_000,
|
||||
runStatus: 'succeeded',
|
||||
};
|
||||
}
|
||||
|
||||
describe('AssistantMessage tool status', () => {
|
||||
afterEach(() => cleanup());
|
||||
|
||||
it('shows Done for a completed run tool use that has no tool result', () => {
|
||||
const { container } = render(
|
||||
<AssistantMessage
|
||||
message={messageWithEvents([
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-1',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm guard', description: 'Run guard' },
|
||||
},
|
||||
])}
|
||||
streaming={false}
|
||||
projectId="project-1"
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(container.querySelector('.op-status-ok')?.textContent).toMatch(/^done$/i);
|
||||
expect(container.querySelector('.op-status-running')).toBeNull();
|
||||
});
|
||||
|
||||
it('keeps legacy completed messages without runStatus as Done', () => {
|
||||
const { container } = render(
|
||||
<AssistantMessage
|
||||
message={{
|
||||
...messageWithEvents([
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-1',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm guard', description: 'Execute guard' },
|
||||
},
|
||||
]),
|
||||
runStatus: undefined,
|
||||
}}
|
||||
streaming={false}
|
||||
projectId="project-1"
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(container.querySelector('.op-status-ok')?.textContent).toMatch(/^done$/i);
|
||||
expect(container.querySelector('.op-status-running')).toBeNull();
|
||||
});
|
||||
|
||||
it('shows Done in a grouped completed run when tool results are missing', () => {
|
||||
const { container } = render(
|
||||
<AssistantMessage
|
||||
message={messageWithEvents([
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-1',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm guard', description: 'Execute guard' },
|
||||
},
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-2',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm typecheck', description: 'Execute typecheck' },
|
||||
},
|
||||
])}
|
||||
streaming={false}
|
||||
projectId="project-1"
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(container.querySelector('.action-card-toggle.running')).toBeNull();
|
||||
expect(screen.getByRole('button', { name: /Done/i })).toBeTruthy();
|
||||
});
|
||||
|
||||
it('does not show Done when a failed run is missing a tool result', () => {
|
||||
const { container } = render(
|
||||
<AssistantMessage
|
||||
message={{
|
||||
...messageWithEvents([
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-1',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm guard', description: 'Execute guard' },
|
||||
},
|
||||
]),
|
||||
runStatus: 'failed',
|
||||
}}
|
||||
streaming={false}
|
||||
projectId="project-1"
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(container.querySelector('.op-status-error')?.textContent).toMatch(/^error$/i);
|
||||
expect(container.querySelector('.op-status-ok')).toBeNull();
|
||||
});
|
||||
|
||||
it('does not show Done when a canceled run is missing a tool result', () => {
|
||||
const { container } = render(
|
||||
<AssistantMessage
|
||||
message={{
|
||||
...messageWithEvents([
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-1',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm guard', description: 'Execute guard' },
|
||||
},
|
||||
]),
|
||||
runStatus: 'canceled',
|
||||
}}
|
||||
streaming={false}
|
||||
projectId="project-1"
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(container.querySelector('.op-status-error')?.textContent).toMatch(/^error$/i);
|
||||
expect(container.querySelector('.op-status-ok')).toBeNull();
|
||||
});
|
||||
|
||||
it('keeps Running for a streaming tool use that has no tool result', () => {
|
||||
const { container } = render(
|
||||
<AssistantMessage
|
||||
message={{
|
||||
...messageWithEvents([
|
||||
{
|
||||
kind: 'tool_use',
|
||||
id: 'tool-1',
|
||||
name: 'Bash',
|
||||
input: { command: 'pnpm guard', description: 'Run guard' },
|
||||
},
|
||||
]),
|
||||
endedAt: undefined,
|
||||
runStatus: 'running',
|
||||
}}
|
||||
streaming
|
||||
projectId="project-1"
|
||||
/>,
|
||||
);
|
||||
|
||||
expect(container.querySelector('.op-status-running')?.textContent).toBe('running…');
|
||||
expect(screen.queryByText('Done')).toBeNull();
|
||||
});
|
||||
});
|
||||
|
|
@ -33,8 +33,12 @@ describe('deriveToolStatus', () => {
|
|||
expect(deriveToolStatus(undefined, true)).toBe('executing');
|
||||
});
|
||||
|
||||
it('returns "inProgress" when the run died before the tool returned', () => {
|
||||
expect(deriveToolStatus(undefined, false)).toBe('inProgress');
|
||||
it('returns "complete" when a successful run finished without a tool result', () => {
|
||||
expect(deriveToolStatus(undefined, false, true)).toBe('complete');
|
||||
});
|
||||
|
||||
it('returns "error" when a failed or canceled run finished without a tool result', () => {
|
||||
expect(deriveToolStatus(undefined, false, false)).toBe('error');
|
||||
});
|
||||
|
||||
it('returns "complete" on a clean tool result', () => {
|
||||
|
|
@ -66,6 +70,13 @@ describe('toRenderProps', () => {
|
|||
expect(props.result).toBeUndefined();
|
||||
expect(props.isError).toBe(false);
|
||||
});
|
||||
|
||||
it('marks missing results complete only for successful terminal runs', () => {
|
||||
const u = use({ city: 'SF' }, 'get_weather');
|
||||
|
||||
expect(toRenderProps(u, undefined, false, true).status).toBe('complete');
|
||||
expect(toRenderProps(u, undefined, false, false).status).toBe('error');
|
||||
});
|
||||
});
|
||||
|
||||
describe('tool renderer registry', () => {
|
||||
|
|
|
|||
137
specs/change/20260511-issue-145-unit-test-reproduction/spec.md
Normal file
137
specs/change/20260511-issue-145-unit-test-reproduction/spec.md
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
---
|
||||
id: 20260511-issue-145-unit-test-reproduction
|
||||
name: Issue 145 Unit Test Reproduction
|
||||
status: implemented
|
||||
created: '2026-05-11'
|
||||
---
|
||||
|
||||
## Overview
|
||||
|
||||
### Problem Statement
|
||||
- GitHub issue 145 reports that artifact generation can complete while the chat task/tool status remains visually stuck on "running".
|
||||
- The current discussion narrowed reproduction to a unit-level case: an assistant message has ended successfully, but a `tool_use` event has no paired `tool_result`.
|
||||
|
||||
### Goal
|
||||
- Add a web unit/component test that reproduces the issue deterministically.
|
||||
- Prefer a focused `AssistantMessage` rendering test over e2e because the failure is a UI state derivation bug.
|
||||
|
||||
### Scope
|
||||
- Cover the case `streaming={false}` + `endedAt`/succeeded message + missing `tool_result`.
|
||||
- Verify the current behavior exposes `Running`, then use the test as the regression target for the fix.
|
||||
|
||||
### Success Criteria
|
||||
- The test fails against the current buggy behavior when written with the desired expectation: completed message should not show a running tool state.
|
||||
- After the implementation fix, the same test passes.
|
||||
|
||||
## Research
|
||||
|
||||
### Existing System
|
||||
- `AssistantMessage` builds render blocks from `message.events` and passes `streaming` to tool groups; `endedAt` is only used by the footer. Source: `apps/web/src/components/AssistantMessage.tsx:68-75,124-130,151-157`.
|
||||
- `ProjectView` marks an assistant message complete on successful stream completion by setting `endedAt`, resolving run status to succeeded, and setting `streaming` false. Source: `apps/web/src/components/ProjectView.tsx:1250-1268`.
|
||||
- Web SSE handling calls `handlers.onDone(acc)` after receiving or recovering a terminal succeeded status. Source: `apps/web/src/providers/daemon.ts:338-371`.
|
||||
- Daemon run completion emits an `end` event and closes SSE clients, but this run-level completion path does not synthesize missing tool results. Source: `apps/daemon/src/runs.ts:72-80`.
|
||||
- Tool group summaries currently treat any tool item without `result` as running. Source: `apps/web/src/components/AssistantMessage.tsx:633-639,736-739`.
|
||||
- Individual tool badges currently treat missing `result` as `op-status-running`. Source: `apps/web/src/components/ToolCard.tsx:330-334`.
|
||||
|
||||
### Available Approaches
|
||||
- **Component/unit reproduction**: render `AssistantMessage` with a completed assistant message containing `tool_use` without `tool_result`. This directly targets the UI state derivation. Source: `apps/web/src/components/AssistantMessage.tsx:56-67,736-739`; `apps/web/src/components/ToolCard.tsx:330-334`.
|
||||
- **Provider-level reproduction**: simulate SSE `tool_use -> end` and assert the provider calls `onDone` while events remain unpaired. This proves the upstream state is possible, while the visible bug still needs component coverage. Source: `apps/web/src/providers/daemon.ts:338-371,440-449`.
|
||||
- **E2E reproduction**: mock or force a daemon stream sequence `tool_use -> end`; this covers the full UI but requires more harness control. Source: `apps/daemon/src/runs.ts:49-56,72-80`; `apps/web/src/providers/daemon.ts:338-371`.
|
||||
|
||||
### Constraints & Dependencies
|
||||
- App tests belong under `apps/web/tests/`, not under `apps/web/src/`. Source: `apps/AGENTS.md#Test layout`.
|
||||
- The repository prefers package-scoped validation, including `pnpm --filter @open-design/web test`. Source: `AGENTS.md#Common commands`.
|
||||
- The current requested scope is reproduction via unit test; implementation fix can follow after the failing/pinning test is in place. Source: conversation.
|
||||
|
||||
### Key References
|
||||
- `apps/web/src/components/AssistantMessage.tsx:633-639,736-739` - missing tool result maps to running state.
|
||||
- `apps/web/src/components/ToolCard.tsx:330-334` - missing result renders the running badge.
|
||||
- `apps/web/src/components/ProjectView.tsx:1250-1268` - message completion is tracked separately from tool result pairing.
|
||||
- `apps/daemon/src/runs.ts:72-80` - run end is emitted at the daemon level.
|
||||
|
||||
## Design
|
||||
|
||||
### Architecture Overview
|
||||
|
||||
```mermaid
|
||||
flowchart LR
|
||||
Fixture[Completed assistant message fixture] --> Render[Render AssistantMessage]
|
||||
Render --> ToolGroup[ToolGroupCard / ToolCard]
|
||||
ToolGroup --> Assertion[Assert visible tool status]
|
||||
```
|
||||
|
||||
### Change Scope
|
||||
- Area: web component tests. Impact: deterministic regression coverage for issue 145 without requiring daemon or Playwright orchestration. Source: `apps/web/src/components/AssistantMessage.tsx:56-67`; `apps/AGENTS.md#Test layout`.
|
||||
- Area: assistant message/tool rendering. Impact: test exercises the current `tool_use` without `tool_result` path. Source: `apps/web/src/components/AssistantMessage.tsx:763-817`; `apps/web/src/components/ToolCard.tsx:330-334`.
|
||||
|
||||
### Design Decisions
|
||||
- Decision: Add a component test for `AssistantMessage` rather than an e2e test as the first reproduction. Source: `apps/web/src/components/AssistantMessage.tsx:736-739`; `apps/web/src/components/ToolCard.tsx:330-334`.
|
||||
- Decision: Use a fixture with `streaming={false}`, `endedAt`, `runStatus: 'succeeded'`, and a single `tool_use` event missing `tool_result`. Source: `apps/web/src/components/ProjectView.tsx:1250-1268`; `apps/web/src/components/AssistantMessage.tsx:785-788`.
|
||||
- Decision: Place the test under `apps/web/tests/components/`, matching existing component test ownership. Source: `apps/AGENTS.md#Test layout`; `apps/web/tests/components/assistant-message-unfinished-todos.test.tsx`.
|
||||
- Decision: Write the desired post-fix assertion: completed assistant messages should not present a missing result as a running tool state. Source: `apps/web/src/components/AssistantMessage.tsx:151-157,736-739`; `apps/web/src/components/ToolCard.tsx:330-334`.
|
||||
|
||||
### Why this design
|
||||
- The bug is produced by UI state derivation: message completion and tool result pairing are tracked independently.
|
||||
- A component test can reproduce the precise state directly and runs faster than an e2e flow.
|
||||
- Provider or e2e coverage can be added later after the UI regression test pins the expected behavior.
|
||||
|
||||
### Test Strategy
|
||||
- Add a Vitest/Testing Library component test in `apps/web/tests/components/assistant-message-tool-status.test.tsx`. Validation: render a completed assistant message with an unpaired `Write` tool and assert the completed message does not show `Running`. Source: `apps/web/src/components/AssistantMessage.tsx:56-67`; `apps/web/src/components/ToolCard.tsx:330-334`.
|
||||
- Run `pnpm --filter @open-design/web test` after adding the test. Source: `AGENTS.md#Common commands`.
|
||||
|
||||
### Pseudocode
|
||||
|
||||
```ts
|
||||
render(
|
||||
<AssistantMessage
|
||||
message={{
|
||||
id: 'assistant-1',
|
||||
role: 'assistant',
|
||||
content: '',
|
||||
startedAt: 1000,
|
||||
endedAt: 2000,
|
||||
runStatus: 'succeeded',
|
||||
events: [
|
||||
{ kind: 'tool_use', id: 'tool-1', name: 'Write', input: { file_path: 'index.html', content: '<html>done</html>' } },
|
||||
],
|
||||
}}
|
||||
streaming={false}
|
||||
projectId="project-1"
|
||||
/>
|
||||
);
|
||||
|
||||
expect(screen.queryByText(/running/i)).toBeNull();
|
||||
expect(screen.getByText(/done/i)).toBeInTheDocument();
|
||||
```
|
||||
|
||||
### File Structure
|
||||
- `apps/web/tests/components/assistant-message-tool-status.test.tsx` - focused reproduction/regression test for completed messages with unpaired tool events.
|
||||
|
||||
## Plan
|
||||
|
||||
- [x] Step 1: Add issue 145 reproduction test
|
||||
- [x] Substep 1.1 Implement: create `assistant-message-tool-status.test.tsx` with the completed-message/unpaired-tool fixture.
|
||||
- [x] Substep 1.2 Verify: run the web component test and confirm it exposes the current bug with the desired assertion.
|
||||
- [x] Step 2: Apply UI status fix
|
||||
- [x] Substep 2.1 Implement: update assistant/tool rendering so completed runs do not display missing tool results as running.
|
||||
- [x] Substep 2.2 Verify: rerun the web test and confirm it passes.
|
||||
|
||||
## Notes
|
||||
|
||||
<!-- Optional sections — add what's relevant. -->
|
||||
|
||||
### Implementation
|
||||
|
||||
- `apps/web/tests/components/assistant-message-tool-status.test.tsx` - added issue 145 regression coverage for completed and streaming runs with missing `tool_result`.
|
||||
- `apps/web/src/components/AssistantMessage.tsx` - made grouped tool summary/running state depend on `runStreaming` so completed turns with missing results render as done.
|
||||
- `apps/web/src/components/ToolCard.tsx` - threaded `runStreaming` into built-in result badges and treated missing results as done only after the run has ended.
|
||||
- `apps/web/src/runtime/tool-renderers.ts` - aligned custom renderer status derivation with the same completed-run behavior.
|
||||
- `apps/web/tests/runtime/tool-renderers.test.tsx` - updated renderer status expectations for completed runs missing a result.
|
||||
- Follow-up review tightened the fallback so failed/canceled terminal runs with missing `tool_result` surface as error rather than done.
|
||||
|
||||
### Verification
|
||||
|
||||
- Reproduction confirmed before the fix: the new component test failed because a completed run without `tool_result` rendered `running…` instead of `Done`.
|
||||
- Passed: `pnpm --filter @open-design/web test -- tests/components/assistant-message-tool-status.test.tsx tests/runtime/tool-renderers.test.tsx` (`84` files, `751` tests).
|
||||
- Passed: `pnpm --filter @open-design/web typecheck`.
|
||||
- Final code review found no blocking issues.
|
||||
Loading…
Reference in a new issue