fix(web): complete finished tool calls missing results (#1240)

This commit is contained in:
nettee 2026-05-11 15:54:11 +08:00 committed by GitHub
parent e254d1280b
commit e859c31574
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 396 additions and 48 deletions

View file

@ -73,6 +73,9 @@ export function AssistantMessage({
const produced = message.producedFiles ?? [];
const roleLabel = assistantRoleLabel(message, t);
const unfinishedTodos = streaming ? [] : unfinishedTodosFromEvents(events);
const runSucceeded =
!streaming &&
(message.runStatus === "succeeded" || (!message.runStatus && !!message.endedAt));
const canContinueTodos =
!streaming &&
!!isLast &&
@ -125,6 +128,7 @@ export function AssistantMessage({
key={i}
items={b.items}
runStreaming={streaming}
runSucceeded={runSucceeded}
projectFileNames={projectFileNames}
onRequestOpenFile={onRequestOpenFile}
/>
@ -605,11 +609,13 @@ interface ToolItem {
function ToolGroupCard({
items,
runStreaming,
runSucceeded,
projectFileNames,
onRequestOpenFile,
}: {
items: ToolItem[];
runStreaming: boolean;
runSucceeded: boolean;
projectFileNames?: Set<string>;
onRequestOpenFile?: (name: string) => void;
}) {
@ -624,14 +630,15 @@ function ToolGroupCard({
use={items[0]!.use}
result={items[0]!.result}
runStreaming={runStreaming}
runSucceeded={runSucceeded}
projectFileNames={projectFileNames}
onRequestOpenFile={onRequestOpenFile}
/>
);
}
const summary = summarizeGroup(items, t);
const running = items.some((it) => !it.result);
const summary = summarizeGroup(items, t, runStreaming, runSucceeded);
const running = runStreaming && items.some((it) => !it.result);
return (
<div className="action-card">
<button
@ -658,6 +665,7 @@ function ToolGroupCard({
use={it.use}
result={it.result}
runStreaming={runStreaming}
runSucceeded={runSucceeded}
projectFileNames={projectFileNames}
onRequestOpenFile={onRequestOpenFile}
/>
@ -670,13 +678,17 @@ function ToolGroupCard({
function summarizeGroup(
items: ToolItem[],
t: (k: keyof Dict, vars?: Record<string, string | number>) => string
t: (k: keyof Dict, vars?: Record<string, string | number>) => string,
runStreaming: boolean,
runSucceeded: boolean
): { label: string; icon: string } {
// All items share a tool family because the grouper only merges by name.
const name = items[0]?.use.name ?? "";
const family = toolFamily(name);
const icon = familyIcon(family);
const verbs = items.map((it) => verbForState(it, t));
const verbs = items.map((it) =>
verbForState(it, t, runStreaming, runSucceeded)
);
// Roll the verbs into a comma-list with deduplicated last-state. So three
// edits whose results are all 'Done' render as "Editing ×3, Done"; mixed
// states render as "Editing, Reading, Done".
@ -733,9 +745,15 @@ function countLabel(
return n > 1 ? `${verb} ×${n}` : verb;
}
function verbForState(it: ToolItem, t: (k: keyof Dict) => string): string {
if (!it.result) return t("assistant.verbRunning");
if (it.result.isError) return t("tool.error");
function verbForState(
it: ToolItem,
t: (k: keyof Dict) => string,
runStreaming = false,
runSucceeded = false
): string {
if (!it.result && runStreaming) return t("assistant.verbRunning");
if (!it.result && !runSucceeded) return t("tool.error");
if (it.result?.isError) return t("tool.error");
return t("tool.done");
}

View file

@ -18,9 +18,11 @@ interface Props {
use: Extract<AgentEvent, { kind: 'tool_use' }>;
result?: Extract<AgentEvent, { kind: 'tool_result' }> | undefined;
// True while the parent run is still streaming. Forwarded to registered
// renderers via `status` so they can distinguish "executing" (run alive)
// from "inProgress" (run dead before result arrived).
// renderers via `status` so they can show live execution.
runStreaming?: boolean;
// True when the parent run reached a successful terminal status. Missing
// tool results in successful completed turns are rendered as done.
runSucceeded?: boolean;
// Set of file names that exist in the project folder. When the tool's
// `file_path`/`path` argument's basename appears in this set we surface
// an "open" button on the card. Pass `undefined` to skip the existence
@ -35,10 +37,13 @@ export function ToolCard({
use,
result,
runStreaming,
runSucceeded,
projectFileNames,
onRequestOpenFile,
}: Props) {
const name = use.name;
const isStreaming = runStreaming ?? false;
const isSucceeded = runSucceeded ?? false;
const custom = getToolRenderer(name);
if (custom) {
// A misbehaving third-party renderer must not take down the whole
@ -46,26 +51,26 @@ export function ToolCard({
// built-in family card. (React's own error boundaries still cover
// throws raised inside the returned tree once it's mounted.)
try {
const node = custom(toRenderProps(use, result, runStreaming ?? false));
const node = custom(toRenderProps(use, result, isStreaming, isSucceeded));
if (node !== undefined && node !== null && node !== false) return <>{node}</>;
} catch (err) {
console.error(`[ToolCard] custom renderer for "${name}" threw; falling back`, err);
}
}
const ctx: FileToolCtx = { projectFileNames, onRequestOpenFile };
if (name === 'TodoWrite' || name === 'todowrite') return <TodoCard input={use.input} />;
if (name === 'TodoWrite' || name === 'todowrite') return <TodoCard input={use.input} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
if (name === 'Write' || name === 'write' || name === 'create_file')
return <FileWriteCard input={use.input} result={result} ctx={ctx} />;
return <FileWriteCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} ctx={ctx} />;
if (name === 'Edit' || name === 'str_replace_edit')
return <FileEditCard input={use.input} result={result} ctx={ctx} />;
return <FileEditCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} ctx={ctx} />;
if (name === 'Read' || name === 'read_file')
return <FileReadCard input={use.input} result={result} ctx={ctx} />;
if (name === 'Bash') return <BashCard input={use.input} result={result} />;
if (name === 'Glob' || name === 'list_files') return <GlobCard input={use.input} result={result} />;
if (name === 'Grep') return <GrepCard input={use.input} result={result} />;
return <FileReadCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} ctx={ctx} />;
if (name === 'Bash') return <BashCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
if (name === 'Glob' || name === 'list_files') return <GlobCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
if (name === 'Grep') return <GrepCard input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
if (name === 'WebFetch' || name === 'web_fetch') return <WebFetchCard input={use.input} />;
if (name === 'WebSearch' || name === 'web_search') return <WebSearchCard input={use.input} />;
return <GenericCard name={name} input={use.input} result={result} />;
return <GenericCard name={name} input={use.input} result={result} runStreaming={isStreaming} runSucceeded={isSucceeded} />;
}
interface FileToolCtx {
@ -94,10 +99,10 @@ function OpenInTabButton({ filePath, ctx }: { filePath: string; ctx: FileToolCtx
);
}
function TodoCard({ input }: { input: unknown }) {
function TodoCard({ input, runStreaming, runSucceeded }: { input: unknown; runStreaming: boolean; runSucceeded: boolean }) {
const t = useT();
const todos = parseTodoWriteInput(input);
if (todos.length === 0) return <GenericCard name="TodoWrite" input={input} />;
if (todos.length === 0) return <GenericCard name="TodoWrite" input={input} runStreaming={runStreaming} runSucceeded={runSucceeded} />;
const done = todos.filter((todo) => todo.status === 'completed').length;
return (
<div className="op-card op-todo">
@ -127,10 +132,14 @@ function TodoCard({ input }: { input: unknown }) {
function FileWriteCard({
input,
result,
runStreaming,
runSucceeded,
ctx,
}: {
input: unknown;
result?: Props['result'];
runStreaming: boolean;
runSucceeded: boolean;
ctx: FileToolCtx;
}) {
const t = useT();
@ -146,7 +155,7 @@ function FileWriteCard({
{lines !== null ? (
<span className="op-meta">{t('tool.lines', { n: lines })}</span>
) : null}
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
<OpenInTabButton filePath={file} ctx={ctx} />
</div>
</div>
@ -156,10 +165,14 @@ function FileWriteCard({
function FileEditCard({
input,
result,
runStreaming,
runSucceeded,
ctx,
}: {
input: unknown;
result?: Props['result'];
runStreaming: boolean;
runSucceeded: boolean;
ctx: FileToolCtx;
}) {
const t = useT();
@ -182,7 +195,7 @@ function FileEditCard({
<span className="op-meta">
{editCount} {editCount === 1 ? t('tool.changeSingular') : t('tool.changePlural')}
</span>
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
<OpenInTabButton filePath={file} ctx={ctx} />
</div>
</div>
@ -192,10 +205,14 @@ function FileEditCard({
function FileReadCard({
input,
result,
runStreaming,
runSucceeded,
ctx,
}: {
input: unknown;
result?: Props['result'];
runStreaming: boolean;
runSucceeded: boolean;
ctx: FileToolCtx;
}) {
const t = useT();
@ -207,14 +224,14 @@ function FileReadCard({
<span className="op-icon op-icon-read" aria-hidden></span>
<span className="op-title">{t('tool.read')}</span>
<code className="op-path">{file}</code>
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
<OpenInTabButton filePath={file} ctx={ctx} />
</div>
</div>
);
}
function BashCard({ input, result }: { input: unknown; result?: Props['result'] }) {
function BashCard({ input, result, runStreaming, runSucceeded }: { input: unknown; result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
const t = useT();
const obj = (input ?? {}) as { command?: string; description?: string };
const command = obj.command ?? '';
@ -226,7 +243,7 @@ function BashCard({ input, result }: { input: unknown; result?: Props['result']
<span className="op-icon" aria-hidden>$</span>
<span className="op-title">{t('tool.bash')}</span>
{desc ? <span className="op-meta op-desc">{desc}</span> : null}
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
{result && result.content ? (
<button className="op-toggle" onClick={() => setOpen((o) => !o)}>
{open ? t('tool.hide') : t('tool.output')}
@ -241,7 +258,7 @@ function BashCard({ input, result }: { input: unknown; result?: Props['result']
);
}
function GlobCard({ input, result }: { input: unknown; result?: Props['result'] }) {
function GlobCard({ input, result, runStreaming, runSucceeded }: { input: unknown; result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
const t = useT();
const obj = (input ?? {}) as { pattern?: string; path?: string };
return (
@ -253,13 +270,13 @@ function GlobCard({ input, result }: { input: unknown; result?: Props['result']
{obj.path ? (
<span className="op-meta">{t('tool.in', { path: obj.path })}</span>
) : null}
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
</div>
</div>
);
}
function GrepCard({ input, result }: { input: unknown; result?: Props['result'] }) {
function GrepCard({ input, result, runStreaming, runSucceeded }: { input: unknown; result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
const t = useT();
const obj = (input ?? {}) as { pattern?: string; path?: string; glob?: string };
return (
@ -271,7 +288,7 @@ function GrepCard({ input, result }: { input: unknown; result?: Props['result']
{obj.path ? (
<span className="op-meta">{t('tool.in', { path: obj.path })}</span>
) : null}
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
</div>
</div>
);
@ -309,10 +326,14 @@ function GenericCard({
name,
input,
result,
runStreaming,
runSucceeded,
}: {
name: string;
input: unknown;
result?: Props['result'];
runStreaming: boolean;
runSucceeded: boolean;
}) {
const summary = describeInput(input);
return (
@ -321,16 +342,17 @@ function GenericCard({
<span className="op-icon" aria-hidden>·</span>
<span className="op-title">{name}</span>
{summary ? <span className="op-meta">{truncate(summary, 200)}</span> : null}
<ResultBadge result={result} />
<ResultBadge result={result} runStreaming={runStreaming} runSucceeded={runSucceeded} />
</div>
</div>
);
}
function ResultBadge({ result }: { result?: Props['result'] }) {
function ResultBadge({ result, runStreaming, runSucceeded }: { result?: Props['result']; runStreaming: boolean; runSucceeded: boolean }) {
const t = useT();
if (!result) return <span className="op-status op-status-running">{t('tool.running')}</span>;
if (result.isError) return <span className="op-status op-status-error">{t('tool.error')}</span>;
if (!result && runStreaming) return <span className="op-status op-status-running">{t('tool.running')}</span>;
if (!result && !runSucceeded) return <span className="op-status op-status-error">{t('tool.error')}</span>;
if (result?.isError) return <span className="op-status op-status-error">{t('tool.error')}</span>;
return <span className="op-status op-status-ok">{t('tool.done')}</span>;
}

View file

@ -26,6 +26,8 @@ export interface ToolRenderProps {
name: string;
args: unknown;
result: string | undefined;
// Mirrors tool_result.isError. Terminal failures without a tool_result are
// surfaced through `status: 'error'`.
isError: boolean;
}
@ -85,38 +87,35 @@ export function clearToolRenderers(): void {
}
/**
* Map an in-flight tool call to AG-UI's four-state lifecycle.
* Map a tool call to AG-UI's lifecycle status.
*
* - `error` tool returned with `isError`
* - `complete` tool returned cleanly
* - `executing` no result yet, run still streaming
* - `inProgress` no result yet, run finished (rare: agent crashed
* mid-call). Distinct so renderers can surface a
* different affordance ("interrupted") than the
* live-spinner state.
* - `complete` no result yet, run finished. Some stored assistant
* turns can be missing a tool_result even though the run
* succeeded, so renderers should show the completed turn.
*
* The split between `inProgress` and `executing` is the same one
* CopilotKit exposes: in their world, `inProgress` = streaming args,
* `executing` = handler running. We don't currently receive partial
* tool_use args from the daemon, so the two states collapse onto the
* "run alive vs. run dead" axis instead. Either way, renderers that
* want a single "loading" state can treat both identically.
* - `error` no result after a failed or canceled terminal run.
*/
export function deriveToolStatus(
result: ToolResult | undefined,
runStreaming: boolean,
runSucceeded = false,
): ToolStatus {
if (result) return result.isError ? 'error' : 'complete';
return runStreaming ? 'executing' : 'inProgress';
if (runStreaming) return 'executing';
return runSucceeded ? 'complete' : 'error';
}
export function toRenderProps(
use: ToolUse,
result: ToolResult | undefined,
runStreaming: boolean,
runSucceeded = false,
): ToolRenderProps {
return {
status: deriveToolStatus(result, runStreaming),
status: deriveToolStatus(result, runStreaming, runSucceeded),
name: use.name,
args: use.input,
result: result?.content,

View file

@ -0,0 +1,161 @@
// @vitest-environment jsdom
import { cleanup, render, screen } from '@testing-library/react';
import { afterEach, describe, expect, it } from 'vitest';
import { AssistantMessage } from '../../src/components/AssistantMessage';
import type { AgentEvent, ChatMessage } from '../../src/types';
function messageWithEvents(events: AgentEvent[]): ChatMessage {
return {
id: 'assistant-1',
role: 'assistant',
content: '',
events,
startedAt: 1_000,
endedAt: 3_000,
runStatus: 'succeeded',
};
}
describe('AssistantMessage tool status', () => {
afterEach(() => cleanup());
it('shows Done for a completed run tool use that has no tool result', () => {
const { container } = render(
<AssistantMessage
message={messageWithEvents([
{
kind: 'tool_use',
id: 'tool-1',
name: 'Bash',
input: { command: 'pnpm guard', description: 'Run guard' },
},
])}
streaming={false}
projectId="project-1"
/>,
);
expect(container.querySelector('.op-status-ok')?.textContent).toMatch(/^done$/i);
expect(container.querySelector('.op-status-running')).toBeNull();
});
it('keeps legacy completed messages without runStatus as Done', () => {
const { container } = render(
<AssistantMessage
message={{
...messageWithEvents([
{
kind: 'tool_use',
id: 'tool-1',
name: 'Bash',
input: { command: 'pnpm guard', description: 'Execute guard' },
},
]),
runStatus: undefined,
}}
streaming={false}
projectId="project-1"
/>,
);
expect(container.querySelector('.op-status-ok')?.textContent).toMatch(/^done$/i);
expect(container.querySelector('.op-status-running')).toBeNull();
});
it('shows Done in a grouped completed run when tool results are missing', () => {
const { container } = render(
<AssistantMessage
message={messageWithEvents([
{
kind: 'tool_use',
id: 'tool-1',
name: 'Bash',
input: { command: 'pnpm guard', description: 'Execute guard' },
},
{
kind: 'tool_use',
id: 'tool-2',
name: 'Bash',
input: { command: 'pnpm typecheck', description: 'Execute typecheck' },
},
])}
streaming={false}
projectId="project-1"
/>,
);
expect(container.querySelector('.action-card-toggle.running')).toBeNull();
expect(screen.getByRole('button', { name: /Done/i })).toBeTruthy();
});
it('does not show Done when a failed run is missing a tool result', () => {
const { container } = render(
<AssistantMessage
message={{
...messageWithEvents([
{
kind: 'tool_use',
id: 'tool-1',
name: 'Bash',
input: { command: 'pnpm guard', description: 'Execute guard' },
},
]),
runStatus: 'failed',
}}
streaming={false}
projectId="project-1"
/>,
);
expect(container.querySelector('.op-status-error')?.textContent).toMatch(/^error$/i);
expect(container.querySelector('.op-status-ok')).toBeNull();
});
it('does not show Done when a canceled run is missing a tool result', () => {
const { container } = render(
<AssistantMessage
message={{
...messageWithEvents([
{
kind: 'tool_use',
id: 'tool-1',
name: 'Bash',
input: { command: 'pnpm guard', description: 'Execute guard' },
},
]),
runStatus: 'canceled',
}}
streaming={false}
projectId="project-1"
/>,
);
expect(container.querySelector('.op-status-error')?.textContent).toMatch(/^error$/i);
expect(container.querySelector('.op-status-ok')).toBeNull();
});
it('keeps Running for a streaming tool use that has no tool result', () => {
const { container } = render(
<AssistantMessage
message={{
...messageWithEvents([
{
kind: 'tool_use',
id: 'tool-1',
name: 'Bash',
input: { command: 'pnpm guard', description: 'Run guard' },
},
]),
endedAt: undefined,
runStatus: 'running',
}}
streaming
projectId="project-1"
/>,
);
expect(container.querySelector('.op-status-running')?.textContent).toBe('running…');
expect(screen.queryByText('Done')).toBeNull();
});
});

View file

@ -33,8 +33,12 @@ describe('deriveToolStatus', () => {
expect(deriveToolStatus(undefined, true)).toBe('executing');
});
it('returns "inProgress" when the run died before the tool returned', () => {
expect(deriveToolStatus(undefined, false)).toBe('inProgress');
it('returns "complete" when a successful run finished without a tool result', () => {
expect(deriveToolStatus(undefined, false, true)).toBe('complete');
});
it('returns "error" when a failed or canceled run finished without a tool result', () => {
expect(deriveToolStatus(undefined, false, false)).toBe('error');
});
it('returns "complete" on a clean tool result', () => {
@ -66,6 +70,13 @@ describe('toRenderProps', () => {
expect(props.result).toBeUndefined();
expect(props.isError).toBe(false);
});
it('marks missing results complete only for successful terminal runs', () => {
const u = use({ city: 'SF' }, 'get_weather');
expect(toRenderProps(u, undefined, false, true).status).toBe('complete');
expect(toRenderProps(u, undefined, false, false).status).toBe('error');
});
});
describe('tool renderer registry', () => {

View file

@ -0,0 +1,137 @@
---
id: 20260511-issue-145-unit-test-reproduction
name: Issue 145 Unit Test Reproduction
status: implemented
created: '2026-05-11'
---
## Overview
### Problem Statement
- GitHub issue 145 reports that artifact generation can complete while the chat task/tool status remains visually stuck on "running".
- The current discussion narrowed reproduction to a unit-level case: an assistant message has ended successfully, but a `tool_use` event has no paired `tool_result`.
### Goal
- Add a web unit/component test that reproduces the issue deterministically.
- Prefer a focused `AssistantMessage` rendering test over e2e because the failure is a UI state derivation bug.
### Scope
- Cover the case `streaming={false}` + `endedAt`/succeeded message + missing `tool_result`.
- Verify the current behavior exposes `Running`, then use the test as the regression target for the fix.
### Success Criteria
- The test fails against the current buggy behavior when written with the desired expectation: completed message should not show a running tool state.
- After the implementation fix, the same test passes.
## Research
### Existing System
- `AssistantMessage` builds render blocks from `message.events` and passes `streaming` to tool groups; `endedAt` is only used by the footer. Source: `apps/web/src/components/AssistantMessage.tsx:68-75,124-130,151-157`.
- `ProjectView` marks an assistant message complete on successful stream completion by setting `endedAt`, resolving run status to succeeded, and setting `streaming` false. Source: `apps/web/src/components/ProjectView.tsx:1250-1268`.
- Web SSE handling calls `handlers.onDone(acc)` after receiving or recovering a terminal succeeded status. Source: `apps/web/src/providers/daemon.ts:338-371`.
- Daemon run completion emits an `end` event and closes SSE clients, but this run-level completion path does not synthesize missing tool results. Source: `apps/daemon/src/runs.ts:72-80`.
- Tool group summaries currently treat any tool item without `result` as running. Source: `apps/web/src/components/AssistantMessage.tsx:633-639,736-739`.
- Individual tool badges currently treat missing `result` as `op-status-running`. Source: `apps/web/src/components/ToolCard.tsx:330-334`.
### Available Approaches
- **Component/unit reproduction**: render `AssistantMessage` with a completed assistant message containing `tool_use` without `tool_result`. This directly targets the UI state derivation. Source: `apps/web/src/components/AssistantMessage.tsx:56-67,736-739`; `apps/web/src/components/ToolCard.tsx:330-334`.
- **Provider-level reproduction**: simulate SSE `tool_use -> end` and assert the provider calls `onDone` while events remain unpaired. This proves the upstream state is possible, while the visible bug still needs component coverage. Source: `apps/web/src/providers/daemon.ts:338-371,440-449`.
- **E2E reproduction**: mock or force a daemon stream sequence `tool_use -> end`; this covers the full UI but requires more harness control. Source: `apps/daemon/src/runs.ts:49-56,72-80`; `apps/web/src/providers/daemon.ts:338-371`.
### Constraints & Dependencies
- App tests belong under `apps/web/tests/`, not under `apps/web/src/`. Source: `apps/AGENTS.md#Test layout`.
- The repository prefers package-scoped validation, including `pnpm --filter @open-design/web test`. Source: `AGENTS.md#Common commands`.
- The current requested scope is reproduction via unit test; implementation fix can follow after the failing/pinning test is in place. Source: conversation.
### Key References
- `apps/web/src/components/AssistantMessage.tsx:633-639,736-739` - missing tool result maps to running state.
- `apps/web/src/components/ToolCard.tsx:330-334` - missing result renders the running badge.
- `apps/web/src/components/ProjectView.tsx:1250-1268` - message completion is tracked separately from tool result pairing.
- `apps/daemon/src/runs.ts:72-80` - run end is emitted at the daemon level.
## Design
### Architecture Overview
```mermaid
flowchart LR
Fixture[Completed assistant message fixture] --> Render[Render AssistantMessage]
Render --> ToolGroup[ToolGroupCard / ToolCard]
ToolGroup --> Assertion[Assert visible tool status]
```
### Change Scope
- Area: web component tests. Impact: deterministic regression coverage for issue 145 without requiring daemon or Playwright orchestration. Source: `apps/web/src/components/AssistantMessage.tsx:56-67`; `apps/AGENTS.md#Test layout`.
- Area: assistant message/tool rendering. Impact: test exercises the current `tool_use` without `tool_result` path. Source: `apps/web/src/components/AssistantMessage.tsx:763-817`; `apps/web/src/components/ToolCard.tsx:330-334`.
### Design Decisions
- Decision: Add a component test for `AssistantMessage` rather than an e2e test as the first reproduction. Source: `apps/web/src/components/AssistantMessage.tsx:736-739`; `apps/web/src/components/ToolCard.tsx:330-334`.
- Decision: Use a fixture with `streaming={false}`, `endedAt`, `runStatus: 'succeeded'`, and a single `tool_use` event missing `tool_result`. Source: `apps/web/src/components/ProjectView.tsx:1250-1268`; `apps/web/src/components/AssistantMessage.tsx:785-788`.
- Decision: Place the test under `apps/web/tests/components/`, matching existing component test ownership. Source: `apps/AGENTS.md#Test layout`; `apps/web/tests/components/assistant-message-unfinished-todos.test.tsx`.
- Decision: Write the desired post-fix assertion: completed assistant messages should not present a missing result as a running tool state. Source: `apps/web/src/components/AssistantMessage.tsx:151-157,736-739`; `apps/web/src/components/ToolCard.tsx:330-334`.
### Why this design
- The bug is produced by UI state derivation: message completion and tool result pairing are tracked independently.
- A component test can reproduce the precise state directly and runs faster than an e2e flow.
- Provider or e2e coverage can be added later after the UI regression test pins the expected behavior.
### Test Strategy
- Add a Vitest/Testing Library component test in `apps/web/tests/components/assistant-message-tool-status.test.tsx`. Validation: render a completed assistant message with an unpaired `Write` tool and assert the completed message does not show `Running`. Source: `apps/web/src/components/AssistantMessage.tsx:56-67`; `apps/web/src/components/ToolCard.tsx:330-334`.
- Run `pnpm --filter @open-design/web test` after adding the test. Source: `AGENTS.md#Common commands`.
### Pseudocode
```ts
render(
<AssistantMessage
message={{
id: 'assistant-1',
role: 'assistant',
content: '',
startedAt: 1000,
endedAt: 2000,
runStatus: 'succeeded',
events: [
{ kind: 'tool_use', id: 'tool-1', name: 'Write', input: { file_path: 'index.html', content: '<html>done</html>' } },
],
}}
streaming={false}
projectId="project-1"
/>
);
expect(screen.queryByText(/running/i)).toBeNull();
expect(screen.getByText(/done/i)).toBeInTheDocument();
```
### File Structure
- `apps/web/tests/components/assistant-message-tool-status.test.tsx` - focused reproduction/regression test for completed messages with unpaired tool events.
## Plan
- [x] Step 1: Add issue 145 reproduction test
- [x] Substep 1.1 Implement: create `assistant-message-tool-status.test.tsx` with the completed-message/unpaired-tool fixture.
- [x] Substep 1.2 Verify: run the web component test and confirm it exposes the current bug with the desired assertion.
- [x] Step 2: Apply UI status fix
- [x] Substep 2.1 Implement: update assistant/tool rendering so completed runs do not display missing tool results as running.
- [x] Substep 2.2 Verify: rerun the web test and confirm it passes.
## Notes
<!-- Optional sections — add what's relevant. -->
### Implementation
- `apps/web/tests/components/assistant-message-tool-status.test.tsx` - added issue 145 regression coverage for completed and streaming runs with missing `tool_result`.
- `apps/web/src/components/AssistantMessage.tsx` - made grouped tool summary/running state depend on `runStreaming` so completed turns with missing results render as done.
- `apps/web/src/components/ToolCard.tsx` - threaded `runStreaming` into built-in result badges and treated missing results as done only after the run has ended.
- `apps/web/src/runtime/tool-renderers.ts` - aligned custom renderer status derivation with the same completed-run behavior.
- `apps/web/tests/runtime/tool-renderers.test.tsx` - updated renderer status expectations for completed runs missing a result.
- Follow-up review tightened the fallback so failed/canceled terminal runs with missing `tool_result` surface as error rather than done.
### Verification
- Reproduction confirmed before the fix: the new component test failed because a completed run without `tool_result` rendered `running…` instead of `Done`.
- Passed: `pnpm --filter @open-design/web test -- tests/components/assistant-message-tool-status.test.tsx tests/runtime/tool-renderers.test.tsx` (`84` files, `751` tests).
- Passed: `pnpm --filter @open-design/web typecheck`.
- Final code review found no blocking issues.