feat(web): add per-tool renderer registry for generative UI (#282)

* feat(web): add per-tool renderer registry for generative UI

Introduce a registry on the web runtime so external surfaces (skill-emitted
tools, MCP tools, future plugins) can render their own tool cards instead
of falling through to the generic command/output fallback. Registered
renderers are consulted before the hardcoded family ladder, mirroring
CopilotKit's `useCopilotAction({ render })` extension point and AG-UI's
four-state lifecycle (`inProgress` / `executing` / `complete` / `error`).
Returning null from a renderer falls back to the built-in card.

Forward `runStreaming` from AssistantMessage so the registry can
distinguish a live tool call from one that was interrupted before the
result arrived.

* fix(web): isolate failing tool renderers from the message stream

Address PR #282 review:

- Wrap the registry-renderer call in ToolCard with try/catch so a
  third-party renderer that throws synchronously falls back to the
  built-in family card instead of taking down the surrounding
  AssistantMessage. Log via console.error for visibility.
- Drop the unused `use` parameter from `deriveToolStatus` (its inputs
  are only `result` + `runStreaming`); removes the awkward `void use;`.
- Document the dispose-before-reregister contract on
  `registerToolRenderer` for dynamic skill load/unload callers.
- Add a test asserting the throwing-renderer fallback path.

* docs(web): pin the hook-free contract for tool renderers

Address PR #282 review:

- Document on `ToolRenderer` that the callback runs inside ToolCard's
  render and must therefore be hook-free. Hookful cards should return
  a component element so React mounts the inner component and owns its
  hooks across re-registers, fallbacks, and replacements.
- Add a test that exercises the documented pattern: register a renderer
  whose body uses useState wrapped in a component element, render,
  dispose, then register a different hookful renderer. Verifies that
  swapping renderers with different hook shapes doesn't violate the
  Rules of Hooks on ToolCard.

Generated-By: looper 0.4.0 (runner=fixer, agent=claude-code)
This commit is contained in:
Tom Huang 2026-05-02 22:41:31 +08:00 committed by GitHub
parent 8c61e43c44
commit bf44394f91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 365 additions and 4 deletions

View file

@ -105,6 +105,7 @@ export function AssistantMessage({
<ToolGroupCard
key={i}
items={b.items}
runStreaming={streaming}
projectFileNames={projectFileNames}
onRequestOpenFile={onRequestOpenFile}
/>
@ -513,10 +514,12 @@ interface ToolItem {
function ToolGroupCard({
items,
runStreaming,
projectFileNames,
onRequestOpenFile,
}: {
items: ToolItem[];
runStreaming: boolean;
projectFileNames?: Set<string>;
onRequestOpenFile?: (name: string) => void;
}) {
@ -530,6 +533,7 @@ function ToolGroupCard({
<ToolCard
use={items[0]!.use}
result={items[0]!.result}
runStreaming={runStreaming}
projectFileNames={projectFileNames}
onRequestOpenFile={onRequestOpenFile}
/>
@ -559,6 +563,7 @@ function ToolGroupCard({
key={i}
use={it.use}
result={it.result}
runStreaming={runStreaming}
projectFileNames={projectFileNames}
onRequestOpenFile={onRequestOpenFile}
/>

View file

@ -1,17 +1,26 @@
/**
* Renders a single tool_use (optionally paired with its tool_result) as an
* inline card in the assistant message stream. Tools we recognize get
* specialized layouts; unknown ones fall back to a generic command/output
* card.
* inline card in the assistant message stream. Lookup order:
*
* 1. user-registered renderer in `tool-renderers` (the extension point
* analogous to CopilotKit's `useCopilotAction({ render })`)
* 2. hardcoded family card for tools we ship with (TodoWrite / Write /
* Edit / Read / Bash / Glob / Grep / WebFetch / WebSearch)
* 3. generic command/output fallback
*/
import { useState } from 'react';
import { useT } from '../i18n';
import { parseTodoWriteInput } from '../runtime/todos';
import { getToolRenderer, toRenderProps } from '../runtime/tool-renderers';
import type { AgentEvent } from '../types';
interface Props {
use: Extract<AgentEvent, { kind: 'tool_use' }>;
result?: Extract<AgentEvent, { kind: 'tool_result' }> | undefined;
// True while the parent run is still streaming. Forwarded to registered
// renderers via `status` so they can distinguish "executing" (run alive)
// from "inProgress" (run dead before result arrived).
runStreaming?: boolean;
// Set of file names that exist in the project folder. When the tool's
// `file_path`/`path` argument's basename appears in this set we surface
// an "open" button on the card. Pass `undefined` to skip the existence
@ -22,8 +31,27 @@ interface Props {
onRequestOpenFile?: (name: string) => void;
}
export function ToolCard({ use, result, projectFileNames, onRequestOpenFile }: Props) {
export function ToolCard({
use,
result,
runStreaming,
projectFileNames,
onRequestOpenFile,
}: Props) {
const name = use.name;
const custom = getToolRenderer(name);
if (custom) {
// A misbehaving third-party renderer must not take down the whole
// assistant message — catch synchronous throws and fall through to the
// built-in family card. (React's own error boundaries still cover
// throws raised inside the returned tree once it's mounted.)
try {
const node = custom(toRenderProps(use, result, runStreaming ?? false));
if (node !== undefined && node !== null && node !== false) return <>{node}</>;
} catch (err) {
console.error(`[ToolCard] custom renderer for "${name}" threw; falling back`, err);
}
}
const ctx: FileToolCtx = { projectFileNames, onRequestOpenFile };
if (name === 'TodoWrite') return <TodoCard input={use.input} />;
if (name === 'Write' || name === 'create_file')

View file

@ -0,0 +1,203 @@
import { useState } from 'react';
import { renderToStaticMarkup } from 'react-dom/server';
import { afterEach, describe, expect, it, vi } from 'vitest';
import { ToolCard } from '../components/ToolCard';
import {
clearToolRenderers,
deriveToolStatus,
getToolRenderer,
registerToolRenderer,
toRenderProps,
} from './tool-renderers';
import type { ToolRenderProps } from './tool-renderers';
import type { AgentEvent } from '../types';
type ToolUse = Extract<AgentEvent, { kind: 'tool_use' }>;
type ToolResult = Extract<AgentEvent, { kind: 'tool_result' }>;
function use(input: unknown, name = 'render_chart', id = 't1'): ToolUse {
return { kind: 'tool_use', id, name, input };
}
function ok(content: string, id = 't1'): ToolResult {
return { kind: 'tool_result', toolUseId: id, content, isError: false };
}
function err(content: string, id = 't1'): ToolResult {
return { kind: 'tool_result', toolUseId: id, content, isError: true };
}
describe('deriveToolStatus', () => {
it('returns "executing" while the run is streaming and no result has arrived', () => {
expect(deriveToolStatus(undefined, true)).toBe('executing');
});
it('returns "inProgress" when the run died before the tool returned', () => {
expect(deriveToolStatus(undefined, false)).toBe('inProgress');
});
it('returns "complete" on a clean tool result', () => {
expect(deriveToolStatus(ok('ok'), true)).toBe('complete');
});
it('returns "error" when the tool result carries isError', () => {
expect(deriveToolStatus(err('boom'), true)).toBe('error');
});
});
describe('toRenderProps', () => {
it('packs args / result / isError into the AG-UI render-prop shape', () => {
const u = use({ city: 'SF' }, 'get_weather');
const props = toRenderProps(u, ok('{"temp":61}'), true);
expect(props).toEqual({
status: 'complete',
name: 'get_weather',
args: { city: 'SF' },
result: '{"temp":61}',
isError: false,
});
});
it('omits result while the tool is still running', () => {
const u = use({ city: 'SF' }, 'get_weather');
const props = toRenderProps(u, undefined, true);
expect(props.status).toBe('executing');
expect(props.result).toBeUndefined();
expect(props.isError).toBe(false);
});
});
describe('tool renderer registry', () => {
afterEach(() => clearToolRenderers());
it('registers, looks up, and unregisters renderers', () => {
const r = () => null;
expect(getToolRenderer('xyz')).toBeUndefined();
const dispose = registerToolRenderer('xyz', r);
expect(getToolRenderer('xyz')).toBe(r);
dispose();
expect(getToolRenderer('xyz')).toBeUndefined();
});
it('overwrites on re-registration (last writer wins)', () => {
const a = () => null;
const b = () => null;
registerToolRenderer('xyz', a);
registerToolRenderer('xyz', b);
expect(getToolRenderer('xyz')).toBe(b);
});
it('does not unregister a renderer that has been overwritten', () => {
const a = () => null;
const b = () => null;
const disposeA = registerToolRenderer('xyz', a);
registerToolRenderer('xyz', b);
disposeA();
expect(getToolRenderer('xyz')).toBe(b);
});
});
describe('ToolCard dispatch', () => {
afterEach(() => clearToolRenderers());
it('routes unknown tool names through the registry', () => {
registerToolRenderer('render_chart', ({ status, args }) => (
<div data-testid="custom-chart" data-status={status}>
{(args as { label?: string }).label}
</div>
));
const markup = renderToStaticMarkup(
<ToolCard use={use({ label: 'Q3 revenue' })} runStreaming={true} />,
);
expect(markup).toContain('data-testid="custom-chart"');
expect(markup).toContain('data-status="executing"');
expect(markup).toContain('Q3 revenue');
});
it('passes the result content through as the `result` prop on completion', () => {
registerToolRenderer('render_chart', ({ status, result }) => (
<span data-testid="custom-chart" data-status={status}>
{result}
</span>
));
const markup = renderToStaticMarkup(
<ToolCard use={use({})} result={ok('payload')} runStreaming={false} />,
);
expect(markup).toContain('data-status="complete"');
expect(markup).toContain('payload');
});
it('falls back to the built-in card when the registered renderer returns null', () => {
registerToolRenderer('Bash', () => null);
const markup = renderToStaticMarkup(
<ToolCard use={use({ command: 'ls' }, 'Bash')} runStreaming={true} />,
);
expect(markup).toContain('op-bash');
expect(markup).toContain('ls');
});
it('lets a registered renderer override a built-in family card', () => {
registerToolRenderer('Bash', ({ args }) => (
<pre data-testid="custom-bash">{(args as { command?: string }).command}</pre>
));
const markup = renderToStaticMarkup(
<ToolCard use={use({ command: 'whoami' }, 'Bash')} runStreaming={true} />,
);
expect(markup).toContain('data-testid="custom-bash"');
expect(markup).not.toContain('op-bash');
});
it('mounts hookful renderer output as a child component, surviving replace + dispose', () => {
// The documented contract: renderers must be hook-free, but they may
// return a component *element* whose body uses hooks. That child gets
// mounted as its own component, so swapping the renderer (or letting
// it return null) does not violate the Rules of Hooks on ToolCard.
function HookfulCardA({ args }: ToolRenderProps) {
const [count] = useState(() => (args as { start?: number }).start ?? 0);
return <span data-testid="hookful-a">A:{count}</span>;
}
function HookfulCardB({ result }: ToolRenderProps) {
const [label] = useState('mounted');
return (
<span data-testid="hookful-b">
B:{label}:{result ?? ''}
</span>
);
}
const disposeA = registerToolRenderer('render_chart', (props) => <HookfulCardA {...props} />);
const first = renderToStaticMarkup(
<ToolCard use={use({ start: 7 })} runStreaming={true} />,
);
expect(first).toContain('data-testid="hookful-a"');
expect(first).toContain('A:7');
// Swap to a renderer with a different hook shape. If the renderer
// were called as a plain function inside ToolCard, this would shift
// ToolCard's hook sequence; mounting as a child component isolates
// each renderer's hooks to its own fiber.
disposeA();
registerToolRenderer('render_chart', (props) => <HookfulCardB {...props} />);
const second = renderToStaticMarkup(
<ToolCard use={use({})} result={ok('payload')} runStreaming={false} />,
);
expect(second).toContain('data-testid="hookful-b"');
expect(second).toContain('B:mounted:payload');
expect(second).not.toContain('hookful-a');
});
it('falls back to the built-in card when a registered renderer throws', () => {
const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
registerToolRenderer('Bash', () => {
throw new Error('boom');
});
const markup = renderToStaticMarkup(
<ToolCard use={use({ command: 'ls' }, 'Bash')} runStreaming={true} />,
);
expect(markup).toContain('op-bash');
expect(markup).toContain('ls');
expect(errorSpy).toHaveBeenCalled();
errorSpy.mockRestore();
});
});

View file

@ -0,0 +1,125 @@
/**
* Per-tool renderer registry the open-design analogue of CopilotKit's
* `useCopilotAction({ render })` and AG-UI's tool render-prop contract.
*
* Built-in tools (Read/Write/Edit/Bash/...) keep their hand-tuned cards in
* `ToolCard.tsx`. The registry is the extension point for everything else:
* skill-emitted tools, MCP-style external tools, future plugins. Anything
* registered here is consulted *before* the hardcoded family ladder, so a
* third party can override a built-in if they really want to.
*
* The render-prop shape mirrors AG-UI:
* ({ status, name, args, result, isError }) => ReactNode
* where `status` is the four-state lifecycle agreed across LangGraph,
* CrewAI, and OpenAI tool calls.
*/
import type { ReactNode } from 'react';
import type { AgentEvent } from '../types';
export type ToolStatus = 'inProgress' | 'executing' | 'complete' | 'error';
type ToolUse = Extract<AgentEvent, { kind: 'tool_use' }>;
type ToolResult = Extract<AgentEvent, { kind: 'tool_result' }>;
export interface ToolRenderProps {
status: ToolStatus;
name: string;
args: unknown;
result: string | undefined;
isError: boolean;
}
/**
* Tool render callback. Mirrors AG-UI's `({ status, args, result, ... })`
* render-prop shape and CopilotKit's `useCopilotAction({ render })`.
*
* The callback runs inside `ToolCard`'s render it is *not* mounted as
* its own component. Two implications follow from that:
*
* 1. **Renderers must be hook-free.** Calling React hooks here would
* weld them into `ToolCard`'s hook sequence, so any swap (skill
* hot-reload, fallback when the renderer returns null/false, or a
* replacement renderer with a different hook shape) would violate
* the Rules of Hooks and crash the surrounding assistant message.
* 2. **If you need hooks**, return a component element. Wrap your
* hookful UI in a component and have the renderer return that
* element: `(props) => <MyHookfulCard {...props} />`. The element
* is mounted as a child, giving React stable hook ownership across
* re-registers.
*
* Returning `null` / `undefined` / `false` defers to the next step in
* the lookup ladder (built-in family card, then generic fallback).
*/
export type ToolRenderer = (props: ToolRenderProps) => ReactNode;
const renderers = new Map<string, ToolRenderer>();
/**
* Register a renderer for a tool name. Returns an unregister handle so
* tests / hot-reloads can dispose cleanly.
*
* Names are matched case-sensitively against `tool_use.name` (mirrors the
* agent's wire spelling). Re-registering the same name overwrites the
* last writer wins, matching CopilotKit's behaviour.
*
* The registry is module-scoped and persists for the lifetime of the
* page. Callers that load skills dynamically (e.g. hot-reload, plugin
* unload) should hold the dispose handle and call it before re-registering
* under the same name, otherwise stale renderers may stick around when a
* skill is removed without a replacement.
*/
export function registerToolRenderer(name: string, renderer: ToolRenderer): () => void {
renderers.set(name, renderer);
return () => {
if (renderers.get(name) === renderer) renderers.delete(name);
};
}
export function getToolRenderer(name: string): ToolRenderer | undefined {
return renderers.get(name);
}
/** Visible mainly for tests. */
export function clearToolRenderers(): void {
renderers.clear();
}
/**
* Map an in-flight tool call to AG-UI's four-state lifecycle.
*
* - `error` tool returned with `isError`
* - `complete` tool returned cleanly
* - `executing` no result yet, run still streaming
* - `inProgress` no result yet, run finished (rare: agent crashed
* mid-call). Distinct so renderers can surface a
* different affordance ("interrupted") than the
* live-spinner state.
*
* The split between `inProgress` and `executing` is the same one
* CopilotKit exposes: in their world, `inProgress` = streaming args,
* `executing` = handler running. We don't currently receive partial
* tool_use args from the daemon, so the two states collapse onto the
* "run alive vs. run dead" axis instead. Either way, renderers that
* want a single "loading" state can treat both identically.
*/
export function deriveToolStatus(
result: ToolResult | undefined,
runStreaming: boolean,
): ToolStatus {
if (result) return result.isError ? 'error' : 'complete';
return runStreaming ? 'executing' : 'inProgress';
}
export function toRenderProps(
use: ToolUse,
result: ToolResult | undefined,
runStreaming: boolean,
): ToolRenderProps {
return {
status: deriveToolStatus(result, runStreaming),
name: use.name,
args: use.input,
result: result?.content,
isError: result?.isError ?? false,
};
}