feat(web): add per-tool renderer registry for generative UI (#282)

* feat(web): add per-tool renderer registry for generative UI Introduce a registry on the web runtime so external surfaces (skill-emitted tools, MCP tools, future plugins) can render their own tool cards instead of falling through to the generic command/output fallback. Registered renderers are consulted before the hardcoded family ladder, mirroring CopilotKit's `useCopilotAction({ render })` extension point and AG-UI's four-state lifecycle (`inProgress` / `executing` / `complete` / `error`). Returning null from a renderer falls back to the built-in card. Forward `runStreaming` from AssistantMessage so the registry can distinguish a live tool call from one that was interrupted before the result arrived. * fix(web): isolate failing tool renderers from the message stream Address PR #282 review: - Wrap the registry-renderer call in ToolCard with try/catch so a third-party renderer that throws synchronously falls back to the built-in family card instead of taking down the surrounding AssistantMessage. Log via console.error for visibility. - Drop the unused `use` parameter from `deriveToolStatus` (its inputs are only `result` + `runStreaming`); removes the awkward `void use;`. - Document the dispose-before-reregister contract on `registerToolRenderer` for dynamic skill load/unload callers. - Add a test asserting the throwing-renderer fallback path. * docs(web): pin the hook-free contract for tool renderers Address PR #282 review: - Document on `ToolRenderer` that the callback runs inside ToolCard's render and must therefore be hook-free. Hookful cards should return a component element so React mounts the inner component and owns its hooks across re-registers, fallbacks, and replacements. - Add a test that exercises the documented pattern: register a renderer whose body uses useState wrapped in a component element, render, dispose, then register a different hookful renderer. Verifies that swapping renderers with different hook shapes doesn't violate the Rules of Hooks on ToolCard. Generated-By: looper 0.4.0 (runner=fixer, agent=claude-code)
2026-06-01 03:14:35 +07:00 · 2026-05-02 22:41:31 +08:00 · 2026-05-02 22:41:31 +08:00 · bf44394f91
commit bf44394f91
parent 8c61e43c44
4 changed files with 365 additions and 4 deletions
--- a/apps/web/src/components/AssistantMessage.tsx
+++ b/apps/web/src/components/AssistantMessage.tsx
@ -105,6 +105,7 @@ export function AssistantMessage({
              <ToolGroupCard
                key={i}
                items={b.items}
+                runStreaming={streaming}
                projectFileNames={projectFileNames}
                onRequestOpenFile={onRequestOpenFile}
              />
@ -513,10 +514,12 @@ interface ToolItem {

 function ToolGroupCard({
  items,
+  runStreaming,
  projectFileNames,
  onRequestOpenFile,
 }: {
  items: ToolItem[];
+  runStreaming: boolean;
  projectFileNames?: Set<string>;
  onRequestOpenFile?: (name: string) => void;
 }) {
@ -530,6 +533,7 @@ function ToolGroupCard({
      <ToolCard
        use={items[0]!.use}
        result={items[0]!.result}
+        runStreaming={runStreaming}
        projectFileNames={projectFileNames}
        onRequestOpenFile={onRequestOpenFile}
      />
@ -559,6 +563,7 @@ function ToolGroupCard({
              key={i}
              use={it.use}
              result={it.result}
+              runStreaming={runStreaming}
              projectFileNames={projectFileNames}
              onRequestOpenFile={onRequestOpenFile}
            />
--- a/apps/web/src/components/ToolCard.tsx
+++ b/apps/web/src/components/ToolCard.tsx
@ -1,17 +1,26 @@
 /**
 * Renders a single tool_use (optionally paired with its tool_result) as an
- * inline card in the assistant message stream. Tools we recognize get
- * specialized layouts; unknown ones fall back to a generic command/output
- * card.
+ * inline card in the assistant message stream. Lookup order:
+ *
+ *   1. user-registered renderer in `tool-renderers` (the extension point
+ *      analogous to CopilotKit's `useCopilotAction({ render })`)
+ *   2. hardcoded family card for tools we ship with (TodoWrite / Write /
+ *      Edit / Read / Bash / Glob / Grep / WebFetch / WebSearch)
+ *   3. generic command/output fallback
 */
 import { useState } from 'react';
 import { useT } from '../i18n';
 import { parseTodoWriteInput } from '../runtime/todos';
+import { getToolRenderer, toRenderProps } from '../runtime/tool-renderers';
 import type { AgentEvent } from '../types';

 interface Props {
  use: Extract<AgentEvent, { kind: 'tool_use' }>;
  result?: Extract<AgentEvent, { kind: 'tool_result' }> | undefined;
+  // True while the parent run is still streaming. Forwarded to registered
+  // renderers via `status` so they can distinguish "executing" (run alive)
+  // from "inProgress" (run dead before result arrived).
+  runStreaming?: boolean;
  // Set of file names that exist in the project folder. When the tool's
  // `file_path`/`path` argument's basename appears in this set we surface
  // an "open" button on the card. Pass `undefined` to skip the existence
@ -22,8 +31,27 @@ interface Props {
  onRequestOpenFile?: (name: string) => void;
 }

-export function ToolCard({ use, result, projectFileNames, onRequestOpenFile }: Props) {
+export function ToolCard({
+  use,
+  result,
+  runStreaming,
+  projectFileNames,
+  onRequestOpenFile,
+}: Props) {
  const name = use.name;
+  const custom = getToolRenderer(name);
+  if (custom) {
+    // A misbehaving third-party renderer must not take down the whole
+    // assistant message — catch synchronous throws and fall through to the
+    // built-in family card. (React's own error boundaries still cover
+    // throws raised inside the returned tree once it's mounted.)
+    try {
+      const node = custom(toRenderProps(use, result, runStreaming ?? false));
+      if (node !== undefined && node !== null && node !== false) return <>{node}</>;
+    } catch (err) {
+      console.error(`[ToolCard] custom renderer for "${name}" threw; falling back`, err);
+    }
+  }
  const ctx: FileToolCtx = { projectFileNames, onRequestOpenFile };
  if (name === 'TodoWrite') return <TodoCard input={use.input} />;
  if (name === 'Write' || name === 'create_file')
--- a/apps/web/src/runtime/tool-renderers.test.tsx
+++ b/apps/web/src/runtime/tool-renderers.test.tsx
@ -0,0 +1,203 @@
+import { useState } from 'react';
+import { renderToStaticMarkup } from 'react-dom/server';
+import { afterEach, describe, expect, it, vi } from 'vitest';
+
+import { ToolCard } from '../components/ToolCard';
+import {
+  clearToolRenderers,
+  deriveToolStatus,
+  getToolRenderer,
+  registerToolRenderer,
+  toRenderProps,
+} from './tool-renderers';
+import type { ToolRenderProps } from './tool-renderers';
+import type { AgentEvent } from '../types';
+
+type ToolUse = Extract<AgentEvent, { kind: 'tool_use' }>;
+type ToolResult = Extract<AgentEvent, { kind: 'tool_result' }>;
+
+function use(input: unknown, name = 'render_chart', id = 't1'): ToolUse {
+  return { kind: 'tool_use', id, name, input };
+}
+
+function ok(content: string, id = 't1'): ToolResult {
+  return { kind: 'tool_result', toolUseId: id, content, isError: false };
+}
+
+function err(content: string, id = 't1'): ToolResult {
+  return { kind: 'tool_result', toolUseId: id, content, isError: true };
+}
+
+describe('deriveToolStatus', () => {
+  it('returns "executing" while the run is streaming and no result has arrived', () => {
+    expect(deriveToolStatus(undefined, true)).toBe('executing');
+  });
+
+  it('returns "inProgress" when the run died before the tool returned', () => {
+    expect(deriveToolStatus(undefined, false)).toBe('inProgress');
+  });
+
+  it('returns "complete" on a clean tool result', () => {
+    expect(deriveToolStatus(ok('ok'), true)).toBe('complete');
+  });
+
+  it('returns "error" when the tool result carries isError', () => {
+    expect(deriveToolStatus(err('boom'), true)).toBe('error');
+  });
+});
+
+describe('toRenderProps', () => {
+  it('packs args / result / isError into the AG-UI render-prop shape', () => {
+    const u = use({ city: 'SF' }, 'get_weather');
+    const props = toRenderProps(u, ok('{"temp":61}'), true);
+    expect(props).toEqual({
+      status: 'complete',
+      name: 'get_weather',
+      args: { city: 'SF' },
+      result: '{"temp":61}',
+      isError: false,
+    });
+  });
+
+  it('omits result while the tool is still running', () => {
+    const u = use({ city: 'SF' }, 'get_weather');
+    const props = toRenderProps(u, undefined, true);
+    expect(props.status).toBe('executing');
+    expect(props.result).toBeUndefined();
+    expect(props.isError).toBe(false);
+  });
+});
+
+describe('tool renderer registry', () => {
+  afterEach(() => clearToolRenderers());
+
+  it('registers, looks up, and unregisters renderers', () => {
+    const r = () => null;
+    expect(getToolRenderer('xyz')).toBeUndefined();
+    const dispose = registerToolRenderer('xyz', r);
+    expect(getToolRenderer('xyz')).toBe(r);
+    dispose();
+    expect(getToolRenderer('xyz')).toBeUndefined();
+  });
+
+  it('overwrites on re-registration (last writer wins)', () => {
+    const a = () => null;
+    const b = () => null;
+    registerToolRenderer('xyz', a);
+    registerToolRenderer('xyz', b);
+    expect(getToolRenderer('xyz')).toBe(b);
+  });
+
+  it('does not unregister a renderer that has been overwritten', () => {
+    const a = () => null;
+    const b = () => null;
+    const disposeA = registerToolRenderer('xyz', a);
+    registerToolRenderer('xyz', b);
+    disposeA();
+    expect(getToolRenderer('xyz')).toBe(b);
+  });
+});
+
+describe('ToolCard dispatch', () => {
+  afterEach(() => clearToolRenderers());
+
+  it('routes unknown tool names through the registry', () => {
+    registerToolRenderer('render_chart', ({ status, args }) => (
+      <div data-testid="custom-chart" data-status={status}>
+        {(args as { label?: string }).label}
+      </div>
+    ));
+    const markup = renderToStaticMarkup(
+      <ToolCard use={use({ label: 'Q3 revenue' })} runStreaming={true} />,
+    );
+    expect(markup).toContain('data-testid="custom-chart"');
+    expect(markup).toContain('data-status="executing"');
+    expect(markup).toContain('Q3 revenue');
+  });
+
+  it('passes the result content through as the `result` prop on completion', () => {
+    registerToolRenderer('render_chart', ({ status, result }) => (
+      <span data-testid="custom-chart" data-status={status}>
+        {result}
+      </span>
+    ));
+    const markup = renderToStaticMarkup(
+      <ToolCard use={use({})} result={ok('payload')} runStreaming={false} />,
+    );
+    expect(markup).toContain('data-status="complete"');
+    expect(markup).toContain('payload');
+  });
+
+  it('falls back to the built-in card when the registered renderer returns null', () => {
+    registerToolRenderer('Bash', () => null);
+    const markup = renderToStaticMarkup(
+      <ToolCard use={use({ command: 'ls' }, 'Bash')} runStreaming={true} />,
+    );
+    expect(markup).toContain('op-bash');
+    expect(markup).toContain('ls');
+  });
+
+  it('lets a registered renderer override a built-in family card', () => {
+    registerToolRenderer('Bash', ({ args }) => (
+      <pre data-testid="custom-bash">{(args as { command?: string }).command}</pre>
+    ));
+    const markup = renderToStaticMarkup(
+      <ToolCard use={use({ command: 'whoami' }, 'Bash')} runStreaming={true} />,
+    );
+    expect(markup).toContain('data-testid="custom-bash"');
+    expect(markup).not.toContain('op-bash');
+  });
+
+  it('mounts hookful renderer output as a child component, surviving replace + dispose', () => {
+    // The documented contract: renderers must be hook-free, but they may
+    // return a component *element* whose body uses hooks. That child gets
+    // mounted as its own component, so swapping the renderer (or letting
+    // it return null) does not violate the Rules of Hooks on ToolCard.
+    function HookfulCardA({ args }: ToolRenderProps) {
+      const [count] = useState(() => (args as { start?: number }).start ?? 0);
+      return <span data-testid="hookful-a">A:{count}</span>;
+    }
+    function HookfulCardB({ result }: ToolRenderProps) {
+      const [label] = useState('mounted');
+      return (
+        <span data-testid="hookful-b">
+          B:{label}:{result ?? ''}
+        </span>
+      );
+    }
+
+    const disposeA = registerToolRenderer('render_chart', (props) => <HookfulCardA {...props} />);
+    const first = renderToStaticMarkup(
+      <ToolCard use={use({ start: 7 })} runStreaming={true} />,
+    );
+    expect(first).toContain('data-testid="hookful-a"');
+    expect(first).toContain('A:7');
+
+    // Swap to a renderer with a different hook shape. If the renderer
+    // were called as a plain function inside ToolCard, this would shift
+    // ToolCard's hook sequence; mounting as a child component isolates
+    // each renderer's hooks to its own fiber.
+    disposeA();
+    registerToolRenderer('render_chart', (props) => <HookfulCardB {...props} />);
+    const second = renderToStaticMarkup(
+      <ToolCard use={use({})} result={ok('payload')} runStreaming={false} />,
+    );
+    expect(second).toContain('data-testid="hookful-b"');
+    expect(second).toContain('B:mounted:payload');
+    expect(second).not.toContain('hookful-a');
+  });
+
+  it('falls back to the built-in card when a registered renderer throws', () => {
+    const errorSpy = vi.spyOn(console, 'error').mockImplementation(() => {});
+    registerToolRenderer('Bash', () => {
+      throw new Error('boom');
+    });
+    const markup = renderToStaticMarkup(
+      <ToolCard use={use({ command: 'ls' }, 'Bash')} runStreaming={true} />,
+    );
+    expect(markup).toContain('op-bash');
+    expect(markup).toContain('ls');
+    expect(errorSpy).toHaveBeenCalled();
+    errorSpy.mockRestore();
+  });
+});
--- a/apps/web/src/runtime/tool-renderers.ts
+++ b/apps/web/src/runtime/tool-renderers.ts
@ -0,0 +1,125 @@
+/**
+ * Per-tool renderer registry — the open-design analogue of CopilotKit's
+ * `useCopilotAction({ render })` and AG-UI's tool render-prop contract.
+ *
+ * Built-in tools (Read/Write/Edit/Bash/...) keep their hand-tuned cards in
+ * `ToolCard.tsx`. The registry is the extension point for everything else:
+ * skill-emitted tools, MCP-style external tools, future plugins. Anything
+ * registered here is consulted *before* the hardcoded family ladder, so a
+ * third party can override a built-in if they really want to.
+ *
+ * The render-prop shape mirrors AG-UI:
+ *   ({ status, name, args, result, isError }) => ReactNode
+ * where `status` is the four-state lifecycle agreed across LangGraph,
+ * CrewAI, and OpenAI tool calls.
+ */
+import type { ReactNode } from 'react';
+import type { AgentEvent } from '../types';
+
+export type ToolStatus = 'inProgress' | 'executing' | 'complete' | 'error';
+
+type ToolUse = Extract<AgentEvent, { kind: 'tool_use' }>;
+type ToolResult = Extract<AgentEvent, { kind: 'tool_result' }>;
+
+export interface ToolRenderProps {
+  status: ToolStatus;
+  name: string;
+  args: unknown;
+  result: string | undefined;
+  isError: boolean;
+}
+
+/**
+ * Tool render callback. Mirrors AG-UI's `({ status, args, result, ... })`
+ * render-prop shape and CopilotKit's `useCopilotAction({ render })`.
+ *
+ * The callback runs inside `ToolCard`'s render — it is *not* mounted as
+ * its own component. Two implications follow from that:
+ *
+ *   1. **Renderers must be hook-free.** Calling React hooks here would
+ *      weld them into `ToolCard`'s hook sequence, so any swap (skill
+ *      hot-reload, fallback when the renderer returns null/false, or a
+ *      replacement renderer with a different hook shape) would violate
+ *      the Rules of Hooks and crash the surrounding assistant message.
+ *   2. **If you need hooks**, return a component element. Wrap your
+ *      hookful UI in a component and have the renderer return that
+ *      element: `(props) => <MyHookfulCard {...props} />`. The element
+ *      is mounted as a child, giving React stable hook ownership across
+ *      re-registers.
+ *
+ * Returning `null` / `undefined` / `false` defers to the next step in
+ * the lookup ladder (built-in family card, then generic fallback).
+ */
+export type ToolRenderer = (props: ToolRenderProps) => ReactNode;
+
+const renderers = new Map<string, ToolRenderer>();
+
+/**
+ * Register a renderer for a tool name. Returns an unregister handle so
+ * tests / hot-reloads can dispose cleanly.
+ *
+ * Names are matched case-sensitively against `tool_use.name` (mirrors the
+ * agent's wire spelling). Re-registering the same name overwrites — the
+ * last writer wins, matching CopilotKit's behaviour.
+ *
+ * The registry is module-scoped and persists for the lifetime of the
+ * page. Callers that load skills dynamically (e.g. hot-reload, plugin
+ * unload) should hold the dispose handle and call it before re-registering
+ * under the same name, otherwise stale renderers may stick around when a
+ * skill is removed without a replacement.
+ */
+export function registerToolRenderer(name: string, renderer: ToolRenderer): () => void {
+  renderers.set(name, renderer);
+  return () => {
+    if (renderers.get(name) === renderer) renderers.delete(name);
+  };
+}
+
+export function getToolRenderer(name: string): ToolRenderer | undefined {
+  return renderers.get(name);
+}
+
+/** Visible mainly for tests. */
+export function clearToolRenderers(): void {
+  renderers.clear();
+}
+
+/**
+ * Map an in-flight tool call to AG-UI's four-state lifecycle.
+ *
+ * - `error`      — tool returned with `isError`
+ * - `complete`   — tool returned cleanly
+ * - `executing`  — no result yet, run still streaming
+ * - `inProgress` — no result yet, run finished (rare: agent crashed
+ *                  mid-call). Distinct so renderers can surface a
+ *                  different affordance ("interrupted") than the
+ *                  live-spinner state.
+ *
+ * The split between `inProgress` and `executing` is the same one
+ * CopilotKit exposes: in their world, `inProgress` = streaming args,
+ * `executing` = handler running. We don't currently receive partial
+ * tool_use args from the daemon, so the two states collapse onto the
+ * "run alive vs. run dead" axis instead. Either way, renderers that
+ * want a single "loading" state can treat both identically.
+ */
+export function deriveToolStatus(
+  result: ToolResult | undefined,
+  runStreaming: boolean,
+): ToolStatus {
+  if (result) return result.isError ? 'error' : 'complete';
+  return runStreaming ? 'executing' : 'inProgress';
+}
+
+export function toRenderProps(
+  use: ToolUse,
+  result: ToolResult | undefined,
+  runStreaming: boolean,
+): ToolRenderProps {
+  return {
+    status: deriveToolStatus(result, runStreaming),
+    name: use.name,
+    args: use.input,
+    result: result?.content,
+    isError: result?.isError ?? false,
+  };
+}