From f12679185c98a1657ddd6c004027516fad851f93 Mon Sep 17 00:00:00 2001 From: RyanCheng77 Date: Sat, 30 May 2026 12:47:47 +0800 Subject: [PATCH] fix(web): send Anthropic proxy image attachments (#3273) * fix(web): send Anthropic proxy image attachments * fix(web): omit image attachment stubs for Anthropic proxy * fix(web): keep image fallback context aligned * fix(web): align Anthropic image attachment omission --------- Co-authored-by: 116405 <116405@ky-tech.com.cn> --- apps/web/src/api-attachment-context.ts | 26 +- apps/web/src/components/ProjectView.tsx | 2 + .../web/src/providers/anthropic-compatible.ts | 12 +- apps/web/src/providers/anthropic.ts | 5 +- apps/web/src/providers/api-proxy.ts | 142 +++++++++- apps/web/src/utils/apiProtocol.ts | 24 +- apps/web/tests/api-attachment-context.test.ts | 58 ++++ apps/web/tests/providers/api-proxy.test.ts | 251 ++++++++++++++++++ packages/contracts/src/api/proxy.ts | 20 +- 9 files changed, 527 insertions(+), 13 deletions(-) create mode 100644 apps/web/tests/providers/api-proxy.test.ts diff --git a/apps/web/src/api-attachment-context.ts b/apps/web/src/api-attachment-context.ts index e0429a192..26413aa69 100644 --- a/apps/web/src/api-attachment-context.ts +++ b/apps/web/src/api-attachment-context.ts @@ -8,6 +8,7 @@ import type { ProjectFile, ProjectFileKind, } from './types'; +import { isAnthropicSupportedImagePath } from './utils/apiProtocol'; const API_ATTACHMENT_TEXT_KINDS = new Set(['html', 'text', 'code']); const API_ATTACHMENT_PREVIEW_KINDS = new Set([ @@ -19,17 +20,22 @@ const API_ATTACHMENT_PREVIEW_KINDS = new Set([ const MAX_API_ATTACHMENT_CHARS = 24_000; const MAX_API_ATTACHMENT_TOTAL_CHARS = 64_000; +export interface ApiAttachmentContextOptions { + omitNativeImageAttachments?: boolean; +} + export async function historyWithApiAttachmentContext( history: ChatMessage[], messageId: string, projectId: string, projectFiles: ProjectFile[], + options: ApiAttachmentContextOptions = {}, ): Promise { const current = history.find((message) => message.id === messageId && message.role === 'user'); const attachments = current?.attachments ?? []; if (!current || attachments.length === 0) return history; - const context = await buildApiAttachmentContext(projectId, attachments, projectFiles); + const context = await buildApiAttachmentContext(projectId, attachments, projectFiles, options); if (!context) return history; return history.map((message) => @@ -43,6 +49,7 @@ async function buildApiAttachmentContext( projectId: string, attachments: ChatAttachment[], projectFiles: ProjectFile[], + options: ApiAttachmentContextOptions, ): Promise { const byPath = new Map(); const byName = new Map(); @@ -54,6 +61,13 @@ async function buildApiAttachmentContext( let remaining = MAX_API_ATTACHMENT_TOTAL_CHARS; const blocks: string[] = []; for (const attachment of attachments) { + const file = + byPath.get(attachment.path) ?? + byName.get(attachment.path) ?? + byName.get(attachment.name); + if (options.omitNativeImageAttachments && canSendNativeAnthropicImage(attachment)) { + continue; + } if (remaining <= 0) { blocks.push( '[Open Design omitted remaining attached files because the attachment context budget was exhausted.]', @@ -61,10 +75,6 @@ async function buildApiAttachmentContext( break; } - const file = - byPath.get(attachment.path) ?? - byName.get(attachment.path) ?? - byName.get(attachment.name); const block = await renderApiAttachmentBlock(projectId, attachment, file, remaining); if (!block) continue; blocks.push(block.text); @@ -136,6 +146,12 @@ async function renderApiAttachmentBlock( return { text, charsUsed: text.length }; } +function canSendNativeAnthropicImage( + attachment: ChatAttachment, +): boolean { + return attachment.kind === 'image' && isAnthropicSupportedImagePath(attachment.path); +} + function canReadRawText(kind: ProjectFileKind, path: string): boolean { if (API_ATTACHMENT_TEXT_KINDS.has(kind)) return true; return kind === 'sketch' && isTextSketchPath(path); diff --git a/apps/web/src/components/ProjectView.tsx b/apps/web/src/components/ProjectView.tsx index 1bd1abde0..0946629d1 100644 --- a/apps/web/src/components/ProjectView.tsx +++ b/apps/web/src/components/ProjectView.tsx @@ -74,6 +74,7 @@ import { import { apiProtocolAgentId, apiProtocolModelLabel, + usesAnthropicProxy, } from '../utils/apiProtocol'; import { playSound, showCompletionNotification } from '../utils/notifications'; import { randomUUID } from '../utils/uuid'; @@ -2861,6 +2862,7 @@ export function ProjectView({ userMsg.id, project.id, projectFiles, + { omitNativeImageAttachments: usesAnthropicProxy(config) }, ); pushEvent({ kind: 'status', label: 'requesting', detail: config.model }); let accumulatedAssistantText = ''; diff --git a/apps/web/src/providers/anthropic-compatible.ts b/apps/web/src/providers/anthropic-compatible.ts index 63a3118a6..2e9249177 100644 --- a/apps/web/src/providers/anthropic-compatible.ts +++ b/apps/web/src/providers/anthropic-compatible.ts @@ -1,5 +1,6 @@ import type { AppConfig, ChatMessage } from '../types'; import type { StreamHandlers } from './anthropic'; +import type { ProxyContext } from './api-proxy'; import { streamProxyEndpoint } from './api-proxy'; export async function streamMessageAnthropicProxy( @@ -8,6 +9,15 @@ export async function streamMessageAnthropicProxy( history: ChatMessage[], signal: AbortSignal, handlers: StreamHandlers, + context?: ProxyContext, ): Promise { - return streamProxyEndpoint('/api/proxy/anthropic/stream', cfg, system, history, signal, handlers); + return streamProxyEndpoint( + '/api/proxy/anthropic/stream', + cfg, + system, + history, + signal, + handlers, + context, + ); } diff --git a/apps/web/src/providers/anthropic.ts b/apps/web/src/providers/anthropic.ts index 08c967726..22c543a3a 100644 --- a/apps/web/src/providers/anthropic.ts +++ b/apps/web/src/providers/anthropic.ts @@ -17,6 +17,7 @@ import { streamMessageGoogle } from './google-compatible'; import { streamMessageOllama } from './ollama-compatible'; import { isOpenAICompatible, streamMessageOpenAI } from './openai-compatible'; import { streamMessageSenseAudio } from './senseaudio-compatible'; +import { usesAnthropicProxy } from '../utils/apiProtocol'; // Re-export for convenience export { isOpenAICompatible } from './openai-compatible'; @@ -66,8 +67,8 @@ export async function streamMessage( return streamMessageOpenAI(cfg, system, history, signal, handlers); } - if (cfg.baseUrl && cfg.baseUrl !== 'https://api.anthropic.com') { - return streamMessageAnthropicProxy(cfg, system, history, signal, handlers); + if (usesAnthropicProxy(cfg)) { + return streamMessageAnthropicProxy(cfg, system, history, signal, handlers, context); } if (!cfg.apiKey) { diff --git a/apps/web/src/providers/api-proxy.ts b/apps/web/src/providers/api-proxy.ts index 537539fba..33455e028 100644 --- a/apps/web/src/providers/api-proxy.ts +++ b/apps/web/src/providers/api-proxy.ts @@ -1,13 +1,22 @@ import { effectiveMaxTokens } from '../state/maxTokens'; import type { AppConfig, ChatMessage } from '../types'; +import type { + ProxyImageContentBlock, + ProxyMessage, + ProxyMessageContent, + ProxyTextContentBlock, +} from '@open-design/contracts'; +import { projectFileUrl } from './registry'; import type { StreamHandlers } from './anthropic'; import { parseSseFrame } from './sse'; +import { isAnthropicSupportedImagePath } from '../utils/apiProtocol'; /** * Optional per-request context that some protocols thread into the - * proxy body. Today only the senseaudio proxy reads these fields: + * proxy body or use to prepare provider-native message payloads: * - `projectId` lets the `generate_image` tool write into the active - * project's folder instead of a daemon-global cache. + * project's folder instead of a daemon-global cache, and lets the + * Anthropic proxy resolve image attachments into content blocks. * - `byokImageModel` is the user's BYOK Settings default for the * image tool. The LLM can still override per-call via the tool's * `model` arg; this is just the fallback when it omits one. @@ -36,6 +45,7 @@ export async function streamProxyEndpoint( let acc = ''; try { + const messages = await buildProxyMessages(endpoint, history, context); const resp = await fetch(endpoint, { method: 'POST', headers: { 'Content-Type': 'application/json' }, @@ -44,7 +54,7 @@ export async function streamProxyEndpoint( apiKey: cfg.apiKey, model: cfg.model, systemPrompt: system, - messages: history.map((m) => ({ role: m.role, content: m.content })), + messages, maxTokens: effectiveMaxTokens(cfg), apiVersion: cfg.apiVersion, ...(context?.projectId ? { projectId: context.projectId } : {}), @@ -107,6 +117,132 @@ export async function streamProxyEndpoint( } } +export async function buildProxyMessages( + endpoint: string, + history: ChatMessage[], + context?: ProxyContext, +): Promise { + if (!usesAnthropicMessagesPayload(endpoint) || !context?.projectId) { + return history.map((m) => ({ role: m.role, content: m.content })); + } + + const out: ProxyMessage[] = []; + for (const message of history) { + out.push({ + role: message.role, + content: await buildAnthropicMessageContent(message, context.projectId), + }); + } + return out; +} + +function usesAnthropicMessagesPayload(endpoint: string): boolean { + return endpoint.includes('/api/proxy/anthropic/'); +} + +async function buildAnthropicMessageContent( + message: ChatMessage, + projectId: string, +): Promise { + const imageAttachments = (message.attachments ?? []).filter( + (attachment) => attachment.kind === 'image', + ); + if (message.role !== 'user' || imageAttachments.length === 0) { + return message.content; + } + + const blocks: Array = []; + if (message.content.trim()) { + blocks.push({ type: 'text', text: message.content }); + } + + for (const attachment of imageAttachments) { + const block = await readAnthropicImageBlock(projectId, attachment.path); + if (block) { + blocks.push(block); + } else if (isAnthropicSupportedImagePath(attachment.path)) { + blocks.push({ + type: 'text', + text: `Attached image could not be sent as native image content: path: ${attachment.path} | name: ${attachment.name}`, + }); + } + } + + return blocks.length > 0 ? blocks : message.content; +} + +async function readAnthropicImageBlock( + projectId: string, + path: string, +): Promise { + try { + const resp = await fetch(projectFileUrl(projectId, path), { cache: 'no-store' }); + if (!resp.ok) return null; + + const mediaType = supportedAnthropicImageMediaType( + resp.headers.get('content-type') ?? '', + path, + ); + if (!mediaType) return null; + + const bytes = new Uint8Array(await resp.arrayBuffer()); + return { + type: 'image', + source: { + type: 'base64', + media_type: mediaType, + data: bytesToBase64(bytes), + }, + }; + } catch { + return null; + } +} + +function supportedAnthropicImageMediaType( + contentType: string, + path: string, +): 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp' | null { + const normalized = contentType.split(';', 1)[0]?.trim().toLowerCase(); + if ( + normalized === 'image/jpeg' || + normalized === 'image/png' || + normalized === 'image/gif' || + normalized === 'image/webp' + ) { + return normalized; + } + const lower = path.toLowerCase(); + if (/\.(jpe?g)$/.test(lower)) return 'image/jpeg'; + if (lower.endsWith('.png')) return 'image/png'; + if (lower.endsWith('.gif')) return 'image/gif'; + if (lower.endsWith('.webp')) return 'image/webp'; + return null; +} + +function bytesToBase64(bytes: Uint8Array): string { + const alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/'; + let out = ''; + let i = 0; + for (; i + 2 < bytes.length; i += 3) { + const n = (bytes[i]! << 16) | (bytes[i + 1]! << 8) | bytes[i + 2]!; + out += alphabet[(n >> 18) & 63]; + out += alphabet[(n >> 12) & 63]; + out += alphabet[(n >> 6) & 63]; + out += alphabet[n & 63]; + } + if (i < bytes.length) { + const a = bytes[i]!; + const b = i + 1 < bytes.length ? bytes[i + 1]! : 0; + const n = (a << 16) | (b << 8); + out += alphabet[(n >> 18) & 63]; + out += alphabet[(n >> 12) & 63]; + out += i + 1 < bytes.length ? alphabet[(n >> 6) & 63] : '='; + out += '='; + } + return out; +} + function proxyErrorMessage(data: Record): string { const nested = data.error; if (nested && typeof nested === 'object' && 'message' in nested) { diff --git a/apps/web/src/utils/apiProtocol.ts b/apps/web/src/utils/apiProtocol.ts index 84d55503f..d624141fc 100644 --- a/apps/web/src/utils/apiProtocol.ts +++ b/apps/web/src/utils/apiProtocol.ts @@ -1,4 +1,5 @@ -import type { ApiProtocol } from '../types'; +import { isOpenAICompatible } from '../providers/openai-compatible'; +import type { ApiProtocol, AppConfig } from '../types'; const API_PROTOCOL_LABELS: Record = { anthropic: 'Anthropic API', @@ -34,3 +35,24 @@ export function apiProtocolModelLabel( export function apiProtocolAgentId(protocol: ApiProtocol | undefined): string { return API_PROTOCOL_AGENT_IDS[protocol ?? 'anthropic']; } + +export function usesAnthropicProxy(cfg: AppConfig): boolean { + if ( + cfg.apiProtocol === 'azure' || + cfg.apiProtocol === 'ollama' || + cfg.apiProtocol === 'google' || + cfg.apiProtocol === 'senseaudio' || + cfg.apiProtocol === 'openai' + ) { + return false; + } + if (!cfg.apiProtocol && isOpenAICompatible(cfg.model, cfg.baseUrl)) { + return false; + } + return Boolean(cfg.baseUrl && cfg.baseUrl !== 'https://api.anthropic.com'); +} + +export function isAnthropicSupportedImagePath(path: string): boolean { + const lower = path.toLowerCase(); + return /\.(jpe?g|png|gif|webp)$/.test(lower); +} diff --git a/apps/web/tests/api-attachment-context.test.ts b/apps/web/tests/api-attachment-context.test.ts index c9e52171f..338f5eafc 100644 --- a/apps/web/tests/api-attachment-context.test.ts +++ b/apps/web/tests/api-attachment-context.test.ts @@ -79,6 +79,64 @@ describe('historyWithApiAttachmentContext', () => { expect(history[0]?.content).toContain('Content preview unavailable'); }); + it('omits image attachment metadata when the provider sends native image blocks', async () => { + for (const path of ['hero.png', 'hero.jpg', 'hero.jpeg', 'hero.gif', 'hero.webp']) { + const history = await historyWithApiAttachmentContext( + [ + userMessage('msg-1', 'Describe this image', [ + { path, name: path, kind: 'image' }, + ]), + ], + 'msg-1', + 'project-1', + [projectFile(path, 'image')], + { omitNativeImageAttachments: true }, + ); + + expect(history[0]?.content).toBe('Describe this image'); + } + expect(mockedFetchProjectFileText).not.toHaveBeenCalled(); + expect(mockedFetchProjectFilePreview).not.toHaveBeenCalled(); + }); + + it('omits sketch-prefixed raster image metadata when native image blocks carry them', async () => { + const history = await historyWithApiAttachmentContext( + [ + userMessage('msg-1', 'Describe this image', [ + { path: 'sketch-hero.png', name: 'sketch-hero.png', kind: 'image' }, + ]), + ], + 'msg-1', + 'project-1', + [projectFile('sketch-hero.png', 'sketch')], + { omitNativeImageAttachments: true }, + ); + + expect(history[0]?.content).toBe('Describe this image'); + expect(mockedFetchProjectFileText).not.toHaveBeenCalled(); + expect(mockedFetchProjectFilePreview).not.toHaveBeenCalled(); + }); + + it('keeps unsupported image metadata when native image blocks cannot carry them', async () => { + for (const path of ['hero.avif', 'hero.bmp']) { + const history = await historyWithApiAttachmentContext( + [ + userMessage('msg-1', 'Describe this image', [ + { path, name: path, kind: 'image' }, + ]), + ], + 'msg-1', + 'project-1', + [projectFile(path, 'image')], + { omitNativeImageAttachments: true }, + ); + + expect(history[0]?.content).toContain(''); + expect(history[0]?.content).toContain(`path: ${path}`); + expect(history[0]?.content).toContain('Content preview unavailable'); + } + }); + it('uses filename inference when the project file list has not refreshed yet', async () => { mockedFetchProjectFilePreview.mockResolvedValue({ kind: 'pdf', diff --git a/apps/web/tests/providers/api-proxy.test.ts b/apps/web/tests/providers/api-proxy.test.ts new file mode 100644 index 000000000..0bfb9be8a --- /dev/null +++ b/apps/web/tests/providers/api-proxy.test.ts @@ -0,0 +1,251 @@ +import { afterEach, describe, expect, it, vi } from 'vitest'; + +import { historyWithApiAttachmentContext } from '../../src/api-attachment-context'; +import { buildProxyMessages, streamProxyEndpoint } from '../../src/providers/api-proxy'; +import type { ChatMessage } from '../../src/types'; + +describe('buildProxyMessages', () => { + afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllGlobals(); + }); + + it('serializes image attachments as Anthropic image content blocks', async () => { + const pngBytes = new Uint8Array([137, 80, 78, 71]); + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: true, + headers: { + get: (name: string) => (name.toLowerCase() === 'content-type' ? 'image/png' : null), + }, + arrayBuffer: async () => pngBytes.buffer, + }), + ); + + const messages = await buildProxyMessages( + '/api/proxy/anthropic/stream', + [ + userMessage('Describe the attached image', [ + { path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 }, + ]), + ], + { projectId: 'project-1' }, + ); + + expect(fetch).toHaveBeenCalledWith( + '/api/projects/project-1/raw/references/logo.png', + { cache: 'no-store' }, + ); + expect(messages).toEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Describe the attached image' }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw==', + }, + }, + ], + }, + ]); + }); + + it('keeps non-Anthropic proxy messages as plain text', async () => { + vi.stubGlobal('fetch', vi.fn()); + + const messages = await buildProxyMessages( + '/api/proxy/openai/stream', + [ + userMessage('Describe the attached image', [ + { path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 }, + ]), + ], + { projectId: 'project-1' }, + ); + + expect(fetch).not.toHaveBeenCalled(); + expect(messages).toEqual([ + { role: 'user', content: 'Describe the attached image' }, + ]); + }); + + it('sends Anthropic image content blocks in the proxy request body', async () => { + const pngBytes = new Uint8Array([137, 80, 78, 71]); + const fetchMock = vi + .fn() + .mockResolvedValueOnce({ + ok: true, + headers: { + get: (name: string) => (name.toLowerCase() === 'content-type' ? 'image/png' : null), + }, + arrayBuffer: async () => pngBytes.buffer, + }) + .mockResolvedValueOnce({ + ok: true, + body: new ReadableStream({ + start(controller) { + controller.enqueue( + new TextEncoder().encode('event: end\ndata: {}\n\n'), + ); + controller.close(); + }, + }), + }); + vi.stubGlobal('fetch', fetchMock); + + await streamProxyEndpoint( + '/api/proxy/anthropic/stream', + { + apiKey: 'test-api-key', + baseUrl: 'https://anthropic-compatible.example', + model: 'vision-model', + } as any, + 'System prompt', + [ + userMessage('Describe the attached image', [ + { path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 }, + ]), + ], + new AbortController().signal, + { + onDelta: vi.fn(), + onDone: vi.fn(), + onError: vi.fn(), + }, + { projectId: 'project-1' }, + ); + + const proxyInit = fetchMock.mock.calls[1]?.[1] as RequestInit; + expect(JSON.parse(String(proxyInit.body))).toMatchObject({ + messages: [ + { + role: 'user', + content: [ + { type: 'text', text: 'Describe the attached image' }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw==', + }, + }, + ], + }, + ], + projectId: 'project-1', + }); + }); + + it('keeps a text fallback when a supported Anthropic image cannot be read', async () => { + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: false, + headers: { get: () => null }, + arrayBuffer: async () => new ArrayBuffer(0), + }), + ); + + const messages = await buildProxyMessages( + '/api/proxy/anthropic/stream', + [ + userMessage('Describe the attached image', [ + { path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 }, + ]), + ], + { projectId: 'project-1' }, + ); + + expect(messages).toEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Describe the attached image' }, + { + type: 'text', + text: 'Attached image could not be sent as native image content: path: references/logo.png | name: logo.png', + }, + ], + }, + ]); + }); + + it('does not send preview-unavailable text alongside sketch raster image blocks', async () => { + const pngBytes = new Uint8Array([137, 80, 78, 71]); + vi.stubGlobal( + 'fetch', + vi.fn().mockResolvedValue({ + ok: true, + headers: { + get: (name: string) => (name.toLowerCase() === 'content-type' ? 'image/png' : null), + }, + arrayBuffer: async () => pngBytes.buffer, + }), + ); + + const history = await historyWithApiAttachmentContext( + [ + userMessage('Describe this image', [ + { path: 'sketch-hero.png', name: 'sketch-hero.png', kind: 'image', size: 4 }, + ]), + ], + 'msg-1', + 'project-1', + [ + { + name: 'sketch-hero.png', + path: 'sketch-hero.png', + type: 'file', + size: 4, + mtime: 123, + kind: 'sketch', + mime: 'image/png', + }, + ], + { omitNativeImageAttachments: true }, + ); + + const messages = await buildProxyMessages( + '/api/proxy/anthropic/stream', + history, + { projectId: 'project-1' }, + ); + + expect(JSON.stringify(messages)).not.toContain('Content preview unavailable'); + expect(messages).toEqual([ + { + role: 'user', + content: [ + { type: 'text', text: 'Describe this image' }, + { + type: 'image', + source: { + type: 'base64', + media_type: 'image/png', + data: 'iVBORw==', + }, + }, + ], + }, + ]); + }); +}); + +function userMessage( + content: string, + attachments: NonNullable, +): ChatMessage { + return { + id: 'msg-1', + role: 'user', + content, + createdAt: 1, + attachments, + }; +} diff --git a/packages/contracts/src/api/proxy.ts b/packages/contracts/src/api/proxy.ts index f6b04d657..98ae789be 100644 --- a/packages/contracts/src/api/proxy.ts +++ b/packages/contracts/src/api/proxy.ts @@ -1,8 +1,26 @@ export type ProxyMessageRole = 'system' | 'user' | 'assistant' | 'tool'; +export type ProxyMessageContent = + | string + | Array; + +export interface ProxyTextContentBlock { + type: 'text'; + text: string; +} + +export interface ProxyImageContentBlock { + type: 'image'; + source: { + type: 'base64'; + media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'; + data: string; + }; +} + export interface ProxyMessage { role: ProxyMessageRole; - content: string; + content: ProxyMessageContent; } export interface ProxyStreamRequest {