fix(web): send Anthropic proxy image attachments (#3273)

* fix(web): send Anthropic proxy image attachments

* fix(web): omit image attachment stubs for Anthropic proxy

* fix(web): keep image fallback context aligned

* fix(web): align Anthropic image attachment omission

---------

Co-authored-by: 116405 <116405@ky-tech.com.cn>
This commit is contained in:
RyanCheng77 2026-05-30 12:47:47 +08:00 committed by GitHub
parent 653a3fcc70
commit f12679185c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 527 additions and 13 deletions

View file

@ -8,6 +8,7 @@ import type {
ProjectFile,
ProjectFileKind,
} from './types';
import { isAnthropicSupportedImagePath } from './utils/apiProtocol';
const API_ATTACHMENT_TEXT_KINDS = new Set<ProjectFileKind>(['html', 'text', 'code']);
const API_ATTACHMENT_PREVIEW_KINDS = new Set<ProjectFileKind>([
@ -19,17 +20,22 @@ const API_ATTACHMENT_PREVIEW_KINDS = new Set<ProjectFileKind>([
const MAX_API_ATTACHMENT_CHARS = 24_000;
const MAX_API_ATTACHMENT_TOTAL_CHARS = 64_000;
export interface ApiAttachmentContextOptions {
omitNativeImageAttachments?: boolean;
}
export async function historyWithApiAttachmentContext(
history: ChatMessage[],
messageId: string,
projectId: string,
projectFiles: ProjectFile[],
options: ApiAttachmentContextOptions = {},
): Promise<ChatMessage[]> {
const current = history.find((message) => message.id === messageId && message.role === 'user');
const attachments = current?.attachments ?? [];
if (!current || attachments.length === 0) return history;
const context = await buildApiAttachmentContext(projectId, attachments, projectFiles);
const context = await buildApiAttachmentContext(projectId, attachments, projectFiles, options);
if (!context) return history;
return history.map((message) =>
@ -43,6 +49,7 @@ async function buildApiAttachmentContext(
projectId: string,
attachments: ChatAttachment[],
projectFiles: ProjectFile[],
options: ApiAttachmentContextOptions,
): Promise<string> {
const byPath = new Map<string, ProjectFile>();
const byName = new Map<string, ProjectFile>();
@ -54,6 +61,13 @@ async function buildApiAttachmentContext(
let remaining = MAX_API_ATTACHMENT_TOTAL_CHARS;
const blocks: string[] = [];
for (const attachment of attachments) {
const file =
byPath.get(attachment.path) ??
byName.get(attachment.path) ??
byName.get(attachment.name);
if (options.omitNativeImageAttachments && canSendNativeAnthropicImage(attachment)) {
continue;
}
if (remaining <= 0) {
blocks.push(
'[Open Design omitted remaining attached files because the attachment context budget was exhausted.]',
@ -61,10 +75,6 @@ async function buildApiAttachmentContext(
break;
}
const file =
byPath.get(attachment.path) ??
byName.get(attachment.path) ??
byName.get(attachment.name);
const block = await renderApiAttachmentBlock(projectId, attachment, file, remaining);
if (!block) continue;
blocks.push(block.text);
@ -136,6 +146,12 @@ async function renderApiAttachmentBlock(
return { text, charsUsed: text.length };
}
function canSendNativeAnthropicImage(
attachment: ChatAttachment,
): boolean {
return attachment.kind === 'image' && isAnthropicSupportedImagePath(attachment.path);
}
function canReadRawText(kind: ProjectFileKind, path: string): boolean {
if (API_ATTACHMENT_TEXT_KINDS.has(kind)) return true;
return kind === 'sketch' && isTextSketchPath(path);

View file

@ -74,6 +74,7 @@ import {
import {
apiProtocolAgentId,
apiProtocolModelLabel,
usesAnthropicProxy,
} from '../utils/apiProtocol';
import { playSound, showCompletionNotification } from '../utils/notifications';
import { randomUUID } from '../utils/uuid';
@ -2861,6 +2862,7 @@ export function ProjectView({
userMsg.id,
project.id,
projectFiles,
{ omitNativeImageAttachments: usesAnthropicProxy(config) },
);
pushEvent({ kind: 'status', label: 'requesting', detail: config.model });
let accumulatedAssistantText = '';

View file

@ -1,5 +1,6 @@
import type { AppConfig, ChatMessage } from '../types';
import type { StreamHandlers } from './anthropic';
import type { ProxyContext } from './api-proxy';
import { streamProxyEndpoint } from './api-proxy';
export async function streamMessageAnthropicProxy(
@ -8,6 +9,15 @@ export async function streamMessageAnthropicProxy(
history: ChatMessage[],
signal: AbortSignal,
handlers: StreamHandlers,
context?: ProxyContext,
): Promise<void> {
return streamProxyEndpoint('/api/proxy/anthropic/stream', cfg, system, history, signal, handlers);
return streamProxyEndpoint(
'/api/proxy/anthropic/stream',
cfg,
system,
history,
signal,
handlers,
context,
);
}

View file

@ -17,6 +17,7 @@ import { streamMessageGoogle } from './google-compatible';
import { streamMessageOllama } from './ollama-compatible';
import { isOpenAICompatible, streamMessageOpenAI } from './openai-compatible';
import { streamMessageSenseAudio } from './senseaudio-compatible';
import { usesAnthropicProxy } from '../utils/apiProtocol';
// Re-export for convenience
export { isOpenAICompatible } from './openai-compatible';
@ -66,8 +67,8 @@ export async function streamMessage(
return streamMessageOpenAI(cfg, system, history, signal, handlers);
}
if (cfg.baseUrl && cfg.baseUrl !== 'https://api.anthropic.com') {
return streamMessageAnthropicProxy(cfg, system, history, signal, handlers);
if (usesAnthropicProxy(cfg)) {
return streamMessageAnthropicProxy(cfg, system, history, signal, handlers, context);
}
if (!cfg.apiKey) {

View file

@ -1,13 +1,22 @@
import { effectiveMaxTokens } from '../state/maxTokens';
import type { AppConfig, ChatMessage } from '../types';
import type {
ProxyImageContentBlock,
ProxyMessage,
ProxyMessageContent,
ProxyTextContentBlock,
} from '@open-design/contracts';
import { projectFileUrl } from './registry';
import type { StreamHandlers } from './anthropic';
import { parseSseFrame } from './sse';
import { isAnthropicSupportedImagePath } from '../utils/apiProtocol';
/**
* Optional per-request context that some protocols thread into the
* proxy body. Today only the senseaudio proxy reads these fields:
* proxy body or use to prepare provider-native message payloads:
* - `projectId` lets the `generate_image` tool write into the active
* project's folder instead of a daemon-global cache.
* project's folder instead of a daemon-global cache, and lets the
* Anthropic proxy resolve image attachments into content blocks.
* - `byokImageModel` is the user's BYOK Settings default for the
* image tool. The LLM can still override per-call via the tool's
* `model` arg; this is just the fallback when it omits one.
@ -36,6 +45,7 @@ export async function streamProxyEndpoint(
let acc = '';
try {
const messages = await buildProxyMessages(endpoint, history, context);
const resp = await fetch(endpoint, {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
@ -44,7 +54,7 @@ export async function streamProxyEndpoint(
apiKey: cfg.apiKey,
model: cfg.model,
systemPrompt: system,
messages: history.map((m) => ({ role: m.role, content: m.content })),
messages,
maxTokens: effectiveMaxTokens(cfg),
apiVersion: cfg.apiVersion,
...(context?.projectId ? { projectId: context.projectId } : {}),
@ -107,6 +117,132 @@ export async function streamProxyEndpoint(
}
}
export async function buildProxyMessages(
endpoint: string,
history: ChatMessage[],
context?: ProxyContext,
): Promise<ProxyMessage[]> {
if (!usesAnthropicMessagesPayload(endpoint) || !context?.projectId) {
return history.map((m) => ({ role: m.role, content: m.content }));
}
const out: ProxyMessage[] = [];
for (const message of history) {
out.push({
role: message.role,
content: await buildAnthropicMessageContent(message, context.projectId),
});
}
return out;
}
function usesAnthropicMessagesPayload(endpoint: string): boolean {
return endpoint.includes('/api/proxy/anthropic/');
}
async function buildAnthropicMessageContent(
message: ChatMessage,
projectId: string,
): Promise<ProxyMessageContent> {
const imageAttachments = (message.attachments ?? []).filter(
(attachment) => attachment.kind === 'image',
);
if (message.role !== 'user' || imageAttachments.length === 0) {
return message.content;
}
const blocks: Array<ProxyTextContentBlock | ProxyImageContentBlock> = [];
if (message.content.trim()) {
blocks.push({ type: 'text', text: message.content });
}
for (const attachment of imageAttachments) {
const block = await readAnthropicImageBlock(projectId, attachment.path);
if (block) {
blocks.push(block);
} else if (isAnthropicSupportedImagePath(attachment.path)) {
blocks.push({
type: 'text',
text: `Attached image could not be sent as native image content: path: ${attachment.path} | name: ${attachment.name}`,
});
}
}
return blocks.length > 0 ? blocks : message.content;
}
async function readAnthropicImageBlock(
projectId: string,
path: string,
): Promise<ProxyImageContentBlock | null> {
try {
const resp = await fetch(projectFileUrl(projectId, path), { cache: 'no-store' });
if (!resp.ok) return null;
const mediaType = supportedAnthropicImageMediaType(
resp.headers.get('content-type') ?? '',
path,
);
if (!mediaType) return null;
const bytes = new Uint8Array(await resp.arrayBuffer());
return {
type: 'image',
source: {
type: 'base64',
media_type: mediaType,
data: bytesToBase64(bytes),
},
};
} catch {
return null;
}
}
function supportedAnthropicImageMediaType(
contentType: string,
path: string,
): 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp' | null {
const normalized = contentType.split(';', 1)[0]?.trim().toLowerCase();
if (
normalized === 'image/jpeg' ||
normalized === 'image/png' ||
normalized === 'image/gif' ||
normalized === 'image/webp'
) {
return normalized;
}
const lower = path.toLowerCase();
if (/\.(jpe?g)$/.test(lower)) return 'image/jpeg';
if (lower.endsWith('.png')) return 'image/png';
if (lower.endsWith('.gif')) return 'image/gif';
if (lower.endsWith('.webp')) return 'image/webp';
return null;
}
function bytesToBase64(bytes: Uint8Array): string {
const alphabet = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/';
let out = '';
let i = 0;
for (; i + 2 < bytes.length; i += 3) {
const n = (bytes[i]! << 16) | (bytes[i + 1]! << 8) | bytes[i + 2]!;
out += alphabet[(n >> 18) & 63];
out += alphabet[(n >> 12) & 63];
out += alphabet[(n >> 6) & 63];
out += alphabet[n & 63];
}
if (i < bytes.length) {
const a = bytes[i]!;
const b = i + 1 < bytes.length ? bytes[i + 1]! : 0;
const n = (a << 16) | (b << 8);
out += alphabet[(n >> 18) & 63];
out += alphabet[(n >> 12) & 63];
out += i + 1 < bytes.length ? alphabet[(n >> 6) & 63] : '=';
out += '=';
}
return out;
}
function proxyErrorMessage(data: Record<string, unknown>): string {
const nested = data.error;
if (nested && typeof nested === 'object' && 'message' in nested) {

View file

@ -1,4 +1,5 @@
import type { ApiProtocol } from '../types';
import { isOpenAICompatible } from '../providers/openai-compatible';
import type { ApiProtocol, AppConfig } from '../types';
const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
anthropic: 'Anthropic API',
@ -34,3 +35,24 @@ export function apiProtocolModelLabel(
export function apiProtocolAgentId(protocol: ApiProtocol | undefined): string {
return API_PROTOCOL_AGENT_IDS[protocol ?? 'anthropic'];
}
export function usesAnthropicProxy(cfg: AppConfig): boolean {
if (
cfg.apiProtocol === 'azure' ||
cfg.apiProtocol === 'ollama' ||
cfg.apiProtocol === 'google' ||
cfg.apiProtocol === 'senseaudio' ||
cfg.apiProtocol === 'openai'
) {
return false;
}
if (!cfg.apiProtocol && isOpenAICompatible(cfg.model, cfg.baseUrl)) {
return false;
}
return Boolean(cfg.baseUrl && cfg.baseUrl !== 'https://api.anthropic.com');
}
export function isAnthropicSupportedImagePath(path: string): boolean {
const lower = path.toLowerCase();
return /\.(jpe?g|png|gif|webp)$/.test(lower);
}

View file

@ -79,6 +79,64 @@ describe('historyWithApiAttachmentContext', () => {
expect(history[0]?.content).toContain('Content preview unavailable');
});
it('omits image attachment metadata when the provider sends native image blocks', async () => {
for (const path of ['hero.png', 'hero.jpg', 'hero.jpeg', 'hero.gif', 'hero.webp']) {
const history = await historyWithApiAttachmentContext(
[
userMessage('msg-1', 'Describe this image', [
{ path, name: path, kind: 'image' },
]),
],
'msg-1',
'project-1',
[projectFile(path, 'image')],
{ omitNativeImageAttachments: true },
);
expect(history[0]?.content).toBe('Describe this image');
}
expect(mockedFetchProjectFileText).not.toHaveBeenCalled();
expect(mockedFetchProjectFilePreview).not.toHaveBeenCalled();
});
it('omits sketch-prefixed raster image metadata when native image blocks carry them', async () => {
const history = await historyWithApiAttachmentContext(
[
userMessage('msg-1', 'Describe this image', [
{ path: 'sketch-hero.png', name: 'sketch-hero.png', kind: 'image' },
]),
],
'msg-1',
'project-1',
[projectFile('sketch-hero.png', 'sketch')],
{ omitNativeImageAttachments: true },
);
expect(history[0]?.content).toBe('Describe this image');
expect(mockedFetchProjectFileText).not.toHaveBeenCalled();
expect(mockedFetchProjectFilePreview).not.toHaveBeenCalled();
});
it('keeps unsupported image metadata when native image blocks cannot carry them', async () => {
for (const path of ['hero.avif', 'hero.bmp']) {
const history = await historyWithApiAttachmentContext(
[
userMessage('msg-1', 'Describe this image', [
{ path, name: path, kind: 'image' },
]),
],
'msg-1',
'project-1',
[projectFile(path, 'image')],
{ omitNativeImageAttachments: true },
);
expect(history[0]?.content).toContain('<attached-project-files>');
expect(history[0]?.content).toContain(`path: ${path}`);
expect(history[0]?.content).toContain('Content preview unavailable');
}
});
it('uses filename inference when the project file list has not refreshed yet', async () => {
mockedFetchProjectFilePreview.mockResolvedValue({
kind: 'pdf',

View file

@ -0,0 +1,251 @@
import { afterEach, describe, expect, it, vi } from 'vitest';
import { historyWithApiAttachmentContext } from '../../src/api-attachment-context';
import { buildProxyMessages, streamProxyEndpoint } from '../../src/providers/api-proxy';
import type { ChatMessage } from '../../src/types';
describe('buildProxyMessages', () => {
afterEach(() => {
vi.restoreAllMocks();
vi.unstubAllGlobals();
});
it('serializes image attachments as Anthropic image content blocks', async () => {
const pngBytes = new Uint8Array([137, 80, 78, 71]);
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: true,
headers: {
get: (name: string) => (name.toLowerCase() === 'content-type' ? 'image/png' : null),
},
arrayBuffer: async () => pngBytes.buffer,
}),
);
const messages = await buildProxyMessages(
'/api/proxy/anthropic/stream',
[
userMessage('Describe the attached image', [
{ path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 },
]),
],
{ projectId: 'project-1' },
);
expect(fetch).toHaveBeenCalledWith(
'/api/projects/project-1/raw/references/logo.png',
{ cache: 'no-store' },
);
expect(messages).toEqual([
{
role: 'user',
content: [
{ type: 'text', text: 'Describe the attached image' },
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: 'iVBORw==',
},
},
],
},
]);
});
it('keeps non-Anthropic proxy messages as plain text', async () => {
vi.stubGlobal('fetch', vi.fn());
const messages = await buildProxyMessages(
'/api/proxy/openai/stream',
[
userMessage('Describe the attached image', [
{ path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 },
]),
],
{ projectId: 'project-1' },
);
expect(fetch).not.toHaveBeenCalled();
expect(messages).toEqual([
{ role: 'user', content: 'Describe the attached image' },
]);
});
it('sends Anthropic image content blocks in the proxy request body', async () => {
const pngBytes = new Uint8Array([137, 80, 78, 71]);
const fetchMock = vi
.fn()
.mockResolvedValueOnce({
ok: true,
headers: {
get: (name: string) => (name.toLowerCase() === 'content-type' ? 'image/png' : null),
},
arrayBuffer: async () => pngBytes.buffer,
})
.mockResolvedValueOnce({
ok: true,
body: new ReadableStream({
start(controller) {
controller.enqueue(
new TextEncoder().encode('event: end\ndata: {}\n\n'),
);
controller.close();
},
}),
});
vi.stubGlobal('fetch', fetchMock);
await streamProxyEndpoint(
'/api/proxy/anthropic/stream',
{
apiKey: 'test-api-key',
baseUrl: 'https://anthropic-compatible.example',
model: 'vision-model',
} as any,
'System prompt',
[
userMessage('Describe the attached image', [
{ path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 },
]),
],
new AbortController().signal,
{
onDelta: vi.fn(),
onDone: vi.fn(),
onError: vi.fn(),
},
{ projectId: 'project-1' },
);
const proxyInit = fetchMock.mock.calls[1]?.[1] as RequestInit;
expect(JSON.parse(String(proxyInit.body))).toMatchObject({
messages: [
{
role: 'user',
content: [
{ type: 'text', text: 'Describe the attached image' },
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: 'iVBORw==',
},
},
],
},
],
projectId: 'project-1',
});
});
it('keeps a text fallback when a supported Anthropic image cannot be read', async () => {
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: false,
headers: { get: () => null },
arrayBuffer: async () => new ArrayBuffer(0),
}),
);
const messages = await buildProxyMessages(
'/api/proxy/anthropic/stream',
[
userMessage('Describe the attached image', [
{ path: 'references/logo.png', name: 'logo.png', kind: 'image', size: 4 },
]),
],
{ projectId: 'project-1' },
);
expect(messages).toEqual([
{
role: 'user',
content: [
{ type: 'text', text: 'Describe the attached image' },
{
type: 'text',
text: 'Attached image could not be sent as native image content: path: references/logo.png | name: logo.png',
},
],
},
]);
});
it('does not send preview-unavailable text alongside sketch raster image blocks', async () => {
const pngBytes = new Uint8Array([137, 80, 78, 71]);
vi.stubGlobal(
'fetch',
vi.fn().mockResolvedValue({
ok: true,
headers: {
get: (name: string) => (name.toLowerCase() === 'content-type' ? 'image/png' : null),
},
arrayBuffer: async () => pngBytes.buffer,
}),
);
const history = await historyWithApiAttachmentContext(
[
userMessage('Describe this image', [
{ path: 'sketch-hero.png', name: 'sketch-hero.png', kind: 'image', size: 4 },
]),
],
'msg-1',
'project-1',
[
{
name: 'sketch-hero.png',
path: 'sketch-hero.png',
type: 'file',
size: 4,
mtime: 123,
kind: 'sketch',
mime: 'image/png',
},
],
{ omitNativeImageAttachments: true },
);
const messages = await buildProxyMessages(
'/api/proxy/anthropic/stream',
history,
{ projectId: 'project-1' },
);
expect(JSON.stringify(messages)).not.toContain('Content preview unavailable');
expect(messages).toEqual([
{
role: 'user',
content: [
{ type: 'text', text: 'Describe this image' },
{
type: 'image',
source: {
type: 'base64',
media_type: 'image/png',
data: 'iVBORw==',
},
},
],
},
]);
});
});
function userMessage(
content: string,
attachments: NonNullable<ChatMessage['attachments']>,
): ChatMessage {
return {
id: 'msg-1',
role: 'user',
content,
createdAt: 1,
attachments,
};
}

View file

@ -1,8 +1,26 @@
export type ProxyMessageRole = 'system' | 'user' | 'assistant' | 'tool';
export type ProxyMessageContent =
| string
| Array<ProxyTextContentBlock | ProxyImageContentBlock>;
export interface ProxyTextContentBlock {
type: 'text';
text: string;
}
export interface ProxyImageContentBlock {
type: 'image';
source: {
type: 'base64';
media_type: 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp';
data: string;
};
}
export interface ProxyMessage {
role: ProxyMessageRole;
content: string;
content: ProxyMessageContent;
}
export interface ProxyStreamRequest {