feat(byok): Added Ollama CLoud as BYOK provider (#923)

* feat: add Ollama Cloud to KNOWN_PROVIDERS as OpenAI-compatible BYOK provider

* feat: add ollama.com to isOpenAICompatible base URL detection

* feat: add Ollama Cloud models to SUGGESTED_MODELS_BY_PROTOCOL fallback list

* fix: use full Ollama Cloud model list from /api/tags, drop -cloud suffix

* feat: add Ollama Cloud as native protocol with NDJSON streaming and connection test support

* fix: remove ollama.com from OpenAI compatibility check

* feat: add token overrides for Ollama Cloud models to prevent truncation

* fix: extend inferApiProtocol and legacy migration to recognize ollama.com base URLs

* fix: normalize Ollama Cloud base URL by stripping /api suffix during migration and in daemon

---------

Co-authored-by: herediaron <aronheredi346@gmail.com>
This commit is contained in:
Herédi Áron 2026-05-09 05:21:16 +02:00 committed by GitHub
parent 64ed8e7046
commit 66f84972cf
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 389 additions and 6 deletions

View file

@ -177,6 +177,15 @@ function inspectProviderCompletion(
};
}
if (protocol === 'ollama') {
const msg = (obj as { message?: { content?: unknown } }).message;
const hasContent = typeof msg?.content === 'string';
return {
valid: Array.isArray((obj as { messages?: unknown }).messages) || hasContent,
...(hasContent ? { sample: truncateSample(msg?.content) } : {}),
};
}
return { valid: false };
}
@ -377,8 +386,6 @@ function buildProviderCall(input: ProviderTestRequest): ProviderCallShape {
}
case 'google': {
const trimmedBase = baseUrl.replace(/\/+$/, '');
// Non-streaming variant — deliberately not :streamGenerateContent so
// we can JSON.parse the response in one shot.
return {
url: `${trimmedBase}/v1beta/models/${encodeURIComponent(model)}:generateContent`,
headers: {
@ -405,6 +412,28 @@ function buildProviderCall(input: ProviderTestRequest): ProviderCallShape {
},
};
}
case 'ollama': {
const trimmedBase = baseUrl.replace(/\/+$/, '').replace(/\/api\/?$/, '');
return {
url: `${trimmedBase}/api/chat`,
headers: {
'content-type': 'application/json',
authorization: `Bearer ${apiKey}`,
},
body: {
model,
messages: [{ role: 'user', content: SMOKE_PROMPT }],
stream: false,
},
extractText: (data) => {
const message = (data as { message?: { content?: unknown } }).message;
if (message && typeof (message as { content?: unknown }).content === 'string') {
return (message as { content: string }).content;
}
return '';
},
};
}
default:
throw new Error(`Unknown protocol: ${(input as { protocol?: string }).protocol}`);
}

View file

@ -6173,13 +6173,13 @@ export async function startServer({
const protocol = body.protocol;
if (
typeof protocol !== 'string' ||
!['anthropic', 'openai', 'azure', 'google'].includes(protocol)
!['anthropic', 'openai', 'azure', 'google', 'ollama'].includes(protocol)
) {
return sendApiError(
res,
400,
'BAD_REQUEST',
'protocol must be one of anthropic|openai|azure|google',
'protocol must be one of anthropic|openai|azure|google|ollama',
);
}
if (
@ -6386,6 +6386,44 @@ export async function startServer({
if (tail) await onFrame(collectSseFrame(tail));
};
// Ollama Cloud streams NDJSON (newline-delimited JSON) — each line is a
// complete JSON object. Parse per-line and dispatch parsed objects.
const streamUpstreamNdjson = async (response, onFrame) => {
const reader = response.body.getReader();
const decoder = new TextDecoder();
let buffer = '';
while (true) {
const { done, value } = await reader.read();
if (done) break;
buffer += decoder.decode(value, { stream: true });
let newline = buffer.indexOf('\n');
while (newline !== -1) {
const line = buffer.slice(0, newline).trim();
buffer = buffer.slice(newline + 1);
newline = buffer.indexOf('\n');
if (!line) continue;
try {
const data = JSON.parse(line);
if (await onFrame({ data })) return;
} catch {
// skip unparseable lines
}
}
}
const tail = buffer.trim();
if (tail) {
try {
const data = JSON.parse(tail);
await onFrame({ data });
} catch {
// skip
}
}
};
const extractOpenAIText = (data) => {
const choices = data?.choices;
if (!Array.isArray(choices) || choices.length === 0) return '';
@ -6838,6 +6876,98 @@ export async function startServer({
}
});
app.post('/api/proxy/ollama/stream', async (req, res) => {
/** @type {Partial<ProxyStreamRequest>} */
const proxyBody = req.body || {};
const { baseUrl, apiKey, model, systemPrompt, messages, maxTokens } = proxyBody;
if (!apiKey || !model) {
return sendApiError(
res,
400,
'BAD_REQUEST',
'apiKey and model are required',
);
}
const effectiveBaseUrl = baseUrl || 'https://ollama.com';
const validated = validateExternalApiBaseUrl(effectiveBaseUrl);
if (validated.error) {
return sendApiError(
res,
validated.forbidden ? 403 : 400,
validated.forbidden ? 'FORBIDDEN' : 'BAD_REQUEST',
validated.error,
);
}
const clean = effectiveBaseUrl.replace(/\/+$/, '').replace(/\/api\/?$/, '');
const url = `${clean}/api/chat`;
console.log(
`[proxy:ollama] ${req.method} ${validated.parsed.hostname} model=${model}`,
);
const payloadMessages = Array.isArray(messages) ? [...messages] : [];
if (typeof systemPrompt === 'string' && systemPrompt) {
payloadMessages.unshift({ role: 'system', content: systemPrompt });
}
const payload = {
model,
messages: payloadMessages,
stream: true,
};
if (typeof maxTokens === 'number' && maxTokens > 0) {
payload.options = { num_predict: maxTokens };
}
const sse = createSseResponse(res);
sse.send('start', { model });
try {
const response = await fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json',
Authorization: `Bearer ${apiKey}`,
},
body: JSON.stringify(payload),
});
if (!response.ok) {
const errorText = await response.text();
console.error(
`[proxy:ollama] upstream error: ${response.status} ${redactAuthTokens(errorText)}`,
);
sendProxyError(sse, `Upstream error: ${response.status}`, {
code: proxyErrorCode(response.status),
details: errorText,
retryable: response.status === 429 || response.status >= 500,
});
return sse.end();
}
let ended = false;
await streamUpstreamNdjson(response, ({ data }) => {
if (!data) return false;
if (data.done) {
sse.send('end', {});
ended = true;
return true;
}
const content = data.message?.content;
if (typeof content === 'string' && content) {
sse.send('delta', { delta: content });
}
return false;
});
if (!ended) sse.send('end', {});
sse.end();
} catch (err) {
console.error(`[proxy:ollama] internal error: ${err.message}`);
sendProxyError(sse, err.message, { code: 'INTERNAL_ERROR' });
sse.end();
}
});
// Wait for `listen` to bind so callers always see the resolved URL —
// critical when port=0 (ephemeral port) and when the embedding sidecar
// needs to advertise the port to a parent process before any request

View file

@ -155,6 +155,47 @@ const SUGGESTED_MODELS_BY_PROTOCOL = {
'MiniMax-M2',
'mimo-v2.5-pro',
],
ollama: [
'cogito-2.1:671b',
'deepseek-v3.1:671b',
'deepseek-v3.2',
'deepseek-v4-flash',
'deepseek-v4-pro',
'devstral-2:123b',
'devstral-small-2:24b',
'gemini-3-flash-preview',
'gemma3:4b',
'gemma3:12b',
'gemma3:27b',
'gemma4:31b',
'glm-4.6',
'glm-4.7',
'glm-5',
'glm-5.1',
'gpt-oss:20b',
'gpt-oss:120b',
'kimi-k2:1t',
'kimi-k2-thinking',
'kimi-k2.5',
'kimi-k2.6',
'minimax-m2',
'minimax-m2.1',
'minimax-m2.5',
'minimax-m2.7',
'ministral-3:3b',
'ministral-3:8b',
'ministral-3:14b',
'mistral-large-3:675b',
'nemotron-3-nano:30b',
'nemotron-3-super',
'qwen3-coder:480b',
'qwen3-coder-next',
'qwen3-next:80b',
'qwen3-vl:235b',
'qwen3-vl:235b-instruct',
'qwen3.5:397b',
'rnj-1:8b',
],
azure: [
'gpt-4o',
'gpt-4o-mini',
@ -175,6 +216,7 @@ const API_PROTOCOL_TABS: Array<{
{ id: 'openai', title: 'OpenAI' },
{ id: 'azure', title: 'Azure OpenAI' },
{ id: 'google', title: 'Google Gemini' },
{ id: 'ollama', title: 'Ollama Cloud' },
];
const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
@ -182,6 +224,7 @@ const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
openai: 'OpenAI API',
azure: 'Azure OpenAI',
google: 'Google Gemini',
ollama: 'Ollama Cloud API',
};
const API_KEY_PLACEHOLDERS: Record<ApiProtocol, string> = {
@ -189,6 +232,7 @@ const API_KEY_PLACEHOLDERS: Record<ApiProtocol, string> = {
openai: 'sk-...',
azure: 'azure key',
google: 'AIza...',
ollama: 'Ollama API key',
};
type RescanNotice =

View file

@ -13,6 +13,7 @@ import type { AppConfig, ChatMessage } from '../types';
import { streamMessageAnthropicProxy } from './anthropic-compatible';
import { streamMessageAzure } from './azure-compatible';
import { streamMessageGoogle } from './google-compatible';
import { streamMessageOllama } from './ollama-compatible';
import { isOpenAICompatible, streamMessageOpenAI } from './openai-compatible';
// Re-export for convenience
@ -44,6 +45,9 @@ export async function streamMessage(
if (cfg.apiProtocol === 'azure') {
return streamMessageAzure(cfg, system, history, signal, handlers);
}
if (cfg.apiProtocol === 'ollama') {
return streamMessageOllama(cfg, system, history, signal, handlers);
}
if (cfg.apiProtocol === 'google') {
return streamMessageGoogle(cfg, system, history, signal, handlers);
}

View file

@ -0,0 +1,13 @@
import type { AppConfig, ChatMessage } from '../types';
import type { StreamHandlers } from './anthropic';
import { streamProxyEndpoint } from './api-proxy';
export async function streamMessageOllama(
cfg: AppConfig,
system: string,
history: ChatMessage[],
signal: AbortSignal,
handlers: StreamHandlers,
): Promise<void> {
return streamProxyEndpoint('/api/proxy/ollama/stream', cfg, system, history, signal, handlers);
}

View file

@ -190,6 +190,53 @@ export const KNOWN_PROVIDERS: KnownProvider[] = [
model: 'mimo-v2.5-pro',
models: ['mimo-v2.5-pro'],
},
{
label: 'Ollama Cloud',
protocol: 'ollama',
baseUrl: 'https://ollama.com',
model: 'gpt-oss:120b',
models: [
'cogito-2.1:671b',
'deepseek-v3.1:671b',
'deepseek-v3.2',
'deepseek-v4-flash',
'deepseek-v4-pro',
'devstral-2:123b',
'devstral-small-2:24b',
'gemini-3-flash-preview',
'gemma3:4b',
'gemma3:12b',
'gemma3:27b',
'gemma4:31b',
'glm-4.6',
'glm-4.7',
'glm-5',
'glm-5.1',
'gpt-oss:20b',
'gpt-oss:120b',
'kimi-k2:1t',
'kimi-k2-thinking',
'kimi-k2.5',
'kimi-k2.6',
'minimax-m2',
'minimax-m2.1',
'minimax-m2.5',
'minimax-m2.7',
'ministral-3:3b',
'ministral-3:8b',
'ministral-3:14b',
'mistral-large-3:675b',
'nemotron-3-nano:30b',
'nemotron-3-super',
'qwen3-coder:480b',
'qwen3-coder-next',
'qwen3-next:80b',
'qwen3-vl:235b',
'qwen3-vl:235b-instruct',
'qwen3.5:397b',
'rnj-1:8b',
],
},
{
label: 'MiMo (Xiaomi) — Anthropic',
protocol: 'anthropic',
@ -233,6 +280,11 @@ function isValidOrbitTime(time: string): boolean {
function inferApiProtocol(model: string, baseUrl: string): ApiProtocol {
try {
const normalized = (baseUrl || '').toLowerCase();
// Any config pointing at ollama.com should resolve to the new ollama
// protocol so both chat and the connection test hit the native Ollama
// proxy instead of the Anthropic or OpenAI paths.
if (normalized.includes('ollama.com')) return 'ollama';
return isOpenAICompatible(model, baseUrl) ? 'openai' : 'anthropic';
} catch {
// Preserve the rest of the user's settings even if an old saved base URL is
@ -286,6 +338,14 @@ export function loadConfig(): AppConfig {
// legacy config can be migrated when it is loaded.
if (!parsedHasApiProtocol) {
merged.apiProtocol = inferApiProtocol(merged.model, merged.baseUrl);
// Ollama Cloud legacy configs may carry a base URL that includes
// /api or /api/ — normalize to the host root so the daemon's own
// /api/chat appending doesn't double up.
if (merged.apiProtocol === 'ollama') {
merged.baseUrl = merged.baseUrl
.replace(/\/api\/?$/, '')
.replace(/\/+$/, '');
}
// Also set apiProviderBaseUrl so setApiProtocol() can correctly identify
// whether the user is on a known provider and switch defaults appropriately.
// null means "custom/unknown provider" so the protocol switch won't override

View file

@ -31,6 +31,48 @@ const OVERRIDES: Record<string, number> = {
// Spec: https://platform.deepseek.com/docs/model-cards
'deepseek-v4-pro': 384000,
'deepseek-v4-flash': 384000,
// Ollama Cloud models. LiteLLM keys this set under `ollama/`-prefixed
// ids (many with `-cloud` suffixes), so the bare model-id lookups never
// match. Add overrides so chat doesn't silently clip at 8192 tokens.
// 131072 (128k) is a safe floor for all Ollama Cloud models.
'cogito-2.1:671b': 131072,
'deepseek-v3.1:671b': 163840,
'deepseek-v3.2': 163840,
'devstral-2:123b': 131072,
'devstral-small-2:24b': 131072,
'gemini-3-flash-preview': 131072,
'gemma3:4b': 131072,
'gemma3:12b': 131072,
'gemma3:27b': 131072,
'gemma4:31b': 131072,
'glm-4.6': 131072,
'glm-4.7': 131072,
'glm-5': 131072,
'glm-5.1': 131072,
'gpt-oss:20b': 131072,
'gpt-oss:120b': 131072,
'kimi-k2:1t': 131072,
'kimi-k2-thinking': 131072,
'kimi-k2.5': 131072,
'kimi-k2.6': 131072,
'minimax-m2': 131072,
'minimax-m2.1': 131072,
'minimax-m2.5': 131072,
'minimax-m2.7': 131072,
'ministral-3:3b': 131072,
'ministral-3:8b': 131072,
'ministral-3:14b': 131072,
'mistral-large-3:675b': 131072,
'nemotron-3-nano:30b': 131072,
'nemotron-3-super': 131072,
'qwen3-coder:480b': 262144,
'qwen3-coder-next': 131072,
'qwen3-next:80b': 131072,
'qwen3-vl:235b': 131072,
'qwen3-vl:235b-instruct': 131072,
'qwen3.5:397b': 131072,
'rnj-1:8b': 131072,
};
export function modelMaxTokensDefault(model: string): number {

View file

@ -65,7 +65,7 @@ export type {
} from '@open-design/contracts';
export type ExecMode = 'daemon' | 'api';
export type ApiProtocol = 'anthropic' | 'openai' | 'azure' | 'google';
export type ApiProtocol = 'anthropic' | 'openai' | 'azure' | 'google' | 'ollama';
export type LiveArtifactTabId = `live:${string}`;
export type ProjectWorkspaceTabId = string | LiveArtifactTabId;

View file

@ -5,6 +5,7 @@ const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
openai: 'OpenAI API',
azure: 'Azure OpenAI',
google: 'Google Gemini',
ollama: 'Ollama Cloud API',
};
const API_PROTOCOL_AGENT_IDS: Record<ApiProtocol, string> = {
@ -12,6 +13,7 @@ const API_PROTOCOL_AGENT_IDS: Record<ApiProtocol, string> = {
openai: 'openai-api',
azure: 'azure-openai-api',
google: 'google-gemini-api',
ollama: 'ollama-cloud-api',
};
export function apiProtocolLabel(protocol: ApiProtocol | undefined): string {

View file

@ -259,6 +259,65 @@ describe('loadConfig', () => {
expect(config.configMigrationVersion).toBe(1);
});
it('migrates legacy Ollama Cloud configs to an explicit ollama apiProtocol', () => {
const legacyConfig: Partial<AppConfig> = {
mode: 'api',
apiKey: 'ollama-key',
baseUrl: 'https://ollama.com',
model: 'gpt-oss:120b',
agentId: null,
skillId: null,
designSystemId: null,
};
store.set('open-design:config', JSON.stringify(legacyConfig));
const config = loadConfig();
expect(config.mode).toBe('api');
expect(config.baseUrl).toBe('https://ollama.com');
expect(config.model).toBe('gpt-oss:120b');
expect(config.apiProtocol).toBe('ollama');
expect(config.apiProviderBaseUrl).toBe('https://ollama.com');
expect(config.configMigrationVersion).toBe(1);
});
it('migrates legacy ollama.com configs with a custom base URL path', () => {
const legacyConfig: Partial<AppConfig> = {
mode: 'api',
apiKey: 'ollama-key',
baseUrl: 'https://ollama.com/api',
model: 'deepseek-v4-pro',
agentId: null,
skillId: null,
designSystemId: null,
};
store.set('open-design:config', JSON.stringify(legacyConfig));
const config = loadConfig();
expect(config.apiProtocol).toBe('ollama');
// /api suffix must be stripped so the daemon doesn't build /api/api/chat.
expect(config.baseUrl).toBe('https://ollama.com');
});
it('migrates legacy ollama.com configs with a trailing /api/ suffix', () => {
const legacyConfig: Partial<AppConfig> = {
mode: 'api',
apiKey: 'ollama-key',
baseUrl: 'https://ollama.com/api/',
model: 'glm-5',
agentId: null,
skillId: null,
designSystemId: null,
};
store.set('open-design:config', JSON.stringify(legacyConfig));
const config = loadConfig();
expect(config.apiProtocol).toBe('ollama');
expect(config.baseUrl).toBe('https://ollama.com');
});
it('does not overwrite an already explicit apiProtocol', () => {
const explicitConfig: Partial<AppConfig> = {
mode: 'api',

View file

@ -133,7 +133,7 @@ export type ConnectionTestKind =
| 'agent_spawn_failed'
| 'unknown';
export type ConnectionTestProtocol = 'anthropic' | 'openai' | 'azure' | 'google';
export type ConnectionTestProtocol = 'anthropic' | 'openai' | 'azure' | 'google' | 'ollama';
export interface ProviderTestRequest {
protocol: ConnectionTestProtocol;