feat(byok): Added Ollama CLoud as BYOK provider (#923)

* feat: add Ollama Cloud to KNOWN_PROVIDERS as OpenAI-compatible BYOK provider * feat: add ollama.com to isOpenAICompatible base URL detection * feat: add Ollama Cloud models to SUGGESTED_MODELS_BY_PROTOCOL fallback list * fix: use full Ollama Cloud model list from /api/tags, drop -cloud suffix * feat: add Ollama Cloud as native protocol with NDJSON streaming and connection test support * fix: remove ollama.com from OpenAI compatibility check * feat: add token overrides for Ollama Cloud models to prevent truncation * fix: extend inferApiProtocol and legacy migration to recognize ollama.com base URLs * fix: normalize Ollama Cloud base URL by stripping /api suffix during migration and in daemon --------- Co-authored-by: herediaron <aronheredi346@gmail.com>
2026-06-01 03:14:35 +07:00 · 2026-05-09 05:21:16 +02:00 · 2026-05-09 05:21:16 +02:00 · 66f84972cf
commit 66f84972cf
parent 64ed8e7046
11 changed files with 389 additions and 6 deletions
--- a/apps/daemon/src/connectionTest.ts
+++ b/apps/daemon/src/connectionTest.ts
@ -177,6 +177,15 @@ function inspectProviderCompletion(
    };
  }

+  if (protocol === 'ollama') {
+    const msg = (obj as { message?: { content?: unknown } }).message;
+    const hasContent = typeof msg?.content === 'string';
+    return {
+      valid: Array.isArray((obj as { messages?: unknown }).messages) || hasContent,
+      ...(hasContent ? { sample: truncateSample(msg?.content) } : {}),
+    };
+  }
+
  return { valid: false };
 }

@ -377,8 +386,6 @@ function buildProviderCall(input: ProviderTestRequest): ProviderCallShape {
    }
    case 'google': {
      const trimmedBase = baseUrl.replace(/\/+$/, '');
-      // Non-streaming variant — deliberately not :streamGenerateContent so
-      // we can JSON.parse the response in one shot.
      return {
        url: `${trimmedBase}/v1beta/models/${encodeURIComponent(model)}:generateContent`,
        headers: {
@ -405,6 +412,28 @@ function buildProviderCall(input: ProviderTestRequest): ProviderCallShape {
        },
      };
    }
+    case 'ollama': {
+      const trimmedBase = baseUrl.replace(/\/+$/, '').replace(/\/api\/?$/, '');
+      return {
+        url: `${trimmedBase}/api/chat`,
+        headers: {
+          'content-type': 'application/json',
+          authorization: `Bearer ${apiKey}`,
+        },
+        body: {
+          model,
+          messages: [{ role: 'user', content: SMOKE_PROMPT }],
+          stream: false,
+        },
+        extractText: (data) => {
+          const message = (data as { message?: { content?: unknown } }).message;
+          if (message && typeof (message as { content?: unknown }).content === 'string') {
+            return (message as { content: string }).content;
+          }
+          return '';
+        },
+      };
+    }
    default:
      throw new Error(`Unknown protocol: ${(input as { protocol?: string }).protocol}`);
  }
--- a/apps/daemon/src/server.ts
+++ b/apps/daemon/src/server.ts
@ -6173,13 +6173,13 @@ export async function startServer({
        const protocol = body.protocol;
        if (
          typeof protocol !== 'string' ||
-          !['anthropic', 'openai', 'azure', 'google'].includes(protocol)
+          !['anthropic', 'openai', 'azure', 'google', 'ollama'].includes(protocol)
        ) {
          return sendApiError(
            res,
            400,
            'BAD_REQUEST',
-            'protocol must be one of anthropic|openai|azure|google',
+            'protocol must be one of anthropic|openai|azure|google|ollama',
          );
        }
        if (
@ -6386,6 +6386,44 @@ export async function startServer({
    if (tail) await onFrame(collectSseFrame(tail));
  };

+  // Ollama Cloud streams NDJSON (newline-delimited JSON) — each line is a
+  // complete JSON object. Parse per-line and dispatch parsed objects.
+  const streamUpstreamNdjson = async (response, onFrame) => {
+    const reader = response.body.getReader();
+    const decoder = new TextDecoder();
+    let buffer = '';
+
+    while (true) {
+      const { done, value } = await reader.read();
+      if (done) break;
+      buffer += decoder.decode(value, { stream: true });
+
+      let newline = buffer.indexOf('\n');
+      while (newline !== -1) {
+        const line = buffer.slice(0, newline).trim();
+        buffer = buffer.slice(newline + 1);
+        newline = buffer.indexOf('\n');
+        if (!line) continue;
+        try {
+          const data = JSON.parse(line);
+          if (await onFrame({ data })) return;
+        } catch {
+          // skip unparseable lines
+        }
+      }
+    }
+
+    const tail = buffer.trim();
+    if (tail) {
+      try {
+        const data = JSON.parse(tail);
+        await onFrame({ data });
+      } catch {
+        // skip
+      }
+    }
+  };
+
  const extractOpenAIText = (data) => {
    const choices = data?.choices;
    if (!Array.isArray(choices) || choices.length === 0) return '';
@ -6838,6 +6876,98 @@ export async function startServer({
    }
  });

+  app.post('/api/proxy/ollama/stream', async (req, res) => {
+    /** @type {Partial<ProxyStreamRequest>} */
+    const proxyBody = req.body || {};
+    const { baseUrl, apiKey, model, systemPrompt, messages, maxTokens } = proxyBody;
+    if (!apiKey || !model) {
+      return sendApiError(
+        res,
+        400,
+        'BAD_REQUEST',
+        'apiKey and model are required',
+      );
+    }
+
+    const effectiveBaseUrl = baseUrl || 'https://ollama.com';
+    const validated = validateExternalApiBaseUrl(effectiveBaseUrl);
+    if (validated.error) {
+      return sendApiError(
+        res,
+        validated.forbidden ? 403 : 400,
+        validated.forbidden ? 'FORBIDDEN' : 'BAD_REQUEST',
+        validated.error,
+      );
+    }
+
+    const clean = effectiveBaseUrl.replace(/\/+$/, '').replace(/\/api\/?$/, '');
+    const url = `${clean}/api/chat`;
+    console.log(
+      `[proxy:ollama] ${req.method} ${validated.parsed.hostname} model=${model}`,
+    );
+
+    const payloadMessages = Array.isArray(messages) ? [...messages] : [];
+    if (typeof systemPrompt === 'string' && systemPrompt) {
+      payloadMessages.unshift({ role: 'system', content: systemPrompt });
+    }
+
+    const payload = {
+      model,
+      messages: payloadMessages,
+      stream: true,
+    };
+    if (typeof maxTokens === 'number' && maxTokens > 0) {
+      payload.options = { num_predict: maxTokens };
+    }
+
+    const sse = createSseResponse(res);
+    sse.send('start', { model });
+    try {
+      const response = await fetch(url, {
+        method: 'POST',
+        headers: {
+          'Content-Type': 'application/json',
+          Authorization: `Bearer ${apiKey}`,
+        },
+        body: JSON.stringify(payload),
+      });
+
+      if (!response.ok) {
+        const errorText = await response.text();
+        console.error(
+          `[proxy:ollama] upstream error: ${response.status} ${redactAuthTokens(errorText)}`,
+        );
+        sendProxyError(sse, `Upstream error: ${response.status}`, {
+          code: proxyErrorCode(response.status),
+          details: errorText,
+          retryable: response.status === 429 || response.status >= 500,
+        });
+        return sse.end();
+      }
+
+      let ended = false;
+      await streamUpstreamNdjson(response, ({ data }) => {
+        if (!data) return false;
+        if (data.done) {
+          sse.send('end', {});
+          ended = true;
+          return true;
+        }
+        const content = data.message?.content;
+        if (typeof content === 'string' && content) {
+          sse.send('delta', { delta: content });
+        }
+        return false;
+      });
+      if (!ended) sse.send('end', {});
+      sse.end();
+    } catch (err) {
+      console.error(`[proxy:ollama] internal error: ${err.message}`);
+      sendProxyError(sse, err.message, { code: 'INTERNAL_ERROR' });
+      sse.end();
+    }
+  });
+
  // Wait for `listen` to bind so callers always see the resolved URL —
  // critical when port=0 (ephemeral port) and when the embedding sidecar
  // needs to advertise the port to a parent process before any request
--- a/apps/web/src/components/SettingsDialog.tsx
+++ b/apps/web/src/components/SettingsDialog.tsx
@ -155,6 +155,47 @@ const SUGGESTED_MODELS_BY_PROTOCOL = {
    'MiniMax-M2',
    'mimo-v2.5-pro',
  ],
+  ollama: [
+    'cogito-2.1:671b',
+    'deepseek-v3.1:671b',
+    'deepseek-v3.2',
+    'deepseek-v4-flash',
+    'deepseek-v4-pro',
+    'devstral-2:123b',
+    'devstral-small-2:24b',
+    'gemini-3-flash-preview',
+    'gemma3:4b',
+    'gemma3:12b',
+    'gemma3:27b',
+    'gemma4:31b',
+    'glm-4.6',
+    'glm-4.7',
+    'glm-5',
+    'glm-5.1',
+    'gpt-oss:20b',
+    'gpt-oss:120b',
+    'kimi-k2:1t',
+    'kimi-k2-thinking',
+    'kimi-k2.5',
+    'kimi-k2.6',
+    'minimax-m2',
+    'minimax-m2.1',
+    'minimax-m2.5',
+    'minimax-m2.7',
+    'ministral-3:3b',
+    'ministral-3:8b',
+    'ministral-3:14b',
+    'mistral-large-3:675b',
+    'nemotron-3-nano:30b',
+    'nemotron-3-super',
+    'qwen3-coder:480b',
+    'qwen3-coder-next',
+    'qwen3-next:80b',
+    'qwen3-vl:235b',
+    'qwen3-vl:235b-instruct',
+    'qwen3.5:397b',
+    'rnj-1:8b',
+  ],
  azure: [
    'gpt-4o',
    'gpt-4o-mini',
@ -175,6 +216,7 @@ const API_PROTOCOL_TABS: Array<{
  { id: 'openai', title: 'OpenAI' },
  { id: 'azure', title: 'Azure OpenAI' },
  { id: 'google', title: 'Google Gemini' },
+  { id: 'ollama', title: 'Ollama Cloud' },
 ];

 const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
@ -182,6 +224,7 @@ const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
  openai: 'OpenAI API',
  azure: 'Azure OpenAI',
  google: 'Google Gemini',
+  ollama: 'Ollama Cloud API',
 };

 const API_KEY_PLACEHOLDERS: Record<ApiProtocol, string> = {
@ -189,6 +232,7 @@ const API_KEY_PLACEHOLDERS: Record<ApiProtocol, string> = {
  openai: 'sk-...',
  azure: 'azure key',
  google: 'AIza...',
+  ollama: 'Ollama API key',
 };

 type RescanNotice =
--- a/apps/web/src/providers/anthropic.ts
+++ b/apps/web/src/providers/anthropic.ts
@ -13,6 +13,7 @@ import type { AppConfig, ChatMessage } from '../types';
 import { streamMessageAnthropicProxy } from './anthropic-compatible';
 import { streamMessageAzure } from './azure-compatible';
 import { streamMessageGoogle } from './google-compatible';
+import { streamMessageOllama } from './ollama-compatible';
 import { isOpenAICompatible, streamMessageOpenAI } from './openai-compatible';

 // Re-export for convenience
@ -44,6 +45,9 @@ export async function streamMessage(
  if (cfg.apiProtocol === 'azure') {
    return streamMessageAzure(cfg, system, history, signal, handlers);
  }
+  if (cfg.apiProtocol === 'ollama') {
+    return streamMessageOllama(cfg, system, history, signal, handlers);
+  }
  if (cfg.apiProtocol === 'google') {
    return streamMessageGoogle(cfg, system, history, signal, handlers);
  }
--- a/apps/web/src/providers/ollama-compatible.ts
+++ b/apps/web/src/providers/ollama-compatible.ts
@ -0,0 +1,13 @@
+import type { AppConfig, ChatMessage } from '../types';
+import type { StreamHandlers } from './anthropic';
+import { streamProxyEndpoint } from './api-proxy';
+
+export async function streamMessageOllama(
+  cfg: AppConfig,
+  system: string,
+  history: ChatMessage[],
+  signal: AbortSignal,
+  handlers: StreamHandlers,
+): Promise<void> {
+  return streamProxyEndpoint('/api/proxy/ollama/stream', cfg, system, history, signal, handlers);
+}
--- a/apps/web/src/state/config.ts
+++ b/apps/web/src/state/config.ts
@ -190,6 +190,53 @@ export const KNOWN_PROVIDERS: KnownProvider[] = [
    model: 'mimo-v2.5-pro',
    models: ['mimo-v2.5-pro'],
  },
+  {
+    label: 'Ollama Cloud',
+    protocol: 'ollama',
+    baseUrl: 'https://ollama.com',
+    model: 'gpt-oss:120b',
+    models: [
+      'cogito-2.1:671b',
+      'deepseek-v3.1:671b',
+      'deepseek-v3.2',
+      'deepseek-v4-flash',
+      'deepseek-v4-pro',
+      'devstral-2:123b',
+      'devstral-small-2:24b',
+      'gemini-3-flash-preview',
+      'gemma3:4b',
+      'gemma3:12b',
+      'gemma3:27b',
+      'gemma4:31b',
+      'glm-4.6',
+      'glm-4.7',
+      'glm-5',
+      'glm-5.1',
+      'gpt-oss:20b',
+      'gpt-oss:120b',
+      'kimi-k2:1t',
+      'kimi-k2-thinking',
+      'kimi-k2.5',
+      'kimi-k2.6',
+      'minimax-m2',
+      'minimax-m2.1',
+      'minimax-m2.5',
+      'minimax-m2.7',
+      'ministral-3:3b',
+      'ministral-3:8b',
+      'ministral-3:14b',
+      'mistral-large-3:675b',
+      'nemotron-3-nano:30b',
+      'nemotron-3-super',
+      'qwen3-coder:480b',
+      'qwen3-coder-next',
+      'qwen3-next:80b',
+      'qwen3-vl:235b',
+      'qwen3-vl:235b-instruct',
+      'qwen3.5:397b',
+      'rnj-1:8b',
+    ],
+  },
  {
    label: 'MiMo (Xiaomi) — Anthropic',
    protocol: 'anthropic',
@ -233,6 +280,11 @@ function isValidOrbitTime(time: string): boolean {

 function inferApiProtocol(model: string, baseUrl: string): ApiProtocol {
  try {
+    const normalized = (baseUrl || '').toLowerCase();
+    // Any config pointing at ollama.com should resolve to the new ollama
+    // protocol so both chat and the connection test hit the native Ollama
+    // proxy instead of the Anthropic or OpenAI paths.
+    if (normalized.includes('ollama.com')) return 'ollama';
    return isOpenAICompatible(model, baseUrl) ? 'openai' : 'anthropic';
  } catch {
    // Preserve the rest of the user's settings even if an old saved base URL is
@ -286,6 +338,14 @@ export function loadConfig(): AppConfig {
      // legacy config can be migrated when it is loaded.
      if (!parsedHasApiProtocol) {
        merged.apiProtocol = inferApiProtocol(merged.model, merged.baseUrl);
+        // Ollama Cloud legacy configs may carry a base URL that includes
+        // /api or /api/ — normalize to the host root so the daemon's own
+        // /api/chat appending doesn't double up.
+        if (merged.apiProtocol === 'ollama') {
+          merged.baseUrl = merged.baseUrl
+            .replace(/\/api\/?$/, '')
+            .replace(/\/+$/, '');
+        }
        // Also set apiProviderBaseUrl so setApiProtocol() can correctly identify
        // whether the user is on a known provider and switch defaults appropriately.
        // null means "custom/unknown provider" so the protocol switch won't override
--- a/apps/web/src/state/maxTokens.ts
+++ b/apps/web/src/state/maxTokens.ts
@ -31,6 +31,48 @@ const OVERRIDES: Record<string, number> = {
  // Spec: https://platform.deepseek.com/docs/model-cards
  'deepseek-v4-pro': 384000,
  'deepseek-v4-flash': 384000,
+
+  // Ollama Cloud models. LiteLLM keys this set under `ollama/`-prefixed
+  // ids (many with `-cloud` suffixes), so the bare model-id lookups never
+  // match. Add overrides so chat doesn't silently clip at 8192 tokens.
+  // 131072 (128k) is a safe floor for all Ollama Cloud models.
+  'cogito-2.1:671b': 131072,
+  'deepseek-v3.1:671b': 163840,
+  'deepseek-v3.2': 163840,
+  'devstral-2:123b': 131072,
+  'devstral-small-2:24b': 131072,
+  'gemini-3-flash-preview': 131072,
+  'gemma3:4b': 131072,
+  'gemma3:12b': 131072,
+  'gemma3:27b': 131072,
+  'gemma4:31b': 131072,
+  'glm-4.6': 131072,
+  'glm-4.7': 131072,
+  'glm-5': 131072,
+  'glm-5.1': 131072,
+  'gpt-oss:20b': 131072,
+  'gpt-oss:120b': 131072,
+  'kimi-k2:1t': 131072,
+  'kimi-k2-thinking': 131072,
+  'kimi-k2.5': 131072,
+  'kimi-k2.6': 131072,
+  'minimax-m2': 131072,
+  'minimax-m2.1': 131072,
+  'minimax-m2.5': 131072,
+  'minimax-m2.7': 131072,
+  'ministral-3:3b': 131072,
+  'ministral-3:8b': 131072,
+  'ministral-3:14b': 131072,
+  'mistral-large-3:675b': 131072,
+  'nemotron-3-nano:30b': 131072,
+  'nemotron-3-super': 131072,
+  'qwen3-coder:480b': 262144,
+  'qwen3-coder-next': 131072,
+  'qwen3-next:80b': 131072,
+  'qwen3-vl:235b': 131072,
+  'qwen3-vl:235b-instruct': 131072,
+  'qwen3.5:397b': 131072,
+  'rnj-1:8b': 131072,
 };

 export function modelMaxTokensDefault(model: string): number {
--- a/apps/web/src/types.ts
+++ b/apps/web/src/types.ts
@ -65,7 +65,7 @@ export type {
 } from '@open-design/contracts';

 export type ExecMode = 'daemon' | 'api';
-export type ApiProtocol = 'anthropic' | 'openai' | 'azure' | 'google';
+export type ApiProtocol = 'anthropic' | 'openai' | 'azure' | 'google' | 'ollama';

 export type LiveArtifactTabId = `live:${string}`;
 export type ProjectWorkspaceTabId = string | LiveArtifactTabId;
--- a/apps/web/src/utils/apiProtocol.ts
+++ b/apps/web/src/utils/apiProtocol.ts
@ -5,6 +5,7 @@ const API_PROTOCOL_LABELS: Record<ApiProtocol, string> = {
  openai: 'OpenAI API',
  azure: 'Azure OpenAI',
  google: 'Google Gemini',
+  ollama: 'Ollama Cloud API',
 };

 const API_PROTOCOL_AGENT_IDS: Record<ApiProtocol, string> = {
@ -12,6 +13,7 @@ const API_PROTOCOL_AGENT_IDS: Record<ApiProtocol, string> = {
  openai: 'openai-api',
  azure: 'azure-openai-api',
  google: 'google-gemini-api',
+  ollama: 'ollama-cloud-api',
 };

 export function apiProtocolLabel(protocol: ApiProtocol | undefined): string {
--- a/apps/web/tests/state/config.test.ts
+++ b/apps/web/tests/state/config.test.ts
@ -259,6 +259,65 @@ describe('loadConfig', () => {
    expect(config.configMigrationVersion).toBe(1);
  });

+  it('migrates legacy Ollama Cloud configs to an explicit ollama apiProtocol', () => {
+    const legacyConfig: Partial<AppConfig> = {
+      mode: 'api',
+      apiKey: 'ollama-key',
+      baseUrl: 'https://ollama.com',
+      model: 'gpt-oss:120b',
+      agentId: null,
+      skillId: null,
+      designSystemId: null,
+    };
+    store.set('open-design:config', JSON.stringify(legacyConfig));
+
+    const config = loadConfig();
+
+    expect(config.mode).toBe('api');
+    expect(config.baseUrl).toBe('https://ollama.com');
+    expect(config.model).toBe('gpt-oss:120b');
+    expect(config.apiProtocol).toBe('ollama');
+    expect(config.apiProviderBaseUrl).toBe('https://ollama.com');
+    expect(config.configMigrationVersion).toBe(1);
+  });
+
+  it('migrates legacy ollama.com configs with a custom base URL path', () => {
+    const legacyConfig: Partial<AppConfig> = {
+      mode: 'api',
+      apiKey: 'ollama-key',
+      baseUrl: 'https://ollama.com/api',
+      model: 'deepseek-v4-pro',
+      agentId: null,
+      skillId: null,
+      designSystemId: null,
+    };
+    store.set('open-design:config', JSON.stringify(legacyConfig));
+
+    const config = loadConfig();
+
+    expect(config.apiProtocol).toBe('ollama');
+    // /api suffix must be stripped so the daemon doesn't build /api/api/chat.
+    expect(config.baseUrl).toBe('https://ollama.com');
+  });
+
+  it('migrates legacy ollama.com configs with a trailing /api/ suffix', () => {
+    const legacyConfig: Partial<AppConfig> = {
+      mode: 'api',
+      apiKey: 'ollama-key',
+      baseUrl: 'https://ollama.com/api/',
+      model: 'glm-5',
+      agentId: null,
+      skillId: null,
+      designSystemId: null,
+    };
+    store.set('open-design:config', JSON.stringify(legacyConfig));
+
+    const config = loadConfig();
+
+    expect(config.apiProtocol).toBe('ollama');
+    expect(config.baseUrl).toBe('https://ollama.com');
+  });
+
  it('does not overwrite an already explicit apiProtocol', () => {
    const explicitConfig: Partial<AppConfig> = {
      mode: 'api',
--- a/packages/contracts/src/api/connectionTest.ts
+++ b/packages/contracts/src/api/connectionTest.ts
@ -133,7 +133,7 @@ export type ConnectionTestKind =
  | 'agent_spawn_failed'
  | 'unknown';

-export type ConnectionTestProtocol = 'anthropic' | 'openai' | 'azure' | 'google';
+export type ConnectionTestProtocol = 'anthropic' | 'openai' | 'azure' | 'google' | 'ollama';

 export interface ProviderTestRequest {
  protocol: ConnectionTestProtocol;