// @ts-nocheck // LLM-driven memory extractor. // // The heuristic regex pack in `memory.ts` only catches explicit markers // ("remember:", "记住", "我喜欢"…). For everything else — implicit // preferences, role, ongoing-work context — we ask a small fast model // to look at the just-finished turn and the existing memory and return // a JSON list of facts to add. // // This module is fire-and-forget: the chat run finishes and triggers // extraction in the background. Output lands in the same MD store so // the next turn's prompt picks it up automatically. // // Provider selection (in order): // 0. memory `.config.json` extraction override → user-supplied // provider/model/baseUrl/apiKey/apiVersion from the Memory model // picker. The override may pick any of four providers — anthropic, // openai, azure (openai-compatible at a per-resource URL), or // google gemini. This is the only path that lets a Local-CLI user // (no env-var key in the daemon's environment) point memory // extraction at, say, their personal Anthropic key with a // specific Haiku build instead of falling all the way through to // gpt-4o-mini. When the override carries the provider but no // apiKey we fall back to the corresponding env var (or the media- // config OpenAI key for openai/azure overrides) so a "I want to // switch to OpenAI but reuse my existing key" change costs zero // typing. // 1. current Local CLI, when the caller passed `chatAgentId` and the // agent supports headless one-shot output (Claude Code today). // 2. matching provider env var for the current chat protocol. // 3. BYOK chat-config snapshot for API-mode chats. // 4. ANTHROPIC_API_KEY env → Claude Haiku 4.5 (legacy fallback) // 5. OPENAI_API_KEY env → gpt-4o-mini // 6. media-config OpenAI BYOK → gpt-4o-mini // (the key the user already typed into Settings → Media providers; // reuses an existing credential so Local-CLI users don't have to // paste it twice just to get LLM-side memory extraction) // 7. nothing → record a 'skipped: no-provider' attempt // so the UI can surface "configure a key to enable LLM memory" // instead of staying silent // // Every attempt — whether it actually called the model or short-circuited // — produces a record in `memory-extractions.ts` so the settings panel // can show running / skipped / success / failed states in real time. import { composeMemoryBody, listMemoryEntries, readMemoryConfig, upsertMemoryEntry, memoryEvents, } from './memory.js'; import { startExtraction, recordSkip, markProvider, markSkipped, markProposed, markSuccess, markFailed, } from './memory-extractions.js'; import { resolveProviderConfig } from './media-config.js'; import { spawn } from 'node:child_process'; import { createCommandInvocation } from '@open-design/platform'; import { applyAgentLaunchEnv, getAgentDef, resolveAgentLaunch, spawnEnvForAgent, } from './agents.js'; import { agentCliEnvForAgent, readAppConfig } from './app-config.js'; import { createJsonEventStreamHandler } from './json-event-stream.js'; const SYSTEM_PROMPT = `You are a memory extractor for a personal AI design assistant. Given the user's most recent message (and optionally the assistant's reply), plus a snapshot of the existing memory store, decide whether ANYTHING in this turn is worth remembering across future conversations. A fact is worth remembering when ALL of these are true: - It's about the user, their preferences, their tools, their ongoing work, OR a stable reference (a Linear board id, a Slack channel, a teammate name). - It will plausibly still be true in a week. - It would change how an assistant responds in a later, unrelated chat. A fact is NOT worth remembering when ANY of these is true: - It's a transient state (current task, what file they're editing right now). - It's already captured in the existing memory. - It's just the user asking a question or describing a one-off bug. - It's something the assistant said about itself. - It's a code snippet, an output, or a paste. Output STRICT JSON in this exact shape — nothing else, no prose, no markdown fences: { "entries": [ { "type": "user|feedback|project|reference", "name": "short title (≤ 60 chars)", "description": "one-line summary (≤ 140 chars)", "body": "the actual remembered fact, 1-3 sentences" } ] } If there's nothing worth remembering, return: {"entries": []} Type rules: - user: who they are, role, expertise, long-term goals - feedback: corrections / preferences about how to work ("don't add comments unless asked") - project: ongoing initiatives, deadlines, why-decisions; usually time-bounded - reference: pointers to external systems (Linear projects, Slack channels, dashboards)`; // Provider defaults are centralised so the override path and the // auto-pick path can't drift apart. When the user picks "Custom → // anthropic" without typing a model, we still want the same // claude-haiku-4-5 fallback the env path uses. // // Azure has no useful baseUrl default — every Azure resource has its // own `https://.openai.azure.com` host, so the user must // supply theirs. We still emit an empty default here so a missing // override doesn't crash with `undefined` when accessed. const PROVIDER_DEFAULTS = { anthropic: { model: 'claude-haiku-4-5', baseUrl: 'https://api.anthropic.com', }, openai: { model: 'gpt-4o-mini', baseUrl: 'https://api.openai.com', }, azure: { model: 'gpt-4o-mini', baseUrl: '', apiVersion: '2024-10-21', }, google: { model: 'gemini-2.0-flash', baseUrl: 'https://generativelanguage.googleapis.com', }, // Ollama Cloud speaks OpenAI-compatible chat-completions, so the // extractor just routes through callOpenAI with the ollama base URL // and the user's Ollama Cloud API key. The default model is a small // open-weight model so the auto-pick produces a deterministic answer // for users who haven't customised the picker; users who care can // pick anything off the picker's `Custom...` list. ollama: { model: 'gemma3:4b', baseUrl: 'https://ollama.com', }, // SenseAudio's chat API is OpenAI-compatible (POST /v1/chat/completions, // Bearer auth), so the extractor falls through to callOpenAI with this // base URL and the user's SenseAudio API key. The default model is the // small/fast variant so auto-pick stays cheap; users can swap in // senseaudio-s2 or any gateway model via the picker. senseaudio: { model: 'senseaudio-s2-flash', baseUrl: 'https://api.senseaudio.cn', }, }; // Map an explicit override provider to the env var the daemon should // consult when the override doesn't carry its own apiKey. The fallback // chain stays the same as before for anthropic/openai; azure uses the // AZURE_OPENAI_API_KEY convention; google uses GOOGLE_API_KEY (matching // the gemini SDK's expectation, with GEMINI_API_KEY as a secondary). function envKeyFor(provider) { if (provider === 'anthropic') return process.env.ANTHROPIC_API_KEY?.trim() || ''; if (provider === 'openai') return process.env.OPENAI_API_KEY?.trim() || ''; if (provider === 'azure') { return ( process.env.AZURE_OPENAI_API_KEY?.trim() || process.env.AZURE_API_KEY?.trim() || '' ); } if (provider === 'google') { return ( process.env.GOOGLE_API_KEY?.trim() || process.env.GEMINI_API_KEY?.trim() || '' ); } if (provider === 'ollama') { return process.env.OLLAMA_API_KEY?.trim() || ''; } if (provider === 'senseaudio') { return ( process.env.OD_SENSEAUDIO_API_KEY?.trim() || process.env.SENSEAUDIO_API_KEY?.trim() || '' ); } return ''; } // Map a chat agent id to the API protocol family it speaks under the // hood. This is the bridge that makes "follow chat" actually mean // something for memory extraction in CLI mode: when the user is on // Claude Code (claude → anthropic) we don't want memory to silently // fall through to whatever OpenAI key happens to be in media-config — // that produces the very confusing "openai/gpt-4o-mini" attempts the // user sees while they think they're "using Claude". Anything we don't // recognise stays unconstrained (returns null) so the legacy // cross-provider fallback can still kick in for setups we don't model. function chatProtocolFromAgentId(agentId) { if (!agentId || typeof agentId !== 'string') return null; const id = agentId.trim().toLowerCase(); if (id === 'claude') return 'anthropic'; if (id === 'gemini') return 'google'; // Codex, OpenCode, Qwen, DeepSeek, Kimi, Copilot, Pi, Kiro, Kilo, // Vibe, Devin, Hermes, Cursor-Agent, Qoder all use the OpenAI chat- // completions wire format. if ( id === 'codex' || id === 'opencode' || id === 'qwen' || id === 'deepseek' || id === 'kimi' || id === 'copilot' || id === 'pi' || id === 'kiro' || id === 'kilo' || id === 'vibe' || id === 'devin' || id === 'hermes' || id === 'cursor-agent' || id === 'qoder' ) { return 'openai'; } return null; } function canUseLocalCliForMemory(agentId, provider) { // Keep this allowlist explicit: each entry below has a headless one-shot // mode that accepts stdin and a parser we can reduce back to assistant text. if (agentId === 'claude' && provider === 'anthropic') return true; if (agentId === 'codex' && provider === 'openai') return true; if (agentId === 'opencode' && provider === 'openai') return true; return false; } function localCliProviderFor(agentId, provider, model) { if (!canUseLocalCliForMemory(agentId, provider)) return null; return { kind: provider, model: (typeof model === 'string' && model.trim()) || 'default', baseUrl: 'local-cli', apiVersion: '', credentialSource: 'chat-cli', transport: 'chat-cli', agentId, }; } // Pick a provider in this order: // 0. Memory config override → user-set provider/model/baseUrl/apiKey // 1. Current Local CLI → if the user is chatting through Claude Code, // run the same CLI in one-shot mode for extraction. This keeps // "Same as chat" literal: no extra OpenAI/Anthropic key required // just because the extraction happens in the background. // 2. Chat-protocol-constrained env var → if the chat is on Claude // Code (anthropic), only ANTHROPIC_API_KEY counts; Codex/OpenAI- // compatible CLIs only consult OPENAI_API_KEY (and the media- // config OpenAI key as a secondary fallback). This stops the // legacy "claude user, openai gpt-4o-mini extracts in the // background" surprise — if the matching key isn't configured, // we'd rather skip with 'no-provider' and surface that in the // history than quietly run on a different vendor's key. // 3. BYOK chat-config snapshot → for API-mode chats (the picker is // on "Same as chat"), `/api/memory/extract` forwards the live // chat provider/key/baseUrl/apiVersion as `chatProvider`. We use // it directly with the per-protocol fast-model default so the // default extractor follows the chat configuration instead of // falling through to env / media-config which the daemon never // saw the user configure. The model deliberately overrides the // user-supplied `chatProvider.model` only when none was given — // memory should default to a cheaper/faster model than the chat // model the user is paying for. // 4. (legacy fallback, only when we can't tell which CLI is in use // AND the caller didn't pass `chatProvider`) // ANTHROPIC_API_KEY env → Claude Haiku 4.5 // 5. (legacy fallback) OPENAI_API_KEY env → gpt-4o-mini // 6. (legacy fallback) media-config OpenAI BYOK → gpt-4o-mini // // The `OD_MEMORY_MODEL` env continues to override the model name across // (1)–(6) so power users don't lose that lever. It does NOT override the // memory-config provider since that one carries an explicit user choice. // `projectRoot` is required for the media-config path; `chatAgentId` is // optional but recommended — without it we fall through to the legacy // unconstrained chain, which is what the daemon used to do and what // pre-context callers (the HTTP /api/memory/extract endpoint) still // expect. `chatProvider` is the BYOK chat-config snapshot threaded // through from the web app on a per-call basis (the daemon never // persists BYOK creds, so this is the only signal we have for that // mode). async function pickProvider(projectRoot, dataDir, chatAgentId, chatProvider, chatModel) { const chatProtocol = chatProtocolFromAgentId(chatAgentId); const normalizedChatAgentId = typeof chatAgentId === 'string' ? chatAgentId.trim().toLowerCase() : ''; let override = null; if (dataDir) { try { const cfg = await readMemoryConfig(dataDir); if (cfg?.extraction?.provider) override = cfg.extraction; } catch (err) { console.warn( '[memory-llm] failed to read memory config override', err?.message ?? err, ); } } if (override) { const defaults = PROVIDER_DEFAULTS[override.provider]; const explicitKey = typeof override.apiKey === 'string' && override.apiKey.trim() ? override.apiKey.trim() : ''; const envKey = envKeyFor(override.provider); let resolvedKey = explicitKey || envKey; let credentialSource = explicitKey ? 'memory-config' : (envKey ? 'env' : null); // Last-chance: an openai-shaped override (openai or azure) with no // explicit/env key can still borrow the media-config OpenAI key the // user already typed. Anthropic / google have no media counterpart // today. if ( !resolvedKey && (override.provider === 'openai' || override.provider === 'azure') && projectRoot ) { try { const cred = await resolveProviderConfig(projectRoot, 'openai'); if (cred?.apiKey?.trim()) { resolvedKey = cred.apiKey.trim(); credentialSource = 'media-config'; } } catch { // Ignore — we'll record a no-provider skip below. } } if (!resolvedKey) { const localCliProvider = localCliProviderFor( normalizedChatAgentId, override.provider, override.model, ); if (localCliProvider) return localCliProvider; return null; } const baseUrl = (typeof override.baseUrl === 'string' && override.baseUrl.trim()) || defaults.baseUrl; if (override.provider === 'azure' && !baseUrl) { // Azure with no resource URL is unrecoverable — bail rather than // logging a confusing 404 from `https:///openai/deployments/...`. return null; } return { kind: override.provider, apiKey: resolvedKey, model: (typeof override.model === 'string' && override.model.trim()) || defaults.model, baseUrl, apiVersion: override.provider === 'azure' ? (typeof override.apiVersion === 'string' && override.apiVersion.trim()) || PROVIDER_DEFAULTS.azure.apiVersion : '', credentialSource, }; } const envOverrideModel = (process.env.OD_MEMORY_MODEL || '').trim(); // Chat-protocol-constrained branch (path 1). Only run when we know // which CLI is in use AND it maps to one of the four providers; we // refuse to wander out of the chat protocol's family even when an // env var for a different provider is set, because doing so produces // the "I'm using Claude but memory says openai gpt-4o-mini" surprise // the user reported. if (chatProtocol) { const localCliProvider = localCliProviderFor( normalizedChatAgentId, chatProtocol, process.env.OD_MEMORY_MODEL || chatModel, ); if (localCliProvider) return localCliProvider; const envKey = envKeyFor(chatProtocol); if (envKey) { const defaults = PROVIDER_DEFAULTS[chatProtocol]; return { kind: chatProtocol, apiKey: envKey, model: envOverrideModel || defaults.model, baseUrl: (chatProtocol === 'anthropic' && process.env.ANTHROPIC_BASE_URL) || (chatProtocol === 'openai' && process.env.OPENAI_BASE_URL) || defaults.baseUrl, apiVersion: chatProtocol === 'azure' ? defaults.apiVersion : '', credentialSource: 'env', }; } // Secondary fallback for openai-compatible CLIs: the user already // typed an OpenAI key under Settings → Media providers, so we can // borrow it for memory extraction without making them paste it // twice. We do NOT try this for anthropic/google chats because the // media-config table only has openai-shaped credentials today. if (chatProtocol === 'openai' && projectRoot) { try { const cred = await resolveProviderConfig(projectRoot, 'openai'); if (cred && typeof cred.apiKey === 'string' && cred.apiKey.trim()) { return { kind: 'openai', apiKey: cred.apiKey.trim(), model: envOverrideModel || cred.model || PROVIDER_DEFAULTS.openai.model, baseUrl: (cred.baseUrl && String(cred.baseUrl).trim()) || PROVIDER_DEFAULTS.openai.baseUrl, apiVersion: '', credentialSource: 'media-config', }; } } catch (err) { console.warn( '[memory-llm] media-config lookup failed (chat-constrained)', err?.message ?? err, ); } } // The chat protocol is known but no key for it is available. Bail // out instead of wandering — recording 'skipped: no-provider' is // strictly more useful than silently running on a foreign vendor. return null; } // BYOK chat-config snapshot (path 2). The web app forwards the live // chat provider/key/baseUrl/apiVersion on every API-mode extraction // call so the daemon can run extraction against the same vendor the // user is chatting with — even though the daemon never persists // BYOK creds itself. Use the per-protocol fast-model default instead // of the chat model the user is paying for, so a memory pass on a // big chat model (gpt-4o, claude-sonnet-4-5) silently turns into a // cheap haiku/mini call. The caller can opt into using the chat // model verbatim by setting `chatProvider.model`. if ( chatProvider && chatProvider.provider && PROVIDER_DEFAULTS[chatProvider.provider] ) { const apiKey = typeof chatProvider.apiKey === 'string' ? chatProvider.apiKey.trim() : ''; if (apiKey) { const defaults = PROVIDER_DEFAULTS[chatProvider.provider]; const baseUrl = (typeof chatProvider.baseUrl === 'string' && chatProvider.baseUrl.trim()) || defaults.baseUrl; // Azure with no resource URL is unrecoverable — same guard as // the override path above. if (chatProvider.provider !== 'azure' || baseUrl) { const explicitModel = typeof chatProvider.model === 'string' && chatProvider.model.trim() ? chatProvider.model.trim() : ''; return { kind: chatProvider.provider, apiKey, model: envOverrideModel || explicitModel || defaults.model, baseUrl, apiVersion: chatProvider.provider === 'azure' ? (typeof chatProvider.apiVersion === 'string' && chatProvider.apiVersion.trim()) || PROVIDER_DEFAULTS.azure.apiVersion : '', credentialSource: 'chat-byok', }; } } } if (process.env.ANTHROPIC_API_KEY) { return { kind: 'anthropic', apiKey: process.env.ANTHROPIC_API_KEY, model: envOverrideModel || PROVIDER_DEFAULTS.anthropic.model, baseUrl: process.env.ANTHROPIC_BASE_URL || PROVIDER_DEFAULTS.anthropic.baseUrl, credentialSource: 'env', }; } if (process.env.OPENAI_API_KEY) { return { kind: 'openai', apiKey: process.env.OPENAI_API_KEY, model: envOverrideModel || PROVIDER_DEFAULTS.openai.model, baseUrl: process.env.OPENAI_BASE_URL || PROVIDER_DEFAULTS.openai.baseUrl, credentialSource: 'env', }; } // Fallback: reuse the OpenAI key the user already configured for media // generation. Most Local-CLI Claude users don't have an // ANTHROPIC_API_KEY in the daemon's environment (Claude Code logs in // via OAuth) but they often have an OpenAI key in Settings → Media // providers. Without this fallback the LLM extraction stage stays dark // for them and only the regex-based heuristic ever runs. if (projectRoot) { try { const cred = await resolveProviderConfig(projectRoot, 'openai'); if (cred && typeof cred.apiKey === 'string' && cred.apiKey.trim()) { return { kind: 'openai', apiKey: cred.apiKey.trim(), model: envOverrideModel || cred.model || PROVIDER_DEFAULTS.openai.model, baseUrl: (cred.baseUrl && String(cred.baseUrl).trim()) || PROVIDER_DEFAULTS.openai.baseUrl, credentialSource: 'media-config', }; } } catch (err) { console.warn( '[memory-llm] failed to read media-config for fallback', err?.message ?? err, ); } } return null; } function renderUserPayload({ userMessage, assistantMessage, currentMemory }) { const parts = []; parts.push('## Existing memory'); parts.push(currentMemory && currentMemory.trim().length > 0 ? currentMemory : '(empty)'); parts.push(''); parts.push('## User message'); parts.push(String(userMessage || '').slice(0, 4000)); if (assistantMessage && assistantMessage.trim().length > 0) { parts.push(''); parts.push('## Assistant reply'); parts.push(String(assistantMessage).slice(0, 4000)); } parts.push(''); parts.push( 'Return ONLY the JSON object described in the system prompt — no prose, no fences.', ); return parts.join('\n'); } // 30s ceiling. The chat run has long since finished and the user is // staring at the settings panel waiting for a green/red pill — leaving // a half-dead fetch in flight for two minutes (the default undici // connect timeout) makes the failure feel even worse than it is. const FETCH_TIMEOUT_MS = 30_000; // Append `/v1` to a base URL only when the URL doesn't already // carry an explicit `/vN` segment. Mirrors the same conditional path // build the chat proxy and connection-test routes use, so a custom // OpenAI-compatible endpoint whose saved baseUrl already contains // `/v1` (local servers, proxies that re-host OpenAI under a fixed // prefix) does not become `/v1/v1/chat/completions` and silently fail // every memory extraction even though chat through the same provider // works. Anthropic's `/v1/messages` and OpenAI's `/v1/chat/completions` // both flow through this; Azure and Gemini build their URLs // differently and don't need it. function appendVersionedApiPath(baseUrl, suffix) { const url = new URL(baseUrl); const pathname = url.pathname.replace(/\/+$/, ''); url.pathname = /\/v\d+(\/|$)/.test(pathname) ? `${pathname}${suffix}` : `${pathname}/v1${suffix}`; return url.toString(); } // Build a standard AbortSignal that fires after FETCH_TIMEOUT_MS so a // stalled provider call surfaces as a 'failed' record instead of // hanging the attempt indefinitely. function withTimeout(ms) { if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') { return AbortSignal.timeout(ms); } const controller = new AbortController(); setTimeout(() => controller.abort(new Error(`timeout ${ms}ms`)), ms); return controller.signal; } // undici raises a generic `TypeError: fetch failed` on every network // error and tucks the real cause under `err.cause` (a Node `Error` or // `AggregateError` with `.code` / `.errors`). The settings UI just // shows `error.message`, so without unwrapping the cause the user // sees "fetch failed" with no clue whether DNS broke, the firewall // reset the connection, or the request timed out. Surface the most // useful piece — the OS error code if present, otherwise the cause's // message — appended in parentheses. We deliberately don't include // both: `cause.message` typically already embeds the code (e.g. // "read ECONNRESET"), and showing "ECONNRESET · read ECONNRESET" // would just double the noise. function describeFetchError(err) { const head = err?.message || String(err); const cause = err?.cause; if (!cause) return head; const codeRaw = cause.code ? String(cause.code) : ''; const msgRaw = cause.message && cause.message !== head ? String(cause.message) : ''; // Prefer the OS error code on its own when the cause's message just // wraps it (the common case for ECONNRESET / ENOTFOUND / ETIMEDOUT). // Fall back to the message when there's no code, or when the message // adds detail beyond the code (e.g. "Hostname/IP does not match // certificate's altnames"). let detail = ''; if (codeRaw && msgRaw) { const m = msgRaw.toLowerCase(); detail = m.includes(codeRaw.toLowerCase()) ? codeRaw : `${codeRaw}: ${msgRaw}`; } else { detail = codeRaw || msgRaw; } // AggregateError: surface the first inner code that adds new info. // Most of these are six identical DNS errors, so dedupe aggressively. if (!detail && Array.isArray(cause.errors)) { for (const inner of cause.errors) { const innerCode = inner?.code ? String(inner.code) : ''; const innerMsg = inner?.message ? String(inner.message) : ''; const candidate = innerCode || innerMsg; if (candidate) { detail = candidate; break; } } } return detail ? `${head} (${detail})` : head; } async function callAnthropic(provider, system, user) { let resp; try { resp = await fetch(appendVersionedApiPath(provider.baseUrl, '/messages'), { method: 'POST', headers: { 'content-type': 'application/json', 'x-api-key': provider.apiKey, 'anthropic-version': '2023-06-01', }, body: JSON.stringify({ model: provider.model, max_tokens: 1024, system, messages: [{ role: 'user', content: user }], }), signal: withTimeout(FETCH_TIMEOUT_MS), }); } catch (err) { throw new Error(describeFetchError(err)); } if (!resp.ok) { throw new Error(`anthropic ${resp.status}: ${await resp.text().catch(() => '')}`); } const json = await resp.json(); const block = (json?.content || []).find((b) => b?.type === 'text'); return block?.text ?? ''; } async function callOpenAI(provider, system, user) { let resp; try { resp = await fetch( appendVersionedApiPath(provider.baseUrl, '/chat/completions'), { method: 'POST', headers: { 'content-type': 'application/json', authorization: `Bearer ${provider.apiKey}`, }, body: JSON.stringify({ model: provider.model, response_format: { type: 'json_object' }, messages: [ { role: 'system', content: system }, { role: 'user', content: user }, ], }), signal: withTimeout(FETCH_TIMEOUT_MS), }, ); } catch (err) { throw new Error(describeFetchError(err)); } if (!resp.ok) { throw new Error(`openai ${resp.status}: ${await resp.text().catch(() => '')}`); } const json = await resp.json(); return json?.choices?.[0]?.message?.content ?? ''; } // Azure OpenAI speaks the same chat-completions JSON as OpenAI, but on // a per-deployment URL and with `api-key:` instead of `Authorization:`. // `provider.model` here is the Azure deployment name (the user typed it // into the model field — that's what the chat picker calls "Deployment // (Model)" too), not the underlying model family. async function callAzure(provider, system, user) { const base = String(provider.baseUrl || '').replace(/\/+$/, ''); const deployment = encodeURIComponent(provider.model); const apiVersion = encodeURIComponent( provider.apiVersion || PROVIDER_DEFAULTS.azure.apiVersion, ); const url = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`; let resp; try { resp = await fetch(url, { method: 'POST', headers: { 'content-type': 'application/json', 'api-key': provider.apiKey, }, body: JSON.stringify({ response_format: { type: 'json_object' }, messages: [ { role: 'system', content: system }, { role: 'user', content: user }, ], }), signal: withTimeout(FETCH_TIMEOUT_MS), }); } catch (err) { throw new Error(describeFetchError(err)); } if (!resp.ok) { throw new Error(`azure ${resp.status}: ${await resp.text().catch(() => '')}`); } const json = await resp.json(); return json?.choices?.[0]?.message?.content ?? ''; } // Google Gemini's REST surface uses a different request shape: // system instructions go in `systemInstruction`, the conversation is // `contents[]` with `role` + `parts`, and the API key is a query // parameter rather than a header. `responseMimeType: application/json` // gets us the strict JSON output the parser expects. async function callGoogle(provider, system, user) { const base = String(provider.baseUrl || '').replace(/\/+$/, ''); const model = encodeURIComponent(provider.model); const url = `${base}/v1beta/models/${model}:generateContent?key=${encodeURIComponent(provider.apiKey)}`; let resp; try { resp = await fetch(url, { method: 'POST', headers: { 'content-type': 'application/json' }, body: JSON.stringify({ systemInstruction: { role: 'system', parts: [{ text: system }] }, contents: [{ role: 'user', parts: [{ text: user }] }], generationConfig: { responseMimeType: 'application/json' }, }), signal: withTimeout(FETCH_TIMEOUT_MS), }); } catch (err) { throw new Error(describeFetchError(err)); } if (!resp.ok) { throw new Error(`google ${resp.status}: ${await resp.text().catch(() => '')}`); } const json = await resp.json(); const parts = json?.candidates?.[0]?.content?.parts; if (Array.isArray(parts)) { return parts.map((p) => (p && typeof p.text === 'string' ? p.text : '')).join(''); } return ''; } const LOCAL_CLI_TIMEOUT_MS = 60_000; function extractJsonEventText(kind, raw, agentName) { const events = []; const handler = createJsonEventStreamHandler(kind, (event) => events.push(event)); handler.feed(raw); handler.flush(); const errorEvent = events.find((event) => event?.type === 'error'); if (errorEvent) { const message = typeof errorEvent.message === 'string' && errorEvent.message.trim() ? errorEvent.message.trim() : 'unknown error'; throw new Error(`${agentName} CLI error: ${message}`); } return events .filter((event) => event?.type === 'text_delta' && typeof event.delta === 'string') .map((event) => event.delta) .join('') .trim(); } async function callLocalCli(provider, system, user, options) { if (typeof options?.localCliRunner === 'function') { return options.localCliRunner({ agentId: provider.agentId, model: provider.model, system, user, projectRoot: options?.projectRoot ?? null, dataDir: options?.dataDir ?? null, }); } const def = getAgentDef(provider.agentId); if (!def) { throw new Error(`Local CLI agent "${provider.agentId}" is not installed`); } let configuredAgentEnv = {}; try { const appConfig = options?.dataDir ? await readAppConfig(options.dataDir) : {}; configuredAgentEnv = agentCliEnvForAgent(appConfig.agentCliEnv, def.id); } catch { configuredAgentEnv = {}; } const launch = resolveAgentLaunch(def, configuredAgentEnv); if (!launch?.launchPath) { throw new Error(`${def.name} CLI is not installed or not on PATH`); } const cwd = typeof options?.projectRoot === 'string' && options.projectRoot.trim() ? options.projectRoot : process.cwd(); const prompt = [ system, '', 'You are running as a background memory extractor. Do not use tools. Return strict JSON only.', '', user, ].join('\n'); let args; let stdinText = prompt; let parseStdout = (raw) => raw.trim(); if (provider.agentId === 'claude') { args = ['-p', '--input-format', 'text', '--output-format', 'text']; if (provider.model && provider.model !== 'default') { args.push('--model', provider.model); } } else if (provider.agentId === 'codex') { args = def.buildArgs( '', [], [], { model: provider.model }, { cwd }, ); parseStdout = (raw) => extractJsonEventText(def.eventParser || def.id, raw, def.name); } else if (provider.agentId === 'opencode') { // Deliver the prompt on stdin, matching the chat-run path // (def.promptViaStdin). `opencode run`'s `-f, --file` is a yargs array // option that greedily consumes every trailing non-flag token, so // `--file ""` made OpenCode treat the message // text as a second attachment and exit with "File not found". Bare // `opencode run --format json` reads the message from stdin instead. args = def.buildArgs( '', [], [], { model: provider.model }, { cwd }, ); parseStdout = (raw) => extractJsonEventText(def.eventParser || def.id, raw, def.name); } else { throw new Error(`Local CLI memory extraction is not supported for ${provider.agentId}`); } const env = applyAgentLaunchEnv( spawnEnvForAgent( def.id, { ...process.env, ...(def.env || {}) }, configuredAgentEnv, undefined, { resolvedBin: launch.selectedPath }, ), launch, ); const invocation = createCommandInvocation({ command: launch.launchPath, args, env, }); return await new Promise((resolve, reject) => { let stdout = ''; let stderr = ''; let settled = false; let closed = false; const child = spawn(invocation.command, invocation.args, { env, stdio: ['pipe', 'pipe', 'pipe'], cwd, shell: false, windowsVerbatimArguments: invocation.windowsVerbatimArguments, }); const finish = (err, text) => { if (settled) return; settled = true; clearTimeout(timeout); if (err) reject(err); else resolve(text); }; const timeout = setTimeout(() => { child.kill('SIGTERM'); setTimeout(() => { if (!closed) child.kill('SIGKILL'); }, 2_000).unref?.(); finish(new Error(`${def.name} CLI timed out after ${Math.round(LOCAL_CLI_TIMEOUT_MS / 1000)}s`)); }, LOCAL_CLI_TIMEOUT_MS); timeout.unref?.(); child.stdout.setEncoding('utf8'); child.stderr.setEncoding('utf8'); child.stdout.on('data', (chunk) => { stdout = `${stdout}${chunk}`.slice(-64_000); }); child.stderr.on('data', (chunk) => { stderr = `${stderr}${chunk}`.slice(-8_000); }); child.once('error', (err) => finish(err)); child.once('close', (code, signal) => { closed = true; if (code === 0) { let text = ''; try { text = parseStdout(stdout); } catch (err) { finish(err); return; } if (text) { finish(null, text); return; } } const detail = (stderr.trim() || stdout.trim() || 'no output').slice(0, 1000); const status = signal ? `signal ${signal}` : `exit ${code}`; finish(new Error(`${def.name} CLI ${status}: ${detail}`)); }); child.stdin.on('error', (err) => { if (err.code !== 'EPIPE') finish(err); }); child.stdin.end(stdinText); }); } // Tolerant JSON parse — the model occasionally wraps output in ```json // fences even when told not to. Strip those defensively. function parseEntries(rawText) { if (typeof rawText !== 'string') return []; let text = rawText.trim(); if (text.startsWith('```')) { text = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim(); } let parsed; try { parsed = JSON.parse(text); } catch { // Last-ditch: pull the first {...} block. const match = /\{[\s\S]*\}/.exec(text); if (!match) return []; try { parsed = JSON.parse(match[0]); } catch { return []; } } const list = Array.isArray(parsed?.entries) ? parsed.entries : []; const validTypes = new Set(['user', 'feedback', 'project', 'reference']); return list .filter( (e) => e && typeof e === 'object' && validTypes.has(e.type) && typeof e.name === 'string' && e.name.trim().length > 0 && typeof e.body === 'string' && e.body.trim().length > 0, ) .slice(0, 6); // hard cap so a confused model can't flood the store } function alreadyKnown(existing, candidate) { const candKey = `${candidate.type}::${candidate.name.toLowerCase().trim()}`; for (const e of existing) { if (`${e.type}::${e.name.toLowerCase().trim()}` === candKey) return true; } return false; } function toMemoryDraft(candidate) { return { type: candidate.type, name: String(candidate.name).trim().slice(0, 80), description: String(candidate.description || '').trim().slice(0, 200), body: String(candidate.body).trim(), }; } async function collectProposedEntries(dataDir, input, options) { const projectRoot = options?.projectRoot ?? null; const chatAgentId = options?.chatAgentId ?? null; const chatModel = options?.chatModel ?? null; const extractionKind = options?.kind ?? 'llm'; const systemPrompt = typeof options?.systemPrompt === 'string' && options.systemPrompt.trim() ? options.systemPrompt.trim() : SYSTEM_PROMPT; // BYOK chat-config snapshot — only present for API-mode calls // forwarded through `/api/memory/extract`. The daemon doesn't // persist BYOK creds, so this per-call signal is the *only* way // pickProvider() can run "Same as chat" extraction against the // user's actual chat provider. const chatProvider = options?.chatProvider ?? null; const userMessage = String(input?.userMessage || '').trim(); const cfg = await readMemoryConfig(dataDir); if (!cfg.enabled) { recordSkip({ userMessage, reason: 'memory-disabled', kind: extractionKind }); return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] }; } if (extractionKind !== 'connector' && !cfg.chatExtractionEnabled) { return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] }; } if (userMessage.length === 0) { recordSkip({ userMessage, reason: 'empty-message', kind: extractionKind }); return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] }; } const provider = await pickProvider( projectRoot, dataDir, chatAgentId, chatProvider, chatModel, ); if (!provider) { recordSkip({ userMessage, reason: 'no-provider', kind: extractionKind }); return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] }; } // Past this point we have a provider committed and an actual model // call about to happen — switch from one-shot skip records to a // running record we can update through phase transitions. const attemptId = startExtraction({ userMessage, kind: extractionKind }); markProvider(attemptId, { kind: provider.kind, model: provider.model, credentialSource: provider.credentialSource, }); let currentMemory = ''; let existingEntries = []; try { [currentMemory, existingEntries] = await Promise.all([ composeMemoryBody(dataDir), listMemoryEntries(dataDir), ]); } catch { // Fresh store — proceed with empty context. } const userPayload = renderUserPayload({ userMessage, assistantMessage: input?.assistantMessage, currentMemory, }); let raw = ''; try { if (provider.transport === 'chat-cli') { raw = await callLocalCli(provider, systemPrompt, userPayload, { dataDir, projectRoot, localCliRunner: options?.localCliRunner, }); } else if (provider.kind === 'anthropic') { raw = await callAnthropic(provider, systemPrompt, userPayload); } else if (provider.kind === 'azure') { raw = await callAzure(provider, systemPrompt, userPayload); } else if (provider.kind === 'google') { raw = await callGoogle(provider, systemPrompt, userPayload); } else { // openai or ollama — both speak the OpenAI chat-completions // wire shape, so callOpenAI handles them with just a different // base URL. raw = await callOpenAI(provider, systemPrompt, userPayload); } } catch (err) { // err.message is already pre-formatted by describeFetchError() when // the call layer caught a network error. For HTTP-level failures // (`anthropic 401: …`) the message is already user-facing too. console.warn(`[memory-llm] ${provider.kind} call failed`, err?.message ?? err); markFailed(attemptId, err); return { status: 'failed', attemptId, proposed: [], existingEntries }; } let proposed; try { proposed = parseEntries(raw); if (typeof options?.candidateFilter === 'function') { proposed = proposed.filter((candidate) => { try { return options.candidateFilter(candidate); } catch { return false; } }); } } catch (err) { markFailed(attemptId, err); return { status: 'failed', attemptId, proposed: [], existingEntries }; } markProposed(attemptId, proposed.length); return { status: 'ok', attemptId, proposed, existingEntries }; } export async function suggestWithLLM(dataDir, input, options) { const result = await collectProposedEntries(dataDir, input, options); if (result.status !== 'ok') return []; const suggestions = result.proposed .filter((cand) => !alreadyKnown(result.existingEntries, cand)) .map(toMemoryDraft); markSuccess(result.attemptId, { writtenCount: 0, writtenIds: [], }); return suggestions; } export async function extractWithLLM(dataDir, input, options) { const changeSource = options?.source ?? 'llm'; const result = await collectProposedEntries(dataDir, input, options); if (result.status !== 'ok') return []; const { attemptId, proposed, existingEntries } = result; if (proposed.length === 0) { markSuccess(attemptId, { writtenCount: 0, writtenIds: [] }); return []; } const written = []; for (const cand of proposed) { if (alreadyKnown(existingEntries, cand)) continue; try { const entry = await upsertMemoryEntry( dataDir, toMemoryDraft(cand), // Suppress per-entry events; we batch a single 'extract' below // so the toast says "Memory updated (3 · LLM)" once. { silent: true, source: changeSource }, ); written.push({ id: entry.id, name: entry.name, description: entry.description, type: entry.type, updatedAt: entry.updatedAt, }); } catch (err) { console.warn('[memory-llm] write failed', err?.message ?? err); } } if (written.length > 0) { memoryEvents.emit('change', { kind: 'extract', count: written.length, source: changeSource, at: Date.now(), }); } markSuccess(attemptId, { writtenCount: written.length, writtenIds: written.map((e) => e.id), }); return written; }