mirror of
https://github.com/nexu-io/open-design.git
synced 2026-05-31 19:04:39 +07:00
Some checks failed
visual-baseline / Capture visual baselines (push) Waiting to run
ci / Detect CI change scopes (push) Successful in 0s
landing-page-ci / Validate landing page (push) Failing after 1s
landing-page-staging / Deploy landing page to staging (push) Has been skipped
nix-check / build (push) Failing after 2s
ci / Validate Nix flake (push) Has been skipped
ci / Preflight (push) Failing after 2s
ci / Workspace unit tests (push) Failing after 2s
ci / Daemon workspace tests (push) Failing after 2s
ci / Web workspace tests (push) Failing after 2s
ci / Browser tests (push) Failing after 2s
ci / Build workspaces (push) Failing after 2s
ci / Validate workspace (push) Failing after 1s
ci / Runtime trace (push) Has been skipped
1185 lines
43 KiB
TypeScript
1185 lines
43 KiB
TypeScript
// @ts-nocheck
|
||
// LLM-driven memory extractor.
|
||
//
|
||
// The heuristic regex pack in `memory.ts` only catches explicit markers
|
||
// ("remember:", "记住", "我喜欢"…). For everything else — implicit
|
||
// preferences, role, ongoing-work context — we ask a small fast model
|
||
// to look at the just-finished turn and the existing memory and return
|
||
// a JSON list of facts to add.
|
||
//
|
||
// This module is fire-and-forget: the chat run finishes and triggers
|
||
// extraction in the background. Output lands in the same MD store so
|
||
// the next turn's prompt picks it up automatically.
|
||
//
|
||
// Provider selection (in order):
|
||
// 0. memory `.config.json` extraction override → user-supplied
|
||
// provider/model/baseUrl/apiKey/apiVersion from the Memory model
|
||
// picker. The override may pick any of four providers — anthropic,
|
||
// openai, azure (openai-compatible at a per-resource URL), or
|
||
// google gemini. This is the only path that lets a Local-CLI user
|
||
// (no env-var key in the daemon's environment) point memory
|
||
// extraction at, say, their personal Anthropic key with a
|
||
// specific Haiku build instead of falling all the way through to
|
||
// gpt-4o-mini. When the override carries the provider but no
|
||
// apiKey we fall back to the corresponding env var (or the media-
|
||
// config OpenAI key for openai/azure overrides) so a "I want to
|
||
// switch to OpenAI but reuse my existing key" change costs zero
|
||
// typing.
|
||
// 1. current Local CLI, when the caller passed `chatAgentId` and the
|
||
// agent supports headless one-shot output (Claude Code today).
|
||
// 2. matching provider env var for the current chat protocol.
|
||
// 3. BYOK chat-config snapshot for API-mode chats.
|
||
// 4. ANTHROPIC_API_KEY env → Claude Haiku 4.5 (legacy fallback)
|
||
// 5. OPENAI_API_KEY env → gpt-4o-mini
|
||
// 6. media-config OpenAI BYOK → gpt-4o-mini
|
||
// (the key the user already typed into Settings → Media providers;
|
||
// reuses an existing credential so Local-CLI users don't have to
|
||
// paste it twice just to get LLM-side memory extraction)
|
||
// 7. nothing → record a 'skipped: no-provider' attempt
|
||
// so the UI can surface "configure a key to enable LLM memory"
|
||
// instead of staying silent
|
||
//
|
||
// Every attempt — whether it actually called the model or short-circuited
|
||
// — produces a record in `memory-extractions.ts` so the settings panel
|
||
// can show running / skipped / success / failed states in real time.
|
||
|
||
import {
|
||
composeMemoryBody,
|
||
listMemoryEntries,
|
||
readMemoryConfig,
|
||
upsertMemoryEntry,
|
||
memoryEvents,
|
||
} from './memory.js';
|
||
import {
|
||
startExtraction,
|
||
recordSkip,
|
||
markProvider,
|
||
markSkipped,
|
||
markProposed,
|
||
markSuccess,
|
||
markFailed,
|
||
} from './memory-extractions.js';
|
||
import { resolveProviderConfig } from './media-config.js';
|
||
import { spawn } from 'node:child_process';
|
||
import { createCommandInvocation } from '@open-design/platform';
|
||
import {
|
||
applyAgentLaunchEnv,
|
||
getAgentDef,
|
||
resolveAgentLaunch,
|
||
spawnEnvForAgent,
|
||
} from './agents.js';
|
||
import { agentCliEnvForAgent, readAppConfig } from './app-config.js';
|
||
import { createJsonEventStreamHandler } from './json-event-stream.js';
|
||
|
||
const SYSTEM_PROMPT = `You are a memory extractor for a personal AI design assistant.
|
||
|
||
Given the user's most recent message (and optionally the assistant's reply), plus a snapshot of the existing memory store, decide whether ANYTHING in this turn is worth remembering across future conversations.
|
||
|
||
A fact is worth remembering when ALL of these are true:
|
||
- It's about the user, their preferences, their tools, their ongoing work, OR a stable reference (a Linear board id, a Slack channel, a teammate name).
|
||
- It will plausibly still be true in a week.
|
||
- It would change how an assistant responds in a later, unrelated chat.
|
||
|
||
A fact is NOT worth remembering when ANY of these is true:
|
||
- It's a transient state (current task, what file they're editing right now).
|
||
- It's already captured in the existing memory.
|
||
- It's just the user asking a question or describing a one-off bug.
|
||
- It's something the assistant said about itself.
|
||
- It's a code snippet, an output, or a paste.
|
||
|
||
Output STRICT JSON in this exact shape — nothing else, no prose, no markdown fences:
|
||
{
|
||
"entries": [
|
||
{ "type": "user|feedback|project|reference", "name": "short title (≤ 60 chars)", "description": "one-line summary (≤ 140 chars)", "body": "the actual remembered fact, 1-3 sentences" }
|
||
]
|
||
}
|
||
|
||
If there's nothing worth remembering, return: {"entries": []}
|
||
|
||
Type rules:
|
||
- user: who they are, role, expertise, long-term goals
|
||
- feedback: corrections / preferences about how to work ("don't add comments unless asked")
|
||
- project: ongoing initiatives, deadlines, why-decisions; usually time-bounded
|
||
- reference: pointers to external systems (Linear projects, Slack channels, dashboards)`;
|
||
|
||
// Provider defaults are centralised so the override path and the
|
||
// auto-pick path can't drift apart. When the user picks "Custom →
|
||
// anthropic" without typing a model, we still want the same
|
||
// claude-haiku-4-5 fallback the env path uses.
|
||
//
|
||
// Azure has no useful baseUrl default — every Azure resource has its
|
||
// own `https://<resource>.openai.azure.com` host, so the user must
|
||
// supply theirs. We still emit an empty default here so a missing
|
||
// override doesn't crash with `undefined` when accessed.
|
||
const PROVIDER_DEFAULTS = {
|
||
anthropic: {
|
||
model: 'claude-haiku-4-5',
|
||
baseUrl: 'https://api.anthropic.com',
|
||
},
|
||
openai: {
|
||
model: 'gpt-4o-mini',
|
||
baseUrl: 'https://api.openai.com',
|
||
},
|
||
azure: {
|
||
model: 'gpt-4o-mini',
|
||
baseUrl: '',
|
||
apiVersion: '2024-10-21',
|
||
},
|
||
google: {
|
||
model: 'gemini-2.0-flash',
|
||
baseUrl: 'https://generativelanguage.googleapis.com',
|
||
},
|
||
// Ollama Cloud speaks OpenAI-compatible chat-completions, so the
|
||
// extractor just routes through callOpenAI with the ollama base URL
|
||
// and the user's Ollama Cloud API key. The default model is a small
|
||
// open-weight model so the auto-pick produces a deterministic answer
|
||
// for users who haven't customised the picker; users who care can
|
||
// pick anything off the picker's `Custom...` list.
|
||
ollama: {
|
||
model: 'gemma3:4b',
|
||
baseUrl: 'https://ollama.com',
|
||
},
|
||
// SenseAudio's chat API is OpenAI-compatible (POST /v1/chat/completions,
|
||
// Bearer auth), so the extractor falls through to callOpenAI with this
|
||
// base URL and the user's SenseAudio API key. The default model is the
|
||
// small/fast variant so auto-pick stays cheap; users can swap in
|
||
// senseaudio-s2 or any gateway model via the picker.
|
||
senseaudio: {
|
||
model: 'senseaudio-s2-flash',
|
||
baseUrl: 'https://api.senseaudio.cn',
|
||
},
|
||
};
|
||
|
||
// Map an explicit override provider to the env var the daemon should
|
||
// consult when the override doesn't carry its own apiKey. The fallback
|
||
// chain stays the same as before for anthropic/openai; azure uses the
|
||
// AZURE_OPENAI_API_KEY convention; google uses GOOGLE_API_KEY (matching
|
||
// the gemini SDK's expectation, with GEMINI_API_KEY as a secondary).
|
||
function envKeyFor(provider) {
|
||
if (provider === 'anthropic') return process.env.ANTHROPIC_API_KEY?.trim() || '';
|
||
if (provider === 'openai') return process.env.OPENAI_API_KEY?.trim() || '';
|
||
if (provider === 'azure') {
|
||
return (
|
||
process.env.AZURE_OPENAI_API_KEY?.trim()
|
||
|| process.env.AZURE_API_KEY?.trim()
|
||
|| ''
|
||
);
|
||
}
|
||
if (provider === 'google') {
|
||
return (
|
||
process.env.GOOGLE_API_KEY?.trim()
|
||
|| process.env.GEMINI_API_KEY?.trim()
|
||
|| ''
|
||
);
|
||
}
|
||
if (provider === 'ollama') {
|
||
return process.env.OLLAMA_API_KEY?.trim() || '';
|
||
}
|
||
if (provider === 'senseaudio') {
|
||
return (
|
||
process.env.OD_SENSEAUDIO_API_KEY?.trim()
|
||
|| process.env.SENSEAUDIO_API_KEY?.trim()
|
||
|| ''
|
||
);
|
||
}
|
||
return '';
|
||
}
|
||
|
||
// Map a chat agent id to the API protocol family it speaks under the
|
||
// hood. This is the bridge that makes "follow chat" actually mean
|
||
// something for memory extraction in CLI mode: when the user is on
|
||
// Claude Code (claude → anthropic) we don't want memory to silently
|
||
// fall through to whatever OpenAI key happens to be in media-config —
|
||
// that produces the very confusing "openai/gpt-4o-mini" attempts the
|
||
// user sees while they think they're "using Claude". Anything we don't
|
||
// recognise stays unconstrained (returns null) so the legacy
|
||
// cross-provider fallback can still kick in for setups we don't model.
|
||
function chatProtocolFromAgentId(agentId) {
|
||
if (!agentId || typeof agentId !== 'string') return null;
|
||
const id = agentId.trim().toLowerCase();
|
||
if (id === 'claude') return 'anthropic';
|
||
if (id === 'gemini') return 'google';
|
||
// Codex, OpenCode, Qwen, DeepSeek, Kimi, Copilot, Pi, Kiro, Kilo,
|
||
// Vibe, Devin, Hermes, Cursor-Agent, Qoder all use the OpenAI chat-
|
||
// completions wire format.
|
||
if (
|
||
id === 'codex'
|
||
|| id === 'opencode'
|
||
|| id === 'qwen'
|
||
|| id === 'deepseek'
|
||
|| id === 'kimi'
|
||
|| id === 'copilot'
|
||
|| id === 'pi'
|
||
|| id === 'kiro'
|
||
|| id === 'kilo'
|
||
|| id === 'vibe'
|
||
|| id === 'devin'
|
||
|| id === 'hermes'
|
||
|| id === 'cursor-agent'
|
||
|| id === 'qoder'
|
||
) {
|
||
return 'openai';
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function canUseLocalCliForMemory(agentId, provider) {
|
||
// Keep this allowlist explicit: each entry below has a headless one-shot
|
||
// mode that accepts stdin and a parser we can reduce back to assistant text.
|
||
if (agentId === 'claude' && provider === 'anthropic') return true;
|
||
if (agentId === 'codex' && provider === 'openai') return true;
|
||
if (agentId === 'opencode' && provider === 'openai') return true;
|
||
return false;
|
||
}
|
||
|
||
function localCliProviderFor(agentId, provider, model) {
|
||
if (!canUseLocalCliForMemory(agentId, provider)) return null;
|
||
return {
|
||
kind: provider,
|
||
model: (typeof model === 'string' && model.trim()) || 'default',
|
||
baseUrl: 'local-cli',
|
||
apiVersion: '',
|
||
credentialSource: 'chat-cli',
|
||
transport: 'chat-cli',
|
||
agentId,
|
||
};
|
||
}
|
||
|
||
// Pick a provider in this order:
|
||
// 0. Memory config override → user-set provider/model/baseUrl/apiKey
|
||
// 1. Current Local CLI → if the user is chatting through Claude Code,
|
||
// run the same CLI in one-shot mode for extraction. This keeps
|
||
// "Same as chat" literal: no extra OpenAI/Anthropic key required
|
||
// just because the extraction happens in the background.
|
||
// 2. Chat-protocol-constrained env var → if the chat is on Claude
|
||
// Code (anthropic), only ANTHROPIC_API_KEY counts; Codex/OpenAI-
|
||
// compatible CLIs only consult OPENAI_API_KEY (and the media-
|
||
// config OpenAI key as a secondary fallback). This stops the
|
||
// legacy "claude user, openai gpt-4o-mini extracts in the
|
||
// background" surprise — if the matching key isn't configured,
|
||
// we'd rather skip with 'no-provider' and surface that in the
|
||
// history than quietly run on a different vendor's key.
|
||
// 3. BYOK chat-config snapshot → for API-mode chats (the picker is
|
||
// on "Same as chat"), `/api/memory/extract` forwards the live
|
||
// chat provider/key/baseUrl/apiVersion as `chatProvider`. We use
|
||
// it directly with the per-protocol fast-model default so the
|
||
// default extractor follows the chat configuration instead of
|
||
// falling through to env / media-config which the daemon never
|
||
// saw the user configure. The model deliberately overrides the
|
||
// user-supplied `chatProvider.model` only when none was given —
|
||
// memory should default to a cheaper/faster model than the chat
|
||
// model the user is paying for.
|
||
// 4. (legacy fallback, only when we can't tell which CLI is in use
|
||
// AND the caller didn't pass `chatProvider`)
|
||
// ANTHROPIC_API_KEY env → Claude Haiku 4.5
|
||
// 5. (legacy fallback) OPENAI_API_KEY env → gpt-4o-mini
|
||
// 6. (legacy fallback) media-config OpenAI BYOK → gpt-4o-mini
|
||
//
|
||
// The `OD_MEMORY_MODEL` env continues to override the model name across
|
||
// (1)–(6) so power users don't lose that lever. It does NOT override the
|
||
// memory-config provider since that one carries an explicit user choice.
|
||
// `projectRoot` is required for the media-config path; `chatAgentId` is
|
||
// optional but recommended — without it we fall through to the legacy
|
||
// unconstrained chain, which is what the daemon used to do and what
|
||
// pre-context callers (the HTTP /api/memory/extract endpoint) still
|
||
// expect. `chatProvider` is the BYOK chat-config snapshot threaded
|
||
// through from the web app on a per-call basis (the daemon never
|
||
// persists BYOK creds, so this is the only signal we have for that
|
||
// mode).
|
||
async function pickProvider(projectRoot, dataDir, chatAgentId, chatProvider, chatModel) {
|
||
const chatProtocol = chatProtocolFromAgentId(chatAgentId);
|
||
const normalizedChatAgentId =
|
||
typeof chatAgentId === 'string' ? chatAgentId.trim().toLowerCase() : '';
|
||
let override = null;
|
||
if (dataDir) {
|
||
try {
|
||
const cfg = await readMemoryConfig(dataDir);
|
||
if (cfg?.extraction?.provider) override = cfg.extraction;
|
||
} catch (err) {
|
||
console.warn(
|
||
'[memory-llm] failed to read memory config override',
|
||
err?.message ?? err,
|
||
);
|
||
}
|
||
}
|
||
if (override) {
|
||
const defaults = PROVIDER_DEFAULTS[override.provider];
|
||
const explicitKey =
|
||
typeof override.apiKey === 'string' && override.apiKey.trim()
|
||
? override.apiKey.trim()
|
||
: '';
|
||
const envKey = envKeyFor(override.provider);
|
||
let resolvedKey = explicitKey || envKey;
|
||
let credentialSource = explicitKey
|
||
? 'memory-config'
|
||
: (envKey ? 'env' : null);
|
||
// Last-chance: an openai-shaped override (openai or azure) with no
|
||
// explicit/env key can still borrow the media-config OpenAI key the
|
||
// user already typed. Anthropic / google have no media counterpart
|
||
// today.
|
||
if (
|
||
!resolvedKey
|
||
&& (override.provider === 'openai' || override.provider === 'azure')
|
||
&& projectRoot
|
||
) {
|
||
try {
|
||
const cred = await resolveProviderConfig(projectRoot, 'openai');
|
||
if (cred?.apiKey?.trim()) {
|
||
resolvedKey = cred.apiKey.trim();
|
||
credentialSource = 'media-config';
|
||
}
|
||
} catch {
|
||
// Ignore — we'll record a no-provider skip below.
|
||
}
|
||
}
|
||
if (!resolvedKey) {
|
||
const localCliProvider = localCliProviderFor(
|
||
normalizedChatAgentId,
|
||
override.provider,
|
||
override.model,
|
||
);
|
||
if (localCliProvider) return localCliProvider;
|
||
return null;
|
||
}
|
||
const baseUrl =
|
||
(typeof override.baseUrl === 'string' && override.baseUrl.trim())
|
||
|| defaults.baseUrl;
|
||
if (override.provider === 'azure' && !baseUrl) {
|
||
// Azure with no resource URL is unrecoverable — bail rather than
|
||
// logging a confusing 404 from `https:///openai/deployments/...`.
|
||
return null;
|
||
}
|
||
return {
|
||
kind: override.provider,
|
||
apiKey: resolvedKey,
|
||
model:
|
||
(typeof override.model === 'string' && override.model.trim())
|
||
|| defaults.model,
|
||
baseUrl,
|
||
apiVersion:
|
||
override.provider === 'azure'
|
||
? (typeof override.apiVersion === 'string' && override.apiVersion.trim())
|
||
|| PROVIDER_DEFAULTS.azure.apiVersion
|
||
: '',
|
||
credentialSource,
|
||
};
|
||
}
|
||
|
||
const envOverrideModel = (process.env.OD_MEMORY_MODEL || '').trim();
|
||
|
||
// Chat-protocol-constrained branch (path 1). Only run when we know
|
||
// which CLI is in use AND it maps to one of the four providers; we
|
||
// refuse to wander out of the chat protocol's family even when an
|
||
// env var for a different provider is set, because doing so produces
|
||
// the "I'm using Claude but memory says openai gpt-4o-mini" surprise
|
||
// the user reported.
|
||
if (chatProtocol) {
|
||
const localCliProvider = localCliProviderFor(
|
||
normalizedChatAgentId,
|
||
chatProtocol,
|
||
process.env.OD_MEMORY_MODEL || chatModel,
|
||
);
|
||
if (localCliProvider) return localCliProvider;
|
||
|
||
const envKey = envKeyFor(chatProtocol);
|
||
if (envKey) {
|
||
const defaults = PROVIDER_DEFAULTS[chatProtocol];
|
||
return {
|
||
kind: chatProtocol,
|
||
apiKey: envKey,
|
||
model: envOverrideModel || defaults.model,
|
||
baseUrl:
|
||
(chatProtocol === 'anthropic' && process.env.ANTHROPIC_BASE_URL)
|
||
|| (chatProtocol === 'openai' && process.env.OPENAI_BASE_URL)
|
||
|| defaults.baseUrl,
|
||
apiVersion: chatProtocol === 'azure' ? defaults.apiVersion : '',
|
||
credentialSource: 'env',
|
||
};
|
||
}
|
||
// Secondary fallback for openai-compatible CLIs: the user already
|
||
// typed an OpenAI key under Settings → Media providers, so we can
|
||
// borrow it for memory extraction without making them paste it
|
||
// twice. We do NOT try this for anthropic/google chats because the
|
||
// media-config table only has openai-shaped credentials today.
|
||
if (chatProtocol === 'openai' && projectRoot) {
|
||
try {
|
||
const cred = await resolveProviderConfig(projectRoot, 'openai');
|
||
if (cred && typeof cred.apiKey === 'string' && cred.apiKey.trim()) {
|
||
return {
|
||
kind: 'openai',
|
||
apiKey: cred.apiKey.trim(),
|
||
model:
|
||
envOverrideModel || cred.model || PROVIDER_DEFAULTS.openai.model,
|
||
baseUrl: (cred.baseUrl && String(cred.baseUrl).trim())
|
||
|| PROVIDER_DEFAULTS.openai.baseUrl,
|
||
apiVersion: '',
|
||
credentialSource: 'media-config',
|
||
};
|
||
}
|
||
} catch (err) {
|
||
console.warn(
|
||
'[memory-llm] media-config lookup failed (chat-constrained)',
|
||
err?.message ?? err,
|
||
);
|
||
}
|
||
}
|
||
// The chat protocol is known but no key for it is available. Bail
|
||
// out instead of wandering — recording 'skipped: no-provider' is
|
||
// strictly more useful than silently running on a foreign vendor.
|
||
return null;
|
||
}
|
||
|
||
// BYOK chat-config snapshot (path 2). The web app forwards the live
|
||
// chat provider/key/baseUrl/apiVersion on every API-mode extraction
|
||
// call so the daemon can run extraction against the same vendor the
|
||
// user is chatting with — even though the daemon never persists
|
||
// BYOK creds itself. Use the per-protocol fast-model default instead
|
||
// of the chat model the user is paying for, so a memory pass on a
|
||
// big chat model (gpt-4o, claude-sonnet-4-5) silently turns into a
|
||
// cheap haiku/mini call. The caller can opt into using the chat
|
||
// model verbatim by setting `chatProvider.model`.
|
||
if (
|
||
chatProvider
|
||
&& chatProvider.provider
|
||
&& PROVIDER_DEFAULTS[chatProvider.provider]
|
||
) {
|
||
const apiKey =
|
||
typeof chatProvider.apiKey === 'string' ? chatProvider.apiKey.trim() : '';
|
||
if (apiKey) {
|
||
const defaults = PROVIDER_DEFAULTS[chatProvider.provider];
|
||
const baseUrl =
|
||
(typeof chatProvider.baseUrl === 'string' && chatProvider.baseUrl.trim())
|
||
|| defaults.baseUrl;
|
||
// Azure with no resource URL is unrecoverable — same guard as
|
||
// the override path above.
|
||
if (chatProvider.provider !== 'azure' || baseUrl) {
|
||
const explicitModel =
|
||
typeof chatProvider.model === 'string' && chatProvider.model.trim()
|
||
? chatProvider.model.trim()
|
||
: '';
|
||
return {
|
||
kind: chatProvider.provider,
|
||
apiKey,
|
||
model: envOverrideModel || explicitModel || defaults.model,
|
||
baseUrl,
|
||
apiVersion:
|
||
chatProvider.provider === 'azure'
|
||
? (typeof chatProvider.apiVersion === 'string'
|
||
&& chatProvider.apiVersion.trim())
|
||
|| PROVIDER_DEFAULTS.azure.apiVersion
|
||
: '',
|
||
credentialSource: 'chat-byok',
|
||
};
|
||
}
|
||
}
|
||
}
|
||
|
||
if (process.env.ANTHROPIC_API_KEY) {
|
||
return {
|
||
kind: 'anthropic',
|
||
apiKey: process.env.ANTHROPIC_API_KEY,
|
||
model: envOverrideModel || PROVIDER_DEFAULTS.anthropic.model,
|
||
baseUrl:
|
||
process.env.ANTHROPIC_BASE_URL || PROVIDER_DEFAULTS.anthropic.baseUrl,
|
||
credentialSource: 'env',
|
||
};
|
||
}
|
||
if (process.env.OPENAI_API_KEY) {
|
||
return {
|
||
kind: 'openai',
|
||
apiKey: process.env.OPENAI_API_KEY,
|
||
model: envOverrideModel || PROVIDER_DEFAULTS.openai.model,
|
||
baseUrl: process.env.OPENAI_BASE_URL || PROVIDER_DEFAULTS.openai.baseUrl,
|
||
credentialSource: 'env',
|
||
};
|
||
}
|
||
// Fallback: reuse the OpenAI key the user already configured for media
|
||
// generation. Most Local-CLI Claude users don't have an
|
||
// ANTHROPIC_API_KEY in the daemon's environment (Claude Code logs in
|
||
// via OAuth) but they often have an OpenAI key in Settings → Media
|
||
// providers. Without this fallback the LLM extraction stage stays dark
|
||
// for them and only the regex-based heuristic ever runs.
|
||
if (projectRoot) {
|
||
try {
|
||
const cred = await resolveProviderConfig(projectRoot, 'openai');
|
||
if (cred && typeof cred.apiKey === 'string' && cred.apiKey.trim()) {
|
||
return {
|
||
kind: 'openai',
|
||
apiKey: cred.apiKey.trim(),
|
||
model:
|
||
envOverrideModel || cred.model || PROVIDER_DEFAULTS.openai.model,
|
||
baseUrl: (cred.baseUrl && String(cred.baseUrl).trim())
|
||
|| PROVIDER_DEFAULTS.openai.baseUrl,
|
||
credentialSource: 'media-config',
|
||
};
|
||
}
|
||
} catch (err) {
|
||
console.warn(
|
||
'[memory-llm] failed to read media-config for fallback',
|
||
err?.message ?? err,
|
||
);
|
||
}
|
||
}
|
||
return null;
|
||
}
|
||
|
||
function renderUserPayload({ userMessage, assistantMessage, currentMemory }) {
|
||
const parts = [];
|
||
parts.push('## Existing memory');
|
||
parts.push(currentMemory && currentMemory.trim().length > 0
|
||
? currentMemory
|
||
: '(empty)');
|
||
parts.push('');
|
||
parts.push('## User message');
|
||
parts.push(String(userMessage || '').slice(0, 4000));
|
||
if (assistantMessage && assistantMessage.trim().length > 0) {
|
||
parts.push('');
|
||
parts.push('## Assistant reply');
|
||
parts.push(String(assistantMessage).slice(0, 4000));
|
||
}
|
||
parts.push('');
|
||
parts.push(
|
||
'Return ONLY the JSON object described in the system prompt — no prose, no fences.',
|
||
);
|
||
return parts.join('\n');
|
||
}
|
||
|
||
// 30s ceiling. The chat run has long since finished and the user is
|
||
// staring at the settings panel waiting for a green/red pill — leaving
|
||
// a half-dead fetch in flight for two minutes (the default undici
|
||
// connect timeout) makes the failure feel even worse than it is.
|
||
const FETCH_TIMEOUT_MS = 30_000;
|
||
|
||
// Append `/v1<suffix>` to a base URL only when the URL doesn't already
|
||
// carry an explicit `/vN` segment. Mirrors the same conditional path
|
||
// build the chat proxy and connection-test routes use, so a custom
|
||
// OpenAI-compatible endpoint whose saved baseUrl already contains
|
||
// `/v1` (local servers, proxies that re-host OpenAI under a fixed
|
||
// prefix) does not become `/v1/v1/chat/completions` and silently fail
|
||
// every memory extraction even though chat through the same provider
|
||
// works. Anthropic's `/v1/messages` and OpenAI's `/v1/chat/completions`
|
||
// both flow through this; Azure and Gemini build their URLs
|
||
// differently and don't need it.
|
||
function appendVersionedApiPath(baseUrl, suffix) {
|
||
const url = new URL(baseUrl);
|
||
const pathname = url.pathname.replace(/\/+$/, '');
|
||
url.pathname = /\/v\d+(\/|$)/.test(pathname)
|
||
? `${pathname}${suffix}`
|
||
: `${pathname}/v1${suffix}`;
|
||
return url.toString();
|
||
}
|
||
|
||
// Build a standard AbortSignal that fires after FETCH_TIMEOUT_MS so a
|
||
// stalled provider call surfaces as a 'failed' record instead of
|
||
// hanging the attempt indefinitely.
|
||
function withTimeout(ms) {
|
||
if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') {
|
||
return AbortSignal.timeout(ms);
|
||
}
|
||
const controller = new AbortController();
|
||
setTimeout(() => controller.abort(new Error(`timeout ${ms}ms`)), ms);
|
||
return controller.signal;
|
||
}
|
||
|
||
// undici raises a generic `TypeError: fetch failed` on every network
|
||
// error and tucks the real cause under `err.cause` (a Node `Error` or
|
||
// `AggregateError` with `.code` / `.errors`). The settings UI just
|
||
// shows `error.message`, so without unwrapping the cause the user
|
||
// sees "fetch failed" with no clue whether DNS broke, the firewall
|
||
// reset the connection, or the request timed out. Surface the most
|
||
// useful piece — the OS error code if present, otherwise the cause's
|
||
// message — appended in parentheses. We deliberately don't include
|
||
// both: `cause.message` typically already embeds the code (e.g.
|
||
// "read ECONNRESET"), and showing "ECONNRESET · read ECONNRESET"
|
||
// would just double the noise.
|
||
function describeFetchError(err) {
|
||
const head = err?.message || String(err);
|
||
const cause = err?.cause;
|
||
if (!cause) return head;
|
||
const codeRaw = cause.code ? String(cause.code) : '';
|
||
const msgRaw =
|
||
cause.message && cause.message !== head ? String(cause.message) : '';
|
||
// Prefer the OS error code on its own when the cause's message just
|
||
// wraps it (the common case for ECONNRESET / ENOTFOUND / ETIMEDOUT).
|
||
// Fall back to the message when there's no code, or when the message
|
||
// adds detail beyond the code (e.g. "Hostname/IP does not match
|
||
// certificate's altnames").
|
||
let detail = '';
|
||
if (codeRaw && msgRaw) {
|
||
const m = msgRaw.toLowerCase();
|
||
detail = m.includes(codeRaw.toLowerCase()) ? codeRaw : `${codeRaw}: ${msgRaw}`;
|
||
} else {
|
||
detail = codeRaw || msgRaw;
|
||
}
|
||
// AggregateError: surface the first inner code that adds new info.
|
||
// Most of these are six identical DNS errors, so dedupe aggressively.
|
||
if (!detail && Array.isArray(cause.errors)) {
|
||
for (const inner of cause.errors) {
|
||
const innerCode = inner?.code ? String(inner.code) : '';
|
||
const innerMsg = inner?.message ? String(inner.message) : '';
|
||
const candidate = innerCode || innerMsg;
|
||
if (candidate) {
|
||
detail = candidate;
|
||
break;
|
||
}
|
||
}
|
||
}
|
||
return detail ? `${head} (${detail})` : head;
|
||
}
|
||
|
||
async function callAnthropic(provider, system, user) {
|
||
let resp;
|
||
try {
|
||
resp = await fetch(appendVersionedApiPath(provider.baseUrl, '/messages'), {
|
||
method: 'POST',
|
||
headers: {
|
||
'content-type': 'application/json',
|
||
'x-api-key': provider.apiKey,
|
||
'anthropic-version': '2023-06-01',
|
||
},
|
||
body: JSON.stringify({
|
||
model: provider.model,
|
||
max_tokens: 1024,
|
||
system,
|
||
messages: [{ role: 'user', content: user }],
|
||
}),
|
||
signal: withTimeout(FETCH_TIMEOUT_MS),
|
||
});
|
||
} catch (err) {
|
||
throw new Error(describeFetchError(err));
|
||
}
|
||
if (!resp.ok) {
|
||
throw new Error(`anthropic ${resp.status}: ${await resp.text().catch(() => '')}`);
|
||
}
|
||
const json = await resp.json();
|
||
const block = (json?.content || []).find((b) => b?.type === 'text');
|
||
return block?.text ?? '';
|
||
}
|
||
|
||
async function callOpenAI(provider, system, user) {
|
||
let resp;
|
||
try {
|
||
resp = await fetch(
|
||
appendVersionedApiPath(provider.baseUrl, '/chat/completions'),
|
||
{
|
||
method: 'POST',
|
||
headers: {
|
||
'content-type': 'application/json',
|
||
authorization: `Bearer ${provider.apiKey}`,
|
||
},
|
||
body: JSON.stringify({
|
||
model: provider.model,
|
||
response_format: { type: 'json_object' },
|
||
messages: [
|
||
{ role: 'system', content: system },
|
||
{ role: 'user', content: user },
|
||
],
|
||
}),
|
||
signal: withTimeout(FETCH_TIMEOUT_MS),
|
||
},
|
||
);
|
||
} catch (err) {
|
||
throw new Error(describeFetchError(err));
|
||
}
|
||
if (!resp.ok) {
|
||
throw new Error(`openai ${resp.status}: ${await resp.text().catch(() => '')}`);
|
||
}
|
||
const json = await resp.json();
|
||
return json?.choices?.[0]?.message?.content ?? '';
|
||
}
|
||
|
||
// Azure OpenAI speaks the same chat-completions JSON as OpenAI, but on
|
||
// a per-deployment URL and with `api-key:` instead of `Authorization:`.
|
||
// `provider.model` here is the Azure deployment name (the user typed it
|
||
// into the model field — that's what the chat picker calls "Deployment
|
||
// (Model)" too), not the underlying model family.
|
||
async function callAzure(provider, system, user) {
|
||
const base = String(provider.baseUrl || '').replace(/\/+$/, '');
|
||
const deployment = encodeURIComponent(provider.model);
|
||
const apiVersion = encodeURIComponent(
|
||
provider.apiVersion || PROVIDER_DEFAULTS.azure.apiVersion,
|
||
);
|
||
const url = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`;
|
||
let resp;
|
||
try {
|
||
resp = await fetch(url, {
|
||
method: 'POST',
|
||
headers: {
|
||
'content-type': 'application/json',
|
||
'api-key': provider.apiKey,
|
||
},
|
||
body: JSON.stringify({
|
||
response_format: { type: 'json_object' },
|
||
messages: [
|
||
{ role: 'system', content: system },
|
||
{ role: 'user', content: user },
|
||
],
|
||
}),
|
||
signal: withTimeout(FETCH_TIMEOUT_MS),
|
||
});
|
||
} catch (err) {
|
||
throw new Error(describeFetchError(err));
|
||
}
|
||
if (!resp.ok) {
|
||
throw new Error(`azure ${resp.status}: ${await resp.text().catch(() => '')}`);
|
||
}
|
||
const json = await resp.json();
|
||
return json?.choices?.[0]?.message?.content ?? '';
|
||
}
|
||
|
||
// Google Gemini's REST surface uses a different request shape:
|
||
// system instructions go in `systemInstruction`, the conversation is
|
||
// `contents[]` with `role` + `parts`, and the API key is a query
|
||
// parameter rather than a header. `responseMimeType: application/json`
|
||
// gets us the strict JSON output the parser expects.
|
||
async function callGoogle(provider, system, user) {
|
||
const base = String(provider.baseUrl || '').replace(/\/+$/, '');
|
||
const model = encodeURIComponent(provider.model);
|
||
const url = `${base}/v1beta/models/${model}:generateContent?key=${encodeURIComponent(provider.apiKey)}`;
|
||
let resp;
|
||
try {
|
||
resp = await fetch(url, {
|
||
method: 'POST',
|
||
headers: { 'content-type': 'application/json' },
|
||
body: JSON.stringify({
|
||
systemInstruction: { role: 'system', parts: [{ text: system }] },
|
||
contents: [{ role: 'user', parts: [{ text: user }] }],
|
||
generationConfig: { responseMimeType: 'application/json' },
|
||
}),
|
||
signal: withTimeout(FETCH_TIMEOUT_MS),
|
||
});
|
||
} catch (err) {
|
||
throw new Error(describeFetchError(err));
|
||
}
|
||
if (!resp.ok) {
|
||
throw new Error(`google ${resp.status}: ${await resp.text().catch(() => '')}`);
|
||
}
|
||
const json = await resp.json();
|
||
const parts = json?.candidates?.[0]?.content?.parts;
|
||
if (Array.isArray(parts)) {
|
||
return parts.map((p) => (p && typeof p.text === 'string' ? p.text : '')).join('');
|
||
}
|
||
return '';
|
||
}
|
||
|
||
const LOCAL_CLI_TIMEOUT_MS = 60_000;
|
||
|
||
function extractJsonEventText(kind, raw, agentName) {
|
||
const events = [];
|
||
const handler = createJsonEventStreamHandler(kind, (event) => events.push(event));
|
||
handler.feed(raw);
|
||
handler.flush();
|
||
|
||
const errorEvent = events.find((event) => event?.type === 'error');
|
||
if (errorEvent) {
|
||
const message =
|
||
typeof errorEvent.message === 'string' && errorEvent.message.trim()
|
||
? errorEvent.message.trim()
|
||
: 'unknown error';
|
||
throw new Error(`${agentName} CLI error: ${message}`);
|
||
}
|
||
|
||
return events
|
||
.filter((event) => event?.type === 'text_delta' && typeof event.delta === 'string')
|
||
.map((event) => event.delta)
|
||
.join('')
|
||
.trim();
|
||
}
|
||
|
||
async function callLocalCli(provider, system, user, options) {
|
||
if (typeof options?.localCliRunner === 'function') {
|
||
return options.localCliRunner({
|
||
agentId: provider.agentId,
|
||
model: provider.model,
|
||
system,
|
||
user,
|
||
projectRoot: options?.projectRoot ?? null,
|
||
dataDir: options?.dataDir ?? null,
|
||
});
|
||
}
|
||
|
||
const def = getAgentDef(provider.agentId);
|
||
if (!def) {
|
||
throw new Error(`Local CLI agent "${provider.agentId}" is not installed`);
|
||
}
|
||
|
||
let configuredAgentEnv = {};
|
||
try {
|
||
const appConfig = options?.dataDir ? await readAppConfig(options.dataDir) : {};
|
||
configuredAgentEnv = agentCliEnvForAgent(appConfig.agentCliEnv, def.id);
|
||
} catch {
|
||
configuredAgentEnv = {};
|
||
}
|
||
|
||
const launch = resolveAgentLaunch(def, configuredAgentEnv);
|
||
if (!launch?.launchPath) {
|
||
throw new Error(`${def.name} CLI is not installed or not on PATH`);
|
||
}
|
||
|
||
const cwd =
|
||
typeof options?.projectRoot === 'string' && options.projectRoot.trim()
|
||
? options.projectRoot
|
||
: process.cwd();
|
||
const prompt = [
|
||
system,
|
||
'',
|
||
'You are running as a background memory extractor. Do not use tools. Return strict JSON only.',
|
||
'',
|
||
user,
|
||
].join('\n');
|
||
|
||
let args;
|
||
let stdinText = prompt;
|
||
let parseStdout = (raw) => raw.trim();
|
||
if (provider.agentId === 'claude') {
|
||
args = ['-p', '--input-format', 'text', '--output-format', 'text'];
|
||
if (provider.model && provider.model !== 'default') {
|
||
args.push('--model', provider.model);
|
||
}
|
||
} else if (provider.agentId === 'codex') {
|
||
args = def.buildArgs(
|
||
'',
|
||
[],
|
||
[],
|
||
{ model: provider.model },
|
||
{ cwd },
|
||
);
|
||
parseStdout = (raw) => extractJsonEventText(def.eventParser || def.id, raw, def.name);
|
||
} else if (provider.agentId === 'opencode') {
|
||
// Deliver the prompt on stdin, matching the chat-run path
|
||
// (def.promptViaStdin). `opencode run`'s `-f, --file` is a yargs array
|
||
// option that greedily consumes every trailing non-flag token, so
|
||
// `--file <prompt-file> "<message>"` made OpenCode treat the message
|
||
// text as a second attachment and exit with "File not found". Bare
|
||
// `opencode run --format json` reads the message from stdin instead.
|
||
args = def.buildArgs(
|
||
'',
|
||
[],
|
||
[],
|
||
{ model: provider.model },
|
||
{ cwd },
|
||
);
|
||
parseStdout = (raw) => extractJsonEventText(def.eventParser || def.id, raw, def.name);
|
||
} else {
|
||
throw new Error(`Local CLI memory extraction is not supported for ${provider.agentId}`);
|
||
}
|
||
|
||
const env = applyAgentLaunchEnv(
|
||
spawnEnvForAgent(
|
||
def.id,
|
||
{ ...process.env, ...(def.env || {}) },
|
||
configuredAgentEnv,
|
||
undefined,
|
||
{ resolvedBin: launch.selectedPath },
|
||
),
|
||
launch,
|
||
);
|
||
const invocation = createCommandInvocation({
|
||
command: launch.launchPath,
|
||
args,
|
||
env,
|
||
});
|
||
|
||
return await new Promise((resolve, reject) => {
|
||
let stdout = '';
|
||
let stderr = '';
|
||
let settled = false;
|
||
let closed = false;
|
||
const child = spawn(invocation.command, invocation.args, {
|
||
env,
|
||
stdio: ['pipe', 'pipe', 'pipe'],
|
||
cwd,
|
||
shell: false,
|
||
windowsVerbatimArguments: invocation.windowsVerbatimArguments,
|
||
});
|
||
|
||
const finish = (err, text) => {
|
||
if (settled) return;
|
||
settled = true;
|
||
clearTimeout(timeout);
|
||
if (err) reject(err);
|
||
else resolve(text);
|
||
};
|
||
|
||
const timeout = setTimeout(() => {
|
||
child.kill('SIGTERM');
|
||
setTimeout(() => {
|
||
if (!closed) child.kill('SIGKILL');
|
||
}, 2_000).unref?.();
|
||
finish(new Error(`${def.name} CLI timed out after ${Math.round(LOCAL_CLI_TIMEOUT_MS / 1000)}s`));
|
||
}, LOCAL_CLI_TIMEOUT_MS);
|
||
timeout.unref?.();
|
||
|
||
child.stdout.setEncoding('utf8');
|
||
child.stderr.setEncoding('utf8');
|
||
child.stdout.on('data', (chunk) => {
|
||
stdout = `${stdout}${chunk}`.slice(-64_000);
|
||
});
|
||
child.stderr.on('data', (chunk) => {
|
||
stderr = `${stderr}${chunk}`.slice(-8_000);
|
||
});
|
||
child.once('error', (err) => finish(err));
|
||
child.once('close', (code, signal) => {
|
||
closed = true;
|
||
if (code === 0) {
|
||
let text = '';
|
||
try {
|
||
text = parseStdout(stdout);
|
||
} catch (err) {
|
||
finish(err);
|
||
return;
|
||
}
|
||
if (text) {
|
||
finish(null, text);
|
||
return;
|
||
}
|
||
}
|
||
const detail = (stderr.trim() || stdout.trim() || 'no output').slice(0, 1000);
|
||
const status = signal ? `signal ${signal}` : `exit ${code}`;
|
||
finish(new Error(`${def.name} CLI ${status}: ${detail}`));
|
||
});
|
||
child.stdin.on('error', (err) => {
|
||
if (err.code !== 'EPIPE') finish(err);
|
||
});
|
||
child.stdin.end(stdinText);
|
||
});
|
||
}
|
||
|
||
// Tolerant JSON parse — the model occasionally wraps output in ```json
|
||
// fences even when told not to. Strip those defensively.
|
||
function parseEntries(rawText) {
|
||
if (typeof rawText !== 'string') return [];
|
||
let text = rawText.trim();
|
||
if (text.startsWith('```')) {
|
||
text = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim();
|
||
}
|
||
let parsed;
|
||
try {
|
||
parsed = JSON.parse(text);
|
||
} catch {
|
||
// Last-ditch: pull the first {...} block.
|
||
const match = /\{[\s\S]*\}/.exec(text);
|
||
if (!match) return [];
|
||
try {
|
||
parsed = JSON.parse(match[0]);
|
||
} catch {
|
||
return [];
|
||
}
|
||
}
|
||
const list = Array.isArray(parsed?.entries) ? parsed.entries : [];
|
||
const validTypes = new Set(['user', 'feedback', 'project', 'reference']);
|
||
return list
|
||
.filter(
|
||
(e) =>
|
||
e &&
|
||
typeof e === 'object' &&
|
||
validTypes.has(e.type) &&
|
||
typeof e.name === 'string' &&
|
||
e.name.trim().length > 0 &&
|
||
typeof e.body === 'string' &&
|
||
e.body.trim().length > 0,
|
||
)
|
||
.slice(0, 6); // hard cap so a confused model can't flood the store
|
||
}
|
||
|
||
function alreadyKnown(existing, candidate) {
|
||
const candKey = `${candidate.type}::${candidate.name.toLowerCase().trim()}`;
|
||
for (const e of existing) {
|
||
if (`${e.type}::${e.name.toLowerCase().trim()}` === candKey) return true;
|
||
}
|
||
return false;
|
||
}
|
||
|
||
function toMemoryDraft(candidate) {
|
||
return {
|
||
type: candidate.type,
|
||
name: String(candidate.name).trim().slice(0, 80),
|
||
description: String(candidate.description || '').trim().slice(0, 200),
|
||
body: String(candidate.body).trim(),
|
||
};
|
||
}
|
||
|
||
async function collectProposedEntries(dataDir, input, options) {
|
||
const projectRoot = options?.projectRoot ?? null;
|
||
const chatAgentId = options?.chatAgentId ?? null;
|
||
const chatModel = options?.chatModel ?? null;
|
||
const extractionKind = options?.kind ?? 'llm';
|
||
const systemPrompt =
|
||
typeof options?.systemPrompt === 'string' && options.systemPrompt.trim()
|
||
? options.systemPrompt.trim()
|
||
: SYSTEM_PROMPT;
|
||
// BYOK chat-config snapshot — only present for API-mode calls
|
||
// forwarded through `/api/memory/extract`. The daemon doesn't
|
||
// persist BYOK creds, so this per-call signal is the *only* way
|
||
// pickProvider() can run "Same as chat" extraction against the
|
||
// user's actual chat provider.
|
||
const chatProvider = options?.chatProvider ?? null;
|
||
const userMessage = String(input?.userMessage || '').trim();
|
||
|
||
const cfg = await readMemoryConfig(dataDir);
|
||
if (!cfg.enabled) {
|
||
recordSkip({ userMessage, reason: 'memory-disabled', kind: extractionKind });
|
||
return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
|
||
}
|
||
if (extractionKind !== 'connector' && !cfg.chatExtractionEnabled) {
|
||
return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
|
||
}
|
||
if (userMessage.length === 0) {
|
||
recordSkip({ userMessage, reason: 'empty-message', kind: extractionKind });
|
||
return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
|
||
}
|
||
|
||
const provider = await pickProvider(
|
||
projectRoot,
|
||
dataDir,
|
||
chatAgentId,
|
||
chatProvider,
|
||
chatModel,
|
||
);
|
||
if (!provider) {
|
||
recordSkip({ userMessage, reason: 'no-provider', kind: extractionKind });
|
||
return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
|
||
}
|
||
|
||
// Past this point we have a provider committed and an actual model
|
||
// call about to happen — switch from one-shot skip records to a
|
||
// running record we can update through phase transitions.
|
||
const attemptId = startExtraction({ userMessage, kind: extractionKind });
|
||
markProvider(attemptId, {
|
||
kind: provider.kind,
|
||
model: provider.model,
|
||
credentialSource: provider.credentialSource,
|
||
});
|
||
|
||
let currentMemory = '';
|
||
let existingEntries = [];
|
||
try {
|
||
[currentMemory, existingEntries] = await Promise.all([
|
||
composeMemoryBody(dataDir),
|
||
listMemoryEntries(dataDir),
|
||
]);
|
||
} catch {
|
||
// Fresh store — proceed with empty context.
|
||
}
|
||
|
||
const userPayload = renderUserPayload({
|
||
userMessage,
|
||
assistantMessage: input?.assistantMessage,
|
||
currentMemory,
|
||
});
|
||
|
||
let raw = '';
|
||
try {
|
||
if (provider.transport === 'chat-cli') {
|
||
raw = await callLocalCli(provider, systemPrompt, userPayload, {
|
||
dataDir,
|
||
projectRoot,
|
||
localCliRunner: options?.localCliRunner,
|
||
});
|
||
} else if (provider.kind === 'anthropic') {
|
||
raw = await callAnthropic(provider, systemPrompt, userPayload);
|
||
} else if (provider.kind === 'azure') {
|
||
raw = await callAzure(provider, systemPrompt, userPayload);
|
||
} else if (provider.kind === 'google') {
|
||
raw = await callGoogle(provider, systemPrompt, userPayload);
|
||
} else {
|
||
// openai or ollama — both speak the OpenAI chat-completions
|
||
// wire shape, so callOpenAI handles them with just a different
|
||
// base URL.
|
||
raw = await callOpenAI(provider, systemPrompt, userPayload);
|
||
}
|
||
} catch (err) {
|
||
// err.message is already pre-formatted by describeFetchError() when
|
||
// the call layer caught a network error. For HTTP-level failures
|
||
// (`anthropic 401: …`) the message is already user-facing too.
|
||
console.warn(`[memory-llm] ${provider.kind} call failed`, err?.message ?? err);
|
||
markFailed(attemptId, err);
|
||
return { status: 'failed', attemptId, proposed: [], existingEntries };
|
||
}
|
||
|
||
let proposed;
|
||
try {
|
||
proposed = parseEntries(raw);
|
||
if (typeof options?.candidateFilter === 'function') {
|
||
proposed = proposed.filter((candidate) => {
|
||
try {
|
||
return options.candidateFilter(candidate);
|
||
} catch {
|
||
return false;
|
||
}
|
||
});
|
||
}
|
||
} catch (err) {
|
||
markFailed(attemptId, err);
|
||
return { status: 'failed', attemptId, proposed: [], existingEntries };
|
||
}
|
||
markProposed(attemptId, proposed.length);
|
||
return { status: 'ok', attemptId, proposed, existingEntries };
|
||
}
|
||
|
||
export async function suggestWithLLM(dataDir, input, options) {
|
||
const result = await collectProposedEntries(dataDir, input, options);
|
||
if (result.status !== 'ok') return [];
|
||
|
||
const suggestions = result.proposed
|
||
.filter((cand) => !alreadyKnown(result.existingEntries, cand))
|
||
.map(toMemoryDraft);
|
||
|
||
markSuccess(result.attemptId, {
|
||
writtenCount: 0,
|
||
writtenIds: [],
|
||
});
|
||
|
||
return suggestions;
|
||
}
|
||
|
||
export async function extractWithLLM(dataDir, input, options) {
|
||
const changeSource = options?.source ?? 'llm';
|
||
const result = await collectProposedEntries(dataDir, input, options);
|
||
if (result.status !== 'ok') return [];
|
||
const { attemptId, proposed, existingEntries } = result;
|
||
|
||
if (proposed.length === 0) {
|
||
markSuccess(attemptId, { writtenCount: 0, writtenIds: [] });
|
||
return [];
|
||
}
|
||
|
||
const written = [];
|
||
for (const cand of proposed) {
|
||
if (alreadyKnown(existingEntries, cand)) continue;
|
||
try {
|
||
const entry = await upsertMemoryEntry(
|
||
dataDir,
|
||
toMemoryDraft(cand),
|
||
// Suppress per-entry events; we batch a single 'extract' below
|
||
// so the toast says "Memory updated (3 · LLM)" once.
|
||
{ silent: true, source: changeSource },
|
||
);
|
||
written.push({
|
||
id: entry.id,
|
||
name: entry.name,
|
||
description: entry.description,
|
||
type: entry.type,
|
||
updatedAt: entry.updatedAt,
|
||
});
|
||
} catch (err) {
|
||
console.warn('[memory-llm] write failed', err?.message ?? err);
|
||
}
|
||
}
|
||
|
||
if (written.length > 0) {
|
||
memoryEvents.emit('change', {
|
||
kind: 'extract',
|
||
count: written.length,
|
||
source: changeSource,
|
||
at: Date.now(),
|
||
});
|
||
}
|
||
|
||
markSuccess(attemptId, {
|
||
writtenCount: written.length,
|
||
writtenIds: written.map((e) => e.id),
|
||
});
|
||
|
||
return written;
|
||
}
|