open-design/apps/daemon/src/memory-llm.ts

// @ts-nocheck
// LLM-driven memory extractor.
//
// The heuristic regex pack in `memory.ts` only catches explicit markers
// ("remember:", "记住", "我喜欢"…). For everything else — implicit
// preferences, role, ongoing-work context — we ask a small fast model
// to look at the just-finished turn and the existing memory and return
// a JSON list of facts to add.
//
// This module is fire-and-forget: the chat run finishes and triggers
// extraction in the background. Output lands in the same MD store so
// the next turn's prompt picks it up automatically.
//
// Provider selection (in order):
//   0. memory `.config.json` extraction override → user-supplied
//      provider/model/baseUrl/apiKey/apiVersion from the Memory model
//      picker. The override may pick any of four providers — anthropic,
//      openai, azure (openai-compatible at a per-resource URL), or
//      google gemini. This is the only path that lets a Local-CLI user
//      (no env-var key in the daemon's environment) point memory
//      extraction at, say, their personal Anthropic key with a
//      specific Haiku build instead of falling all the way through to
//      gpt-4o-mini. When the override carries the provider but no
//      apiKey we fall back to the corresponding env var (or the media-
//      config OpenAI key for openai/azure overrides) so a "I want to
//      switch to OpenAI but reuse my existing key" change costs zero
//      typing.
//   1. current Local CLI, when the caller passed `chatAgentId` and the
//      agent supports headless one-shot output (Claude Code today).
//   2. matching provider env var for the current chat protocol.
//   3. BYOK chat-config snapshot for API-mode chats.
//   4. ANTHROPIC_API_KEY env → Claude Haiku 4.5 (legacy fallback)
//   5. OPENAI_API_KEY env    → gpt-4o-mini
//   6. media-config OpenAI BYOK → gpt-4o-mini
//      (the key the user already typed into Settings → Media providers;
//       reuses an existing credential so Local-CLI users don't have to
//       paste it twice just to get LLM-side memory extraction)
//   7. nothing               → record a 'skipped: no-provider' attempt
//      so the UI can surface "configure a key to enable LLM memory"
//      instead of staying silent
//
// Every attempt — whether it actually called the model or short-circuited
// — produces a record in `memory-extractions.ts` so the settings panel
// can show running / skipped / success / failed states in real time.

import {
  composeMemoryBody,
  listMemoryEntries,
  readMemoryConfig,
  upsertMemoryEntry,
  memoryEvents,
} from './memory.js';
import {
  startExtraction,
  recordSkip,
  markProvider,
  markSkipped,
  markProposed,
  markSuccess,
  markFailed,
} from './memory-extractions.js';
import { resolveProviderConfig } from './media-config.js';
import { spawn } from 'node:child_process';
import { createCommandInvocation } from '@open-design/platform';
import {
  applyAgentLaunchEnv,
  getAgentDef,
  resolveAgentLaunch,
  spawnEnvForAgent,
} from './agents.js';
import { agentCliEnvForAgent, readAppConfig } from './app-config.js';
import { createJsonEventStreamHandler } from './json-event-stream.js';

const SYSTEM_PROMPT = `You are a memory extractor for a personal AI design assistant.

Given the user's most recent message (and optionally the assistant's reply), plus a snapshot of the existing memory store, decide whether ANYTHING in this turn is worth remembering across future conversations.

A fact is worth remembering when ALL of these are true:
- It's about the user, their preferences, their tools, their ongoing work, OR a stable reference (a Linear board id, a Slack channel, a teammate name).
- It will plausibly still be true in a week.
- It would change how an assistant responds in a later, unrelated chat.

A fact is NOT worth remembering when ANY of these is true:
- It's a transient state (current task, what file they're editing right now).
- It's already captured in the existing memory.
- It's just the user asking a question or describing a one-off bug.
- It's something the assistant said about itself.
- It's a code snippet, an output, or a paste.

Output STRICT JSON in this exact shape — nothing else, no prose, no markdown fences:
{
  "entries": [
    { "type": "user|feedback|project|reference", "name": "short title (≤ 60 chars)", "description": "one-line summary (≤ 140 chars)", "body": "the actual remembered fact, 1-3 sentences" }
  ]
}

If there's nothing worth remembering, return: {"entries": []}

Type rules:
- user: who they are, role, expertise, long-term goals
- feedback: corrections / preferences about how to work ("don't add comments unless asked")
- project: ongoing initiatives, deadlines, why-decisions; usually time-bounded
- reference: pointers to external systems (Linear projects, Slack channels, dashboards)`;

// Provider defaults are centralised so the override path and the
// auto-pick path can't drift apart. When the user picks "Custom →
// anthropic" without typing a model, we still want the same
// claude-haiku-4-5 fallback the env path uses.
//
// Azure has no useful baseUrl default — every Azure resource has its
// own `https://<resource>.openai.azure.com` host, so the user must
// supply theirs. We still emit an empty default here so a missing
// override doesn't crash with `undefined` when accessed.
const PROVIDER_DEFAULTS = {
  anthropic: {
    model: 'claude-haiku-4-5',
    baseUrl: 'https://api.anthropic.com',
  },
  openai: {
    model: 'gpt-4o-mini',
    baseUrl: 'https://api.openai.com',
  },
  azure: {
    model: 'gpt-4o-mini',
    baseUrl: '',
    apiVersion: '2024-10-21',
  },
  google: {
    model: 'gemini-2.0-flash',
    baseUrl: 'https://generativelanguage.googleapis.com',
  },
  // Ollama Cloud speaks OpenAI-compatible chat-completions, so the
  // extractor just routes through callOpenAI with the ollama base URL
  // and the user's Ollama Cloud API key. The default model is a small
  // open-weight model so the auto-pick produces a deterministic answer
  // for users who haven't customised the picker; users who care can
  // pick anything off the picker's `Custom...` list.
  ollama: {
    model: 'gemma3:4b',
    baseUrl: 'https://ollama.com',
  },
  // SenseAudio's chat API is OpenAI-compatible (POST /v1/chat/completions,
  // Bearer auth), so the extractor falls through to callOpenAI with this
  // base URL and the user's SenseAudio API key. The default model is the
  // small/fast variant so auto-pick stays cheap; users can swap in
  // senseaudio-s2 or any gateway model via the picker.
  senseaudio: {
    model: 'senseaudio-s2-flash',
    baseUrl: 'https://api.senseaudio.cn',
  },
};

// Map an explicit override provider to the env var the daemon should
// consult when the override doesn't carry its own apiKey. The fallback
// chain stays the same as before for anthropic/openai; azure uses the
// AZURE_OPENAI_API_KEY convention; google uses GOOGLE_API_KEY (matching
// the gemini SDK's expectation, with GEMINI_API_KEY as a secondary).
function envKeyFor(provider) {
  if (provider === 'anthropic') return process.env.ANTHROPIC_API_KEY?.trim() || '';
  if (provider === 'openai') return process.env.OPENAI_API_KEY?.trim() || '';
  if (provider === 'azure') {
    return (
      process.env.AZURE_OPENAI_API_KEY?.trim()
      || process.env.AZURE_API_KEY?.trim()
      || ''
    );
  }
  if (provider === 'google') {
    return (
      process.env.GOOGLE_API_KEY?.trim()
      || process.env.GEMINI_API_KEY?.trim()
      || ''
    );
  }
  if (provider === 'ollama') {
    return process.env.OLLAMA_API_KEY?.trim() || '';
  }
  if (provider === 'senseaudio') {
    return (
      process.env.OD_SENSEAUDIO_API_KEY?.trim()
      || process.env.SENSEAUDIO_API_KEY?.trim()
      || ''
    );
  }
  return '';
}

// Map a chat agent id to the API protocol family it speaks under the
// hood. This is the bridge that makes "follow chat" actually mean
// something for memory extraction in CLI mode: when the user is on
// Claude Code (claude → anthropic) we don't want memory to silently
// fall through to whatever OpenAI key happens to be in media-config —
// that produces the very confusing "openai/gpt-4o-mini" attempts the
// user sees while they think they're "using Claude". Anything we don't
// recognise stays unconstrained (returns null) so the legacy
// cross-provider fallback can still kick in for setups we don't model.
function chatProtocolFromAgentId(agentId) {
  if (!agentId || typeof agentId !== 'string') return null;
  const id = agentId.trim().toLowerCase();
  if (id === 'claude') return 'anthropic';
  if (id === 'gemini') return 'google';
  // Codex, OpenCode, Qwen, DeepSeek, Kimi, Copilot, Pi, Kiro, Kilo,
  // Vibe, Devin, Hermes, Cursor-Agent, Qoder all use the OpenAI chat-
  // completions wire format.
  if (
    id === 'codex'
    || id === 'opencode'
    || id === 'qwen'
    || id === 'deepseek'
    || id === 'kimi'
    || id === 'copilot'
    || id === 'pi'
    || id === 'kiro'
    || id === 'kilo'
    || id === 'vibe'
    || id === 'devin'
    || id === 'hermes'
    || id === 'cursor-agent'
    || id === 'qoder'
  ) {
    return 'openai';
  }
  return null;
}

function canUseLocalCliForMemory(agentId, provider) {
  // Keep this allowlist explicit: each entry below has a headless one-shot
  // mode that accepts stdin and a parser we can reduce back to assistant text.
  if (agentId === 'claude' && provider === 'anthropic') return true;
  if (agentId === 'codex' && provider === 'openai') return true;
  if (agentId === 'opencode' && provider === 'openai') return true;
  return false;
}

function localCliProviderFor(agentId, provider, model) {
  if (!canUseLocalCliForMemory(agentId, provider)) return null;
  return {
    kind: provider,
    model: (typeof model === 'string' && model.trim()) || 'default',
    baseUrl: 'local-cli',
    apiVersion: '',
    credentialSource: 'chat-cli',
    transport: 'chat-cli',
    agentId,
  };
}

// Pick a provider in this order:
//   0. Memory config override → user-set provider/model/baseUrl/apiKey
//   1. Current Local CLI → if the user is chatting through Claude Code,
//      run the same CLI in one-shot mode for extraction. This keeps
//      "Same as chat" literal: no extra OpenAI/Anthropic key required
//      just because the extraction happens in the background.
//   2. Chat-protocol-constrained env var → if the chat is on Claude
//      Code (anthropic), only ANTHROPIC_API_KEY counts; Codex/OpenAI-
//      compatible CLIs only consult OPENAI_API_KEY (and the media-
//      config OpenAI key as a secondary fallback). This stops the
//      legacy "claude user, openai gpt-4o-mini extracts in the
//      background" surprise — if the matching key isn't configured,
//      we'd rather skip with 'no-provider' and surface that in the
//      history than quietly run on a different vendor's key.
//   3. BYOK chat-config snapshot → for API-mode chats (the picker is
//      on "Same as chat"), `/api/memory/extract` forwards the live
//      chat provider/key/baseUrl/apiVersion as `chatProvider`. We use
//      it directly with the per-protocol fast-model default so the
//      default extractor follows the chat configuration instead of
//      falling through to env / media-config which the daemon never
//      saw the user configure. The model deliberately overrides the
//      user-supplied `chatProvider.model` only when none was given —
//      memory should default to a cheaper/faster model than the chat
//      model the user is paying for.
//   4. (legacy fallback, only when we can't tell which CLI is in use
//      AND the caller didn't pass `chatProvider`)
//      ANTHROPIC_API_KEY env → Claude Haiku 4.5
//   5. (legacy fallback) OPENAI_API_KEY env → gpt-4o-mini
//   6. (legacy fallback) media-config OpenAI BYOK → gpt-4o-mini
//
// The `OD_MEMORY_MODEL` env continues to override the model name across
// (1)–(6) so power users don't lose that lever. It does NOT override the
// memory-config provider since that one carries an explicit user choice.
// `projectRoot` is required for the media-config path; `chatAgentId` is
// optional but recommended — without it we fall through to the legacy
// unconstrained chain, which is what the daemon used to do and what
// pre-context callers (the HTTP /api/memory/extract endpoint) still
// expect. `chatProvider` is the BYOK chat-config snapshot threaded
// through from the web app on a per-call basis (the daemon never
// persists BYOK creds, so this is the only signal we have for that
// mode).
async function pickProvider(projectRoot, dataDir, chatAgentId, chatProvider, chatModel) {
  const chatProtocol = chatProtocolFromAgentId(chatAgentId);
  const normalizedChatAgentId =
    typeof chatAgentId === 'string' ? chatAgentId.trim().toLowerCase() : '';
  let override = null;
  if (dataDir) {
    try {
      const cfg = await readMemoryConfig(dataDir);
      if (cfg?.extraction?.provider) override = cfg.extraction;
    } catch (err) {
      console.warn(
        '[memory-llm] failed to read memory config override',
        err?.message ?? err,
      );
    }
  }
  if (override) {
    const defaults = PROVIDER_DEFAULTS[override.provider];
    const explicitKey =
      typeof override.apiKey === 'string' && override.apiKey.trim()
        ? override.apiKey.trim()
        : '';
    const envKey = envKeyFor(override.provider);
    let resolvedKey = explicitKey || envKey;
    let credentialSource = explicitKey
      ? 'memory-config'
      : (envKey ? 'env' : null);
    // Last-chance: an openai-shaped override (openai or azure) with no
    // explicit/env key can still borrow the media-config OpenAI key the
    // user already typed. Anthropic / google have no media counterpart
    // today.
    if (
      !resolvedKey
      && (override.provider === 'openai' || override.provider === 'azure')
      && projectRoot
    ) {
      try {
        const cred = await resolveProviderConfig(projectRoot, 'openai');
        if (cred?.apiKey?.trim()) {
          resolvedKey = cred.apiKey.trim();
          credentialSource = 'media-config';
        }
      } catch {
        // Ignore — we'll record a no-provider skip below.
      }
    }
    if (!resolvedKey) {
      const localCliProvider = localCliProviderFor(
        normalizedChatAgentId,
        override.provider,
        override.model,
      );
      if (localCliProvider) return localCliProvider;
      return null;
    }
    const baseUrl =
      (typeof override.baseUrl === 'string' && override.baseUrl.trim())
      || defaults.baseUrl;
    if (override.provider === 'azure' && !baseUrl) {
      // Azure with no resource URL is unrecoverable — bail rather than
      // logging a confusing 404 from `https:///openai/deployments/...`.
      return null;
    }
    return {
      kind: override.provider,
      apiKey: resolvedKey,
      model:
        (typeof override.model === 'string' && override.model.trim())
        || defaults.model,
      baseUrl,
      apiVersion:
        override.provider === 'azure'
          ? (typeof override.apiVersion === 'string' && override.apiVersion.trim())
            || PROVIDER_DEFAULTS.azure.apiVersion
          : '',
      credentialSource,
    };
  }

  const envOverrideModel = (process.env.OD_MEMORY_MODEL || '').trim();

  // Chat-protocol-constrained branch (path 1). Only run when we know
  // which CLI is in use AND it maps to one of the four providers; we
  // refuse to wander out of the chat protocol's family even when an
  // env var for a different provider is set, because doing so produces
  // the "I'm using Claude but memory says openai gpt-4o-mini" surprise
  // the user reported.
  if (chatProtocol) {
    const localCliProvider = localCliProviderFor(
      normalizedChatAgentId,
      chatProtocol,
      process.env.OD_MEMORY_MODEL || chatModel,
    );
    if (localCliProvider) return localCliProvider;

    const envKey = envKeyFor(chatProtocol);
    if (envKey) {
      const defaults = PROVIDER_DEFAULTS[chatProtocol];
      return {
        kind: chatProtocol,
        apiKey: envKey,
        model: envOverrideModel || defaults.model,
        baseUrl:
          (chatProtocol === 'anthropic' && process.env.ANTHROPIC_BASE_URL)
          || (chatProtocol === 'openai' && process.env.OPENAI_BASE_URL)
          || defaults.baseUrl,
        apiVersion: chatProtocol === 'azure' ? defaults.apiVersion : '',
        credentialSource: 'env',
      };
    }
    // Secondary fallback for openai-compatible CLIs: the user already
    // typed an OpenAI key under Settings → Media providers, so we can
    // borrow it for memory extraction without making them paste it
    // twice. We do NOT try this for anthropic/google chats because the
    // media-config table only has openai-shaped credentials today.
    if (chatProtocol === 'openai' && projectRoot) {
      try {
        const cred = await resolveProviderConfig(projectRoot, 'openai');
        if (cred && typeof cred.apiKey === 'string' && cred.apiKey.trim()) {
          return {
            kind: 'openai',
            apiKey: cred.apiKey.trim(),
            model:
              envOverrideModel || cred.model || PROVIDER_DEFAULTS.openai.model,
            baseUrl: (cred.baseUrl && String(cred.baseUrl).trim())
              || PROVIDER_DEFAULTS.openai.baseUrl,
            apiVersion: '',
            credentialSource: 'media-config',
          };
        }
      } catch (err) {
        console.warn(
          '[memory-llm] media-config lookup failed (chat-constrained)',
          err?.message ?? err,
        );
      }
    }
    // The chat protocol is known but no key for it is available. Bail
    // out instead of wandering — recording 'skipped: no-provider' is
    // strictly more useful than silently running on a foreign vendor.
    return null;
  }

  // BYOK chat-config snapshot (path 2). The web app forwards the live
  // chat provider/key/baseUrl/apiVersion on every API-mode extraction
  // call so the daemon can run extraction against the same vendor the
  // user is chatting with — even though the daemon never persists
  // BYOK creds itself. Use the per-protocol fast-model default instead
  // of the chat model the user is paying for, so a memory pass on a
  // big chat model (gpt-4o, claude-sonnet-4-5) silently turns into a
  // cheap haiku/mini call. The caller can opt into using the chat
  // model verbatim by setting `chatProvider.model`.
  if (
    chatProvider
    && chatProvider.provider
    && PROVIDER_DEFAULTS[chatProvider.provider]
  ) {
    const apiKey =
      typeof chatProvider.apiKey === 'string' ? chatProvider.apiKey.trim() : '';
    if (apiKey) {
      const defaults = PROVIDER_DEFAULTS[chatProvider.provider];
      const baseUrl =
        (typeof chatProvider.baseUrl === 'string' && chatProvider.baseUrl.trim())
        || defaults.baseUrl;
      // Azure with no resource URL is unrecoverable — same guard as
      // the override path above.
      if (chatProvider.provider !== 'azure' || baseUrl) {
        const explicitModel =
          typeof chatProvider.model === 'string' && chatProvider.model.trim()
            ? chatProvider.model.trim()
            : '';
        return {
          kind: chatProvider.provider,
          apiKey,
          model: envOverrideModel || explicitModel || defaults.model,
          baseUrl,
          apiVersion:
            chatProvider.provider === 'azure'
              ? (typeof chatProvider.apiVersion === 'string'
                  && chatProvider.apiVersion.trim())
                || PROVIDER_DEFAULTS.azure.apiVersion
              : '',
          credentialSource: 'chat-byok',
        };
      }
    }
  }

  if (process.env.ANTHROPIC_API_KEY) {
    return {
      kind: 'anthropic',
      apiKey: process.env.ANTHROPIC_API_KEY,
      model: envOverrideModel || PROVIDER_DEFAULTS.anthropic.model,
      baseUrl:
        process.env.ANTHROPIC_BASE_URL || PROVIDER_DEFAULTS.anthropic.baseUrl,
      credentialSource: 'env',
    };
  }
  if (process.env.OPENAI_API_KEY) {
    return {
      kind: 'openai',
      apiKey: process.env.OPENAI_API_KEY,
      model: envOverrideModel || PROVIDER_DEFAULTS.openai.model,
      baseUrl: process.env.OPENAI_BASE_URL || PROVIDER_DEFAULTS.openai.baseUrl,
      credentialSource: 'env',
    };
  }
  // Fallback: reuse the OpenAI key the user already configured for media
  // generation. Most Local-CLI Claude users don't have an
  // ANTHROPIC_API_KEY in the daemon's environment (Claude Code logs in
  // via OAuth) but they often have an OpenAI key in Settings → Media
  // providers. Without this fallback the LLM extraction stage stays dark
  // for them and only the regex-based heuristic ever runs.
  if (projectRoot) {
    try {
      const cred = await resolveProviderConfig(projectRoot, 'openai');
      if (cred && typeof cred.apiKey === 'string' && cred.apiKey.trim()) {
        return {
          kind: 'openai',
          apiKey: cred.apiKey.trim(),
          model:
            envOverrideModel || cred.model || PROVIDER_DEFAULTS.openai.model,
          baseUrl: (cred.baseUrl && String(cred.baseUrl).trim())
            || PROVIDER_DEFAULTS.openai.baseUrl,
          credentialSource: 'media-config',
        };
      }
    } catch (err) {
      console.warn(
        '[memory-llm] failed to read media-config for fallback',
        err?.message ?? err,
      );
    }
  }
  return null;
}

function renderUserPayload({ userMessage, assistantMessage, currentMemory }) {
  const parts = [];
  parts.push('## Existing memory');
  parts.push(currentMemory && currentMemory.trim().length > 0
    ? currentMemory
    : '(empty)');
  parts.push('');
  parts.push('## User message');
  parts.push(String(userMessage || '').slice(0, 4000));
  if (assistantMessage && assistantMessage.trim().length > 0) {
    parts.push('');
    parts.push('## Assistant reply');
    parts.push(String(assistantMessage).slice(0, 4000));
  }
  parts.push('');
  parts.push(
    'Return ONLY the JSON object described in the system prompt — no prose, no fences.',
  );
  return parts.join('\n');
}

// 30s ceiling. The chat run has long since finished and the user is
// staring at the settings panel waiting for a green/red pill — leaving
// a half-dead fetch in flight for two minutes (the default undici
// connect timeout) makes the failure feel even worse than it is.
const FETCH_TIMEOUT_MS = 30_000;

// Append `/v1<suffix>` to a base URL only when the URL doesn't already
// carry an explicit `/vN` segment. Mirrors the same conditional path
// build the chat proxy and connection-test routes use, so a custom
// OpenAI-compatible endpoint whose saved baseUrl already contains
// `/v1` (local servers, proxies that re-host OpenAI under a fixed
// prefix) does not become `/v1/v1/chat/completions` and silently fail
// every memory extraction even though chat through the same provider
// works. Anthropic's `/v1/messages` and OpenAI's `/v1/chat/completions`
// both flow through this; Azure and Gemini build their URLs
// differently and don't need it.
function appendVersionedApiPath(baseUrl, suffix) {
  const url = new URL(baseUrl);
  const pathname = url.pathname.replace(/\/+$/, '');
  url.pathname = /\/v\d+(\/|$)/.test(pathname)
    ? `${pathname}${suffix}`
    : `${pathname}/v1${suffix}`;
  return url.toString();
}

// Build a standard AbortSignal that fires after FETCH_TIMEOUT_MS so a
// stalled provider call surfaces as a 'failed' record instead of
// hanging the attempt indefinitely.
function withTimeout(ms) {
  if (typeof AbortSignal !== 'undefined' && typeof AbortSignal.timeout === 'function') {
    return AbortSignal.timeout(ms);
  }
  const controller = new AbortController();
  setTimeout(() => controller.abort(new Error(`timeout ${ms}ms`)), ms);
  return controller.signal;
}

// undici raises a generic `TypeError: fetch failed` on every network
// error and tucks the real cause under `err.cause` (a Node `Error` or
// `AggregateError` with `.code` / `.errors`). The settings UI just
// shows `error.message`, so without unwrapping the cause the user
// sees "fetch failed" with no clue whether DNS broke, the firewall
// reset the connection, or the request timed out. Surface the most
// useful piece — the OS error code if present, otherwise the cause's
// message — appended in parentheses. We deliberately don't include
// both: `cause.message` typically already embeds the code (e.g.
// "read ECONNRESET"), and showing "ECONNRESET · read ECONNRESET"
// would just double the noise.
function describeFetchError(err) {
  const head = err?.message || String(err);
  const cause = err?.cause;
  if (!cause) return head;
  const codeRaw = cause.code ? String(cause.code) : '';
  const msgRaw =
    cause.message && cause.message !== head ? String(cause.message) : '';
  // Prefer the OS error code on its own when the cause's message just
  // wraps it (the common case for ECONNRESET / ENOTFOUND / ETIMEDOUT).
  // Fall back to the message when there's no code, or when the message
  // adds detail beyond the code (e.g. "Hostname/IP does not match
  // certificate's altnames").
  let detail = '';
  if (codeRaw && msgRaw) {
    const m = msgRaw.toLowerCase();
    detail = m.includes(codeRaw.toLowerCase()) ? codeRaw : `${codeRaw}: ${msgRaw}`;
  } else {
    detail = codeRaw || msgRaw;
  }
  // AggregateError: surface the first inner code that adds new info.
  // Most of these are six identical DNS errors, so dedupe aggressively.
  if (!detail && Array.isArray(cause.errors)) {
    for (const inner of cause.errors) {
      const innerCode = inner?.code ? String(inner.code) : '';
      const innerMsg = inner?.message ? String(inner.message) : '';
      const candidate = innerCode || innerMsg;
      if (candidate) {
        detail = candidate;
        break;
      }
    }
  }
  return detail ? `${head} (${detail})` : head;
}

async function callAnthropic(provider, system, user) {
  let resp;
  try {
    resp = await fetch(appendVersionedApiPath(provider.baseUrl, '/messages'), {
      method: 'POST',
      headers: {
        'content-type': 'application/json',
        'x-api-key': provider.apiKey,
        'anthropic-version': '2023-06-01',
      },
      body: JSON.stringify({
        model: provider.model,
        max_tokens: 1024,
        system,
        messages: [{ role: 'user', content: user }],
      }),
      signal: withTimeout(FETCH_TIMEOUT_MS),
    });
  } catch (err) {
    throw new Error(describeFetchError(err));
  }
  if (!resp.ok) {
    throw new Error(`anthropic ${resp.status}: ${await resp.text().catch(() => '')}`);
  }
  const json = await resp.json();
  const block = (json?.content || []).find((b) => b?.type === 'text');
  return block?.text ?? '';
}

async function callOpenAI(provider, system, user) {
  let resp;
  try {
    resp = await fetch(
      appendVersionedApiPath(provider.baseUrl, '/chat/completions'),
      {
        method: 'POST',
        headers: {
          'content-type': 'application/json',
          authorization: `Bearer ${provider.apiKey}`,
        },
        body: JSON.stringify({
          model: provider.model,
          response_format: { type: 'json_object' },
          messages: [
            { role: 'system', content: system },
            { role: 'user', content: user },
          ],
        }),
        signal: withTimeout(FETCH_TIMEOUT_MS),
      },
    );
  } catch (err) {
    throw new Error(describeFetchError(err));
  }
  if (!resp.ok) {
    throw new Error(`openai ${resp.status}: ${await resp.text().catch(() => '')}`);
  }
  const json = await resp.json();
  return json?.choices?.[0]?.message?.content ?? '';
}

// Azure OpenAI speaks the same chat-completions JSON as OpenAI, but on
// a per-deployment URL and with `api-key:` instead of `Authorization:`.
// `provider.model` here is the Azure deployment name (the user typed it
// into the model field — that's what the chat picker calls "Deployment
// (Model)" too), not the underlying model family.
async function callAzure(provider, system, user) {
  const base = String(provider.baseUrl || '').replace(/\/+$/, '');
  const deployment = encodeURIComponent(provider.model);
  const apiVersion = encodeURIComponent(
    provider.apiVersion || PROVIDER_DEFAULTS.azure.apiVersion,
  );
  const url = `${base}/openai/deployments/${deployment}/chat/completions?api-version=${apiVersion}`;
  let resp;
  try {
    resp = await fetch(url, {
      method: 'POST',
      headers: {
        'content-type': 'application/json',
        'api-key': provider.apiKey,
      },
      body: JSON.stringify({
        response_format: { type: 'json_object' },
        messages: [
          { role: 'system', content: system },
          { role: 'user', content: user },
        ],
      }),
      signal: withTimeout(FETCH_TIMEOUT_MS),
    });
  } catch (err) {
    throw new Error(describeFetchError(err));
  }
  if (!resp.ok) {
    throw new Error(`azure ${resp.status}: ${await resp.text().catch(() => '')}`);
  }
  const json = await resp.json();
  return json?.choices?.[0]?.message?.content ?? '';
}

// Google Gemini's REST surface uses a different request shape:
// system instructions go in `systemInstruction`, the conversation is
// `contents[]` with `role` + `parts`, and the API key is a query
// parameter rather than a header. `responseMimeType: application/json`
// gets us the strict JSON output the parser expects.
async function callGoogle(provider, system, user) {
  const base = String(provider.baseUrl || '').replace(/\/+$/, '');
  const model = encodeURIComponent(provider.model);
  const url = `${base}/v1beta/models/${model}:generateContent?key=${encodeURIComponent(provider.apiKey)}`;
  let resp;
  try {
    resp = await fetch(url, {
      method: 'POST',
      headers: { 'content-type': 'application/json' },
      body: JSON.stringify({
        systemInstruction: { role: 'system', parts: [{ text: system }] },
        contents: [{ role: 'user', parts: [{ text: user }] }],
        generationConfig: { responseMimeType: 'application/json' },
      }),
      signal: withTimeout(FETCH_TIMEOUT_MS),
    });
  } catch (err) {
    throw new Error(describeFetchError(err));
  }
  if (!resp.ok) {
    throw new Error(`google ${resp.status}: ${await resp.text().catch(() => '')}`);
  }
  const json = await resp.json();
  const parts = json?.candidates?.[0]?.content?.parts;
  if (Array.isArray(parts)) {
    return parts.map((p) => (p && typeof p.text === 'string' ? p.text : '')).join('');
  }
  return '';
}

const LOCAL_CLI_TIMEOUT_MS = 60_000;

function extractJsonEventText(kind, raw, agentName) {
  const events = [];
  const handler = createJsonEventStreamHandler(kind, (event) => events.push(event));
  handler.feed(raw);
  handler.flush();

  const errorEvent = events.find((event) => event?.type === 'error');
  if (errorEvent) {
    const message =
      typeof errorEvent.message === 'string' && errorEvent.message.trim()
        ? errorEvent.message.trim()
        : 'unknown error';
    throw new Error(`${agentName} CLI error: ${message}`);
  }

  return events
    .filter((event) => event?.type === 'text_delta' && typeof event.delta === 'string')
    .map((event) => event.delta)
    .join('')
    .trim();
}

async function callLocalCli(provider, system, user, options) {
  if (typeof options?.localCliRunner === 'function') {
    return options.localCliRunner({
      agentId: provider.agentId,
      model: provider.model,
      system,
      user,
      projectRoot: options?.projectRoot ?? null,
      dataDir: options?.dataDir ?? null,
    });
  }

  const def = getAgentDef(provider.agentId);
  if (!def) {
    throw new Error(`Local CLI agent "${provider.agentId}" is not installed`);
  }

  let configuredAgentEnv = {};
  try {
    const appConfig = options?.dataDir ? await readAppConfig(options.dataDir) : {};
    configuredAgentEnv = agentCliEnvForAgent(appConfig.agentCliEnv, def.id);
  } catch {
    configuredAgentEnv = {};
  }

  const launch = resolveAgentLaunch(def, configuredAgentEnv);
  if (!launch?.launchPath) {
    throw new Error(`${def.name} CLI is not installed or not on PATH`);
  }

  const cwd =
    typeof options?.projectRoot === 'string' && options.projectRoot.trim()
      ? options.projectRoot
      : process.cwd();
  const prompt = [
    system,
    '',
    'You are running as a background memory extractor. Do not use tools. Return strict JSON only.',
    '',
    user,
  ].join('\n');

  let args;
  let stdinText = prompt;
  let parseStdout = (raw) => raw.trim();
  if (provider.agentId === 'claude') {
    args = ['-p', '--input-format', 'text', '--output-format', 'text'];
    if (provider.model && provider.model !== 'default') {
      args.push('--model', provider.model);
    }
  } else if (provider.agentId === 'codex') {
    args = def.buildArgs(
      '',
      [],
      [],
      { model: provider.model },
      { cwd },
    );
    parseStdout = (raw) => extractJsonEventText(def.eventParser || def.id, raw, def.name);
  } else if (provider.agentId === 'opencode') {
    // Deliver the prompt on stdin, matching the chat-run path
    // (def.promptViaStdin). `opencode run`'s `-f, --file` is a yargs array
    // option that greedily consumes every trailing non-flag token, so
    // `--file <prompt-file> "<message>"` made OpenCode treat the message
    // text as a second attachment and exit with "File not found". Bare
    // `opencode run --format json` reads the message from stdin instead.
    args = def.buildArgs(
      '',
      [],
      [],
      { model: provider.model },
      { cwd },
    );
    parseStdout = (raw) => extractJsonEventText(def.eventParser || def.id, raw, def.name);
  } else {
    throw new Error(`Local CLI memory extraction is not supported for ${provider.agentId}`);
  }

  const env = applyAgentLaunchEnv(
    spawnEnvForAgent(
      def.id,
      { ...process.env, ...(def.env || {}) },
      configuredAgentEnv,
      undefined,
      { resolvedBin: launch.selectedPath },
    ),
    launch,
  );
  const invocation = createCommandInvocation({
    command: launch.launchPath,
    args,
    env,
  });

  return await new Promise((resolve, reject) => {
    let stdout = '';
    let stderr = '';
    let settled = false;
    let closed = false;
    const child = spawn(invocation.command, invocation.args, {
      env,
      stdio: ['pipe', 'pipe', 'pipe'],
      cwd,
      shell: false,
      windowsVerbatimArguments: invocation.windowsVerbatimArguments,
    });

    const finish = (err, text) => {
      if (settled) return;
      settled = true;
      clearTimeout(timeout);
      if (err) reject(err);
      else resolve(text);
    };

    const timeout = setTimeout(() => {
      child.kill('SIGTERM');
      setTimeout(() => {
        if (!closed) child.kill('SIGKILL');
      }, 2_000).unref?.();
      finish(new Error(`${def.name} CLI timed out after ${Math.round(LOCAL_CLI_TIMEOUT_MS / 1000)}s`));
    }, LOCAL_CLI_TIMEOUT_MS);
    timeout.unref?.();

    child.stdout.setEncoding('utf8');
    child.stderr.setEncoding('utf8');
    child.stdout.on('data', (chunk) => {
      stdout = `${stdout}${chunk}`.slice(-64_000);
    });
    child.stderr.on('data', (chunk) => {
      stderr = `${stderr}${chunk}`.slice(-8_000);
    });
    child.once('error', (err) => finish(err));
    child.once('close', (code, signal) => {
      closed = true;
      if (code === 0) {
        let text = '';
        try {
          text = parseStdout(stdout);
        } catch (err) {
          finish(err);
          return;
        }
        if (text) {
          finish(null, text);
          return;
        }
      }
      const detail = (stderr.trim() || stdout.trim() || 'no output').slice(0, 1000);
      const status = signal ? `signal ${signal}` : `exit ${code}`;
      finish(new Error(`${def.name} CLI ${status}: ${detail}`));
    });
    child.stdin.on('error', (err) => {
      if (err.code !== 'EPIPE') finish(err);
    });
    child.stdin.end(stdinText);
  });
}

// Tolerant JSON parse — the model occasionally wraps output in ```json
// fences even when told not to. Strip those defensively.
function parseEntries(rawText) {
  if (typeof rawText !== 'string') return [];
  let text = rawText.trim();
  if (text.startsWith('```')) {
    text = text.replace(/^```(?:json)?\s*/i, '').replace(/```\s*$/i, '').trim();
  }
  let parsed;
  try {
    parsed = JSON.parse(text);
  } catch {
    // Last-ditch: pull the first {...} block.
    const match = /\{[\s\S]*\}/.exec(text);
    if (!match) return [];
    try {
      parsed = JSON.parse(match[0]);
    } catch {
      return [];
    }
  }
  const list = Array.isArray(parsed?.entries) ? parsed.entries : [];
  const validTypes = new Set(['user', 'feedback', 'project', 'reference']);
  return list
    .filter(
      (e) =>
        e &&
        typeof e === 'object' &&
        validTypes.has(e.type) &&
        typeof e.name === 'string' &&
        e.name.trim().length > 0 &&
        typeof e.body === 'string' &&
        e.body.trim().length > 0,
    )
    .slice(0, 6); // hard cap so a confused model can't flood the store
}

function alreadyKnown(existing, candidate) {
  const candKey = `${candidate.type}::${candidate.name.toLowerCase().trim()}`;
  for (const e of existing) {
    if (`${e.type}::${e.name.toLowerCase().trim()}` === candKey) return true;
  }
  return false;
}

function toMemoryDraft(candidate) {
  return {
    type: candidate.type,
    name: String(candidate.name).trim().slice(0, 80),
    description: String(candidate.description || '').trim().slice(0, 200),
    body: String(candidate.body).trim(),
  };
}

async function collectProposedEntries(dataDir, input, options) {
  const projectRoot = options?.projectRoot ?? null;
  const chatAgentId = options?.chatAgentId ?? null;
  const chatModel = options?.chatModel ?? null;
  const extractionKind = options?.kind ?? 'llm';
  const systemPrompt =
    typeof options?.systemPrompt === 'string' && options.systemPrompt.trim()
      ? options.systemPrompt.trim()
      : SYSTEM_PROMPT;
  // BYOK chat-config snapshot — only present for API-mode calls
  // forwarded through `/api/memory/extract`. The daemon doesn't
  // persist BYOK creds, so this per-call signal is the *only* way
  // pickProvider() can run "Same as chat" extraction against the
  // user's actual chat provider.
  const chatProvider = options?.chatProvider ?? null;
  const userMessage = String(input?.userMessage || '').trim();

  const cfg = await readMemoryConfig(dataDir);
  if (!cfg.enabled) {
    recordSkip({ userMessage, reason: 'memory-disabled', kind: extractionKind });
    return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
  }
  if (extractionKind !== 'connector' && !cfg.chatExtractionEnabled) {
    return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
  }
  if (userMessage.length === 0) {
    recordSkip({ userMessage, reason: 'empty-message', kind: extractionKind });
    return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
  }

  const provider = await pickProvider(
    projectRoot,
    dataDir,
    chatAgentId,
    chatProvider,
    chatModel,
  );
  if (!provider) {
    recordSkip({ userMessage, reason: 'no-provider', kind: extractionKind });
    return { status: 'skipped', attemptId: null, proposed: [], existingEntries: [] };
  }

  // Past this point we have a provider committed and an actual model
  // call about to happen — switch from one-shot skip records to a
  // running record we can update through phase transitions.
  const attemptId = startExtraction({ userMessage, kind: extractionKind });
  markProvider(attemptId, {
    kind: provider.kind,
    model: provider.model,
    credentialSource: provider.credentialSource,
  });

  let currentMemory = '';
  let existingEntries = [];
  try {
    [currentMemory, existingEntries] = await Promise.all([
      composeMemoryBody(dataDir),
      listMemoryEntries(dataDir),
    ]);
  } catch {
    // Fresh store — proceed with empty context.
  }

  const userPayload = renderUserPayload({
    userMessage,
    assistantMessage: input?.assistantMessage,
    currentMemory,
  });

  let raw = '';
  try {
    if (provider.transport === 'chat-cli') {
      raw = await callLocalCli(provider, systemPrompt, userPayload, {
        dataDir,
        projectRoot,
        localCliRunner: options?.localCliRunner,
      });
    } else if (provider.kind === 'anthropic') {
      raw = await callAnthropic(provider, systemPrompt, userPayload);
    } else if (provider.kind === 'azure') {
      raw = await callAzure(provider, systemPrompt, userPayload);
    } else if (provider.kind === 'google') {
      raw = await callGoogle(provider, systemPrompt, userPayload);
    } else {
      // openai or ollama — both speak the OpenAI chat-completions
      // wire shape, so callOpenAI handles them with just a different
      // base URL.
      raw = await callOpenAI(provider, systemPrompt, userPayload);
    }
  } catch (err) {
    // err.message is already pre-formatted by describeFetchError() when
    // the call layer caught a network error. For HTTP-level failures
    // (`anthropic 401: …`) the message is already user-facing too.
    console.warn(`[memory-llm] ${provider.kind} call failed`, err?.message ?? err);
    markFailed(attemptId, err);
    return { status: 'failed', attemptId, proposed: [], existingEntries };
  }

  let proposed;
  try {
    proposed = parseEntries(raw);
    if (typeof options?.candidateFilter === 'function') {
      proposed = proposed.filter((candidate) => {
        try {
          return options.candidateFilter(candidate);
        } catch {
          return false;
        }
      });
    }
  } catch (err) {
    markFailed(attemptId, err);
    return { status: 'failed', attemptId, proposed: [], existingEntries };
  }
  markProposed(attemptId, proposed.length);
  return { status: 'ok', attemptId, proposed, existingEntries };
}

export async function suggestWithLLM(dataDir, input, options) {
  const result = await collectProposedEntries(dataDir, input, options);
  if (result.status !== 'ok') return [];

  const suggestions = result.proposed
    .filter((cand) => !alreadyKnown(result.existingEntries, cand))
    .map(toMemoryDraft);

  markSuccess(result.attemptId, {
    writtenCount: 0,
    writtenIds: [],
  });

  return suggestions;
}

export async function extractWithLLM(dataDir, input, options) {
  const changeSource = options?.source ?? 'llm';
  const result = await collectProposedEntries(dataDir, input, options);
  if (result.status !== 'ok') return [];
  const { attemptId, proposed, existingEntries } = result;

  if (proposed.length === 0) {
    markSuccess(attemptId, { writtenCount: 0, writtenIds: [] });
    return [];
  }

  const written = [];
  for (const cand of proposed) {
    if (alreadyKnown(existingEntries, cand)) continue;
    try {
      const entry = await upsertMemoryEntry(
        dataDir,
        toMemoryDraft(cand),
        // Suppress per-entry events; we batch a single 'extract' below
        // so the toast says "Memory updated (3 · LLM)" once.
        { silent: true, source: changeSource },
      );
      written.push({
        id: entry.id,
        name: entry.name,
        description: entry.description,
        type: entry.type,
        updatedAt: entry.updatedAt,
      });
    } catch (err) {
      console.warn('[memory-llm] write failed', err?.message ?? err);
    }
  }

  if (written.length > 0) {
    memoryEvents.emit('change', {
      kind: 'extract',
      count: written.length,
      source: changeSource,
      at: Date.now(),
    });
  }

  markSuccess(attemptId, {
    writtenCount: written.length,
    writtenIds: written.map((e) => e.id),
  });

  return written;
}