/** * Prompt composer. The base is the OD-adapted "expert designer" system * prompt (see ./official-system.ts) — a full identity, workflow, and * content-philosophy charter. Stacked on top: * * 1. The discovery + planning + huashu-philosophy layer (./discovery.ts) * — interactive question-form syntax, direction-picker fork, * brand-spec extraction, TodoWrite reinforcement, 5-dim critique, * and the embedded `directions.ts` library. * 2. The active design system's DESIGN.md (if any) — palette, typography, * spacing rules treated as authoritative tokens. * 3. The active skill's SKILL.md (if any) — workflow specific to the * kind of artifact being built. When the skill ships a seed * (`assets/template.html`) and references (`references/layouts.md`, * `references/checklist.md`), we inject a hard pre-flight rule above * the skill body so the agent reads them BEFORE writing any code. * 4. For decks (skillMode === 'deck' OR metadata.kind === 'deck'), the * deck framework directive (./deck-framework.ts) is pinned LAST so it * overrides any softer slide-handling wording earlier in the stack — * this is the load-bearing nav / counter / scroll JS / print * stylesheet contract that PDF stitching depends on. We also fire on * the metadata path so deck-kind projects without a bound skill * (skill_id null) still get a framework, instead of having the agent * re-author scaling / nav / print logic from scratch each turn. When * the active skill ships its own seed (skill body references * `assets/template.html`), we defer to that seed and skip the generic * skeleton — the skill's framework wins to avoid double-injection. * * The composed string is what the daemon sees as `systemPrompt` and what * the Anthropic path sends as `system`. */ import { OFFICIAL_DESIGNER_PROMPT } from './official-system.js'; import { DISCOVERY_AND_PHILOSOPHY } from './discovery.js'; import { DECK_FRAMEWORK_DIRECTIVE } from './deck-framework.js'; import { renderMediaGenerationContract } from './media-contract.js'; import { IMAGE_MODELS } from '../media-models.js'; import { renderPanelPrompt } from './panel.js'; import { defaultCritiqueConfig, type CritiqueConfig } from '@open-design/contracts/critique'; import type { MediaExecutionPolicy, MediaSurface } from '@open-design/contracts'; const ELEVENLABS_VOICE_PROMPT_OPTION_LIMIT = 100; const ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX = 'ElevenLabs voice list could not be loaded'; const PROMPT_SAFE_HTTP_STATUS_LABELS: Record = { '400': 'Bad Request', '401': 'Unauthorized', '403': 'Forbidden', '404': 'Not Found', '429': 'Too Many Requests', '500': 'Internal Server Error', '502': 'Bad Gateway', '503': 'Service Unavailable', '504': 'Gateway Timeout', }; function renderUiLocalePrompt(locale: string | undefined): string { const normalized = locale?.trim(); if (!normalized || normalized.toLowerCase() === 'en') return ''; const languageName = normalized === 'zh-CN' ? 'Simplified Chinese' : normalized === 'zh-TW' ? 'Traditional Chinese' : normalized; const lines = [ '# UI locale override', '', `The Open Design UI locale for this run is \`${normalized}\` (${languageName}). All user-visible chat prose and generated UI controls must follow this locale, especially \`\` titles, descriptions, labels, placeholders, helper text, and option labels. Keep machine-readable ids and object option \`value\` fields exact and unlocalized.`, 'Exception: for the default task-type form, keep the `taskType` option labels as the canonical routing choices: `Prototype`, `Live artifact`, `Slide deck`, `Image`, `Video`, `HyperFrames`, `Audio`, `Other`. Do not translate, reorder, or rewrite those option labels.', ]; if (normalized === 'zh-CN') { lines.push( '', 'For the default quick brief in Simplified Chinese, use copy like:', '- title: `快速简报 — 30 秒`', '- description: `开始生成前我会先确认这些信息。不适用的可以跳过,我会补上默认值。`', '- output label/options: `我们要做什么?` / `幻灯片 / 路演稿`, `单页网页原型 / 落地页`, `多屏应用原型`, `数据看板 / 工具界面`, `编辑式 / 营销页面`, `其他 — 我来描述`', '- platform label/options: `目标平台` / `响应式网页`, `桌面网页`, `iOS 应用`, `Android 应用`, `平板应用`, `桌面应用`, `固定画布 (1920×1080)`', '- audience label/placeholder: `目标用户` / `例如:早期投资人、开发者工具采购者、内部高管评审`', '- tone label/options: `视觉调性` / `编辑 / 杂志感`, `现代极简`, `活泼 / 插画感`, `科技 / 工具型`, `奢华 / 精致`, `粗野 / 实验性`, `人性化 / 亲切`', '- brand label/options: `品牌背景` / `帮我选一个方向`, `我有品牌规范 — 稍后分享`, `参考网站 / 截图 — 稍后附上`', '- scale label/placeholder: `大概需要多少内容?` / `例如:8 页幻灯片、1 个落地页 + 3 个子页面、4 个移动端界面`', '- constraints label/placeholder: `还有什么需要知道的吗?` / `真实文案、必须使用的字体、需要避免的内容、截止时间…`', ); } return lines.join('\n'); } function normalizePromptText(value: string): string { return value .replace(/[\r\n]+/g, ' ') .replace(/\s+/g, ' ') .trim(); } function formatElevenLabsVoiceOptionsErrorForPrompt( error: string | undefined, ): string | undefined { const trimmed = normalizePromptText(error ?? ''); if (!trimmed) return undefined; if (/no ElevenLabs API key/i.test(trimmed)) { return `${ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX} because the ElevenLabs API key is missing. Tell the user to configure it in Settings or paste a voice id manually.`; } const statusMatch = trimmed.match( /(?:\((\d{3})(?:\s+([^)]+))?\)|\b(\d{3})(?:\s+([A-Za-z][A-Za-z -]{0,40}))?\b)/, ); if (statusMatch) { const statusCode = statusMatch[1] ?? statusMatch[3]; const statusText = statusCode ? PROMPT_SAFE_HTTP_STATUS_LABELS[statusCode] ?? '' : ''; const suffix = statusText ? ` ${statusText}` : ''; return `${ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX} (${statusCode}${suffix}). Tell the user to retry the lookup or paste a voice id manually.`; } return `${ELEVENLABS_VOICE_OPTIONS_PROMPT_PREFIX}. Tell the user to retry the lookup or paste a voice id manually.`; } type ProjectMetadata = { kind?: string; intent?: string | null; fidelity?: string | null; speakerNotes?: boolean | null; slideCount?: string | null; animations?: boolean | null; includeLandingPage?: boolean | null; includeOsWidgets?: boolean | null; templateId?: string | null; templateLabel?: string | null; platform?: string | null; platformTargets?: string[] | null; inspirationDesignSystemIds?: string[]; skipDiscoveryBrief?: boolean | null; imageModel?: string | null; imageAspect?: string | null; imageStyle?: string | null; videoModel?: string | null; videoLength?: number | null; videoAspect?: string | null; audioKind?: string | null; audioModel?: string | null; audioDuration?: number | null; voice?: string | null; promptTemplate?: { id?: string | null; surface?: 'image' | 'video' | null; title?: string | null; prompt?: string | null; summary?: string | null; category?: string | null; tags?: string[] | null; model?: string | null; aspect?: string | null; source?: { repo?: string | null; license?: string | null; author?: string | null; url?: string | null; } | null; } | null; contextPlugins?: Array<{ id?: string | null; title?: string | null; description?: string | null; }> | null; contextMcpServers?: Array<{ id?: string | null; label?: string | null; transport?: string | null; url?: string | null; command?: string | null; }> | null; contextConnectors?: Array<{ id?: string | null; name?: string | null; provider?: string | null; category?: string | null; status?: string | null; accountLabel?: string | null; }> | null; }; type ProjectTemplate = { name: string; description?: string | null; files: Array<{ name: string; content: string }> }; type AudioVoiceOption = { name: string; voiceId: string; category?: string | null; labels?: Record | null; }; type ExclusiveSurfaceMode = 'deck' | 'image' | 'video' | 'audio'; const EXCLUSIVE_SURFACE_MODES = new Set(['deck', 'image', 'video', 'audio']); export function resolveExclusiveSurface(args: { metadata?: ProjectMetadata | undefined; skillMode?: ComposeInput['skillMode'] | undefined; skillModes?: ComposeInput['skillModes'] | undefined; }): ExclusiveSurfaceMode | null { const activeSkillModes = new Set( Array.isArray(args.skillModes) ? args.skillModes.filter(Boolean) : args.skillMode ? [args.skillMode] : [], ); const metadataSurface = EXCLUSIVE_SURFACE_MODES.has(args.metadata?.kind as ExclusiveSurfaceMode) ? args.metadata?.kind as ExclusiveSurfaceMode : null; const primarySkillSurface = EXCLUSIVE_SURFACE_MODES.has(args.skillMode as ExclusiveSurfaceMode) ? args.skillMode as ExclusiveSurfaceMode : null; const composedSurfaceModes = Array.from(activeSkillModes).filter((mode): mode is ExclusiveSurfaceMode => EXCLUSIVE_SURFACE_MODES.has(mode as ExclusiveSurfaceMode), ); return metadataSurface ?? primarySkillSurface ?? (composedSurfaceModes.length === 1 ? composedSurfaceModes[0] ?? null : null); } export const BASE_SYSTEM_PROMPT = OFFICIAL_DESIGNER_PROMPT; export const SKIP_DISCOVERY_BRIEF_OVERRIDE = `# Automated project mode — skip discovery form This project was created through the daemon API with \`skipDiscoveryBrief: true\`. Override the discovery rules below: do NOT emit \`\`, do NOT show "Quick brief — 30 seconds", and do NOT ask a first-turn clarification form. Treat the user's first message and project metadata as the brief, then proceed directly to planning/building under the normal artifact workflow. Ask at most one concise follow-up only if a required detail is impossible to infer safely.`; // Injected into non-media projects so the agent knows how to dispatch // media generation if the user asks for it mid-session (e.g. "generate an // image with fal"). Without this, agents in prototype/deck projects try to // call provider REST APIs directly and ask the user for keys that the daemon // already holds in .od/media-config.json. const MEDIA_DISPATCH_HINT = ` --- ## Media generation (if asked) If the user asks you to generate an image, video, or audio file — regardless of which provider or model they mention (fal, Replicate, OpenAI, etc.) — use the daemon dispatcher via your **Bash tool**. Do NOT call provider REST APIs directly. The daemon injects these env vars into your shell (**POSIX bash — not PowerShell**): - \`OD_NODE_BIN\` — absolute path to the Node runtime - \`OD_BIN\` — absolute path to the OD CLI script - \`OD_PROJECT_ID\` — the active project id **Always use the generate→wait loop below.** \`media generate\` always exits 0 — either with \`{"file":{...}}\` if done within ~25s, or with \`{"taskId":"..."}\` as a handoff for slow models (flux-pro-ultra ~60–180s, veo-3-fal longer). Whenever the output contains a \`taskId\`, keep polling with \`media wait\` until exit 0 (done) or exit 5 (failed). Use **POSIX \`$VAR\` syntax** — do NOT translate to PowerShell (\`$env:VAR\`, \`&\` operator). Uses \`python3\` for JSON parsing (do NOT use \`jq\`): \`\`\`bash # POSIX bash — do NOT convert to PowerShell out=\$("$OD_NODE_BIN" "$OD_BIN" media generate \\ --project "$OD_PROJECT_ID" \\ --surface image \\ --model flux-pro-ultra \\ --prompt "..." \\ --aspect 16:9) ec=\$? if [ "\$ec" -ne 0 ]; then echo "\$out" >&2; exit "\$ec"; fi last=\$(printf '%s\\n' "\$out" | tail -1) task_id=\$(printf '%s\\n' "\$last" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('taskId',''))" 2>/dev/null) since=\$(printf '%s\\n' "\$last" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('nextSince',0))" 2>/dev/null) since="\${since:-0}" while [ -n "\$task_id" ]; do out=\$("$OD_NODE_BIN" "$OD_BIN" media wait "\$task_id" --since "\$since") ec=\$? last=\$(printf '%s\\n' "\$out" | tail -1) since=\$(printf '%s\\n' "\$last" | python3 -c "import sys,json; d=json.load(sys.stdin); print(d.get('nextSince',\$since))" 2>/dev/null) since="\${since:-0}" if [ "\$ec" -eq 0 ]; then task_id="" elif [ "\$ec" -ne 2 ]; then echo "\$out" >&2; exit "\$ec" fi done printf '%s\\n' "\$last" \`\`\` **Never ask the user for an API key.** The daemon reads provider credentials from its config; keys are never passed through the shell. If the provider returns an auth error, tell the user to open Settings → AI Providers and confirm the key is configured there. For the best fal image model use \`--model flux-pro-ultra\`. For video use \`--model veo-3-fal\` or \`--model wan-2.1-t2v\`. Always pass \`--surface\` explicitly (\`image\`, \`video\`, or \`audio\`). Any \`fal-ai/*\` path (e.g. \`fal-ai/flux/schnell\`, \`fal-ai/wan-i2v\`) is also a valid \`--model\` value for image/video — pass it through as-is without substitution.`; const ACTIVE_DESIGN_SYSTEM_VISUAL_DIRECTION_OVERRIDE = ` --- ## Active design system visual direction Active design system exception: the active design system is the visual direction for this project. Use its DESIGN.md palette, typography, spacing, component rules, and theme tokens as the source of truth for color and mood. - Do not ask the user to pick a separate theme color, visual direction, palette, typography mood, or direction card. - Do not emit a direction question-form, a \`direction-cards\` picker, or any visual-direction card while an active design system is present. - If an earlier discovery answer asks to "Pick a direction for me", treat that as already satisfied by the active design system and continue with the plan. - When a downstream framework mentions "active direction" or "theme tokens", bind those fields from the active design system instead of the built-in direction library. `; const DEFAULT_DESIGN_SYSTEM_USAGE = `Read DESIGN.md for visual principles, paste tokens.css verbatim into the first