mirror of
https://github.com/ZSeven-W/openpencil.git
synced 2026-05-31 19:04:29 +07:00
refactor(ai): streamline orchestration and remove complexity classifier
- Remove the complexity classifier as its functionality is no longer needed. - Update design generator to always route through the orchestrator, simplifying the logic. - Enhance error handling during orchestration to ensure fallback to direct generation is clear. - Introduce a new sub-agent prompt for improved output formatting and clarity in design generation.
This commit is contained in:
parent
c079c85061
commit
92dde8ca88
5 changed files with 71 additions and 133 deletions
|
|
@ -76,7 +76,7 @@ async function streamViaAnthropicSDK(apiKey: string, body: ChatBody, model?: str
|
|||
const data = JSON.stringify({ type: 'text', content: ev.delta.text })
|
||||
controller.enqueue(encoder.encode(`data: ${data}\n\n`))
|
||||
} else if (ev.delta.type === 'thinking_delta') {
|
||||
clearInterval(pingTimer)
|
||||
// Keep pings alive during thinking — only stop on text output
|
||||
const data = JSON.stringify({ type: 'thinking', content: ev.delta.thinking })
|
||||
controller.enqueue(encoder.encode(`data: ${data}\n\n`))
|
||||
}
|
||||
|
|
@ -150,7 +150,7 @@ function streamViaAgentSDK(body: ChatBody, model?: string) {
|
|||
const data = JSON.stringify({ type: 'text', content: ev.delta.text })
|
||||
controller.enqueue(encoder.encode(`data: ${data}\n\n`))
|
||||
} else if (ev.delta.type === 'thinking_delta') {
|
||||
clearInterval(pingTimer)
|
||||
// Keep pings alive during thinking — only stop on text output
|
||||
const data = JSON.stringify({ type: 'thinking', content: (ev.delta as any).thinking })
|
||||
controller.enqueue(encoder.encode(`data: ${data}\n\n`))
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,90 +0,0 @@
|
|||
/**
|
||||
* Heuristic complexity classifier for design prompts.
|
||||
* Determines whether a prompt should be routed through
|
||||
* the orchestrator (parallel sub-agents) or handled directly.
|
||||
*
|
||||
* No API calls — runs in <1ms. False positives are cheap
|
||||
* (one extra planning call), false negatives are safe
|
||||
* (fall back to existing single-call path).
|
||||
*/
|
||||
|
||||
// Structural keywords indicating distinct spatial sections
|
||||
const SECTION_KEYWORDS = [
|
||||
'sidebar', 'header', 'footer', 'nav', 'navigation',
|
||||
'hero', 'stats', 'statistics', 'chart', 'table',
|
||||
'form', 'modal', 'dialog', 'card section', 'cards section',
|
||||
'profile', 'settings', 'feed', 'content area',
|
||||
'grid', 'gallery', 'carousel', 'tabs',
|
||||
'search bar', 'filter', 'toolbar', 'breadcrumb',
|
||||
'notification', 'calendar', 'timeline',
|
||||
// Chinese equivalents
|
||||
'侧边栏', '头部', '底部', '导航', '英雄区',
|
||||
'统计', '图表', '表格', '表单', '弹窗',
|
||||
'卡片区', '个人资料', '设置', '时间线',
|
||||
]
|
||||
|
||||
// Full-page keywords that almost always produce complex output
|
||||
const COMPLEX_PAGE_KEYWORDS = [
|
||||
'dashboard', 'landing page', 'homepage', 'e-commerce',
|
||||
'admin panel', 'social media', 'email client',
|
||||
'settings page', 'analytics', 'crm', 'erp',
|
||||
'portfolio', 'blog', 'marketplace', 'checkout',
|
||||
// Chinese
|
||||
'仪表盘', '着陆页', '首页', '电商', '管理后台',
|
||||
'社交', '邮件', '分析页',
|
||||
]
|
||||
|
||||
// Simple single-component keywords (suppress orchestration)
|
||||
const SIMPLE_KEYWORDS = [
|
||||
'button', 'input', 'avatar', 'badge', 'tooltip',
|
||||
'toggle', 'switch', 'checkbox', 'radio',
|
||||
'tag', 'chip', 'divider', 'spinner', 'icon',
|
||||
'按钮', '输入框', '头像', '标签', '开关',
|
||||
]
|
||||
|
||||
/** Minimum sections required to trigger orchestrator */
|
||||
const COMPLEXITY_THRESHOLD = 3
|
||||
|
||||
export interface ComplexityAssessment {
|
||||
isComplex: boolean
|
||||
estimatedSections: number
|
||||
reason: string
|
||||
}
|
||||
|
||||
export function assessComplexity(prompt: string): ComplexityAssessment {
|
||||
const lower = prompt.toLowerCase()
|
||||
|
||||
// Count distinct section keywords mentioned
|
||||
const mentionedSections = SECTION_KEYWORDS.filter((kw) => lower.includes(kw))
|
||||
const isFullPage = COMPLEX_PAGE_KEYWORDS.some((kw) => lower.includes(kw))
|
||||
const isSimpleComponent =
|
||||
SIMPLE_KEYWORDS.some((kw) => lower.includes(kw)) && mentionedSections.length <= 1
|
||||
|
||||
// Count conjunctions as section indicators
|
||||
const conjunctions = (lower.match(/\band\b/g) || []).length
|
||||
+ (lower.match(/[,,、]/g) || []).length
|
||||
|
||||
const estimatedSections = isFullPage
|
||||
? Math.max(mentionedSections.length, 4)
|
||||
: mentionedSections.length + Math.min(Math.floor(conjunctions / 2), 2)
|
||||
|
||||
if (isSimpleComponent) {
|
||||
return {
|
||||
isComplex: false,
|
||||
estimatedSections: 1,
|
||||
reason: 'Simple single component request',
|
||||
}
|
||||
}
|
||||
|
||||
const isComplex =
|
||||
estimatedSections >= COMPLEXITY_THRESHOLD ||
|
||||
(isFullPage && estimatedSections >= 2)
|
||||
|
||||
return {
|
||||
isComplex,
|
||||
estimatedSections,
|
||||
reason: isComplex
|
||||
? `Detected ${mentionedSections.length} sections (${mentionedSections.join(', ')})`
|
||||
: `Below threshold: ${estimatedSections} sections`,
|
||||
}
|
||||
}
|
||||
|
|
@ -11,7 +11,6 @@ import {
|
|||
startNewAnimationBatch,
|
||||
resetAnimationState,
|
||||
} from './design-animation'
|
||||
import { assessComplexity } from './complexity-classifier'
|
||||
import { executeOrchestration } from './orchestrator'
|
||||
|
||||
const DESIGN_STREAM_TIMEOUTS = {
|
||||
|
|
@ -344,15 +343,11 @@ export async function generateDesign(
|
|||
animated?: boolean
|
||||
}
|
||||
): Promise<{ nodes: PenNode[]; rawResponse: string }> {
|
||||
// Route complex prompts through orchestrator for parallel generation
|
||||
const { isComplex } = assessComplexity(request.prompt)
|
||||
if (isComplex) {
|
||||
try {
|
||||
return await executeOrchestration(request, callbacks)
|
||||
} catch (err) {
|
||||
// Orchestrator failed — silently fall back to single-call generation
|
||||
console.warn('Orchestrator failed, falling back to direct generation:', err)
|
||||
}
|
||||
// Always route through orchestrator (fallback to direct generation on failure)
|
||||
try {
|
||||
return await executeOrchestration(request, callbacks)
|
||||
} catch (err) {
|
||||
console.error('[Orchestrator] Failed, falling back to direct generation:', err)
|
||||
}
|
||||
|
||||
const userMessage = buildContextMessage(request)
|
||||
|
|
|
|||
|
|
@ -18,3 +18,28 @@ export const ORCHESTRATOR_TIMEOUTS = {
|
|||
hardTimeoutMs: 30_000,
|
||||
noTextTimeoutMs: 20_000,
|
||||
}
|
||||
|
||||
// Safe code block delimiter
|
||||
const BLOCK = "```"
|
||||
|
||||
/**
|
||||
* Sub-agent prompt — lean version of DESIGN_GENERATOR_PROMPT.
|
||||
* Only essential schema + JSONL output format. Includes one example for format clarity.
|
||||
*/
|
||||
export const SUB_AGENT_PROMPT = `PenNode flat JSONL engine. Output a ${BLOCK}json block with ONE node per line.
|
||||
|
||||
TYPES: frame (width,height,layout,gap,padding,justifyContent,alignItems,cornerRadius,fill,stroke,effects,children), rectangle, ellipse, text (content,fontFamily,fontSize,fontWeight,fill,width,height,textAlign), path (d,width,height,fill,stroke), image (src,width,height)
|
||||
SHARED: id, type, name, x, y, opacity
|
||||
Fill=[{"type":"solid","color":"#hex"}] Stroke={"thickness":N,"fill":[...]}
|
||||
cornerRadius=number. fill=array. No x/y on children in layout frames. Use "fill_container" to stretch.
|
||||
|
||||
FORMAT: Each line has "_parent" (null=root, else parent-id). Parent before children.
|
||||
${BLOCK}json
|
||||
{"_parent":null,"id":"root","type":"frame","name":"Section","width":"fill_container","height":300,"layout":"vertical","gap":16,"padding":24}
|
||||
{"_parent":"root","id":"title","type":"text","name":"Title","content":"Hello","fontSize":24,"fontWeight":700,"width":"fill_container","height":32,"fill":[{"type":"solid","color":"#F4F4F5"}]}
|
||||
${BLOCK}
|
||||
|
||||
STYLE: Dark. Bg #16171B, Card #1E2026, Text #F4F4F5, Secondary #52525B, Accent #22C55E, Border #2A2B30. Headlines "Space Grotesk" 700, Body "Inter". cornerRadius 4. No shadows, 1px borders.
|
||||
Icons: path+SVG d 16-24px. Images: src "https://picsum.photos/{w}/{h}".
|
||||
|
||||
Start with ${BLOCK}json immediately. No preamble, no <step> tags.`
|
||||
|
|
|
|||
|
|
@ -20,8 +20,7 @@ import type {
|
|||
SubAgentResult,
|
||||
} from './ai-types'
|
||||
import { streamChat } from './ai-service'
|
||||
import { DESIGN_GENERATOR_PROMPT } from './ai-prompts'
|
||||
import { ORCHESTRATOR_PROMPT, ORCHESTRATOR_TIMEOUTS } from './orchestrator-prompts'
|
||||
import { ORCHESTRATOR_PROMPT, ORCHESTRATOR_TIMEOUTS, SUB_AGENT_PROMPT } from './orchestrator-prompts'
|
||||
import {
|
||||
extractStreamingNodes,
|
||||
extractJsonFromResponse,
|
||||
|
|
@ -37,8 +36,8 @@ import {
|
|||
} from './design-animation'
|
||||
|
||||
const SUB_AGENT_TIMEOUTS = {
|
||||
hardTimeoutMs: 120_000,
|
||||
noTextTimeoutMs: 45_000,
|
||||
hardTimeoutMs: 60_000,
|
||||
noTextTimeoutMs: 30_000,
|
||||
thinkingResetsTimeout: true,
|
||||
}
|
||||
|
||||
|
|
@ -56,13 +55,12 @@ export async function executeOrchestration(
|
|||
): Promise<{ nodes: PenNode[]; rawResponse: string }> {
|
||||
const animated = callbacks?.animated ?? false
|
||||
|
||||
// -- Phase 1: Planning --
|
||||
// -- Phase 1: Planning (streaming) --
|
||||
callbacks?.onTextUpdate?.(
|
||||
'<step title="Planning layout" status="streaming">Analyzing design structure...</step>',
|
||||
)
|
||||
|
||||
const plan = await callOrchestrator(request.prompt, (thinking) => {
|
||||
// Forward thinking progress to UI so user sees activity
|
||||
const truncated = thinking.length > 200
|
||||
? thinking.slice(-200) + '...'
|
||||
: thinking
|
||||
|
|
@ -165,29 +163,22 @@ export async function executeOrchestration(
|
|||
// Orchestrator call — fast decomposition
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
/** Max prompt length for the orchestrator — it only needs structure, not full detail */
|
||||
const MAX_ORCHESTRATOR_PROMPT_CHARS = 2000
|
||||
|
||||
async function callOrchestrator(
|
||||
prompt: string,
|
||||
onThinking?: (thinking: string) => void,
|
||||
): Promise<OrchestratorPlan> {
|
||||
// Truncate long prompts — the orchestrator only needs high-level structure
|
||||
const truncatedPrompt = prompt.length > MAX_ORCHESTRATOR_PROMPT_CHARS
|
||||
? prompt.slice(0, MAX_ORCHESTRATOR_PROMPT_CHARS) + '\n\n[... prompt truncated for planning — full details will be sent to each section agent]'
|
||||
: prompt
|
||||
console.log('[Orchestrator] Calling streamChat...')
|
||||
|
||||
let rawResponse = ''
|
||||
let thinkingContent = ''
|
||||
|
||||
for await (const chunk of streamChat(
|
||||
ORCHESTRATOR_PROMPT,
|
||||
[{ role: 'user', content: truncatedPrompt }],
|
||||
[{ role: 'user', content: prompt }],
|
||||
undefined,
|
||||
{
|
||||
...ORCHESTRATOR_TIMEOUTS,
|
||||
// Don't let thinking indefinitely extend the no-text timeout
|
||||
thinkingResetsTimeout: false,
|
||||
thinkingResetsTimeout: true,
|
||||
},
|
||||
)) {
|
||||
if (chunk.type === 'text') {
|
||||
|
|
@ -200,11 +191,15 @@ async function callOrchestrator(
|
|||
}
|
||||
}
|
||||
|
||||
console.log('[Orchestrator] Raw response:', rawResponse.slice(0, 500))
|
||||
|
||||
const plan = parseOrchestratorResponse(rawResponse)
|
||||
if (!plan) {
|
||||
console.error('[Orchestrator] Failed to parse plan from:', rawResponse.slice(0, 500))
|
||||
throw new Error('Failed to parse orchestrator plan')
|
||||
}
|
||||
|
||||
console.log('[Orchestrator] Plan:', plan.subtasks.length, 'subtasks')
|
||||
return plan
|
||||
}
|
||||
|
||||
|
|
@ -253,7 +248,7 @@ function tryParsePlan(text: string): OrchestratorPlan | null {
|
|||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Parallel sub-agent execution
|
||||
// Sequential sub-agent execution
|
||||
// ---------------------------------------------------------------------------
|
||||
|
||||
async function executeSubAgentsSequentially(
|
||||
|
|
@ -268,7 +263,9 @@ async function executeSubAgentsSequentially(
|
|||
): Promise<SubAgentResult[]> {
|
||||
const results: SubAgentResult[] = []
|
||||
for (let i = 0; i < plan.subtasks.length; i++) {
|
||||
const result = await executeSubAgent(plan.subtasks[i], plan, request, progress, i, callbacks)
|
||||
const result = await executeSubAgent(
|
||||
plan.subtasks[i], plan, request, progress, i, callbacks,
|
||||
)
|
||||
results.push(result)
|
||||
}
|
||||
return results
|
||||
|
|
@ -293,6 +290,7 @@ async function executeSubAgent(
|
|||
|
||||
const userPrompt = buildSubAgentUserPrompt(
|
||||
subtask,
|
||||
plan,
|
||||
request.prompt,
|
||||
request.context?.variables,
|
||||
request.context?.themes,
|
||||
|
|
@ -304,7 +302,7 @@ async function executeSubAgent(
|
|||
|
||||
try {
|
||||
for await (const chunk of streamChat(
|
||||
DESIGN_GENERATOR_PROMPT,
|
||||
SUB_AGENT_PROMPT,
|
||||
[{ role: 'user', content: userPrompt }],
|
||||
undefined,
|
||||
SUB_AGENT_TIMEOUTS,
|
||||
|
|
@ -312,6 +310,9 @@ async function executeSubAgent(
|
|||
if (chunk.type === 'text') {
|
||||
rawResponse += chunk.content
|
||||
|
||||
// Forward streaming text to panel
|
||||
emitProgress(plan, progress, callbacks, rawResponse)
|
||||
|
||||
if (animated) {
|
||||
const { results, newOffset } = extractStreamingNodes(
|
||||
rawResponse,
|
||||
|
|
@ -340,7 +341,7 @@ async function executeSubAgent(
|
|||
progress.totalNodes++
|
||||
}
|
||||
callbacks?.onApplyPartial?.(progress.totalNodes)
|
||||
emitProgress(plan, progress, callbacks)
|
||||
emitProgress(plan, progress, callbacks, rawResponse)
|
||||
}
|
||||
}
|
||||
} else if (chunk.type === 'thinking') {
|
||||
|
|
@ -386,23 +387,19 @@ async function executeSubAgent(
|
|||
|
||||
function buildSubAgentUserPrompt(
|
||||
subtask: SubTask,
|
||||
plan: OrchestratorPlan,
|
||||
originalPrompt: string,
|
||||
variables?: Record<string, VariableDefinition>,
|
||||
themes?: Record<string, string[]>,
|
||||
): string {
|
||||
const { region } = subtask
|
||||
|
||||
// Extract a brief context line from the original prompt (first 200 chars)
|
||||
const briefContext = originalPrompt.length > 200
|
||||
? originalPrompt.slice(0, 200) + '...'
|
||||
: originalPrompt
|
||||
// Show all sections so the model knows scope — only generate THIS one
|
||||
const sectionList = plan.subtasks
|
||||
.map((st) => `- ${st.label} (${st.region.width}x${st.region.height})${st.id === subtask.id ? ' ← YOU' : ''}`)
|
||||
.join('\n')
|
||||
|
||||
let prompt = `Design: "${subtask.label}"
|
||||
Context: ${briefContext}
|
||||
Canvas: ${region.width}x${region.height}px
|
||||
Root frame: id="${subtask.idPrefix}-root", width=${region.width}, height=${region.height}
|
||||
All node IDs MUST start with "${subtask.idPrefix}-".
|
||||
Generate ONLY this section, not the full page.`
|
||||
let prompt = `Page sections:\n${sectionList}\n\nGenerate ONLY "${subtask.label}" (${region.width}x${region.height}px).\n${originalPrompt}\nRoot: id="${subtask.idPrefix}-root", width="fill_container", height=${region.height}. IDs prefix="${subtask.idPrefix}-". No <step> tags. Output \`\`\`json immediately.`
|
||||
|
||||
const varContext = buildVariableContext(variables, themes)
|
||||
if (varContext) {
|
||||
|
|
@ -420,6 +417,12 @@ function ensureIdPrefix(node: PenNode, prefix: string): void {
|
|||
if (!node.id.startsWith(`${prefix}-`)) {
|
||||
node.id = `${prefix}-${node.id}`
|
||||
}
|
||||
// Recursively prefix children (for fallback tree extraction)
|
||||
if ('children' in node && Array.isArray(node.children)) {
|
||||
for (const child of node.children) {
|
||||
ensureIdPrefix(child, prefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function ensurePrefixStr(id: string, prefix: string): string {
|
||||
|
|
@ -437,6 +440,7 @@ function emitProgress(
|
|||
callbacks?: {
|
||||
onTextUpdate?: (text: string) => void
|
||||
},
|
||||
streamingText?: string,
|
||||
): void {
|
||||
if (!callbacks?.onTextUpdate) return
|
||||
|
||||
|
|
@ -455,7 +459,11 @@ function emitProgress(
|
|||
})
|
||||
.join('\n')
|
||||
|
||||
callbacks.onTextUpdate(`${planningStep}\n${subtaskSteps}`)
|
||||
let output = `${planningStep}\n${subtaskSteps}`
|
||||
if (streamingText) {
|
||||
output += '\n\n' + streamingText
|
||||
}
|
||||
callbacks.onTextUpdate(output)
|
||||
}
|
||||
|
||||
/** Build step tags for the final rawResponse (shown in message after streaming ends) */
|
||||
|
|
|
|||
Loading…
Reference in a new issue