refactor(ai): streamline orchestration and remove complexity classifier

- Remove the complexity classifier as its functionality is no longer needed. - Update design generator to always route through the orchestrator, simplifying the logic. - Enhance error handling during orchestration to ensure fallback to direct generation is clear. - Introduce a new sub-agent prompt for improved output formatting and clarity in design generation.
2026-05-31 19:04:29 +07:00 · 2026-02-21 17:31:23 +08:00 · 2026-02-21 17:31:23 +08:00 · 92dde8ca88
commit 92dde8ca88
parent c079c85061
5 changed files with 71 additions and 133 deletions
--- a/server/api/ai/chat.ts
+++ b/server/api/ai/chat.ts
@ -76,7 +76,7 @@ async function streamViaAnthropicSDK(apiKey: string, body: ChatBody, model?: str
              const data = JSON.stringify({ type: 'text', content: ev.delta.text })
              controller.enqueue(encoder.encode(`data: ${data}\n\n`))
            } else if (ev.delta.type === 'thinking_delta') {
-              clearInterval(pingTimer)
+              // Keep pings alive during thinking — only stop on text output
              const data = JSON.stringify({ type: 'thinking', content: ev.delta.thinking })
              controller.enqueue(encoder.encode(`data: ${data}\n\n`))
            }
@ -150,7 +150,7 @@ function streamViaAgentSDK(body: ChatBody, model?: string) {
                const data = JSON.stringify({ type: 'text', content: ev.delta.text })
                controller.enqueue(encoder.encode(`data: ${data}\n\n`))
              } else if (ev.delta.type === 'thinking_delta') {
-                clearInterval(pingTimer)
+                // Keep pings alive during thinking — only stop on text output
                const data = JSON.stringify({ type: 'thinking', content: (ev.delta as any).thinking })
                controller.enqueue(encoder.encode(`data: ${data}\n\n`))
              }
--- a/src/services/ai/complexity-classifier.ts
+++ b/src/services/ai/complexity-classifier.ts
@ -1,90 +0,0 @@
-/**
- * Heuristic complexity classifier for design prompts.
- * Determines whether a prompt should be routed through
- * the orchestrator (parallel sub-agents) or handled directly.
- *
- * No API calls — runs in <1ms. False positives are cheap
- * (one extra planning call), false negatives are safe
- * (fall back to existing single-call path).
- */
-
-// Structural keywords indicating distinct spatial sections
-const SECTION_KEYWORDS = [
-  'sidebar', 'header', 'footer', 'nav', 'navigation',
-  'hero', 'stats', 'statistics', 'chart', 'table',
-  'form', 'modal', 'dialog', 'card section', 'cards section',
-  'profile', 'settings', 'feed', 'content area',
-  'grid', 'gallery', 'carousel', 'tabs',
-  'search bar', 'filter', 'toolbar', 'breadcrumb',
-  'notification', 'calendar', 'timeline',
-  // Chinese equivalents
-  '侧边栏', '头部', '底部', '导航', '英雄区',
-  '统计', '图表', '表格', '表单', '弹窗',
-  '卡片区', '个人资料', '设置', '时间线',
-]
-
-// Full-page keywords that almost always produce complex output
-const COMPLEX_PAGE_KEYWORDS = [
-  'dashboard', 'landing page', 'homepage', 'e-commerce',
-  'admin panel', 'social media', 'email client',
-  'settings page', 'analytics', 'crm', 'erp',
-  'portfolio', 'blog', 'marketplace', 'checkout',
-  // Chinese
-  '仪表盘', '着陆页', '首页', '电商', '管理后台',
-  '社交', '邮件', '分析页',
-]
-
-// Simple single-component keywords (suppress orchestration)
-const SIMPLE_KEYWORDS = [
-  'button', 'input', 'avatar', 'badge', 'tooltip',
-  'toggle', 'switch', 'checkbox', 'radio',
-  'tag', 'chip', 'divider', 'spinner', 'icon',
-  '按钮', '输入框', '头像', '标签', '开关',
-]
-
-/** Minimum sections required to trigger orchestrator */
-const COMPLEXITY_THRESHOLD = 3
-
-export interface ComplexityAssessment {
-  isComplex: boolean
-  estimatedSections: number
-  reason: string
-}
-
-export function assessComplexity(prompt: string): ComplexityAssessment {
-  const lower = prompt.toLowerCase()
-
-  // Count distinct section keywords mentioned
-  const mentionedSections = SECTION_KEYWORDS.filter((kw) => lower.includes(kw))
-  const isFullPage = COMPLEX_PAGE_KEYWORDS.some((kw) => lower.includes(kw))
-  const isSimpleComponent =
-    SIMPLE_KEYWORDS.some((kw) => lower.includes(kw)) && mentionedSections.length <= 1
-
-  // Count conjunctions as section indicators
-  const conjunctions = (lower.match(/\band\b/g) || []).length
-  + (lower.match(/[,，、]/g) || []).length
-
-  const estimatedSections = isFullPage
-    ? Math.max(mentionedSections.length, 4)
-    : mentionedSections.length + Math.min(Math.floor(conjunctions / 2), 2)
-
-  if (isSimpleComponent) {
-    return {
-      isComplex: false,
-      estimatedSections: 1,
-      reason: 'Simple single component request',
-    }
-  }
-
-  const isComplex =
-    estimatedSections >= COMPLEXITY_THRESHOLD ||
-    (isFullPage && estimatedSections >= 2)
-
-  return {
-    isComplex,
-    estimatedSections,
-    reason: isComplex
-      ? `Detected ${mentionedSections.length} sections (${mentionedSections.join(', ')})`
-      : `Below threshold: ${estimatedSections} sections`,
-  }
-}
--- a/src/services/ai/design-generator.ts
+++ b/src/services/ai/design-generator.ts
@ -11,7 +11,6 @@ import {
  startNewAnimationBatch,
  resetAnimationState,
 } from './design-animation'
-import { assessComplexity } from './complexity-classifier'
 import { executeOrchestration } from './orchestrator'

 const DESIGN_STREAM_TIMEOUTS = {
@ -344,15 +343,11 @@ export async function generateDesign(
    animated?: boolean
  }
 ): Promise<{ nodes: PenNode[]; rawResponse: string }> {
-  // Route complex prompts through orchestrator for parallel generation
-  const { isComplex } = assessComplexity(request.prompt)
-  if (isComplex) {
-    try {
-      return await executeOrchestration(request, callbacks)
-    } catch (err) {
-      // Orchestrator failed — silently fall back to single-call generation
-      console.warn('Orchestrator failed, falling back to direct generation:', err)
-    }
+  // Always route through orchestrator (fallback to direct generation on failure)
+  try {
+    return await executeOrchestration(request, callbacks)
+  } catch (err) {
+    console.error('[Orchestrator] Failed, falling back to direct generation:', err)
  }

  const userMessage = buildContextMessage(request)
--- a/src/services/ai/orchestrator-prompts.ts
+++ b/src/services/ai/orchestrator-prompts.ts
@ -18,3 +18,28 @@ export const ORCHESTRATOR_TIMEOUTS = {
  hardTimeoutMs: 30_000,
  noTextTimeoutMs: 20_000,
 }
+
+// Safe code block delimiter
+const BLOCK = "```"
+
+/**
+ * Sub-agent prompt — lean version of DESIGN_GENERATOR_PROMPT.
+ * Only essential schema + JSONL output format. Includes one example for format clarity.
+ */
+export const SUB_AGENT_PROMPT = `PenNode flat JSONL engine. Output a ${BLOCK}json block with ONE node per line.
+
+TYPES: frame (width,height,layout,gap,padding,justifyContent,alignItems,cornerRadius,fill,stroke,effects,children), rectangle, ellipse, text (content,fontFamily,fontSize,fontWeight,fill,width,height,textAlign), path (d,width,height,fill,stroke), image (src,width,height)
+SHARED: id, type, name, x, y, opacity
+Fill=[{"type":"solid","color":"#hex"}] Stroke={"thickness":N,"fill":[...]}
+cornerRadius=number. fill=array. No x/y on children in layout frames. Use "fill_container" to stretch.
+
+FORMAT: Each line has "_parent" (null=root, else parent-id). Parent before children.
+${BLOCK}json
+{"_parent":null,"id":"root","type":"frame","name":"Section","width":"fill_container","height":300,"layout":"vertical","gap":16,"padding":24}
+{"_parent":"root","id":"title","type":"text","name":"Title","content":"Hello","fontSize":24,"fontWeight":700,"width":"fill_container","height":32,"fill":[{"type":"solid","color":"#F4F4F5"}]}
+${BLOCK}
+
+STYLE: Dark. Bg #16171B, Card #1E2026, Text #F4F4F5, Secondary #52525B, Accent #22C55E, Border #2A2B30. Headlines "Space Grotesk" 700, Body "Inter". cornerRadius 4. No shadows, 1px borders.
+Icons: path+SVG d 16-24px. Images: src "https://picsum.photos/{w}/{h}".
+
+Start with ${BLOCK}json immediately. No preamble, no <step> tags.`
--- a/src/services/ai/orchestrator.ts
+++ b/src/services/ai/orchestrator.ts
@ -20,8 +20,7 @@ import type {
  SubAgentResult,
 } from './ai-types'
 import { streamChat } from './ai-service'
-import { DESIGN_GENERATOR_PROMPT } from './ai-prompts'
-import { ORCHESTRATOR_PROMPT, ORCHESTRATOR_TIMEOUTS } from './orchestrator-prompts'
+import { ORCHESTRATOR_PROMPT, ORCHESTRATOR_TIMEOUTS, SUB_AGENT_PROMPT } from './orchestrator-prompts'
 import {
  extractStreamingNodes,
  extractJsonFromResponse,
@ -37,8 +36,8 @@ import {
 } from './design-animation'

 const SUB_AGENT_TIMEOUTS = {
-  hardTimeoutMs: 120_000,
-  noTextTimeoutMs: 45_000,
+  hardTimeoutMs: 60_000,
+  noTextTimeoutMs: 30_000,
  thinkingResetsTimeout: true,
 }

@ -56,13 +55,12 @@ export async function executeOrchestration(
 ): Promise<{ nodes: PenNode[]; rawResponse: string }> {
  const animated = callbacks?.animated ?? false

-  // -- Phase 1: Planning --
+  // -- Phase 1: Planning (streaming) --
  callbacks?.onTextUpdate?.(
    '<step title="Planning layout" status="streaming">Analyzing design structure...</step>',
  )

  const plan = await callOrchestrator(request.prompt, (thinking) => {
-    // Forward thinking progress to UI so user sees activity
    const truncated = thinking.length > 200
      ? thinking.slice(-200) + '...'
      : thinking
@ -165,29 +163,22 @@ export async function executeOrchestration(
 // Orchestrator call — fast decomposition
 // ---------------------------------------------------------------------------

-/** Max prompt length for the orchestrator — it only needs structure, not full detail */
-const MAX_ORCHESTRATOR_PROMPT_CHARS = 2000
-
 async function callOrchestrator(
  prompt: string,
  onThinking?: (thinking: string) => void,
 ): Promise<OrchestratorPlan> {
-  // Truncate long prompts — the orchestrator only needs high-level structure
-  const truncatedPrompt = prompt.length > MAX_ORCHESTRATOR_PROMPT_CHARS
-    ? prompt.slice(0, MAX_ORCHESTRATOR_PROMPT_CHARS) + '\n\n[... prompt truncated for planning — full details will be sent to each section agent]'
-    : prompt
+  console.log('[Orchestrator] Calling streamChat...')

  let rawResponse = ''
  let thinkingContent = ''

  for await (const chunk of streamChat(
    ORCHESTRATOR_PROMPT,
-    [{ role: 'user', content: truncatedPrompt }],
+    [{ role: 'user', content: prompt }],
    undefined,
    {
      ...ORCHESTRATOR_TIMEOUTS,
-      // Don't let thinking indefinitely extend the no-text timeout
-      thinkingResetsTimeout: false,
+      thinkingResetsTimeout: true,
    },
  )) {
    if (chunk.type === 'text') {
@ -200,11 +191,15 @@ async function callOrchestrator(
    }
  }

+  console.log('[Orchestrator] Raw response:', rawResponse.slice(0, 500))
+
  const plan = parseOrchestratorResponse(rawResponse)
  if (!plan) {
+    console.error('[Orchestrator] Failed to parse plan from:', rawResponse.slice(0, 500))
    throw new Error('Failed to parse orchestrator plan')
  }

+  console.log('[Orchestrator] Plan:', plan.subtasks.length, 'subtasks')
  return plan
 }

@ -253,7 +248,7 @@ function tryParsePlan(text: string): OrchestratorPlan | null {
 }

 // ---------------------------------------------------------------------------
-// Parallel sub-agent execution
+// Sequential sub-agent execution
 // ---------------------------------------------------------------------------

 async function executeSubAgentsSequentially(
@ -268,7 +263,9 @@ async function executeSubAgentsSequentially(
 ): Promise<SubAgentResult[]> {
  const results: SubAgentResult[] = []
  for (let i = 0; i < plan.subtasks.length; i++) {
-    const result = await executeSubAgent(plan.subtasks[i], plan, request, progress, i, callbacks)
+    const result = await executeSubAgent(
+      plan.subtasks[i], plan, request, progress, i, callbacks,
+    )
    results.push(result)
  }
  return results
@ -293,6 +290,7 @@ async function executeSubAgent(

  const userPrompt = buildSubAgentUserPrompt(
    subtask,
+    plan,
    request.prompt,
    request.context?.variables,
    request.context?.themes,
@ -304,7 +302,7 @@ async function executeSubAgent(

  try {
    for await (const chunk of streamChat(
-      DESIGN_GENERATOR_PROMPT,
+      SUB_AGENT_PROMPT,
      [{ role: 'user', content: userPrompt }],
      undefined,
      SUB_AGENT_TIMEOUTS,
@ -312,6 +310,9 @@ async function executeSubAgent(
      if (chunk.type === 'text') {
        rawResponse += chunk.content

+        // Forward streaming text to panel
+        emitProgress(plan, progress, callbacks, rawResponse)
+
        if (animated) {
          const { results, newOffset } = extractStreamingNodes(
            rawResponse,
@ -340,7 +341,7 @@ async function executeSubAgent(
              progress.totalNodes++
            }
            callbacks?.onApplyPartial?.(progress.totalNodes)
-            emitProgress(plan, progress, callbacks)
+            emitProgress(plan, progress, callbacks, rawResponse)
          }
        }
      } else if (chunk.type === 'thinking') {
@ -386,23 +387,19 @@ async function executeSubAgent(

 function buildSubAgentUserPrompt(
  subtask: SubTask,
+  plan: OrchestratorPlan,
  originalPrompt: string,
  variables?: Record<string, VariableDefinition>,
  themes?: Record<string, string[]>,
 ): string {
  const { region } = subtask

-  // Extract a brief context line from the original prompt (first 200 chars)
-  const briefContext = originalPrompt.length > 200
-    ? originalPrompt.slice(0, 200) + '...'
-    : originalPrompt
+  // Show all sections so the model knows scope — only generate THIS one
+  const sectionList = plan.subtasks
+    .map((st) => `- ${st.label} (${st.region.width}x${st.region.height})${st.id === subtask.id ? ' ← YOU' : ''}`)
+    .join('\n')

-  let prompt = `Design: "${subtask.label}"
-Context: ${briefContext}
-Canvas: ${region.width}x${region.height}px
-Root frame: id="${subtask.idPrefix}-root", width=${region.width}, height=${region.height}
-All node IDs MUST start with "${subtask.idPrefix}-".
-Generate ONLY this section, not the full page.`
+  let prompt = `Page sections:\n${sectionList}\n\nGenerate ONLY "${subtask.label}" (${region.width}x${region.height}px).\n${originalPrompt}\nRoot: id="${subtask.idPrefix}-root", width="fill_container", height=${region.height}. IDs prefix="${subtask.idPrefix}-". No <step> tags. Output \`\`\`json immediately.`

  const varContext = buildVariableContext(variables, themes)
  if (varContext) {
@ -420,6 +417,12 @@ function ensureIdPrefix(node: PenNode, prefix: string): void {
  if (!node.id.startsWith(`${prefix}-`)) {
    node.id = `${prefix}-${node.id}`
  }
+  // Recursively prefix children (for fallback tree extraction)
+  if ('children' in node && Array.isArray(node.children)) {
+    for (const child of node.children) {
+      ensureIdPrefix(child, prefix)
+    }
+  }
 }

 function ensurePrefixStr(id: string, prefix: string): string {
@ -437,6 +440,7 @@ function emitProgress(
  callbacks?: {
    onTextUpdate?: (text: string) => void
  },
+  streamingText?: string,
 ): void {
  if (!callbacks?.onTextUpdate) return

@ -455,7 +459,11 @@ function emitProgress(
    })
    .join('\n')

-  callbacks.onTextUpdate(`${planningStep}\n${subtaskSteps}`)
+  let output = `${planningStep}\n${subtaskSteps}`
+  if (streamingText) {
+    output += '\n\n' + streamingText
+  }
+  callbacks.onTextUpdate(output)
 }

 /** Build step tags for the final rawResponse (shown in message after streaming ends) */