fix(ai): query mining + enqueue path for heuristic image-area frames

Why: b9b4126b's collectImageSearchTargets returns heuristic-matched frames (named "Image" / "Photo" / "Cover" without the canonical role) but two downstream code paths quietly dropped them: 1. enqueueImageForSearch only accepted type==='image' or isUnfilledImagePlaceholderFrame, so the heuristic frames got past collect but never reached the queue. 2. extractQueryForNode would have returned the literal name "Image" or "Photo" — useless to the photo search API. The user's "Bella Italia" restaurant card never gets a relevant photo because the placeholder frame's name says nothing about the restaurant. What: - enqueueImageForSearch grows a third branch: isImageAreaFrameByHeuristic → kind: 'placeholder-frame'. Same kind so the rest of the pipeline treats it identically to a canonical placeholder. - extractQueryForNode learns to skip "generic" placeholder names (Image / Photo / Cover / Hero / Thumbnail / Banner / Poster + a few variants) and walk up to the nearest semantic parent frame name ("Bella Italia" / "Margherita Pizza" / "Sushi House" — whatever the enclosing card was named). Bounded to 3 hops. Filters layout words (Card / Wrapper / Container / Section / Frame / Root / Page / Stack / Row / Column / Content) so we don't end up searching for "Card". - A new helper findParentSemanticName builds a parent map from the live document on demand. Cheap for typical designs (< few hundred nodes); avoids threading parent through every collect / enqueue call site. Net effect: a model-emitted plain "Image" frame inside a "Bella Italia" card now searches for "Bella Italia" instead of literally "Image". The existing isImageAreaFrameByHeuristic test coverage protects the entry condition; 1098 / 1098 AI service tests still pass.
2026-06-01 03:14:29 +07:00 · 2026-05-09 20:59:50 +08:00 · 2026-05-09 20:59:50 +08:00 · 6047e743b2
commit 6047e743b2
parent dc3e40963e
1 changed files with 91 additions and 12 deletions
--- a/apps/web/src/services/ai/image-search-pipeline.ts
+++ b/apps/web/src/services/ai/image-search-pipeline.ts
@ -149,6 +149,77 @@ function isPlaceholderSrc(src?: string): boolean {
  return !src || src.startsWith(PHONE_PLACEHOLDER_PREFIX);
 }

+/**
+ * Names so generic that they tell the photo search API nothing useful.
+ * For these, prefer mining context (parent frame name, sibling text)
+ * over returning the name itself.
+ */
+const GENERIC_PLACEHOLDER_NAMES = new Set([
+  'image',
+  'photo',
+  'cover',
+  'hero',
+  'thumbnail',
+  'thumb',
+  'picture',
+  'banner',
+  'poster',
+  'image placeholder',
+  'placeholder icon',
+  'placeholder',
+  'card image',
+  'card photo',
+  'product image',
+  'item image',
+]);
+
+function isGenericPlaceholderName(name: string): boolean {
+  return GENERIC_PLACEHOLDER_NAMES.has(name.trim().toLowerCase());
+}
+
+/**
+ * Walk up to find a parent frame whose name carries product / restaurant
+ * / event semantic. The image-search API hits much more useful results
+ * with "Bella Italia" / "Margherita Pizza" than with the generic
+ * "Image" name the model gave the placeholder. Bounded to 3 hops so a
+ * deep page bg doesn't end up as the query.
+ */
+function findParentSemanticName(nodeId: string, maxHops = 3): string | null {
+  const { document: doc } = useDocumentStore.getState();
+  // Build a parent map by walking the doc tree once. Cheap for typical
+  // designs (< few hundred nodes) and avoids passing parent through
+  // every collectImageSearchTargets / enqueue call site.
+  const parentOf = new Map<string, PenNode>();
+  const walk = (n: PenNode): void => {
+    if ('children' in n && Array.isArray(n.children)) {
+      for (const c of n.children) {
+        parentOf.set(c.id, n);
+        walk(c);
+      }
+    }
+  };
+  const roots = doc.pages?.flatMap((p) => p.children ?? []) ?? doc.children ?? [];
+  for (const r of roots) walk(r);
+  let cur = parentOf.get(nodeId);
+  let hops = 0;
+  while (cur && hops < maxHops) {
+    const name = (cur as PenNode & { name?: string }).name;
+    if (typeof name === 'string' && name.length > 0 && !isGenericPlaceholderName(name)) {
+      // Filter common layout words so we don't end up searching for
+      // "Card" / "Wrapper" — neither yields useful photos.
+      const lower = name.toLowerCase();
+      if (
+        !/\b(card|wrapper|container|section|frame|root|page|stack|row|column|content)\b/.test(lower)
+      ) {
+        return name;
+      }
+    }
+    cur = parentOf.get(cur.id);
+    hops++;
+  }
+  return null;
+}
+
 function extractQueryForNode(node: PenNode): string {
  const r = node as PenNode & {
    imageSearchQuery?: string;
@ -158,14 +229,6 @@ function extractQueryForNode(node: PenNode): string {
  if (typeof r.imageSearchQuery === 'string' && r.imageSearchQuery.length > 0) {
    return r.imageSearchQuery;
  }
-  if (
-    typeof r.name === 'string' &&
-    r.name.length > 0 &&
-    r.name !== 'Image Placeholder' &&
-    r.name !== 'Placeholder Icon'
-  ) {
-    return r.name;
-  }
  // For placeholder frames, mine the optional label child for a hint
  // (e.g. "Hero image" / "Upload cover" — set by the caller).
  if (isImagePlaceholderFrame(node) && Array.isArray(r.children)) {
@ -179,6 +242,15 @@ function extractQueryForNode(node: PenNode): string {
      }
    }
  }
+  // If the node's name is too generic to make a useful photo query
+  // (literal "Image" / "Photo" / "Cover" — common with the heuristic
+  // detector), walk up to find a semantic parent name (e.g. "Bella
+  // Italia" or "Margherita Pizza" from the food-app card scenario).
+  if (typeof r.name === 'string' && r.name.length > 0 && !isGenericPlaceholderName(r.name)) {
+    return r.name;
+  }
+  const parentName = findParentSemanticName(node.id);
+  if (parentName) return parentName;
  return r.name ?? 'placeholder';
 }

@ -200,12 +272,17 @@ let queueProcessing = false;
 let queueAbort: AbortController | null = null;

 /**
- * Enqueue an image target for background search. Accepts either a real
- * `image` node with a placeholder src OR a `frame` carrying
- * `role: 'image-placeholder'` (what `add_image_placeholder_v0/v1` emit).
+ * Enqueue an image target for background search. Accepts:
+ *   - real `image` node with a placeholder src
+ *   - frame carrying `role: 'image-placeholder'` (canonical, from
+ *     add_image_placeholder_v0/v1)
+ *   - frame matching the isImageAreaFrameByHeuristic predicate (a
+ *     non-canonical placeholder the model emitted as a plain colored
+ *     "Image" / "Photo" / "Cover" / "Hero" frame — see the comment on
+ *     that function for the full match policy)
 *
 * Called from insertStreamingNode for streamed image nodes, and from
- * `scanAndFillImages` for both shapes after a non-streaming insert (the
+ * `scanAndFillImages` for all shapes after a non-streaming insert (the
 * orchestrator-tail and per-subtask scans). Streaming intentionally
 * skips placeholder frames because their icon/label children stream in
 * separately — enqueueing the frame mid-stream and replacing children
@ -218,6 +295,8 @@ export function enqueueImageForSearch(node: PenNode): void {
    kind = 'image';
  } else if (isUnfilledImagePlaceholderFrame(node)) {
    kind = 'placeholder-frame';
+  } else if (isImageAreaFrameByHeuristic(node)) {
+    kind = 'placeholder-frame';
  } else {
    return;
  }