From db777363d771e8f87b72cd04c57fba39952ca27d Mon Sep 17 00:00:00 2001
From: SysVis AI <ai@kv-graph.com>
Date: Sun, 28 Dec 2025 21:10:45 +0700
Subject: [PATCH] fix: use correct Florence-2 processor call pattern

---
 src/lib/visionService.ts | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/lib/visionService.ts b/src/lib/visionService.ts
index a49e64f..32765c6 100644
--- a/src/lib/visionService.ts
+++ b/src/lib/visionService.ts
@@ -90,11 +90,14 @@ export class VisionService {
             const image = await RawImage.fromURL(cleanBase64);
 
             // Task: Detailed Captioning is best for understanding diagrams
-            const text = '<MORE_DETAILED_CAPTION>';
-            // Pass arguments as object to avoid positional ambiguity
-            // Florence-2 processor typically expects 'images' and 'text'
+            const task = '<MORE_DETAILED_CAPTION>';
+
+            // Construct prompts using the processor's method (required for Florence-2)
+            const prompts = this.processor.construct_prompts(task);
+
+            // Pre-process the image and text inputs (image first, prompts second)
             if (!this.processor) throw new Error('Processor is undefined');
-            const inputs = await this.processor({ text, images: [image] });
+            const inputs = await this.processor(image, prompts);
 
             const generatedIds = await this.model.generate({
                 ...inputs,
@@ -109,7 +112,7 @@ export class VisionService {
             // Florence-2 output format usually includes the task token
             const parsedAnswer = this.processor.post_process_generation(
                 generatedText,
-                text,
+                task,
                 image.size
             );