From db777363d771e8f87b72cd04c57fba39952ca27d Mon Sep 17 00:00:00 2001 From: SysVis AI Date: Sun, 28 Dec 2025 21:10:45 +0700 Subject: [PATCH] fix: use correct Florence-2 processor call pattern --- src/lib/visionService.ts | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/lib/visionService.ts b/src/lib/visionService.ts index a49e64f..32765c6 100644 --- a/src/lib/visionService.ts +++ b/src/lib/visionService.ts @@ -90,11 +90,14 @@ export class VisionService { const image = await RawImage.fromURL(cleanBase64); // Task: Detailed Captioning is best for understanding diagrams - const text = ''; - // Pass arguments as object to avoid positional ambiguity - // Florence-2 processor typically expects 'images' and 'text' + const task = ''; + + // Construct prompts using the processor's method (required for Florence-2) + const prompts = this.processor.construct_prompts(task); + + // Pre-process the image and text inputs (image first, prompts second) if (!this.processor) throw new Error('Processor is undefined'); - const inputs = await this.processor({ text, images: [image] }); + const inputs = await this.processor(image, prompts); const generatedIds = await this.model.generate({ ...inputs, @@ -109,7 +112,7 @@ export class VisionService { // Florence-2 output format usually includes the task token const parsedAnswer = this.processor.post_process_generation( generatedText, - text, + task, image.size );