fix: use correct Florence-2 processor call pattern

This commit is contained in:
SysVis AI 2025-12-28 21:10:45 +07:00
parent a9c903dc29
commit db777363d7

View file

@ -90,11 +90,14 @@ export class VisionService {
const image = await RawImage.fromURL(cleanBase64); const image = await RawImage.fromURL(cleanBase64);
// Task: Detailed Captioning is best for understanding diagrams // Task: Detailed Captioning is best for understanding diagrams
const text = '<MORE_DETAILED_CAPTION>'; const task = '<MORE_DETAILED_CAPTION>';
// Pass arguments as object to avoid positional ambiguity
// Florence-2 processor typically expects 'images' and 'text' // Construct prompts using the processor's method (required for Florence-2)
const prompts = this.processor.construct_prompts(task);
// Pre-process the image and text inputs (image first, prompts second)
if (!this.processor) throw new Error('Processor is undefined'); if (!this.processor) throw new Error('Processor is undefined');
const inputs = await this.processor({ text, images: [image] }); const inputs = await this.processor(image, prompts);
const generatedIds = await this.model.generate({ const generatedIds = await this.model.generate({
...inputs, ...inputs,
@ -109,7 +112,7 @@ export class VisionService {
// Florence-2 output format usually includes the task token // Florence-2 output format usually includes the task token
const parsedAnswer = this.processor.post_process_generation( const parsedAnswer = this.processor.post_process_generation(
generatedText, generatedText,
text, task,
image.size image.size
); );