mirror of
https://github.com/vndangkhoa/Sys-Arc-Visl.git
synced 2026-04-05 01:17:57 +07:00
fix: use correct Florence-2 processor call pattern
This commit is contained in:
parent
a9c903dc29
commit
db777363d7
1 changed files with 8 additions and 5 deletions
|
|
@ -90,11 +90,14 @@ export class VisionService {
|
|||
const image = await RawImage.fromURL(cleanBase64);
|
||||
|
||||
// Task: Detailed Captioning is best for understanding diagrams
|
||||
const text = '<MORE_DETAILED_CAPTION>';
|
||||
// Pass arguments as object to avoid positional ambiguity
|
||||
// Florence-2 processor typically expects 'images' and 'text'
|
||||
const task = '<MORE_DETAILED_CAPTION>';
|
||||
|
||||
// Construct prompts using the processor's method (required for Florence-2)
|
||||
const prompts = this.processor.construct_prompts(task);
|
||||
|
||||
// Pre-process the image and text inputs (image first, prompts second)
|
||||
if (!this.processor) throw new Error('Processor is undefined');
|
||||
const inputs = await this.processor({ text, images: [image] });
|
||||
const inputs = await this.processor(image, prompts);
|
||||
|
||||
const generatedIds = await this.model.generate({
|
||||
...inputs,
|
||||
|
|
@ -109,7 +112,7 @@ export class VisionService {
|
|||
// Florence-2 output format usually includes the task token
|
||||
const parsedAnswer = this.processor.post_process_generation(
|
||||
generatedText,
|
||||
text,
|
||||
task,
|
||||
image.size
|
||||
);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue