mirror of
https://github.com/vndangkhoa/Sys-Arc-Visl.git
synced 2026-04-05 01:17:57 +07:00
fix: use correct Florence-2 processor call pattern
This commit is contained in:
parent
a9c903dc29
commit
db777363d7
1 changed files with 8 additions and 5 deletions
|
|
@ -90,11 +90,14 @@ export class VisionService {
|
||||||
const image = await RawImage.fromURL(cleanBase64);
|
const image = await RawImage.fromURL(cleanBase64);
|
||||||
|
|
||||||
// Task: Detailed Captioning is best for understanding diagrams
|
// Task: Detailed Captioning is best for understanding diagrams
|
||||||
const text = '<MORE_DETAILED_CAPTION>';
|
const task = '<MORE_DETAILED_CAPTION>';
|
||||||
// Pass arguments as object to avoid positional ambiguity
|
|
||||||
// Florence-2 processor typically expects 'images' and 'text'
|
// Construct prompts using the processor's method (required for Florence-2)
|
||||||
|
const prompts = this.processor.construct_prompts(task);
|
||||||
|
|
||||||
|
// Pre-process the image and text inputs (image first, prompts second)
|
||||||
if (!this.processor) throw new Error('Processor is undefined');
|
if (!this.processor) throw new Error('Processor is undefined');
|
||||||
const inputs = await this.processor({ text, images: [image] });
|
const inputs = await this.processor(image, prompts);
|
||||||
|
|
||||||
const generatedIds = await this.model.generate({
|
const generatedIds = await this.model.generate({
|
||||||
...inputs,
|
...inputs,
|
||||||
|
|
@ -109,7 +112,7 @@ export class VisionService {
|
||||||
// Florence-2 output format usually includes the task token
|
// Florence-2 output format usually includes the task token
|
||||||
const parsedAnswer = this.processor.post_process_generation(
|
const parsedAnswer = this.processor.post_process_generation(
|
||||||
generatedText,
|
generatedText,
|
||||||
text,
|
task,
|
||||||
image.size
|
image.size
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue