feat: add image-to-video support for Meta AI
Some checks are pending
CI / build (18.x) (push) Waiting to run
CI / build (20.x) (push) Waiting to run

- Add Litterbox temporary image hosting for image URLs
- Update backend to accept image_base64 parameter
- Update TypeScript client and API route
- Subject button now enabled for Meta AI (for image-to-video)
- Button changes from 'Video' to 'Animate' when subject is set
- Pink/purple gradient for image-to-video, blue/cyan for text-to-video
This commit is contained in:
Khoa.vo 2026-01-06 14:11:26 +07:00
parent 0f87b8ef99
commit bae4c487da
5 changed files with 128 additions and 27 deletions

View file

@ -4,12 +4,14 @@ import { MetaCrawlClient } from '@/lib/providers/meta-crawl-client';
/**
* POST /api/meta/video
*
* Generate a video from a text prompt using Meta AI.
* Generate a video from a text prompt (and optionally an image) using Meta AI.
* - Text-to-Video: Just provide prompt and cookies
* - Image-to-Video: Also provide imageBase64
* Video generation takes 30-60+ seconds, so this endpoint may take a while.
*/
export async function POST(req: NextRequest) {
try {
const { prompt, cookies: clientCookies } = await req.json();
const { prompt, cookies: clientCookies, imageBase64 } = await req.json();
if (!prompt) {
return NextResponse.json({ error: "Prompt is required" }, { status: 400 });
@ -25,7 +27,8 @@ export async function POST(req: NextRequest) {
);
}
console.log(`[Meta Video API] Starting video generation for prompt: "${prompt.substring(0, 50)}..."`);
const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
console.log(`[Meta Video API] Starting ${mode} for prompt: "${prompt.substring(0, 50)}..."`);
const client = new MetaCrawlClient();
@ -39,7 +42,7 @@ export async function POST(req: NextRequest) {
}
// Generate video - this can take 30-60+ seconds
const result = await client.generateVideo(prompt, cookieString);
const result = await client.generateVideo(prompt, cookieString, imageBase64);
if (!result.success || result.videos.length === 0) {
throw new Error(result.error || "No videos generated");

View file

@ -206,6 +206,8 @@ export function PromptHero() {
};
// Handle video generation (Meta AI only)
// If a subject reference is set, it will use image-to-video
// Otherwise, it will use text-to-video
const handleGenerateVideo = async () => {
let finalPrompt = prompt.trim();
if (!finalPrompt || isGeneratingVideo || settings.provider !== 'meta') return;
@ -214,14 +216,20 @@ export function PromptHero() {
setIsGenerating(true);
try {
console.log('[PromptHero] Starting Meta AI video generation...');
// Check if we have a subject reference for image-to-video
const subjectRefs = references.subject || [];
const imageBase64 = subjectRefs.length > 0 ? subjectRefs[0].thumbnail : undefined;
const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
console.log(`[PromptHero] Starting Meta AI ${mode}...`);
const res = await fetch('/api/meta/video', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({
prompt: finalPrompt,
cookies: settings.metaCookies
cookies: settings.metaCookies,
imageBase64: imageBase64
})
});
@ -243,12 +251,13 @@ export function PromptHero() {
id: crypto.randomUUID(),
url: video.url,
prompt: video.prompt || finalPrompt,
thumbnail: imageBase64, // Store the source image as thumbnail
createdAt: Date.now()
});
}
// Show success notification
setErrorNotification({
message: `🎬 Success! Generated ${data.videos.length} video(s). Check the Videos tab.`,
message: `🎬 Success! Generated ${data.videos.length} video(s) via ${mode}. Check the gallery.`,
type: 'warning' // Using warning for visibility (amber color)
});
setTimeout(() => setErrorNotification(null), 5000);
@ -557,14 +566,20 @@ export function PromptHero() {
{/* Controls Area */}
<div className="flex flex-col md:flex-row items-center justify-between gap-3 pt-1">
{/* Left Controls: References (Hidden for Meta AI as it doesn't support them yet) */}
<div className={cn("flex flex-wrap gap-2", settings.provider === 'meta' && "opacity-30 pointer-events-none grayscale")}>
{/* Left Controls: References */}
{/* For Meta AI: Only subject is enabled (for image-to-video), scene/style are disabled */}
<div className="flex flex-wrap gap-2">
{(['subject', 'scene', 'style'] as ReferenceCategory[]).map((cat) => {
const refs = references[cat] || [];
const hasRefs = refs.length > 0;
const isUploading = uploadingRefs[cat];
// For Meta AI: only enable subject (for image-to-video), disable scene/style
const isDisabledForMeta = settings.provider === 'meta' && cat !== 'subject';
return (
<div key={cat} className="relative group">
<div key={cat} className={cn(
"relative group",
isDisabledForMeta && "opacity-30 pointer-events-none grayscale"
)}>
<button
onClick={() => toggleReference(cat)}
onDragOver={handleDragOver}
@ -576,6 +591,9 @@ export function PromptHero() {
: "bg-white/5 text-white/40 border-white/5 hover:bg-white/10 hover:text-white/70 hover:border-white/10",
isUploading && "animate-pulse cursor-wait"
)}
title={settings.provider === 'meta' && cat === 'subject'
? "Upload an image to animate into video"
: undefined}
>
{isUploading ? (
<div className="h-3 w-3 animate-spin rounded-full border-2 border-current border-t-transparent" />
@ -724,9 +742,13 @@ export function PromptHero() {
"relative overflow-hidden px-4 py-1.5 rounded-lg font-bold text-sm text-white shadow-lg transition-all active:scale-95 group border border-white/10",
isGenerating
? "bg-gray-700 cursor-not-allowed"
: "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 hover:shadow-cyan-500/25"
: references.subject?.length
? "bg-gradient-to-r from-pink-600 to-purple-600 hover:from-pink-500 hover:to-purple-500 hover:shadow-purple-500/25"
: "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 hover:shadow-cyan-500/25"
)}
title="Generate video from prompt (30-60+ seconds)"
title={references.subject?.length
? "Animate the subject image (30-60+ seconds)"
: "Generate video from text prompt (30-60+ seconds)"}
>
<div className="relative z-10 flex items-center gap-1.5">
{isGeneratingVideo ? (
@ -737,7 +759,7 @@ export function PromptHero() {
) : (
<>
<Video className="h-3 w-3 group-hover:scale-110 transition-transform" />
<span>Video</span>
<span>{references.subject?.length ? "Animate" : "Video"}</span>
</>
)}
</div>

View file

@ -173,14 +173,18 @@ export class MetaCrawlClient {
}
/**
* Generate video from text prompt using Meta AI
* Generate video from text prompt (and optionally an image) using Meta AI
* - Text-to-Video: Just provide prompt and cookies
* - Image-to-Video: Also provide imageBase64
* Video generation takes longer than image generation (30-60+ seconds)
*/
async generateVideo(
prompt: string,
cookies: string
cookies: string,
imageBase64?: string
): Promise<MetaCrawlVideoResponse> {
console.log(`[MetaCrawl] Sending video request to ${this.baseUrl}/video/generate`);
const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
console.log(`[MetaCrawl] Sending ${mode} request to ${this.baseUrl}/video/generate`);
const response = await fetch(`${this.baseUrl}/video/generate`, {
method: 'POST',
@ -189,7 +193,8 @@ export class MetaCrawlClient {
},
body: JSON.stringify({
prompt,
cookies
cookies,
image_base64: imageBase64
})
});

View file

@ -189,17 +189,23 @@ async def grok_chat(request: GrokChatRequest):
@app.post("/video/generate", response_model=VideoGenerateResponse)
async def generate_video(request: VideoGenerateRequest):
"""
Generate a video from a text prompt using Meta AI.
Generate a video from a text prompt (and optionally an image) using Meta AI.
This uses the metaai_api library's video generation feature.
- Text-to-Video: Just provide a prompt
- Image-to-Video: Provide a prompt + image_base64
Video generation takes longer than image generation (30-60+ seconds).
Requires:
- prompt: The video generation prompt
- cookies: Facebook/Meta cookies (JSON array or string format)
- image_base64: Optional base64 image data for image-to-video
"""
import json
import asyncio
import base64
import requests as sync_requests
from concurrent.futures import ThreadPoolExecutor
try:
@ -227,20 +233,84 @@ async def generate_video(request: VideoGenerateRequest):
error="No valid cookies provided"
)
print(f"[VideoGen] Starting video generation for: '{request.prompt[:50]}...'")
# Handle image upload to Litterbox if image_base64 is provided
image_url = None
if request.image_base64:
print(f"[VideoGen] Uploading image to Litterbox for image-to-video...")
try:
# Extract base64 data (remove data:image/...;base64, prefix if present)
image_data = request.image_base64
if ',' in image_data:
image_data = image_data.split(',')[1]
# Decode base64 to bytes
image_bytes = base64.b64decode(image_data)
# Upload to Litterbox (temporary hosting, 1 hour expiry)
litterbox_url = "https://litterbox.catbox.moe/resources/internals/api.php"
files = {
'fileToUpload': ('image.png', image_bytes, 'image/png')
}
data = {
'reqtype': 'fileupload',
'time': '1h' # 1 hour expiry
}
upload_response = sync_requests.post(litterbox_url, files=files, data=data)
if upload_response.status_code == 200 and upload_response.text.startswith('http'):
image_url = upload_response.text.strip()
print(f"[VideoGen] Image uploaded to: {image_url}")
else:
print(f"[VideoGen] Litterbox upload failed: {upload_response.text}")
return VideoGenerateResponse(
success=False,
videos=[],
error=f"Failed to upload image: {upload_response.text[:200]}"
)
except Exception as upload_error:
print(f"[VideoGen] Image upload error: {str(upload_error)}")
return VideoGenerateResponse(
success=False,
videos=[],
error=f"Failed to upload image: {str(upload_error)}"
)
mode = "image-to-video" if image_url else "text-to-video"
print(f"[VideoGen] Starting {mode} for: '{request.prompt[:50]}...'")
# Import MetaAI and run video generation in thread pool (it's synchronous)
from metaai_api import MetaAI
def run_video_gen():
ai = MetaAI(cookies=cookies_dict)
return ai.generate_video(
prompt=request.prompt,
wait_before_poll=10,
max_attempts=60, # Up to 5 minutes of polling
wait_seconds=5,
verbose=True
)
if image_url:
# Image-to-video: Use prompt() with images parameter
result = ai.prompt(
message=request.prompt,
images=[image_url]
)
# Extract video URLs from media
video_urls = []
for media in result.get('media', []):
if media.get('type') == 'VIDEO' and media.get('url'):
video_urls.append(media['url'])
return {
'success': len(video_urls) > 0,
'video_urls': video_urls,
'message': result.get('message', '')
}
else:
# Text-to-video: Use generate_video()
return ai.generate_video(
prompt=request.prompt,
wait_before_poll=10,
max_attempts=60, # Up to 5 minutes of polling
wait_seconds=5,
verbose=True
)
# Run in thread pool since metaai_api is synchronous
loop = asyncio.get_event_loop()
@ -248,7 +318,7 @@ async def generate_video(request: VideoGenerateRequest):
result = await loop.run_in_executor(executor, run_video_gen)
if not result.get('success', False):
error_msg = result.get('error', 'Video generation failed')
error_msg = result.get('error') or result.get('message') or 'Video generation failed'
print(f"[VideoGen] Failed: {error_msg}")
return VideoGenerateResponse(
success=False,

View file

@ -70,6 +70,7 @@ class VideoGenerateRequest(BaseModel):
"""Request model for video generation"""
prompt: str = Field(..., description="Video generation prompt", min_length=1)
cookies: str = Field(..., description="Meta AI session cookies")
image_base64: Optional[str] = Field(default=None, description="Base64 image data for image-to-video (optional)")
class VideoResult(BaseModel):