feat: add image-to-video support for Meta AI
Some checks are pending
CI / build (18.x) (push) Waiting to run
CI / build (20.x) (push) Waiting to run

- Add Litterbox temporary image hosting for image URLs
- Update backend to accept image_base64 parameter
- Update TypeScript client and API route
- Subject button now enabled for Meta AI (for image-to-video)
- Button changes from 'Video' to 'Animate' when subject is set
- Pink/purple gradient for image-to-video, blue/cyan for text-to-video
This commit is contained in:
Khoa.vo 2026-01-06 14:11:26 +07:00
parent 0f87b8ef99
commit bae4c487da
5 changed files with 128 additions and 27 deletions

View file

@ -4,12 +4,14 @@ import { MetaCrawlClient } from '@/lib/providers/meta-crawl-client';
/** /**
* POST /api/meta/video * POST /api/meta/video
* *
* Generate a video from a text prompt using Meta AI. * Generate a video from a text prompt (and optionally an image) using Meta AI.
* - Text-to-Video: Just provide prompt and cookies
* - Image-to-Video: Also provide imageBase64
* Video generation takes 30-60+ seconds, so this endpoint may take a while. * Video generation takes 30-60+ seconds, so this endpoint may take a while.
*/ */
export async function POST(req: NextRequest) { export async function POST(req: NextRequest) {
try { try {
const { prompt, cookies: clientCookies } = await req.json(); const { prompt, cookies: clientCookies, imageBase64 } = await req.json();
if (!prompt) { if (!prompt) {
return NextResponse.json({ error: "Prompt is required" }, { status: 400 }); return NextResponse.json({ error: "Prompt is required" }, { status: 400 });
@ -25,7 +27,8 @@ export async function POST(req: NextRequest) {
); );
} }
console.log(`[Meta Video API] Starting video generation for prompt: "${prompt.substring(0, 50)}..."`); const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
console.log(`[Meta Video API] Starting ${mode} for prompt: "${prompt.substring(0, 50)}..."`);
const client = new MetaCrawlClient(); const client = new MetaCrawlClient();
@ -39,7 +42,7 @@ export async function POST(req: NextRequest) {
} }
// Generate video - this can take 30-60+ seconds // Generate video - this can take 30-60+ seconds
const result = await client.generateVideo(prompt, cookieString); const result = await client.generateVideo(prompt, cookieString, imageBase64);
if (!result.success || result.videos.length === 0) { if (!result.success || result.videos.length === 0) {
throw new Error(result.error || "No videos generated"); throw new Error(result.error || "No videos generated");

View file

@ -206,6 +206,8 @@ export function PromptHero() {
}; };
// Handle video generation (Meta AI only) // Handle video generation (Meta AI only)
// If a subject reference is set, it will use image-to-video
// Otherwise, it will use text-to-video
const handleGenerateVideo = async () => { const handleGenerateVideo = async () => {
let finalPrompt = prompt.trim(); let finalPrompt = prompt.trim();
if (!finalPrompt || isGeneratingVideo || settings.provider !== 'meta') return; if (!finalPrompt || isGeneratingVideo || settings.provider !== 'meta') return;
@ -214,14 +216,20 @@ export function PromptHero() {
setIsGenerating(true); setIsGenerating(true);
try { try {
console.log('[PromptHero] Starting Meta AI video generation...'); // Check if we have a subject reference for image-to-video
const subjectRefs = references.subject || [];
const imageBase64 = subjectRefs.length > 0 ? subjectRefs[0].thumbnail : undefined;
const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
console.log(`[PromptHero] Starting Meta AI ${mode}...`);
const res = await fetch('/api/meta/video', { const res = await fetch('/api/meta/video', {
method: 'POST', method: 'POST',
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ body: JSON.stringify({
prompt: finalPrompt, prompt: finalPrompt,
cookies: settings.metaCookies cookies: settings.metaCookies,
imageBase64: imageBase64
}) })
}); });
@ -243,12 +251,13 @@ export function PromptHero() {
id: crypto.randomUUID(), id: crypto.randomUUID(),
url: video.url, url: video.url,
prompt: video.prompt || finalPrompt, prompt: video.prompt || finalPrompt,
thumbnail: imageBase64, // Store the source image as thumbnail
createdAt: Date.now() createdAt: Date.now()
}); });
} }
// Show success notification // Show success notification
setErrorNotification({ setErrorNotification({
message: `🎬 Success! Generated ${data.videos.length} video(s). Check the Videos tab.`, message: `🎬 Success! Generated ${data.videos.length} video(s) via ${mode}. Check the gallery.`,
type: 'warning' // Using warning for visibility (amber color) type: 'warning' // Using warning for visibility (amber color)
}); });
setTimeout(() => setErrorNotification(null), 5000); setTimeout(() => setErrorNotification(null), 5000);
@ -557,14 +566,20 @@ export function PromptHero() {
{/* Controls Area */} {/* Controls Area */}
<div className="flex flex-col md:flex-row items-center justify-between gap-3 pt-1"> <div className="flex flex-col md:flex-row items-center justify-between gap-3 pt-1">
{/* Left Controls: References (Hidden for Meta AI as it doesn't support them yet) */} {/* Left Controls: References */}
<div className={cn("flex flex-wrap gap-2", settings.provider === 'meta' && "opacity-30 pointer-events-none grayscale")}> {/* For Meta AI: Only subject is enabled (for image-to-video), scene/style are disabled */}
<div className="flex flex-wrap gap-2">
{(['subject', 'scene', 'style'] as ReferenceCategory[]).map((cat) => { {(['subject', 'scene', 'style'] as ReferenceCategory[]).map((cat) => {
const refs = references[cat] || []; const refs = references[cat] || [];
const hasRefs = refs.length > 0; const hasRefs = refs.length > 0;
const isUploading = uploadingRefs[cat]; const isUploading = uploadingRefs[cat];
// For Meta AI: only enable subject (for image-to-video), disable scene/style
const isDisabledForMeta = settings.provider === 'meta' && cat !== 'subject';
return ( return (
<div key={cat} className="relative group"> <div key={cat} className={cn(
"relative group",
isDisabledForMeta && "opacity-30 pointer-events-none grayscale"
)}>
<button <button
onClick={() => toggleReference(cat)} onClick={() => toggleReference(cat)}
onDragOver={handleDragOver} onDragOver={handleDragOver}
@ -576,6 +591,9 @@ export function PromptHero() {
: "bg-white/5 text-white/40 border-white/5 hover:bg-white/10 hover:text-white/70 hover:border-white/10", : "bg-white/5 text-white/40 border-white/5 hover:bg-white/10 hover:text-white/70 hover:border-white/10",
isUploading && "animate-pulse cursor-wait" isUploading && "animate-pulse cursor-wait"
)} )}
title={settings.provider === 'meta' && cat === 'subject'
? "Upload an image to animate into video"
: undefined}
> >
{isUploading ? ( {isUploading ? (
<div className="h-3 w-3 animate-spin rounded-full border-2 border-current border-t-transparent" /> <div className="h-3 w-3 animate-spin rounded-full border-2 border-current border-t-transparent" />
@ -724,9 +742,13 @@ export function PromptHero() {
"relative overflow-hidden px-4 py-1.5 rounded-lg font-bold text-sm text-white shadow-lg transition-all active:scale-95 group border border-white/10", "relative overflow-hidden px-4 py-1.5 rounded-lg font-bold text-sm text-white shadow-lg transition-all active:scale-95 group border border-white/10",
isGenerating isGenerating
? "bg-gray-700 cursor-not-allowed" ? "bg-gray-700 cursor-not-allowed"
: references.subject?.length
? "bg-gradient-to-r from-pink-600 to-purple-600 hover:from-pink-500 hover:to-purple-500 hover:shadow-purple-500/25"
: "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 hover:shadow-cyan-500/25" : "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 hover:shadow-cyan-500/25"
)} )}
title="Generate video from prompt (30-60+ seconds)" title={references.subject?.length
? "Animate the subject image (30-60+ seconds)"
: "Generate video from text prompt (30-60+ seconds)"}
> >
<div className="relative z-10 flex items-center gap-1.5"> <div className="relative z-10 flex items-center gap-1.5">
{isGeneratingVideo ? ( {isGeneratingVideo ? (
@ -737,7 +759,7 @@ export function PromptHero() {
) : ( ) : (
<> <>
<Video className="h-3 w-3 group-hover:scale-110 transition-transform" /> <Video className="h-3 w-3 group-hover:scale-110 transition-transform" />
<span>Video</span> <span>{references.subject?.length ? "Animate" : "Video"}</span>
</> </>
)} )}
</div> </div>

View file

@ -173,14 +173,18 @@ export class MetaCrawlClient {
} }
/** /**
* Generate video from text prompt using Meta AI * Generate video from text prompt (and optionally an image) using Meta AI
* - Text-to-Video: Just provide prompt and cookies
* - Image-to-Video: Also provide imageBase64
* Video generation takes longer than image generation (30-60+ seconds) * Video generation takes longer than image generation (30-60+ seconds)
*/ */
async generateVideo( async generateVideo(
prompt: string, prompt: string,
cookies: string cookies: string,
imageBase64?: string
): Promise<MetaCrawlVideoResponse> { ): Promise<MetaCrawlVideoResponse> {
console.log(`[MetaCrawl] Sending video request to ${this.baseUrl}/video/generate`); const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
console.log(`[MetaCrawl] Sending ${mode} request to ${this.baseUrl}/video/generate`);
const response = await fetch(`${this.baseUrl}/video/generate`, { const response = await fetch(`${this.baseUrl}/video/generate`, {
method: 'POST', method: 'POST',
@ -189,7 +193,8 @@ export class MetaCrawlClient {
}, },
body: JSON.stringify({ body: JSON.stringify({
prompt, prompt,
cookies cookies,
image_base64: imageBase64
}) })
}); });

View file

@ -189,17 +189,23 @@ async def grok_chat(request: GrokChatRequest):
@app.post("/video/generate", response_model=VideoGenerateResponse) @app.post("/video/generate", response_model=VideoGenerateResponse)
async def generate_video(request: VideoGenerateRequest): async def generate_video(request: VideoGenerateRequest):
""" """
Generate a video from a text prompt using Meta AI. Generate a video from a text prompt (and optionally an image) using Meta AI.
This uses the metaai_api library's video generation feature. This uses the metaai_api library's video generation feature.
- Text-to-Video: Just provide a prompt
- Image-to-Video: Provide a prompt + image_base64
Video generation takes longer than image generation (30-60+ seconds). Video generation takes longer than image generation (30-60+ seconds).
Requires: Requires:
- prompt: The video generation prompt - prompt: The video generation prompt
- cookies: Facebook/Meta cookies (JSON array or string format) - cookies: Facebook/Meta cookies (JSON array or string format)
- image_base64: Optional base64 image data for image-to-video
""" """
import json import json
import asyncio import asyncio
import base64
import requests as sync_requests
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
try: try:
@ -227,13 +233,77 @@ async def generate_video(request: VideoGenerateRequest):
error="No valid cookies provided" error="No valid cookies provided"
) )
print(f"[VideoGen] Starting video generation for: '{request.prompt[:50]}...'") # Handle image upload to Litterbox if image_base64 is provided
image_url = None
if request.image_base64:
print(f"[VideoGen] Uploading image to Litterbox for image-to-video...")
try:
# Extract base64 data (remove data:image/...;base64, prefix if present)
image_data = request.image_base64
if ',' in image_data:
image_data = image_data.split(',')[1]
# Decode base64 to bytes
image_bytes = base64.b64decode(image_data)
# Upload to Litterbox (temporary hosting, 1 hour expiry)
litterbox_url = "https://litterbox.catbox.moe/resources/internals/api.php"
files = {
'fileToUpload': ('image.png', image_bytes, 'image/png')
}
data = {
'reqtype': 'fileupload',
'time': '1h' # 1 hour expiry
}
upload_response = sync_requests.post(litterbox_url, files=files, data=data)
if upload_response.status_code == 200 and upload_response.text.startswith('http'):
image_url = upload_response.text.strip()
print(f"[VideoGen] Image uploaded to: {image_url}")
else:
print(f"[VideoGen] Litterbox upload failed: {upload_response.text}")
return VideoGenerateResponse(
success=False,
videos=[],
error=f"Failed to upload image: {upload_response.text[:200]}"
)
except Exception as upload_error:
print(f"[VideoGen] Image upload error: {str(upload_error)}")
return VideoGenerateResponse(
success=False,
videos=[],
error=f"Failed to upload image: {str(upload_error)}"
)
mode = "image-to-video" if image_url else "text-to-video"
print(f"[VideoGen] Starting {mode} for: '{request.prompt[:50]}...'")
# Import MetaAI and run video generation in thread pool (it's synchronous) # Import MetaAI and run video generation in thread pool (it's synchronous)
from metaai_api import MetaAI from metaai_api import MetaAI
def run_video_gen(): def run_video_gen():
ai = MetaAI(cookies=cookies_dict) ai = MetaAI(cookies=cookies_dict)
if image_url:
# Image-to-video: Use prompt() with images parameter
result = ai.prompt(
message=request.prompt,
images=[image_url]
)
# Extract video URLs from media
video_urls = []
for media in result.get('media', []):
if media.get('type') == 'VIDEO' and media.get('url'):
video_urls.append(media['url'])
return {
'success': len(video_urls) > 0,
'video_urls': video_urls,
'message': result.get('message', '')
}
else:
# Text-to-video: Use generate_video()
return ai.generate_video( return ai.generate_video(
prompt=request.prompt, prompt=request.prompt,
wait_before_poll=10, wait_before_poll=10,
@ -248,7 +318,7 @@ async def generate_video(request: VideoGenerateRequest):
result = await loop.run_in_executor(executor, run_video_gen) result = await loop.run_in_executor(executor, run_video_gen)
if not result.get('success', False): if not result.get('success', False):
error_msg = result.get('error', 'Video generation failed') error_msg = result.get('error') or result.get('message') or 'Video generation failed'
print(f"[VideoGen] Failed: {error_msg}") print(f"[VideoGen] Failed: {error_msg}")
return VideoGenerateResponse( return VideoGenerateResponse(
success=False, success=False,

View file

@ -70,6 +70,7 @@ class VideoGenerateRequest(BaseModel):
"""Request model for video generation""" """Request model for video generation"""
prompt: str = Field(..., description="Video generation prompt", min_length=1) prompt: str = Field(..., description="Video generation prompt", min_length=1)
cookies: str = Field(..., description="Meta AI session cookies") cookies: str = Field(..., description="Meta AI session cookies")
image_base64: Optional[str] = Field(default=None, description="Base64 image data for image-to-video (optional)")
class VideoResult(BaseModel): class VideoResult(BaseModel):