feat: add image-to-video support for Meta AI

- Add Litterbox temporary image hosting for image URLs - Update backend to accept image_base64 parameter - Update TypeScript client and API route - Subject button now enabled for Meta AI (for image-to-video) - Button changes from 'Video' to 'Animate' when subject is set - Pink/purple gradient for image-to-video, blue/cyan for text-to-video
2026-01-06 14:11:26 +07:00 · 2026-01-06 14:11:26 +07:00 · bae4c487da
commit bae4c487da
parent 0f87b8ef99
5 changed files with 128 additions and 27 deletions
--- a/app/api/meta/video/route.ts
+++ b/app/api/meta/video/route.ts
@ -4,12 +4,14 @@ import { MetaCrawlClient } from '@/lib/providers/meta-crawl-client';
 /**
 * POST /api/meta/video
 * 
- * Generate a video from a text prompt using Meta AI.
+ * Generate a video from a text prompt (and optionally an image) using Meta AI.
+ * - Text-to-Video: Just provide prompt and cookies
+ * - Image-to-Video: Also provide imageBase64
 * Video generation takes 30-60+ seconds, so this endpoint may take a while.
 */
 export async function POST(req: NextRequest) {
    try {
-        const { prompt, cookies: clientCookies } = await req.json();
+        const { prompt, cookies: clientCookies, imageBase64 } = await req.json();

        if (!prompt) {
            return NextResponse.json({ error: "Prompt is required" }, { status: 400 });
@ -25,7 +27,8 @@ export async function POST(req: NextRequest) {
            );
        }

-        console.log(`[Meta Video API] Starting video generation for prompt: "${prompt.substring(0, 50)}..."`);
+        const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
+        console.log(`[Meta Video API] Starting ${mode} for prompt: "${prompt.substring(0, 50)}..."`);

        const client = new MetaCrawlClient();

@ -39,7 +42,7 @@ export async function POST(req: NextRequest) {
        }

        // Generate video - this can take 30-60+ seconds
-        const result = await client.generateVideo(prompt, cookieString);
+        const result = await client.generateVideo(prompt, cookieString, imageBase64);

        if (!result.success || result.videos.length === 0) {
            throw new Error(result.error || "No videos generated");
--- a/components/PromptHero.tsx
+++ b/components/PromptHero.tsx
@ -206,6 +206,8 @@ export function PromptHero() {
    };

    // Handle video generation (Meta AI only)
+    // If a subject reference is set, it will use image-to-video
+    // Otherwise, it will use text-to-video
    const handleGenerateVideo = async () => {
        let finalPrompt = prompt.trim();
        if (!finalPrompt || isGeneratingVideo || settings.provider !== 'meta') return;
@ -214,14 +216,20 @@ export function PromptHero() {
        setIsGenerating(true);

        try {
-            console.log('[PromptHero] Starting Meta AI video generation...');
+            // Check if we have a subject reference for image-to-video
+            const subjectRefs = references.subject || [];
+            const imageBase64 = subjectRefs.length > 0 ? subjectRefs[0].thumbnail : undefined;
+            const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
+
+            console.log(`[PromptHero] Starting Meta AI ${mode}...`);

            const res = await fetch('/api/meta/video', {
                method: 'POST',
                headers: { 'Content-Type': 'application/json' },
                body: JSON.stringify({
                    prompt: finalPrompt,
-                    cookies: settings.metaCookies
+                    cookies: settings.metaCookies,
+                    imageBase64: imageBase64
                })
            });

@ -243,12 +251,13 @@ export function PromptHero() {
                        id: crypto.randomUUID(),
                        url: video.url,
                        prompt: video.prompt || finalPrompt,
+                        thumbnail: imageBase64, // Store the source image as thumbnail
                        createdAt: Date.now()
                    });
                }
                // Show success notification
                setErrorNotification({
-                    message: `🎬 Success! Generated ${data.videos.length} video(s). Check the Videos tab.`,
+                    message: `🎬 Success! Generated ${data.videos.length} video(s) via ${mode}. Check the gallery.`,
                    type: 'warning' // Using warning for visibility (amber color)
                });
                setTimeout(() => setErrorNotification(null), 5000);
@ -557,14 +566,20 @@ export function PromptHero() {
                {/* Controls Area */}
                <div className="flex flex-col md:flex-row items-center justify-between gap-3 pt-1">

-                    {/* Left Controls: References (Hidden for Meta AI as it doesn't support them yet) */}
-                    <div className={cn("flex flex-wrap gap-2", settings.provider === 'meta' && "opacity-30 pointer-events-none grayscale")}>
+                    {/* Left Controls: References */}
+                    {/* For Meta AI: Only subject is enabled (for image-to-video), scene/style are disabled */}
+                    <div className="flex flex-wrap gap-2">
                        {(['subject', 'scene', 'style'] as ReferenceCategory[]).map((cat) => {
                            const refs = references[cat] || [];
                            const hasRefs = refs.length > 0;
                            const isUploading = uploadingRefs[cat];
+                            // For Meta AI: only enable subject (for image-to-video), disable scene/style
+                            const isDisabledForMeta = settings.provider === 'meta' && cat !== 'subject';
                            return (
-                                <div key={cat} className="relative group">
+                                <div key={cat} className={cn(
+                                    "relative group",
+                                    isDisabledForMeta && "opacity-30 pointer-events-none grayscale"
+                                )}>
                                    <button
                                        onClick={() => toggleReference(cat)}
                                        onDragOver={handleDragOver}
@ -576,6 +591,9 @@ export function PromptHero() {
                                                : "bg-white/5 text-white/40 border-white/5 hover:bg-white/10 hover:text-white/70 hover:border-white/10",
                                            isUploading && "animate-pulse cursor-wait"
                                        )}
+                                        title={settings.provider === 'meta' && cat === 'subject'
+                                            ? "Upload an image to animate into video"
+                                            : undefined}
                                    >
                                        {isUploading ? (
                                            <div className="h-3 w-3 animate-spin rounded-full border-2 border-current border-t-transparent" />
@ -724,9 +742,13 @@ export function PromptHero() {
                                    "relative overflow-hidden px-4 py-1.5 rounded-lg font-bold text-sm text-white shadow-lg transition-all active:scale-95 group border border-white/10",
                                    isGenerating
                                        ? "bg-gray-700 cursor-not-allowed"
-                                        : "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 hover:shadow-cyan-500/25"
+                                        : references.subject?.length
+                                            ? "bg-gradient-to-r from-pink-600 to-purple-600 hover:from-pink-500 hover:to-purple-500 hover:shadow-purple-500/25"
+                                            : "bg-gradient-to-r from-blue-600 to-cyan-600 hover:from-blue-500 hover:to-cyan-500 hover:shadow-cyan-500/25"
                                )}
-                                title="Generate video from prompt (30-60+ seconds)"
+                                title={references.subject?.length
+                                    ? "Animate the subject image (30-60+ seconds)"
+                                    : "Generate video from text prompt (30-60+ seconds)"}
                            >
                                <div className="relative z-10 flex items-center gap-1.5">
                                    {isGeneratingVideo ? (
@ -737,7 +759,7 @@ export function PromptHero() {
                                    ) : (
                                        <>
                                            <Video className="h-3 w-3 group-hover:scale-110 transition-transform" />
-                                            <span>Video</span>
+                                            <span>{references.subject?.length ? "Animate" : "Video"}</span>
                                        </>
                                    )}
                                </div>
--- a/lib/providers/meta-crawl-client.ts
+++ b/lib/providers/meta-crawl-client.ts
@ -173,14 +173,18 @@ export class MetaCrawlClient {
    }

    /**
-     * Generate video from text prompt using Meta AI
+     * Generate video from text prompt (and optionally an image) using Meta AI
+     * - Text-to-Video: Just provide prompt and cookies
+     * - Image-to-Video: Also provide imageBase64
     * Video generation takes longer than image generation (30-60+ seconds)
     */
    async generateVideo(
        prompt: string,
-        cookies: string
+        cookies: string,
+        imageBase64?: string
    ): Promise<MetaCrawlVideoResponse> {
-        console.log(`[MetaCrawl] Sending video request to ${this.baseUrl}/video/generate`);
+        const mode = imageBase64 ? 'image-to-video' : 'text-to-video';
+        console.log(`[MetaCrawl] Sending ${mode} request to ${this.baseUrl}/video/generate`);

        const response = await fetch(`${this.baseUrl}/video/generate`, {
            method: 'POST',
@ -189,7 +193,8 @@ export class MetaCrawlClient {
            },
            body: JSON.stringify({
                prompt,
-                cookies
+                cookies,
+                image_base64: imageBase64
            })
        });

--- a/services/crawl4ai/app/main.py
+++ b/services/crawl4ai/app/main.py
@ -189,17 +189,23 @@ async def grok_chat(request: GrokChatRequest):
@app.post("/video/generate", response_model=VideoGenerateResponse)
 async def generate_video(request: VideoGenerateRequest):
    """
-    Generate a video from a text prompt using Meta AI.
+    Generate a video from a text prompt (and optionally an image) using Meta AI.
    
    This uses the metaai_api library's video generation feature.
+    - Text-to-Video: Just provide a prompt
+    - Image-to-Video: Provide a prompt + image_base64
+
    Video generation takes longer than image generation (30-60+ seconds).
    
    Requires:
    - prompt: The video generation prompt
    - cookies: Facebook/Meta cookies (JSON array or string format)
+    - image_base64: Optional base64 image data for image-to-video
    """
    import json
    import asyncio
+    import base64
+    import requests as sync_requests
    from concurrent.futures import ThreadPoolExecutor
    
    try:
@ -227,20 +233,84 @@ async def generate_video(request: VideoGenerateRequest):
                error="No valid cookies provided"
            )
        
-        print(f"[VideoGen] Starting video generation for: '{request.prompt[:50]}...'")
+        # Handle image upload to Litterbox if image_base64 is provided
+        image_url = None
+        if request.image_base64:
+            print(f"[VideoGen] Uploading image to Litterbox for image-to-video...")
+            try:
+                # Extract base64 data (remove data:image/...;base64, prefix if present)
+                image_data = request.image_base64
+                if ',' in image_data:
+                    image_data = image_data.split(',')[1]
+                
+                # Decode base64 to bytes
+                image_bytes = base64.b64decode(image_data)
+                
+                # Upload to Litterbox (temporary hosting, 1 hour expiry)
+                litterbox_url = "https://litterbox.catbox.moe/resources/internals/api.php"
+                files = {
+                    'fileToUpload': ('image.png', image_bytes, 'image/png')
+                }
+                data = {
+                    'reqtype': 'fileupload',
+                    'time': '1h'  # 1 hour expiry
+                }
+                
+                upload_response = sync_requests.post(litterbox_url, files=files, data=data)
+                
+                if upload_response.status_code == 200 and upload_response.text.startswith('http'):
+                    image_url = upload_response.text.strip()
+                    print(f"[VideoGen] Image uploaded to: {image_url}")
+                else:
+                    print(f"[VideoGen] Litterbox upload failed: {upload_response.text}")
+                    return VideoGenerateResponse(
+                        success=False,
+                        videos=[],
+                        error=f"Failed to upload image: {upload_response.text[:200]}"
+                    )
+            except Exception as upload_error:
+                print(f"[VideoGen] Image upload error: {str(upload_error)}")
+                return VideoGenerateResponse(
+                    success=False,
+                    videos=[],
+                    error=f"Failed to upload image: {str(upload_error)}"
+                )
+        
+        mode = "image-to-video" if image_url else "text-to-video"
+        print(f"[VideoGen] Starting {mode} for: '{request.prompt[:50]}...'")
        
        # Import MetaAI and run video generation in thread pool (it's synchronous)
        from metaai_api import MetaAI
        
        def run_video_gen():
            ai = MetaAI(cookies=cookies_dict)
-            return ai.generate_video(
-                prompt=request.prompt,
-                wait_before_poll=10,
-                max_attempts=60,  # Up to 5 minutes of polling
-                wait_seconds=5,
-                verbose=True
-            )
+            
+            if image_url:
+                # Image-to-video: Use prompt() with images parameter
+                result = ai.prompt(
+                    message=request.prompt,
+                    images=[image_url]
+                )
+                # Extract video URLs from media
+                video_urls = []
+                for media in result.get('media', []):
+                    if media.get('type') == 'VIDEO' and media.get('url'):
+                        video_urls.append(media['url'])
+                
+                return {
+                    'success': len(video_urls) > 0,
+                    'video_urls': video_urls,
+                    'message': result.get('message', '')
+                }
+            else:
+                # Text-to-video: Use generate_video()
+                return ai.generate_video(
+                    prompt=request.prompt,
+                    wait_before_poll=10,
+                    max_attempts=60,  # Up to 5 minutes of polling
+                    wait_seconds=5,
+                    verbose=True
+                )
        
        # Run in thread pool since metaai_api is synchronous
        loop = asyncio.get_event_loop()
@ -248,7 +318,7 @@ async def generate_video(request: VideoGenerateRequest):
            result = await loop.run_in_executor(executor, run_video_gen)
        
        if not result.get('success', False):
-            error_msg = result.get('error', 'Video generation failed')
+            error_msg = result.get('error') or result.get('message') or 'Video generation failed'
            print(f"[VideoGen] Failed: {error_msg}")
            return VideoGenerateResponse(
                success=False,
--- a/services/crawl4ai/app/models.py
+++ b/services/crawl4ai/app/models.py
@ -70,6 +70,7 @@ class VideoGenerateRequest(BaseModel):
    """Request model for video generation"""
    prompt: str = Field(..., description="Video generation prompt", min_length=1)
    cookies: str = Field(..., description="Meta AI session cookies")
+    image_base64: Optional[str] = Field(default=None, description="Base64 image data for image-to-video (optional)")


 class VideoResult(BaseModel):