import { v4 as uuidv4 } from 'uuid'; import type { WhiskAuthResponse, WhiskGeneratePayload, WhiskRecipePayload, MediaInput, WhiskGenerateResponse, WhiskVideoResult } from './types'; /** * Whisk Client for Next.js * Ported from whisk_client.py */ const ENDPOINTS = { AUTH: "https://labs.google/fx/api/auth/session", UPLOAD: "https://labs.google/fx/api/trpc/backbone.uploadImage", GENERATE: "https://aisandbox-pa.googleapis.com/v1/whisk:generateImage", RECIPE: "https://aisandbox-pa.googleapis.com/v1/whisk:runImageRecipe", VIDEO_GENERATE: "https://aisandbox-pa.googleapis.com/v1/whisk:generateVideo", VIDEO_STATUS: "https://aisandbox-pa.googleapis.com/v1:runVideoFxSingleClipsStatusCheck", VIDEO_CREDITS: "https://aisandbox-pa.googleapis.com/v1/whisk:getVideoCreditStatus", }; const DEFAULT_HEADERS = { "Origin": "https://labs.google", "Content-Type": "application/json", "Referer": "https://labs.google/fx/tools/whisk/project", "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", }; const ASPECT_RATIOS: Record = { "1:1": "IMAGE_ASPECT_RATIO_SQUARE", "9:16": "IMAGE_ASPECT_RATIO_PORTRAIT", "16:9": "IMAGE_ASPECT_RATIO_LANDSCAPE", "4:3": "IMAGE_ASPECT_RATIO_LANDSCAPE_FOUR_THREE", "3:4": "IMAGE_ASPECT_RATIO_PORTRAIT", "Auto": "IMAGE_ASPECT_RATIO_SQUARE" }; const MEDIA_CATEGORIES: Record = { "subject": "MEDIA_CATEGORY_SUBJECT", "scene": "MEDIA_CATEGORY_SCENE", "style": "MEDIA_CATEGORY_STYLE" }; export interface GeneratedImage { data: string; // Base64 string index: number; prompt: string; aspectRatio: string; } export class WhiskClient { private cookies: Record; private accessToken: string | null = null; private tokenExpires: number = 0; private cookieString: string = ''; constructor(cookieInput: string) { this.cookies = this.parseCookies(cookieInput); // Construct cookie string header this.cookieString = Object.entries(this.cookies) .map(([k, v]) => `${k}=${v}`) .join('; '); if (!this.cookieString) { throw new Error("No valid cookies provided"); } } private parseCookies(input: string): Record { if (!input) return {}; input = input.trim(); const cookies: Record = {}; // Try JSON if (input.startsWith('[') && input.endsWith(']')) { try { const list = JSON.parse(input); for (const c of list) { if (c.name && c.value) cookies[c.name] = c.value; } return cookies; } catch { /* ignore */ } } // Try header string input.split(';').forEach(part => { const [name, value] = part.split('='); if (name && value) cookies[name.trim()] = value.trim(); }); return cookies; } private async getAccessToken(): Promise { if (this.accessToken && Date.now() / 1000 < this.tokenExpires) { return this.accessToken; } console.log("Fetching access token..."); try { const res = await fetch(ENDPOINTS.AUTH, { headers: { ...DEFAULT_HEADERS, "Cookie": this.cookieString } }); if (!res.ok) throw new Error(`Auth failed: ${res.status}`); const data = await res.json(); if (!data.access_token) throw new Error("Missing access_token"); this.accessToken = data.access_token; this.tokenExpires = (Date.now() / 1000) + 3300; // 55 mins return this.accessToken!; } catch (e) { throw new Error(`Authentication failed: ${e}`); } } async uploadReferenceImage(fileBase64: string, mimeType: string, category: string): Promise { const mediaCategory = MEDIA_CATEGORIES[category.toLowerCase()] || "MEDIA_CATEGORY_SUBJECT"; const dataUri = `data:${mimeType};base64,${fileBase64}`; const payload = { json: { clientContext: { workflowId: uuidv4(), sessionId: Date.now().toString() }, uploadMediaInput: { mediaCategory, rawBytes: dataUri, caption: "" } } }; const res = await fetch(ENDPOINTS.UPLOAD, { method: "POST", headers: { ...DEFAULT_HEADERS, "Cookie": this.cookieString }, body: JSON.stringify(payload) }); if (!res.ok) { const errText = await res.text(); console.error("Upload API Error:", errText); throw new Error(`Upload status ${res.status}: ${errText.substring(0, 200)}`); } const data = await res.json(); const mediaId = data?.result?.data?.json?.result?.uploadMediaGenerationId; if (mediaId) { console.log(`Uploaded ${category} image: ${mediaId}`); return mediaId; } console.error("Upload response missing ID:", JSON.stringify(data).substring(0, 200)); throw new Error("Upload successful but returned no ID"); } async generate( prompt: string, aspectRatio: string = "1:1", refs: { subject?: string | string[]; scene?: string | string[]; style?: string | string[] } = {}, _preciseMode: boolean = false ): Promise { const token = await this.getAccessToken(); // Prepare Media Inputs (Assuming refs are Generation IDs already uploaded) // Now supports multiple IDs per category const mediaInputs: MediaInput[] = []; // Helper to add refs (handles both single string and array) const addRefs = (category: string, ids: string | string[] | undefined) => { if (!ids) return; const idArray = Array.isArray(ids) ? ids : [ids]; for (const id of idArray) { mediaInputs.push({ mediaInput: { mediaCategory: category, mediaGenerationId: id } }); } }; addRefs("MEDIA_CATEGORY_SUBJECT", refs.subject); addRefs("MEDIA_CATEGORY_SCENE", refs.scene); addRefs("MEDIA_CATEGORY_STYLE", refs.style); const isImageToImage = mediaInputs.length > 0; const endpoint = isImageToImage ? ENDPOINTS.RECIPE : ENDPOINTS.GENERATE; const arEnum = ASPECT_RATIOS[aspectRatio] || "IMAGE_ASPECT_RATIO_SQUARE"; let payload: WhiskGeneratePayload | WhiskRecipePayload; const clientContext = { workflowId: uuidv4(), tool: isImageToImage ? "BACKBONE" : "IMAGE_FX", sessionId: Date.now().toString() }; if (!isImageToImage) { const seed = Math.floor(Math.random() * 100000); payload = { clientContext, imageModelSettings: { imageModel: "IMAGEN_3_5", aspectRatio: arEnum }, seed: seed, prompt: prompt, mediaCategory: "MEDIA_CATEGORY_BOARD" } as any; // eslint-disable-line @typescript-eslint/no-explicit-any } else { // Image-to-Image (Recipe) - uses runImageRecipe endpoint // Uses recipeMediaInputs array with caption and mediaInput for each ref const seed = Math.floor(Math.random() * 1000000); // Build recipeMediaInputs array (handles multiple IDs per category) const recipeMediaInputs: Array<{ caption: string; mediaInput: { mediaCategory: string; mediaGenerationId: string } }> = []; // Helper to add recipe inputs (handles both single string and array) const addRecipeRefs = (category: string, ids: string | string[] | undefined) => { if (!ids) return; const idArray = Array.isArray(ids) ? ids : [ids]; for (const id of idArray) { recipeMediaInputs.push({ caption: "", mediaInput: { mediaCategory: category, mediaGenerationId: id } }); } }; addRecipeRefs("MEDIA_CATEGORY_SUBJECT", refs.subject); addRecipeRefs("MEDIA_CATEGORY_SCENE", refs.scene); addRecipeRefs("MEDIA_CATEGORY_STYLE", refs.style); payload = { clientContext, imageModelSettings: { imageModel: "R2I", // Recipe-to-Image model aspectRatio: arEnum }, seed: seed, userInstruction: prompt, // Note: uses userInstruction instead of prompt recipeMediaInputs: recipeMediaInputs // Note: preciseMode field name TBD - needs API discovery } as any; // eslint-disable-line @typescript-eslint/no-explicit-any } console.log(`Generating: "${prompt.substring(0, 30)}..." (Refs: ${mediaInputs.length})`); try { const res = await fetch(endpoint, { method: "POST", headers: { ...DEFAULT_HEADERS, "Authorization": `Bearer ${token}` }, body: JSON.stringify(payload) }); if (!res.ok) { const errText = await res.text(); console.error("Whisk API Error Body:", errText); throw new Error(`API Error ${res.status}: ${errText.substring(0, 500)}`); } const json = await res.json() as WhiskGenerateResponse; const images: string[] = []; if (json.imagePanels) { for (const panel of json.imagePanels) { for (const img of (panel.generatedImages || [])) { if (img.encodedImage) images.push(img.encodedImage); } } } if (images.length === 0) throw new Error("No images returned"); return images.map((data, i) => ({ data, index: i, prompt, aspectRatio })); } catch (e: unknown) { console.error("Generation failed:", e); const errMessage = e instanceof Error ? e.message : String(e); // Check for safety filter if (errMessage.includes("UNSAFE") || errMessage.includes("SEXUAL")) { throw new Error("Safety Filter Blocked Request"); } throw e; } } /** * Generate a video from an image using Whisk Animate (Veo) * This is an async operation - submits request and polls for completion */ async generateVideo( imageGenerationId: string, prompt: string, imageBase64?: string, aspectRatio: string = "16:9" ): Promise { const token = await this.getAccessToken(); console.log("generateVideo: Starting video generation...", { hasImageId: !!imageGenerationId, hasBase64: !!imageBase64, promptLength: prompt.length }); // Generate IDs for client context const sessionId = `;${Date.now()}`; const workflowId = crypto.randomUUID(); // Map aspect ratio to video format const videoAspectRatio = aspectRatio === "9:16" ? "VIDEO_ASPECT_RATIO_PORTRAIT" : "VIDEO_ASPECT_RATIO_LANDSCAPE"; // Build promptImageInput - the nested object for prompt and image const promptImageInput: Record = { prompt: prompt }; // Add image reference if (imageGenerationId) { promptImageInput.mediaGenerationId = imageGenerationId; } else if (imageBase64) { // Clean and prepare base64 (remove data URI prefix) const cleanBase64 = imageBase64.replace(/^data:image\/\w+;base64,/, ''); promptImageInput.rawBytes = cleanBase64; } else { throw new Error("Either imageGenerationId or imageBase64 is required"); } // Build payload matching Whisk API structure (with correct field names) const payload = { clientContext: { sessionId: sessionId, tool: "BACKBONE", workflowId: workflowId }, promptImageInput: promptImageInput, modelNameType: "VEO_3_1_I2V_12STEP", loopVideo: false, aspectRatio: videoAspectRatio }; const endpoint = ENDPOINTS.VIDEO_GENERATE; console.log("generateVideo: Sending payload to", endpoint, JSON.stringify(payload, null, 2)); try { const res = await fetch(endpoint, { method: "POST", headers: { ...DEFAULT_HEADERS, "Authorization": `Bearer ${token}` }, body: JSON.stringify(payload), }); if (!res.ok) { const errorText = await res.text(); console.error("generateVideo: API Error", res.status, errorText); throw new Error(`Whisk API Error: ${res.status} - ${errorText}`); } const data = await res.json(); console.log("generateVideo: Response Data", JSON.stringify(data, null, 2)); let resultId = ''; // Check for mediaGenerationId (Whisk video response format) if (data.mediaGenerationId) { resultId = data.mediaGenerationId; } // Check for operation ID (alternative response) else if (data.operation?.operation?.name) { resultId = data.operation.operation.name; } // Fallback checks else { const generations = data.mediaGenerations || []; const videoGen = generations.find((g: unknown) => (g as { mediaContentType?: string }).mediaContentType === 'MEDIA_CONTENT_TYPE_VIDEO'); resultId = data.videoGenerationId || (videoGen as { id?: string })?.id || (generations[0] as { id?: string })?.id || data.id; } if (!resultId) { console.error("generateVideo: No ID found in response", data); throw new Error("Failed to start video generation: No ID returned"); } console.log("generateVideo: Got ID, starting polling:", resultId); // Start polling for result return this.pollVideoStatus(resultId, token); } catch (error) { console.error("generateVideo: Failed", error); throw error; } } /** * Poll for video generation status until complete or failed * Uses the runVideoFxSingleClipsStatusCheck endpoint */ private async pollVideoStatus(videoGenId: string, token: string): Promise { const maxAttempts = 60; // 5 minutes max (5s intervals) const pollInterval = 5000; // 5 seconds console.log(`Starting video status polling for ID: ${videoGenId}`); for (let attempt = 0; attempt < maxAttempts; attempt++) { console.log(`Polling video status... attempt ${attempt + 1}/${maxAttempts}`); try { // Use POST request with operations array containing operation.name // (video uses async operation model, not mediaGenerationId) const statusPayload = { operations: [ { operation: { name: videoGenId } } ] }; const res = await fetch(ENDPOINTS.VIDEO_STATUS, { method: "POST", headers: { ...DEFAULT_HEADERS, "Authorization": `Bearer ${token}` }, body: JSON.stringify(statusPayload) }); if (!res.ok) { const errText = await res.text(); console.error("Video status error:", res.status, errText); // Continue polling unless it's a fatal error (4xx) if (res.status >= 400 && res.status < 500) { throw new Error(`Video status error: ${errText}`); } } else { const json = await res.json(); console.log("Video status response:", JSON.stringify(json, null, 2)); // Response is likely in operations array format const operation = json.operations?.[0] || json; const status = operation.status || operation.state || operation.taskStatus || json.status; // Normalize status - check for completion const isComplete = status === 'COMPLETED' || status === 'SUCCEEDED' || status === 'complete' || status === 'FINISHED' || status === 'MEDIA_GENERATION_STATUS_COMPLETE' || status === 'MEDIA_GENERATION_STATUS_SUCCEEDED' || status === 'MEDIA_GENERATION_STATUS_SUCCESSFUL' || status?.includes('SUCCESSFUL') || status?.includes('COMPLETE'); // Normalize status - check for failure const isFailed = status === 'FAILED' || status === 'ERROR' || status === 'failed' || status === 'CANCELLED' || status === 'MEDIA_GENERATION_STATUS_FAILED' || status?.includes('FAILED') || status?.includes('ERROR'); if (isComplete) { // Check multiple possible response formats (including nested in operations) const result = operation.result || operation; // Check for URL first const videoUrl = result.videoUrl || result.video?.url || result.mediaUrl || result.generatedMedia?.url || result.generatedMedia?.uri || result.url || json.videoUrl || operation.generatedMedia?.uri; // Check for base64 encoded video data - Whisk uses rawBytes field const encodedVideo = operation.rawBytes || result.rawBytes || result.encodedVideo || result.video?.encodedVideo || result.generatedMedia?.encodedVideo || json.encodedVideo || operation.generatedMedia?.rawBytes; if (videoUrl) { console.log("Video generation complete with URL:", videoUrl); return { id: videoGenId, url: videoUrl, status: 'COMPLETED' }; } else if (encodedVideo) { console.log("Video generation complete with rawBytes/base64 data"); // Check if it's already a data URI or needs to be converted const videoDataUri = encodedVideo.startsWith('data:') ? encodedVideo : `data:video/mp4;base64,${encodedVideo}`; return { id: videoGenId, url: videoDataUri, status: 'COMPLETED' }; } else { console.warn("Video completed but no URL/data found in response:", JSON.stringify(json, null, 2)); // Try to find any media key that can be used const mediaKey = operation.mediaKey || result.mediaKey; if (mediaKey) { console.log("Found mediaKey, but no direct URL:", mediaKey); } } } else if (isFailed) { // Extract error message from nested structure const errorMsg = operation.operation?.error?.message || operation.error?.message || operation.error || json.error?.message || json.error || 'Video generation failed'; // Immediately throw - don't continue polling on failure console.error("Video generation FAILED:", errorMsg); throw new Error(`Video generation failed: ${errorMsg}`); } // IN_PROGRESS, PENDING, PROCESSING, RUNNING - continue polling console.log(`Video status: ${status} - continuing to poll...`); } } catch (e: any) { // eslint-disable-line @typescript-eslint/no-explicit-any // Check if this is a logical failure (should not retry) vs network error (should retry) if (e.message?.includes('Video generation failed:') || e.message?.includes('NCII') || e.message?.includes('content policy') || e.message?.includes('safety')) { // Logical failure - throw immediately throw e; } console.error("Poll error (network/transient):", e); if (attempt === maxAttempts - 1) throw e; } await new Promise(resolve => setTimeout(resolve, pollInterval)); } throw new Error("Video generation timed out after 5 minutes"); } }