""" Whisk Client for Python/FastAPI Port of lib/whisk-client.ts Handles: - Cookie parsing (JSON array or string format) - Access token retrieval from Whisk API - Image generation with aspect ratio support - Reference image upload - Video generation with polling """ import httpx import json import uuid import base64 import asyncio from typing import Optional, Dict, List, Any # Whisk API endpoints AUTH_URL = "https://aisandbox-pa.googleapis.com/v1:signInWithIdp" GENERATE_URL = "https://aisandbox-pa.googleapis.com/v1:runImagine" RECIPE_URL = "https://aisandbox-pa.googleapis.com/v1:runRecipe" UPLOAD_URL = "https://aisandbox-pa.googleapis.com/v1:uploadMedia" VIDEO_URL = "https://aisandbox-pa.googleapis.com/v1:runVideoFxSingleClips" VIDEO_STATUS_URL = "https://aisandbox-pa.googleapis.com/v1:runVideoFxSingleClipsStatusCheck" # Aspect ratio mapping ASPECT_RATIOS = { "1:1": "IMAGE_ASPECT_RATIO_SQUARE", "9:16": "IMAGE_ASPECT_RATIO_PORTRAIT", "16:9": "IMAGE_ASPECT_RATIO_LANDSCAPE", "4:3": "IMAGE_ASPECT_RATIO_LANDSCAPE_FOUR_THREE", "3:4": "IMAGE_ASPECT_RATIO_PORTRAIT", "Auto": "IMAGE_ASPECT_RATIO_SQUARE" } MEDIA_CATEGORIES = { "subject": "MEDIA_CATEGORY_SUBJECT", "scene": "MEDIA_CATEGORY_SCENE", "style": "MEDIA_CATEGORY_STYLE" } class GeneratedImage: def __init__(self, data: str, index: int, prompt: str, aspect_ratio: str): self.data = data self.index = index self.prompt = prompt self.aspect_ratio = aspect_ratio def to_dict(self) -> Dict[str, Any]: return { "data": self.data, "index": self.index, "prompt": self.prompt, "aspectRatio": self.aspect_ratio } class WhiskVideoResult: def __init__(self, id: str, url: Optional[str], status: str): self.id = id self.url = url self.status = status def to_dict(self) -> Dict[str, Any]: return { "id": self.id, "url": self.url, "status": self.status } class WhiskClient: def __init__(self, cookie_input: str): self.cookies = self._parse_cookies(cookie_input) self.access_token: Optional[str] = None self.token_expires: int = 0 self.cookie_string = "" if not self.cookies: raise ValueError("No valid cookies provided") # Build cookie string for requests self.cookie_string = "; ".join( f"{name}={value}" for name, value in self.cookies.items() ) def _parse_cookies(self, input_str: str) -> Dict[str, str]: """Parse cookies from string or JSON format""" if not input_str or not input_str.strip(): return {} trimmed = input_str.strip() cookies: Dict[str, str] = {} # Handle JSON array format (e.g., from Cookie-Editor) if trimmed.startswith('[') or trimmed.startswith('{'): try: parsed = json.loads(trimmed) if isinstance(parsed, list): for c in parsed: if isinstance(c, dict) and 'name' in c and 'value' in c: cookies[c['name']] = c['value'] return cookies elif isinstance(parsed, dict) and 'name' in parsed and 'value' in parsed: return {parsed['name']: parsed['value']} except json.JSONDecodeError: pass # Handle string format (key=value; key2=value2) for pair in trimmed.split(';'): pair = pair.strip() if '=' in pair: key, _, value = pair.partition('=') cookies[key.strip()] = value.strip() return cookies async def get_access_token(self) -> str: """Get or refresh access token from Whisk API""" import time # Return cached token if still valid if self.access_token and self.token_expires > int(time.time() * 1000): return self.access_token async with httpx.AsyncClient() as client: response = await client.post( AUTH_URL, headers={ "Content-Type": "application/json", "Cookie": self.cookie_string }, json={} ) if response.status_code != 200: raise Exception(f"Auth failed: {response.status_code} - {response.text[:200]}") data = response.json() self.access_token = data.get("authToken") expires_in = int(data.get("expiresIn", 3600)) self.token_expires = int(time.time() * 1000) + (expires_in * 1000) - 60000 if not self.access_token: raise Exception("No auth token in response") return self.access_token async def upload_reference_image( self, file_base64: str, mime_type: str, category: str ) -> Optional[str]: """Upload a reference image and return media ID""" token = await self.get_access_token() data_uri = f"data:{mime_type};base64,{file_base64}" media_category = MEDIA_CATEGORIES.get(category.lower(), MEDIA_CATEGORIES["subject"]) payload = { "mediaData": data_uri, "imageOptions": { "imageCategory": media_category } } async with httpx.AsyncClient(timeout=60.0) as client: response = await client.post( UPLOAD_URL, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", "Cookie": self.cookie_string }, json=payload ) if response.status_code != 200: print(f"[WhiskClient] Upload failed: {response.status_code}") raise Exception(f"Upload failed: {response.text[:200]}") data = response.json() media_id = data.get("generationId") or data.get("imageMediaId") if not media_id: print(f"[WhiskClient] No media ID in response: {data}") return None print(f"[WhiskClient] Upload successful, mediaId: {media_id}") return media_id async def generate( self, prompt: str, aspect_ratio: str = "1:1", refs: Optional[Dict[str, Any]] = None, precise_mode: bool = False ) -> List[GeneratedImage]: """Generate images using Whisk API""" token = await self.get_access_token() refs = refs or {} # Build media inputs media_inputs = [] def add_refs(category: str, ids): """Helper to add refs (handles both single string and array)""" if not ids: return id_list = [ids] if isinstance(ids, str) else ids cat_enum = MEDIA_CATEGORIES.get(category.lower()) for ref_id in id_list: if ref_id: media_inputs.append({ "mediaId": ref_id, "mediaCategory": cat_enum }) add_refs("subject", refs.get("subject")) add_refs("scene", refs.get("scene")) add_refs("style", refs.get("style")) # Build payload aspect_enum = ASPECT_RATIOS.get(aspect_ratio, ASPECT_RATIOS["1:1"]) # Determine endpoint based on refs has_refs = len(media_inputs) > 0 endpoint = RECIPE_URL if has_refs else GENERATE_URL if has_refs: # Recipe format (with refs) recipe_inputs = [] def add_recipe_refs(category: str, ids): if not ids: return id_list = [ids] if isinstance(ids, str) else ids cat_enum = MEDIA_CATEGORIES.get(category.lower()) for ref_id in id_list: if ref_id: recipe_inputs.append({ "inputType": cat_enum, "mediaId": ref_id }) add_recipe_refs("subject", refs.get("subject")) add_recipe_refs("scene", refs.get("scene")) add_recipe_refs("style", refs.get("style")) payload = { "recipeInputs": recipe_inputs, "generationConfig": { "aspectRatio": aspect_enum, "numberOfImages": 4, "personalizationConfig": {} }, "textPromptInput": { "text": prompt } } else: # Direct imagine format (no refs) payload = { "imagineConfig": { "aspectRatio": aspect_enum, "imaginePrompt": prompt, "numberOfImages": 4, "imageSafetyMode": "BLOCK_SOME" } } print(f"[WhiskClient] Generating with prompt: \"{prompt[:50]}...\"") async with httpx.AsyncClient(timeout=120.0) as client: response = await client.post( endpoint, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", "Cookie": self.cookie_string }, json=payload ) if response.status_code != 200: error_text = response.text[:500] if "401" in error_text or "403" in error_text: raise Exception("Whisk auth failed - cookies may be expired") raise Exception(f"Generation failed: {response.status_code} - {error_text}") data = response.json() # Extract images images: List[GeneratedImage] = [] image_list = data.get("generatedImages", []) for i, img in enumerate(image_list): image_data = img.get("encodedImage", "") if image_data: images.append(GeneratedImage( data=image_data, index=i, prompt=prompt, aspect_ratio=aspect_ratio )) print(f"[WhiskClient] Generated {len(images)} images") return images async def generate_video( self, image_generation_id: str, prompt: str, image_base64: Optional[str] = None, aspect_ratio: str = "16:9" ) -> WhiskVideoResult: """Generate a video from an image using Whisk Animate (Veo)""" token = await self.get_access_token() # If we have base64 but no generation ID, upload first actual_gen_id = image_generation_id if not actual_gen_id and image_base64: actual_gen_id = await self.upload_reference_image( image_base64, "image/png", "subject" ) if not actual_gen_id: raise Exception("No image generation ID available for video") payload = { "generationId": actual_gen_id, "videoFxConfig": { "aspectRatio": aspect_ratio.replace(":", "_"), "prompt": prompt, "duration": "5s" } } async with httpx.AsyncClient(timeout=30.0) as client: response = await client.post( VIDEO_URL, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", "Cookie": self.cookie_string }, json=payload ) if response.status_code != 200: raise Exception(f"Video init failed: {response.text[:200]}") data = response.json() video_gen_id = data.get("videoGenId") if not video_gen_id: raise Exception("No video generation ID in response") print(f"[WhiskClient] Video generation started: {video_gen_id}") # Poll for completion return await self.poll_video_status(video_gen_id, token) async def poll_video_status( self, video_gen_id: str, token: str ) -> WhiskVideoResult: """Poll for video generation status until complete or failed""" max_attempts = 60 poll_interval = 3 async with httpx.AsyncClient(timeout=30.0) as client: for attempt in range(max_attempts): print(f"[WhiskClient] Polling video status {attempt + 1}/{max_attempts}...") response = await client.post( VIDEO_STATUS_URL, headers={ "Content-Type": "application/json", "Authorization": f"Bearer {token}", "Cookie": self.cookie_string }, json={"videoGenId": video_gen_id} ) if response.status_code != 200: await asyncio.sleep(poll_interval) continue data = response.json() status = data.get("status", "") video_url = data.get("videoUri") if status == "COMPLETE" and video_url: print(f"[WhiskClient] Video complete: {video_url[:50]}...") return WhiskVideoResult( id=video_gen_id, url=video_url, status="complete" ) elif status in ["FAILED", "ERROR"]: raise Exception(f"Video generation failed: {status}") await asyncio.sleep(poll_interval) raise Exception("Video generation timed out")