556 lines
22 KiB
TypeScript
556 lines
22 KiB
TypeScript
import { v4 as uuidv4 } from 'uuid';
|
|
import type {
|
|
WhiskAuthResponse,
|
|
WhiskGeneratePayload,
|
|
WhiskRecipePayload,
|
|
MediaInput,
|
|
WhiskGenerateResponse,
|
|
WhiskVideoResult
|
|
} from './types';
|
|
|
|
/**
|
|
* Whisk Client for Next.js
|
|
* Ported from whisk_client.py
|
|
*/
|
|
|
|
const ENDPOINTS = {
|
|
AUTH: "https://labs.google/fx/api/auth/session",
|
|
UPLOAD: "https://labs.google/fx/api/trpc/backbone.uploadImage",
|
|
GENERATE: "https://aisandbox-pa.googleapis.com/v1/whisk:generateImage",
|
|
RECIPE: "https://aisandbox-pa.googleapis.com/v1/whisk:runImageRecipe",
|
|
VIDEO_GENERATE: "https://aisandbox-pa.googleapis.com/v1/whisk:generateVideo",
|
|
VIDEO_STATUS: "https://aisandbox-pa.googleapis.com/v1:runVideoFxSingleClipsStatusCheck",
|
|
VIDEO_CREDITS: "https://aisandbox-pa.googleapis.com/v1/whisk:getVideoCreditStatus",
|
|
};
|
|
|
|
const DEFAULT_HEADERS = {
|
|
"Origin": "https://labs.google",
|
|
"Content-Type": "application/json",
|
|
"Referer": "https://labs.google/fx/tools/whisk/project",
|
|
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
};
|
|
|
|
const ASPECT_RATIOS: Record<string, string> = {
|
|
"1:1": "IMAGE_ASPECT_RATIO_SQUARE",
|
|
"9:16": "IMAGE_ASPECT_RATIO_PORTRAIT",
|
|
"16:9": "IMAGE_ASPECT_RATIO_LANDSCAPE",
|
|
"4:3": "IMAGE_ASPECT_RATIO_LANDSCAPE_FOUR_THREE",
|
|
"3:4": "IMAGE_ASPECT_RATIO_PORTRAIT",
|
|
"Auto": "IMAGE_ASPECT_RATIO_SQUARE"
|
|
};
|
|
|
|
const MEDIA_CATEGORIES: Record<string, string> = {
|
|
"subject": "MEDIA_CATEGORY_SUBJECT",
|
|
"scene": "MEDIA_CATEGORY_SCENE",
|
|
"style": "MEDIA_CATEGORY_STYLE"
|
|
};
|
|
|
|
export interface GeneratedImage {
|
|
data: string; // Base64 string
|
|
index: number;
|
|
prompt: string;
|
|
aspectRatio: string;
|
|
}
|
|
|
|
export class WhiskClient {
|
|
private cookies: Record<string, string>;
|
|
private accessToken: string | null = null;
|
|
private tokenExpires: number = 0;
|
|
private cookieString: string = '';
|
|
|
|
constructor(cookieInput: string) {
|
|
this.cookies = this.parseCookies(cookieInput);
|
|
|
|
// Construct cookie string header
|
|
this.cookieString = Object.entries(this.cookies)
|
|
.map(([k, v]) => `${k}=${v}`)
|
|
.join('; ');
|
|
|
|
if (!this.cookieString) {
|
|
throw new Error("No valid cookies provided");
|
|
}
|
|
}
|
|
|
|
private parseCookies(input: string): Record<string, string> {
|
|
if (!input) return {};
|
|
input = input.trim();
|
|
const cookies: Record<string, string> = {};
|
|
|
|
// Try JSON
|
|
if (input.startsWith('[') && input.endsWith(']')) {
|
|
try {
|
|
const list = JSON.parse(input);
|
|
for (const c of list) {
|
|
if (c.name && c.value) cookies[c.name] = c.value;
|
|
}
|
|
return cookies;
|
|
} catch (e) { /* ignore */ }
|
|
}
|
|
|
|
// Try header string
|
|
input.split(';').forEach(part => {
|
|
const [name, value] = part.split('=');
|
|
if (name && value) cookies[name.trim()] = value.trim();
|
|
});
|
|
|
|
return cookies;
|
|
}
|
|
|
|
private async getAccessToken(): Promise<string> {
|
|
if (this.accessToken && Date.now() / 1000 < this.tokenExpires) {
|
|
return this.accessToken;
|
|
}
|
|
|
|
console.log("Fetching access token...");
|
|
try {
|
|
const res = await fetch(ENDPOINTS.AUTH, {
|
|
headers: {
|
|
...DEFAULT_HEADERS,
|
|
"Cookie": this.cookieString
|
|
}
|
|
});
|
|
|
|
if (!res.ok) throw new Error(`Auth failed: ${res.status}`);
|
|
|
|
const data = await res.json();
|
|
if (!data.access_token) throw new Error("Missing access_token");
|
|
|
|
this.accessToken = data.access_token;
|
|
this.tokenExpires = (Date.now() / 1000) + 3300; // 55 mins
|
|
return this.accessToken!;
|
|
} catch (e) {
|
|
throw new Error(`Authentication failed: ${e}`);
|
|
}
|
|
}
|
|
|
|
async uploadReferenceImage(fileBase64: string, mimeType: string, category: string): Promise<string | null> {
|
|
const mediaCategory = MEDIA_CATEGORIES[category.toLowerCase()] || "MEDIA_CATEGORY_SUBJECT";
|
|
const dataUri = `data:${mimeType};base64,${fileBase64}`;
|
|
|
|
const payload = {
|
|
json: {
|
|
clientContext: {
|
|
workflowId: uuidv4(),
|
|
sessionId: Date.now().toString()
|
|
},
|
|
uploadMediaInput: {
|
|
mediaCategory,
|
|
rawBytes: dataUri,
|
|
caption: ""
|
|
}
|
|
}
|
|
};
|
|
|
|
const res = await fetch(ENDPOINTS.UPLOAD, {
|
|
method: "POST",
|
|
headers: {
|
|
...DEFAULT_HEADERS,
|
|
"Cookie": this.cookieString
|
|
},
|
|
body: JSON.stringify(payload)
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text();
|
|
console.error("Upload API Error:", errText);
|
|
throw new Error(`Upload status ${res.status}: ${errText.substring(0, 200)}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
const mediaId = data?.result?.data?.json?.result?.uploadMediaGenerationId;
|
|
|
|
if (mediaId) {
|
|
console.log(`Uploaded ${category} image: ${mediaId}`);
|
|
return mediaId;
|
|
}
|
|
|
|
console.error("Upload response missing ID:", JSON.stringify(data).substring(0, 200));
|
|
throw new Error("Upload successful but returned no ID");
|
|
}
|
|
|
|
async generate(
|
|
prompt: string,
|
|
aspectRatio: string = "1:1",
|
|
refs: { subject?: string | string[]; scene?: string | string[]; style?: string | string[] } = {},
|
|
preciseMode: boolean = false
|
|
): Promise<GeneratedImage[]> {
|
|
const token = await this.getAccessToken();
|
|
|
|
// Prepare Media Inputs (Assuming refs are Generation IDs already uploaded)
|
|
// Now supports multiple IDs per category
|
|
const mediaInputs: MediaInput[] = [];
|
|
|
|
// Helper to add refs (handles both single string and array)
|
|
const addRefs = (category: string, ids: string | string[] | undefined) => {
|
|
if (!ids) return;
|
|
const idArray = Array.isArray(ids) ? ids : [ids];
|
|
for (const id of idArray) {
|
|
mediaInputs.push({ mediaInput: { mediaCategory: category, mediaGenerationId: id } });
|
|
}
|
|
};
|
|
|
|
addRefs("MEDIA_CATEGORY_SUBJECT", refs.subject);
|
|
addRefs("MEDIA_CATEGORY_SCENE", refs.scene);
|
|
addRefs("MEDIA_CATEGORY_STYLE", refs.style);
|
|
|
|
const isImageToImage = mediaInputs.length > 0;
|
|
const endpoint = isImageToImage ? ENDPOINTS.RECIPE : ENDPOINTS.GENERATE;
|
|
const arEnum = ASPECT_RATIOS[aspectRatio] || "IMAGE_ASPECT_RATIO_SQUARE";
|
|
|
|
let payload: WhiskGeneratePayload | WhiskRecipePayload;
|
|
|
|
const clientContext = {
|
|
workflowId: uuidv4(),
|
|
tool: isImageToImage ? "BACKBONE" : "IMAGE_FX",
|
|
sessionId: Date.now().toString()
|
|
};
|
|
|
|
if (!isImageToImage) {
|
|
const seed = Math.floor(Math.random() * 100000);
|
|
payload = {
|
|
clientContext,
|
|
imageModelSettings: {
|
|
imageModel: "IMAGEN_3_5",
|
|
aspectRatio: arEnum
|
|
},
|
|
seed: seed,
|
|
prompt: prompt,
|
|
mediaCategory: "MEDIA_CATEGORY_BOARD"
|
|
} as any;
|
|
} else {
|
|
// Image-to-Image (Recipe) - uses runImageRecipe endpoint
|
|
// Uses recipeMediaInputs array with caption and mediaInput for each ref
|
|
const seed = Math.floor(Math.random() * 1000000);
|
|
|
|
// Build recipeMediaInputs array (handles multiple IDs per category)
|
|
const recipeMediaInputs: Array<{ caption: string; mediaInput: { mediaCategory: string; mediaGenerationId: string } }> = [];
|
|
|
|
// Helper to add recipe inputs (handles both single string and array)
|
|
const addRecipeRefs = (category: string, ids: string | string[] | undefined) => {
|
|
if (!ids) return;
|
|
const idArray = Array.isArray(ids) ? ids : [ids];
|
|
for (const id of idArray) {
|
|
recipeMediaInputs.push({
|
|
caption: "",
|
|
mediaInput: {
|
|
mediaCategory: category,
|
|
mediaGenerationId: id
|
|
}
|
|
});
|
|
}
|
|
};
|
|
|
|
addRecipeRefs("MEDIA_CATEGORY_SUBJECT", refs.subject);
|
|
addRecipeRefs("MEDIA_CATEGORY_SCENE", refs.scene);
|
|
addRecipeRefs("MEDIA_CATEGORY_STYLE", refs.style);
|
|
|
|
payload = {
|
|
clientContext,
|
|
imageModelSettings: {
|
|
imageModel: "R2I", // Recipe-to-Image model
|
|
aspectRatio: arEnum
|
|
},
|
|
seed: seed,
|
|
userInstruction: prompt, // Note: uses userInstruction instead of prompt
|
|
recipeMediaInputs: recipeMediaInputs
|
|
// Note: preciseMode field name TBD - needs API discovery
|
|
} as any;
|
|
}
|
|
|
|
console.log(`Generating: "${prompt.substring(0, 30)}..." (Refs: ${mediaInputs.length})`);
|
|
|
|
try {
|
|
const res = await fetch(endpoint, {
|
|
method: "POST",
|
|
headers: {
|
|
...DEFAULT_HEADERS,
|
|
"Authorization": `Bearer ${token}`
|
|
},
|
|
body: JSON.stringify(payload)
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text();
|
|
console.error("Whisk API Error Body:", errText);
|
|
throw new Error(`API Error ${res.status}: ${errText.substring(0, 500)}`);
|
|
}
|
|
|
|
const json = await res.json() as WhiskGenerateResponse;
|
|
const images: string[] = [];
|
|
|
|
if (json.imagePanels) {
|
|
for (const panel of json.imagePanels) {
|
|
for (const img of (panel.generatedImages || [])) {
|
|
if (img.encodedImage) images.push(img.encodedImage);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (images.length === 0) throw new Error("No images returned");
|
|
|
|
return images.map((data, i) => ({
|
|
data,
|
|
index: i,
|
|
prompt,
|
|
aspectRatio
|
|
}));
|
|
|
|
} catch (e: unknown) {
|
|
console.error("Generation failed:", e);
|
|
const errMessage = e instanceof Error ? e.message : String(e);
|
|
|
|
// Check for safety filter
|
|
if (errMessage.includes("UNSAFE") || errMessage.includes("SEXUAL")) {
|
|
throw new Error("Safety Filter Blocked Request");
|
|
}
|
|
throw e;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Generate a video from an image using Whisk Animate (Veo)
|
|
* This is an async operation - submits request and polls for completion
|
|
*/
|
|
async generateVideo(
|
|
imageGenerationId: string,
|
|
prompt: string,
|
|
imageBase64?: string,
|
|
aspectRatio: string = "16:9"
|
|
): Promise<WhiskVideoResult> {
|
|
const token = await this.getAccessToken();
|
|
|
|
console.log("generateVideo: Starting video generation...", {
|
|
hasImageId: !!imageGenerationId,
|
|
hasBase64: !!imageBase64,
|
|
promptLength: prompt.length
|
|
});
|
|
|
|
// Generate IDs for client context
|
|
const sessionId = `;${Date.now()}`;
|
|
const workflowId = crypto.randomUUID();
|
|
|
|
// Map aspect ratio to video format
|
|
const videoAspectRatio = aspectRatio === "9:16"
|
|
? "VIDEO_ASPECT_RATIO_PORTRAIT"
|
|
: "VIDEO_ASPECT_RATIO_LANDSCAPE";
|
|
|
|
// Build promptImageInput - the nested object for prompt and image
|
|
const promptImageInput: Record<string, string> = {
|
|
prompt: prompt
|
|
};
|
|
|
|
// Add image reference
|
|
if (imageGenerationId) {
|
|
promptImageInput.mediaGenerationId = imageGenerationId;
|
|
} else if (imageBase64) {
|
|
// Clean and prepare base64 (remove data URI prefix)
|
|
const cleanBase64 = imageBase64.replace(/^data:image\/\w+;base64,/, '');
|
|
promptImageInput.rawBytes = cleanBase64;
|
|
} else {
|
|
throw new Error("Either imageGenerationId or imageBase64 is required");
|
|
}
|
|
|
|
// Build payload matching Whisk API structure (with correct field names)
|
|
const payload = {
|
|
clientContext: {
|
|
sessionId: sessionId,
|
|
tool: "BACKBONE",
|
|
workflowId: workflowId
|
|
},
|
|
promptImageInput: promptImageInput,
|
|
modelNameType: "VEO_3_1_I2V_12STEP",
|
|
loopVideo: false,
|
|
aspectRatio: videoAspectRatio
|
|
};
|
|
|
|
const endpoint = ENDPOINTS.VIDEO_GENERATE;
|
|
|
|
console.log("generateVideo: Sending payload to", endpoint, JSON.stringify(payload, null, 2));
|
|
|
|
try {
|
|
const res = await fetch(endpoint, {
|
|
method: "POST",
|
|
headers: {
|
|
...DEFAULT_HEADERS,
|
|
"Authorization": `Bearer ${token}`
|
|
},
|
|
body: JSON.stringify(payload),
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errorText = await res.text();
|
|
console.error("generateVideo: API Error", res.status, errorText);
|
|
throw new Error(`Whisk API Error: ${res.status} - ${errorText}`);
|
|
}
|
|
|
|
const data = await res.json();
|
|
console.log("generateVideo: Response Data", JSON.stringify(data, null, 2));
|
|
|
|
let resultId = '';
|
|
|
|
// Check for mediaGenerationId (Whisk video response format)
|
|
if (data.mediaGenerationId) {
|
|
resultId = data.mediaGenerationId;
|
|
}
|
|
// Check for operation ID (alternative response)
|
|
else if (data.operation?.operation?.name) {
|
|
resultId = data.operation.operation.name;
|
|
}
|
|
// Fallback checks
|
|
else {
|
|
const generations = data.mediaGenerations || [];
|
|
const videoGen = generations.find((g: unknown) => (g as { mediaContentType?: string }).mediaContentType === 'MEDIA_CONTENT_TYPE_VIDEO');
|
|
resultId = data.videoGenerationId || (videoGen as { id?: string })?.id || (generations[0] as { id?: string })?.id || data.id;
|
|
}
|
|
|
|
if (!resultId) {
|
|
console.error("generateVideo: No ID found in response", data);
|
|
throw new Error("Failed to start video generation: No ID returned");
|
|
}
|
|
|
|
console.log("generateVideo: Got ID, starting polling:", resultId);
|
|
|
|
// Start polling for result
|
|
return this.pollVideoStatus(resultId, token);
|
|
|
|
} catch (error) {
|
|
console.error("generateVideo: Failed", error);
|
|
throw error;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Poll for video generation status until complete or failed
|
|
* Uses the runVideoFxSingleClipsStatusCheck endpoint
|
|
*/
|
|
private async pollVideoStatus(videoGenId: string, token: string): Promise<WhiskVideoResult> {
|
|
const maxAttempts = 60; // 5 minutes max (5s intervals)
|
|
const pollInterval = 5000; // 5 seconds
|
|
|
|
console.log(`Starting video status polling for ID: ${videoGenId}`);
|
|
|
|
for (let attempt = 0; attempt < maxAttempts; attempt++) {
|
|
console.log(`Polling video status... attempt ${attempt + 1}/${maxAttempts}`);
|
|
|
|
try {
|
|
// Use POST request with operations array containing operation.name
|
|
// (video uses async operation model, not mediaGenerationId)
|
|
const statusPayload = {
|
|
operations: [
|
|
{
|
|
operation: {
|
|
name: videoGenId
|
|
}
|
|
}
|
|
]
|
|
};
|
|
|
|
const res = await fetch(ENDPOINTS.VIDEO_STATUS, {
|
|
method: "POST",
|
|
headers: {
|
|
...DEFAULT_HEADERS,
|
|
"Authorization": `Bearer ${token}`
|
|
},
|
|
body: JSON.stringify(statusPayload)
|
|
});
|
|
|
|
if (!res.ok) {
|
|
const errText = await res.text();
|
|
console.error("Video status error:", res.status, errText);
|
|
// Continue polling unless it's a fatal error (4xx)
|
|
if (res.status >= 400 && res.status < 500) {
|
|
throw new Error(`Video status error: ${errText}`);
|
|
}
|
|
} else {
|
|
const json = await res.json();
|
|
console.log("Video status response:", JSON.stringify(json, null, 2));
|
|
|
|
// Response is likely in operations array format
|
|
const operation = json.operations?.[0] || json;
|
|
const status = operation.status || operation.state || operation.taskStatus || json.status;
|
|
|
|
// Normalize status - check for completion
|
|
const isComplete = status === 'COMPLETED' || status === 'SUCCEEDED' ||
|
|
status === 'complete' || status === 'FINISHED' ||
|
|
status === 'MEDIA_GENERATION_STATUS_COMPLETE' ||
|
|
status === 'MEDIA_GENERATION_STATUS_SUCCEEDED' ||
|
|
status === 'MEDIA_GENERATION_STATUS_SUCCESSFUL' ||
|
|
status?.includes('SUCCESSFUL') || status?.includes('COMPLETE');
|
|
|
|
// Normalize status - check for failure
|
|
const isFailed = status === 'FAILED' || status === 'ERROR' ||
|
|
status === 'failed' || status === 'CANCELLED' ||
|
|
status === 'MEDIA_GENERATION_STATUS_FAILED' ||
|
|
status?.includes('FAILED') || status?.includes('ERROR');
|
|
|
|
if (isComplete) {
|
|
// Check multiple possible response formats (including nested in operations)
|
|
const result = operation.result || operation;
|
|
|
|
// Check for URL first
|
|
const videoUrl = result.videoUrl || result.video?.url || result.mediaUrl ||
|
|
result.generatedMedia?.url || result.generatedMedia?.uri ||
|
|
result.url || json.videoUrl || operation.generatedMedia?.uri;
|
|
|
|
// Check for base64 encoded video data - Whisk uses rawBytes field
|
|
const encodedVideo = operation.rawBytes || result.rawBytes ||
|
|
result.encodedVideo || result.video?.encodedVideo ||
|
|
result.generatedMedia?.encodedVideo || json.encodedVideo ||
|
|
operation.generatedMedia?.rawBytes;
|
|
|
|
if (videoUrl) {
|
|
console.log("Video generation complete with URL:", videoUrl);
|
|
return { id: videoGenId, url: videoUrl, status: 'COMPLETED' };
|
|
} else if (encodedVideo) {
|
|
console.log("Video generation complete with rawBytes/base64 data");
|
|
// Check if it's already a data URI or needs to be converted
|
|
const videoDataUri = encodedVideo.startsWith('data:')
|
|
? encodedVideo
|
|
: `data:video/mp4;base64,${encodedVideo}`;
|
|
return {
|
|
id: videoGenId,
|
|
url: videoDataUri,
|
|
status: 'COMPLETED'
|
|
};
|
|
} else {
|
|
console.warn("Video completed but no URL/data found in response:", JSON.stringify(json, null, 2));
|
|
// Try to find any media key that can be used
|
|
const mediaKey = operation.mediaKey || result.mediaKey;
|
|
if (mediaKey) {
|
|
console.log("Found mediaKey, but no direct URL:", mediaKey);
|
|
}
|
|
}
|
|
} else if (isFailed) {
|
|
// Extract error message from nested structure
|
|
const errorMsg = operation.operation?.error?.message ||
|
|
operation.error?.message ||
|
|
operation.error ||
|
|
json.error?.message ||
|
|
json.error ||
|
|
'Video generation failed';
|
|
// Immediately throw - don't continue polling on failure
|
|
console.error("Video generation FAILED:", errorMsg);
|
|
throw new Error(`Video generation failed: ${errorMsg}`);
|
|
}
|
|
// IN_PROGRESS, PENDING, PROCESSING, RUNNING - continue polling
|
|
console.log(`Video status: ${status} - continuing to poll...`);
|
|
}
|
|
} catch (e: any) {
|
|
// Check if this is a logical failure (should not retry) vs network error (should retry)
|
|
if (e.message?.includes('Video generation failed:') ||
|
|
e.message?.includes('NCII') ||
|
|
e.message?.includes('content policy') ||
|
|
e.message?.includes('safety')) {
|
|
// Logical failure - throw immediately
|
|
throw e;
|
|
}
|
|
console.error("Poll error (network/transient):", e);
|
|
if (attempt === maxAttempts - 1) throw e;
|
|
}
|
|
|
|
await new Promise(resolve => setTimeout(resolve, pollInterval));
|
|
}
|
|
|
|
throw new Error("Video generation timed out after 5 minutes");
|
|
}
|
|
}
|