feat: Add Whisk integration and Docker support

This commit is contained in:
Khoa.vo 2025-12-30 19:09:21 +07:00
parent 9e0f3b80b2
commit 63ad7cc21f
9 changed files with 543 additions and 89 deletions

BIN
.DS_Store vendored

Binary file not shown.

21
Dockerfile Normal file
View file

@ -0,0 +1,21 @@
FROM python:3.10-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Install python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Expose port
EXPOSE 8888
# Run the application
CMD ["python", "app.py"]

Binary file not shown.

Binary file not shown.

137
app.py
View file

@ -12,6 +12,7 @@ from google import genai
from google.genai import types
from PIL import Image, PngImagePlugin
import threading, time, subprocess, re
import whisk_client
import logging
@ -393,12 +394,15 @@ def generate_image():
if not prompt:
return jsonify({'error': 'Prompt is required'}), 400
if not api_key:
return jsonify({'error': 'API Key is required.'}), 401
# Determine if this is a Whisk request
is_whisk = 'whisk' in model.lower() or 'imagefx' in model.lower()
if not is_whisk and not api_key:
return jsonify({'error': 'API Key is required for Gemini models.'}), 401
try:
print("Đang gửi lệnh...", flush=True)
client = genai.Client(api_key=api_key)
# client initialization moved to Gemini block
image_config_args = {}
@ -514,6 +518,133 @@ def generate_image():
continue
model_name = model
# ==================================================================================
# WHISK (IMAGEFX) HANDLING
# ==================================================================================
if is_whisk:
print(f"Detected Whisk/ImageFX model request: {model_name}", flush=True)
# Extract cookies from request headers or form data
# Priority: Form Data 'cookies' > Request Header 'x-whisk-cookies' > Environment Variable
cookie_str = request.form.get('cookies') or request.headers.get('x-whisk-cookies') or os.environ.get('WHISK_COOKIES')
if not cookie_str:
return jsonify({'error': 'Whisk cookies are required. Please provide them in the "cookies" form field or configuration.'}), 400
print("Sending request to Whisk...", flush=True)
try:
# Check for reference images
reference_image_path = None
# final_reference_paths (populated above) contains URLs/paths to reference images.
# Can be new uploads or history items.
if final_reference_paths:
# Use the first one
ref_url = final_reference_paths[0]
# Convert URL/Path to absolute local path
# ref_url might be "http://.../static/..." or "/static/..."
if '/static/' in ref_url:
rel_path = ref_url.split('/static/')[1]
possible_path = os.path.join(app.static_folder, rel_path)
if os.path.exists(possible_path):
reference_image_path = possible_path
print(f"Whisk: Using reference image at {reference_image_path}", flush=True)
elif os.path.exists(ref_url):
# It's already a path?
reference_image_path = ref_url
# Call the client
try:
whisk_result = whisk_client.generate_image_whisk(
prompt=api_prompt,
cookie_str=cookie_str,
aspect_ratio=aspect_ratio,
resolution=resolution,
reference_image_path=reference_image_path
)
except Exception as e:
# Re-raise to be caught by the outer block
raise e
# Process result - whisk_client returns raw bytes
image_bytes = None
if isinstance(whisk_result, bytes):
image_bytes = whisk_result
elif isinstance(whisk_result, dict):
# Fallback if I ever change the client to return dict
if 'image_data' in whisk_result:
image_bytes = whisk_result['image_data']
elif 'image_url' in whisk_result:
import requests
img_resp = requests.get(whisk_result['image_url'])
image_bytes = img_resp.content
if not image_bytes:
raise ValueError("No image data returned from Whisk.")
# Save and process image (Reuse existing logic)
image = Image.open(BytesIO(image_bytes))
png_info = PngImagePlugin.PngInfo()
date_str = datetime.now().strftime("%Y%m%d")
search_pattern = os.path.join(GENERATED_DIR, f"whisk_{date_str}_*.png")
existing_files = glob.glob(search_pattern)
max_id = 0
for f in existing_files:
try:
basename = os.path.basename(f)
name_without_ext = os.path.splitext(basename)[0]
id_part = name_without_ext.split('_')[-1]
id_num = int(id_part)
if id_num > max_id:
max_id = id_num
except ValueError:
continue
next_id = max_id + 1
filename = f"whisk_{date_str}_{next_id}.png"
filepath = os.path.join(GENERATED_DIR, filename)
rel_path = os.path.join('generated', filename)
image_url = url_for('static', filename=rel_path)
metadata = {
'prompt': prompt,
'note': note,
'processed_prompt': api_prompt,
'aspect_ratio': aspect_ratio or 'Auto',
'resolution': resolution,
'reference_images': final_reference_paths,
'model': 'whisk'
}
png_info.add_text('sdvn_meta', json.dumps(metadata))
buffer = BytesIO()
image.save(buffer, format='PNG', pnginfo=png_info)
final_bytes = buffer.getvalue()
with open(filepath, 'wb') as f:
f.write(final_bytes)
image_data = base64.b64encode(final_bytes).decode('utf-8')
return jsonify({
'image': image_url,
'image_data': image_data,
'metadata': metadata,
})
except Exception as e:
print(f"Whisk error: {e}")
return jsonify({'error': f"Whisk Generation Error: {str(e)}"}), 500
# ==================================================================================
# STANDARD GEMINI HANDLING
# ==================================================================================
# Initialize Client here, since API Key is required
client = genai.Client(api_key=api_key)
print(f"Đang tạo với model {model_name}...", flush=True)
response = client.models.generate_content(
model=model_name,

17
docker-compose.yml Normal file
View file

@ -0,0 +1,17 @@
version: '3.8'
services:
app:
build: .
platform: linux/amd64
ports:
- "8558:8888"
volumes:
- ./static:/app/static
- ./prompts.json:/app/prompts.json
- ./user_prompts.json:/app/user_prompts.json
- ./gallery_favorites.json:/app/gallery_favorites.json
environment:
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-} # Optional for Whisk
- WHISK_COOKIES=${WHISK_COOKIES:-}
restart: unless-stopped

View file

@ -132,10 +132,28 @@ document.addEventListener('DOMContentLoaded', () => {
if (apiModelSelect) {
apiModelSelect.addEventListener('change', () => {
toggleResolutionVisibility();
toggleCookiesVisibility();
persistSettings();
});
}
const whiskCookiesGroup = document.getElementById('whisk-cookies-group');
const whiskCookiesInput = document.getElementById('whisk-cookies');
function toggleCookiesVisibility() {
if (whiskCookiesGroup && apiModelSelect) {
if (apiModelSelect.value === 'whisk') {
whiskCookiesGroup.classList.remove('hidden');
} else {
whiskCookiesGroup.classList.add('hidden');
}
}
}
if (whiskCookiesInput) {
whiskCookiesInput.addEventListener('input', persistSettings);
}
// Load Settings
function loadSettings() {
try {
@ -156,6 +174,10 @@ document.addEventListener('DOMContentLoaded', () => {
if (bodyFontSelect && settings.bodyFont) {
bodyFontSelect.value = settings.bodyFont;
}
if (whiskCookiesInput && settings.whiskCookies) {
whiskCookiesInput.value = settings.whiskCookies;
}
toggleCookiesVisibility();
return settings;
}
} catch (e) {
@ -180,6 +202,7 @@ document.addEventListener('DOMContentLoaded', () => {
referenceImages,
theme: currentTheme || DEFAULT_THEME,
bodyFont: bodyFontSelect ? bodyFontSelect.value : DEFAULT_BODY_FONT,
whiskCookies: whiskCookiesInput ? whiskCookiesInput.value : '',
};
try {
localStorage.setItem(SETTINGS_STORAGE_KEY, JSON.stringify(settings));
@ -199,6 +222,10 @@ document.addEventListener('DOMContentLoaded', () => {
const selectedModel = model || (apiModelSelect ? apiModelSelect.value : 'gemini-3-pro-image-preview');
formData.append('model', selectedModel);
if (whiskCookiesInput && whiskCookiesInput.value) {
formData.append('cookies', whiskCookiesInput.value);
}
// Add reference images using correct slotManager methods
const referenceFiles = slotManager.getReferenceFiles();
referenceFiles.forEach(file => {

View file

@ -46,18 +46,16 @@
<div class="field-action-buttons" data-target="prompt" aria-label="Prompt actions">
<button type="button" class="field-action-btn" data-action="copy" title="Copy prompt"
aria-label="Copy prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<rect x="9" y="9" width="13" height="13" rx="2.5" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg>
</button>
<button type="button" class="field-action-btn" data-action="paste" title="Paste"
aria-label="Paste vào prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M8 4h8" />
<path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" />
<rect x="5" y="5" width="14" height="16" rx="2" />
@ -67,9 +65,8 @@
</button>
<button type="button" class="field-action-btn" data-action="clear" title="Clear prompt"
aria-label="Xoá prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M3 6h18" />
<path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" />
<path d="M10 11v6" />
@ -132,18 +129,16 @@
<div class="field-action-buttons" data-target="note" aria-label="Note actions">
<button type="button" class="field-action-btn" data-action="copy" title="Copy note"
aria-label="Copy note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<rect x="9" y="9" width="13" height="13" rx="2.5" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg>
</button>
<button type="button" class="field-action-btn" data-action="paste" title="Paste"
aria-label="Paste vào note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M8 4h8" />
<path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" />
<rect x="5" y="5" width="14" height="16" rx="2" />
@ -153,9 +148,8 @@
</button>
<button type="button" class="field-action-btn" data-action="clear" title="Clear note"
aria-label="Xoá note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M3 6h18" />
<path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" />
<path d="M10 11v6" />
@ -498,6 +492,15 @@
rel="noreferrer">aistudio.google.com/api-keys</a>
</p>
</div>
<!-- Whisk Cookies Input -->
<div class="input-group api-settings-input-group hidden" id="whisk-cookies-group">
<label for="whisk-cookies">Whisk Cookies (dành cho ImageFX)</label>
<textarea id="whisk-cookies" rows="3" placeholder="Paste toàn bộ cookie string từ labs.google..."
style="width: 100%; padding: 0.5rem; background: rgba(0,0,0,0.2); border: 1px solid var(--border-color); color: var(--text-primary); border-radius: 4px; font-size: 0.8rem;"></textarea>
<p class="input-hint">
F12 trên labs.google > Network > Request bất kỳ > Copy Request Headers > Cookie.
</p>
</div>
<div class="input-group api-settings-input-group">
<label for="api-model">Model</label>
<div class="select-wrapper">
@ -505,6 +508,7 @@
style="width: 100%; padding: 0.75rem; background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 0.5rem; color: var(--text-primary); font-size: 0.9rem;">
<option value="gemini-3-pro-image-preview">Gemini 3 Pro (Image Preview)</option>
<option value="gemini-2.5-flash-image">Gemini 2.5 Flash Image</option>
<option value="whisk">Whisk (ImageFX) [Experimental]</option>
</select>
</div>
</div>

254
whisk_client.py Normal file
View file

@ -0,0 +1,254 @@
import requests
import json
import time
import base64
import os
import uuid
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger("whisk_client")
# Constants from reverse engineering
AUTH_ENDPOINT = "https://labs.google/fx/api/auth/session"
UPLOAD_ENDPOINT = "https://labs.google/fx/api/trpc/backbone.uploadImage"
# Endpoint 1: Text-to-Image
# (Captured in Step 405)
GENERATE_ENDPOINT = "https://aisandbox-pa.googleapis.com/v1/whisk:generateImage"
# Endpoint 2: Reference Image (Recipe)
# (Captured in Step 424)
RECIPE_ENDPOINT = "https://aisandbox-pa.googleapis.com/v1/whisk:runImageRecipe"
DEFAULT_HEADERS = {
"Origin": "https://labs.google",
"Content-Type": "application/json",
"Referer": "https://labs.google/fx/tools/image-fx",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
class WhiskClientError(Exception):
pass
def parse_cookies(cookie_input):
if not cookie_input:
return {}
cookies = {}
cookie_input = cookie_input.strip()
if cookie_input.startswith('[') and cookie_input.endswith(']'):
try:
cookie_list = json.loads(cookie_input)
for c in cookie_list:
name = c.get('name')
value = c.get('value')
if name and value:
cookies[name] = value
return cookies
except json.JSONDecodeError:
pass
for item in cookie_input.split(';'):
if '=' in item:
name, value = item.split('=', 1)
cookies[name.strip()] = value.strip()
return cookies
def get_session_token(cookies):
logger.info("Fetching session token from labs.google...")
try:
response = requests.get(
AUTH_ENDPOINT,
headers={**DEFAULT_HEADERS},
cookies=cookies,
timeout=30
)
response.raise_for_status()
data = response.json()
if not data.get('access_token'):
raise WhiskClientError("Session response missing access_token")
return data['access_token']
except Exception as e:
logger.error(f"Failed to fetch session token: {e}")
raise WhiskClientError(f"Authentication failed: {str(e)}")
def upload_reference_image(image_path, cookies):
if not image_path or not os.path.exists(image_path):
return None
logger.info(f"Uploading reference image: {image_path}")
try:
with open(image_path, "rb") as img_file:
import mimetypes
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type: mime_type = "image/png"
b64_data = base64.b64encode(img_file.read()).decode('utf-8')
data_uri = f"data:{mime_type};base64,{b64_data}"
payload = {
"json": {
"clientContext": {
"workflowId": str(uuid.uuid4()),
"sessionId": str(int(time.time() * 1000))
},
"uploadMediaInput": {
"mediaCategory": "MEDIA_CATEGORY_SUBJECT",
"rawBytes": data_uri,
"caption": ""
}
}
}
response = requests.post(
UPLOAD_ENDPOINT,
headers=DEFAULT_HEADERS,
cookies=cookies,
json=payload,
timeout=60
)
if not response.ok:
raise WhiskClientError(f"Image upload failed: {response.text}")
data = response.json()
try:
media_id = data['result']['data']['json']['result']['uploadMediaGenerationId']
except (KeyError, TypeError):
raise WhiskClientError("Failed to retrieve uploadMediaGenerationId")
logger.info(f"Image uploaded successfully. ID: {media_id}")
return media_id
except Exception as e:
logger.error(f"Error uploading image: {e}")
raise e
def generate_image_whisk(prompt, cookie_str, **kwargs):
cookies = parse_cookies(cookie_str)
if not cookies:
raise WhiskClientError("No valid cookies found")
access_token = get_session_token(cookies)
ref_image_path = kwargs.get('reference_image_path')
media_generation_id = None
if ref_image_path:
try:
media_generation_id = upload_reference_image(ref_image_path, cookies)
except Exception as e:
logger.warning(f"Skipping reference image due to upload error: {e}")
aspect_ratio_map = {
"1:1": "IMAGE_ASPECT_RATIO_SQUARE",
"9:16": "IMAGE_ASPECT_RATIO_PORTRAIT",
"16:9": "IMAGE_ASPECT_RATIO_LANDSCAPE",
"4:3": "IMAGE_ASPECT_RATIO_LANDSCAPE_FOUR_THREE",
"3:4": "IMAGE_ASPECT_RATIO_PORTRAIT",
"Auto": "IMAGE_ASPECT_RATIO_SQUARE"
}
aspect_ratio_key = kwargs.get('aspect_ratio', 'Auto')
aspect_ratio_enum = aspect_ratio_map.get(aspect_ratio_key, "IMAGE_ASPECT_RATIO_SQUARE")
seed = kwargs.get('seed', int(time.time()))
headers = {
**DEFAULT_HEADERS,
"Authorization": f"Bearer {access_token}"
}
# BRANCH: Use Recipe Endpoint if Reference Image exists
if media_generation_id:
target_endpoint = RECIPE_ENDPOINT
payload = {
"clientContext": {
"workflowId": str(uuid.uuid4()),
"tool": "BACKBONE",
"sessionId": str(int(time.time() * 1000))
},
"seed": seed,
"imageModelSettings": {
"imageModel": "GEM_PIX",
"aspectRatio": aspect_ratio_enum
},
"userInstruction": prompt,
"recipeMediaInputs": [{
"mediaInput": {
"mediaCategory": "MEDIA_CATEGORY_SUBJECT",
"mediaGenerationId": media_generation_id
}
}]
}
else:
# BRANCH: Use Generate Endpoint for Text-to-Image
# NOTE: Payload for generateImage is inferred to be userInput based.
# If this fails, we might need further inspection, but Recipe flow is the priority.
target_endpoint = GENERATE_ENDPOINT
payload = {
"userInput": {
"candidatesCount": 2,
"prompts": [prompt],
"seed": seed
},
"clientContext": {
"workflowId": str(uuid.uuid4()),
"tool": "IMAGE_FX", # Usually ImageFX for T2I
"sessionId": str(int(time.time() * 1000))
},
"modelInput": {
"modelNameType": "IMAGEN_3_5", # Usually Imagen 3 for ImageFX
"aspectRatio": aspect_ratio_enum
}
}
logger.info(f"Generating image. Endpoint: {target_endpoint}, Prompt: {prompt}")
try:
response = requests.post(
target_endpoint,
headers=headers,
json=payload,
timeout=120
)
if not response.ok:
error_text = response.text
try:
err_json = response.json()
details = err_json.get('error', {}).get('details', [])
if any(d.get('reason') in ['PUBLIC_ERROR_UNSAFE_GENERATION', 'PUBLIC_ERROR_SEXUAL'] for d in details):
raise WhiskClientError("⚠️ Google Safety Filter Triggered. Prompt bị từ chối do nội dung không an toàn.")
except (json.JSONDecodeError, WhiskClientError) as e:
if isinstance(e, WhiskClientError): raise e
# Additional T2I Fallback: If generateImage fails 400, try Recipe with empty media?
# Not implementing strictly to avoid loops, but helpful mental note.
raise WhiskClientError(f"Generation failed ({response.status_code}): {error_text}")
# Parse Response
json_resp = response.json()
images = []
if 'imagePanels' in json_resp:
for panel in json_resp['imagePanels']:
for img in panel.get('generatedImages', []):
if 'encodedImage' in img:
images.append(img['encodedImage'])
if not images:
logger.error(f"Unexpected response structure: {json_resp.keys()}")
raise WhiskClientError("No images found in response")
return base64.b64decode(images[0])
except requests.exceptions.Timeout:
raise WhiskClientError("Timout connecting to Google Whisk.")
except Exception as e:
logger.error(f"Whisk Generation Error: {e}")
raise WhiskClientError(str(e))