feat: Add Whisk integration and Docker support

This commit is contained in:
Khoa.vo 2025-12-30 19:09:21 +07:00
parent 9e0f3b80b2
commit 63ad7cc21f
9 changed files with 543 additions and 89 deletions

BIN
.DS_Store vendored

Binary file not shown.

21
Dockerfile Normal file
View file

@ -0,0 +1,21 @@
FROM python:3.10-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Install python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Expose port
EXPOSE 8888
# Run the application
CMD ["python", "app.py"]

Binary file not shown.

Binary file not shown.

137
app.py
View file

@ -12,6 +12,7 @@ from google import genai
from google.genai import types
from PIL import Image, PngImagePlugin
import threading, time, subprocess, re
import whisk_client
import logging
@ -393,12 +394,15 @@ def generate_image():
if not prompt:
return jsonify({'error': 'Prompt is required'}), 400
if not api_key:
return jsonify({'error': 'API Key is required.'}), 401
# Determine if this is a Whisk request
is_whisk = 'whisk' in model.lower() or 'imagefx' in model.lower()
if not is_whisk and not api_key:
return jsonify({'error': 'API Key is required for Gemini models.'}), 401
try:
print("Đang gửi lệnh...", flush=True)
client = genai.Client(api_key=api_key)
# client initialization moved to Gemini block
image_config_args = {}
@ -514,6 +518,133 @@ def generate_image():
continue
model_name = model
# ==================================================================================
# WHISK (IMAGEFX) HANDLING
# ==================================================================================
if is_whisk:
print(f"Detected Whisk/ImageFX model request: {model_name}", flush=True)
# Extract cookies from request headers or form data
# Priority: Form Data 'cookies' > Request Header 'x-whisk-cookies' > Environment Variable
cookie_str = request.form.get('cookies') or request.headers.get('x-whisk-cookies') or os.environ.get('WHISK_COOKIES')
if not cookie_str:
return jsonify({'error': 'Whisk cookies are required. Please provide them in the "cookies" form field or configuration.'}), 400
print("Sending request to Whisk...", flush=True)
try:
# Check for reference images
reference_image_path = None
# final_reference_paths (populated above) contains URLs/paths to reference images.
# Can be new uploads or history items.
if final_reference_paths:
# Use the first one
ref_url = final_reference_paths[0]
# Convert URL/Path to absolute local path
# ref_url might be "http://.../static/..." or "/static/..."
if '/static/' in ref_url:
rel_path = ref_url.split('/static/')[1]
possible_path = os.path.join(app.static_folder, rel_path)
if os.path.exists(possible_path):
reference_image_path = possible_path
print(f"Whisk: Using reference image at {reference_image_path}", flush=True)
elif os.path.exists(ref_url):
# It's already a path?
reference_image_path = ref_url
# Call the client
try:
whisk_result = whisk_client.generate_image_whisk(
prompt=api_prompt,
cookie_str=cookie_str,
aspect_ratio=aspect_ratio,
resolution=resolution,
reference_image_path=reference_image_path
)
except Exception as e:
# Re-raise to be caught by the outer block
raise e
# Process result - whisk_client returns raw bytes
image_bytes = None
if isinstance(whisk_result, bytes):
image_bytes = whisk_result
elif isinstance(whisk_result, dict):
# Fallback if I ever change the client to return dict
if 'image_data' in whisk_result:
image_bytes = whisk_result['image_data']
elif 'image_url' in whisk_result:
import requests
img_resp = requests.get(whisk_result['image_url'])
image_bytes = img_resp.content
if not image_bytes:
raise ValueError("No image data returned from Whisk.")
# Save and process image (Reuse existing logic)
image = Image.open(BytesIO(image_bytes))
png_info = PngImagePlugin.PngInfo()
date_str = datetime.now().strftime("%Y%m%d")
search_pattern = os.path.join(GENERATED_DIR, f"whisk_{date_str}_*.png")
existing_files = glob.glob(search_pattern)
max_id = 0
for f in existing_files:
try:
basename = os.path.basename(f)
name_without_ext = os.path.splitext(basename)[0]
id_part = name_without_ext.split('_')[-1]
id_num = int(id_part)
if id_num > max_id:
max_id = id_num
except ValueError:
continue
next_id = max_id + 1
filename = f"whisk_{date_str}_{next_id}.png"
filepath = os.path.join(GENERATED_DIR, filename)
rel_path = os.path.join('generated', filename)
image_url = url_for('static', filename=rel_path)
metadata = {
'prompt': prompt,
'note': note,
'processed_prompt': api_prompt,
'aspect_ratio': aspect_ratio or 'Auto',
'resolution': resolution,
'reference_images': final_reference_paths,
'model': 'whisk'
}
png_info.add_text('sdvn_meta', json.dumps(metadata))
buffer = BytesIO()
image.save(buffer, format='PNG', pnginfo=png_info)
final_bytes = buffer.getvalue()
with open(filepath, 'wb') as f:
f.write(final_bytes)
image_data = base64.b64encode(final_bytes).decode('utf-8')
return jsonify({
'image': image_url,
'image_data': image_data,
'metadata': metadata,
})
except Exception as e:
print(f"Whisk error: {e}")
return jsonify({'error': f"Whisk Generation Error: {str(e)}"}), 500
# ==================================================================================
# STANDARD GEMINI HANDLING
# ==================================================================================
# Initialize Client here, since API Key is required
client = genai.Client(api_key=api_key)
print(f"Đang tạo với model {model_name}...", flush=True)
response = client.models.generate_content(
model=model_name,

17
docker-compose.yml Normal file
View file

@ -0,0 +1,17 @@
version: '3.8'
services:
app:
build: .
platform: linux/amd64
ports:
- "8558:8888"
volumes:
- ./static:/app/static
- ./prompts.json:/app/prompts.json
- ./user_prompts.json:/app/user_prompts.json
- ./gallery_favorites.json:/app/gallery_favorites.json
environment:
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-} # Optional for Whisk
- WHISK_COOKIES=${WHISK_COOKIES:-}
restart: unless-stopped

View file

@ -132,10 +132,28 @@ document.addEventListener('DOMContentLoaded', () => {
if (apiModelSelect) {
apiModelSelect.addEventListener('change', () => {
toggleResolutionVisibility();
toggleCookiesVisibility();
persistSettings();
});
}
const whiskCookiesGroup = document.getElementById('whisk-cookies-group');
const whiskCookiesInput = document.getElementById('whisk-cookies');
function toggleCookiesVisibility() {
if (whiskCookiesGroup && apiModelSelect) {
if (apiModelSelect.value === 'whisk') {
whiskCookiesGroup.classList.remove('hidden');
} else {
whiskCookiesGroup.classList.add('hidden');
}
}
}
if (whiskCookiesInput) {
whiskCookiesInput.addEventListener('input', persistSettings);
}
// Load Settings
function loadSettings() {
try {
@ -156,6 +174,10 @@ document.addEventListener('DOMContentLoaded', () => {
if (bodyFontSelect && settings.bodyFont) {
bodyFontSelect.value = settings.bodyFont;
}
if (whiskCookiesInput && settings.whiskCookies) {
whiskCookiesInput.value = settings.whiskCookies;
}
toggleCookiesVisibility();
return settings;
}
} catch (e) {
@ -169,7 +191,7 @@ document.addEventListener('DOMContentLoaded', () => {
const referenceImages = (typeof slotManager !== 'undefined' && typeof slotManager.serializeReferenceImages === 'function')
? slotManager.serializeReferenceImages()
: [];
const settings = {
apiKey: apiKeyInput.value,
prompt: promptInput.value,
@ -180,6 +202,7 @@ document.addEventListener('DOMContentLoaded', () => {
referenceImages,
theme: currentTheme || DEFAULT_THEME,
bodyFont: bodyFontSelect ? bodyFontSelect.value : DEFAULT_BODY_FONT,
whiskCookies: whiskCookiesInput ? whiskCookiesInput.value : '',
};
try {
localStorage.setItem(SETTINGS_STORAGE_KEY, JSON.stringify(settings));
@ -199,12 +222,16 @@ document.addEventListener('DOMContentLoaded', () => {
const selectedModel = model || (apiModelSelect ? apiModelSelect.value : 'gemini-3-pro-image-preview');
formData.append('model', selectedModel);
if (whiskCookiesInput && whiskCookiesInput.value) {
formData.append('cookies', whiskCookiesInput.value);
}
// Add reference images using correct slotManager methods
const referenceFiles = slotManager.getReferenceFiles();
referenceFiles.forEach(file => {
formData.append('reference_images', file);
});
const referencePaths = slotManager.getReferencePaths();
if (referencePaths && referencePaths.length > 0) {
formData.append('reference_image_paths', JSON.stringify(referencePaths));
@ -592,14 +619,14 @@ document.addEventListener('DOMContentLoaded', () => {
// 2. Item currently being processed (isProcessingQueue)
// 3. Items waiting for backend response (pendingRequests)
const count = generationQueue.length + (isProcessingQueue ? 1 : 0) + pendingRequests;
console.log('Queue counter update:', {
queue: generationQueue.length,
processing: isProcessingQueue,
console.log('Queue counter update:', {
queue: generationQueue.length,
processing: isProcessingQueue,
pending: pendingRequests,
total: count
total: count
});
if (count > 0) {
if (queueCounter) {
queueCounter.classList.remove('hidden');
@ -623,10 +650,10 @@ document.addEventListener('DOMContentLoaded', () => {
const task = generationQueue.shift();
isProcessingQueue = true;
updateQueueCounter(); // Show counter immediately
try {
setViewState('loading');
// Check if this task already has a result (immediate generation)
if (task.immediateResult) {
// Display the already-generated image
@ -730,7 +757,7 @@ document.addEventListener('DOMContentLoaded', () => {
});
const data = await response.json();
// Mark fetch as completed and decrement pending
// We do this BEFORE adding to queue to avoid double counting
fetchCompleted = true;
@ -785,12 +812,12 @@ document.addEventListener('DOMContentLoaded', () => {
}
} catch (error) {
console.error('Error in addToQueue:', error);
// If fetch failed (didn't complete), we need to decrement pendingRequests
if (!fetchCompleted) {
pendingRequests--;
}
updateQueueCounter();
showError(error.message);
}
@ -816,7 +843,7 @@ document.addEventListener('DOMContentLoaded', () => {
const response = await fetch(url);
const blob = await response.blob();
const blobUrl = window.URL.createObjectURL(blob);
const tempLink = document.createElement('a');
tempLink.href = blobUrl;
tempLink.download = filename;
@ -834,7 +861,7 @@ document.addEventListener('DOMContentLoaded', () => {
if (imageDisplayArea) {
imageDisplayArea.addEventListener('wheel', handleCanvasWheel, { passive: false });
imageDisplayArea.addEventListener('pointerdown', handleCanvasPointerDown);
// Drag and drop support
imageDisplayArea.addEventListener('dragover', (e) => {
e.preventDefault();
@ -849,7 +876,7 @@ document.addEventListener('DOMContentLoaded', () => {
imageDisplayArea.addEventListener('drop', async (e) => {
e.preventDefault();
imageDisplayArea.classList.remove('drag-over');
const files = e.dataTransfer?.files;
if (files && files.length > 0) {
const file = files[0];
@ -858,7 +885,7 @@ document.addEventListener('DOMContentLoaded', () => {
// Display image immediately
const objectUrl = URL.createObjectURL(file);
displayImage(objectUrl);
// Extract and apply metadata
const metadata = await extractMetadataFromBlob(file);
if (metadata) {
@ -965,7 +992,7 @@ document.addEventListener('DOMContentLoaded', () => {
const createTemplateModal = document.getElementById('create-template-modal');
const closeTemplateModalBtn = document.getElementById('close-template-modal');
const saveTemplateBtn = document.getElementById('save-template-btn');
const templateTitleInput = document.getElementById('template-title');
const templatePromptInput = document.getElementById('template-prompt');
const templateNoteInput = document.getElementById('template-note');
@ -1189,11 +1216,11 @@ document.addEventListener('DOMContentLoaded', () => {
}
// Global function for opening edit modal (called from templateGallery.js)
window.openEditTemplateModal = async function(template) {
window.openEditTemplateModal = async function (template) {
editingTemplate = template;
editingTemplateSource = template.isUserTemplate ? 'user' : 'builtin';
editingBuiltinIndex = editingTemplateSource === 'builtin' ? template.builtinTemplateIndex : null;
// Pre-fill with template data
templateTitleInput.value = template.title || '';
templatePromptInput.value = template.prompt || '';
@ -1206,18 +1233,18 @@ document.addEventListener('DOMContentLoaded', () => {
try {
const response = await fetch('/prompts');
const data = await response.json();
if (data.prompts) {
const categories = new Set();
data.prompts.forEach(t => {
if (t.category) {
const categoryText = typeof t.category === 'string'
? t.category
const categoryText = typeof t.category === 'string'
? t.category
: (t.category.vi || t.category.en || '');
if (categoryText) categories.add(categoryText);
}
});
templateCategorySelect.innerHTML = '';
const sortedCategories = Array.from(categories).sort();
sortedCategories.forEach(cat => {
@ -1226,15 +1253,15 @@ document.addEventListener('DOMContentLoaded', () => {
option.textContent = cat;
templateCategorySelect.appendChild(option);
});
const newOption = document.createElement('option');
newOption.value = 'new';
newOption.textContent = '+ New Category';
templateCategorySelect.appendChild(newOption);
// Set to template's category
const templateCategory = typeof template.category === 'string'
? template.category
const templateCategory = typeof template.category === 'string'
? template.category
: (template.category.vi || template.category.en || '');
templateCategorySelect.value = templateCategory || 'User';
}
@ -1263,16 +1290,16 @@ document.addEventListener('DOMContentLoaded', () => {
// Update button text
saveTemplateBtn.innerHTML = '<span>Update Template</span><div class="btn-shine"></div>';
createTemplateModal.classList.remove('hidden');
};
// Global function for opening create modal with empty values (called from templateGallery.js)
window.openCreateTemplateModal = async function() {
window.openCreateTemplateModal = async function () {
editingTemplate = null;
editingTemplateSource = 'user';
editingBuiltinIndex = null;
setTemplateTags([]);
if (templateTagInput) {
templateTagInput.value = '';
@ -1290,18 +1317,18 @@ document.addEventListener('DOMContentLoaded', () => {
try {
const response = await fetch('/prompts');
const data = await response.json();
if (data.prompts) {
const categories = new Set();
data.prompts.forEach(t => {
if (t.category) {
const categoryText = typeof t.category === 'string'
? t.category
const categoryText = typeof t.category === 'string'
? t.category
: (t.category.vi || t.category.en || '');
if (categoryText) categories.add(categoryText);
}
});
templateCategorySelect.innerHTML = '';
const sortedCategories = Array.from(categories).sort();
sortedCategories.forEach(cat => {
@ -1310,12 +1337,12 @@ document.addEventListener('DOMContentLoaded', () => {
option.textContent = cat;
templateCategorySelect.appendChild(option);
});
const newOption = document.createElement('option');
newOption.value = 'new';
newOption.textContent = '+ New Category';
templateCategorySelect.appendChild(newOption);
if (sortedCategories.includes('User')) {
templateCategorySelect.value = 'User';
} else if (sortedCategories.length > 0) {
@ -1335,7 +1362,7 @@ document.addEventListener('DOMContentLoaded', () => {
// Update button text
saveTemplateBtn.innerHTML = '<span>Save Template</span><div class="btn-shine"></div>';
createTemplateModal.classList.remove('hidden');
};
@ -1345,7 +1372,7 @@ document.addEventListener('DOMContentLoaded', () => {
editingTemplate = null;
editingTemplateSource = 'user';
editingBuiltinIndex = null;
// Pre-fill data
templateTitleInput.value = '';
templatePromptInput.value = promptInput.value;
@ -1358,25 +1385,25 @@ document.addEventListener('DOMContentLoaded', () => {
try {
const response = await fetch('/prompts');
const data = await response.json();
if (data.prompts) {
// Extract unique categories
const categories = new Set();
data.prompts.forEach(template => {
if (template.category) {
// Handle both string and object categories
const categoryText = typeof template.category === 'string'
? template.category
const categoryText = typeof template.category === 'string'
? template.category
: (template.category.vi || template.category.en || '');
if (categoryText) {
categories.add(categoryText);
}
}
});
// Clear existing options except "new"
templateCategorySelect.innerHTML = '';
// Add sorted categories
const sortedCategories = Array.from(categories).sort();
sortedCategories.forEach(cat => {
@ -1385,13 +1412,13 @@ document.addEventListener('DOMContentLoaded', () => {
option.textContent = cat;
templateCategorySelect.appendChild(option);
});
// Add "new category" option at the end
const newOption = document.createElement('option');
newOption.value = 'new';
newOption.textContent = '+ New Category';
templateCategorySelect.appendChild(newOption);
// Set default to first category or "User" if it exists
if (sortedCategories.includes('User')) {
templateCategorySelect.value = 'User';
@ -1465,7 +1492,7 @@ document.addEventListener('DOMContentLoaded', () => {
templatePreviewDropzone.addEventListener('click', (e) => {
// Don't toggle if clicking on the input itself
if (e.target === templatePreviewUrlInput) return;
if (!isUrlInputMode) {
// Switch to URL input mode
isUrlInputMode = true;
@ -1520,7 +1547,7 @@ document.addEventListener('DOMContentLoaded', () => {
}
});
}
templatePreviewDropzone.addEventListener('dragover', (e) => {
e.preventDefault();
templatePreviewDropzone.classList.add('drag-over');
@ -1534,7 +1561,7 @@ document.addEventListener('DOMContentLoaded', () => {
templatePreviewDropzone.addEventListener('drop', (e) => {
e.preventDefault();
templatePreviewDropzone.classList.remove('drag-over');
const files = e.dataTransfer.files;
if (files.length > 0) {
const file = files[0];
@ -1559,7 +1586,7 @@ document.addEventListener('DOMContentLoaded', () => {
const note = templateNoteInput.value.trim();
const mode = templateModeSelect.value;
let category = templateCategorySelect.value;
if (category === 'new') {
category = templateCategoryInput.value.trim();
}
@ -1619,10 +1646,10 @@ document.addEventListener('DOMContentLoaded', () => {
// Success
createTemplateModal.classList.add('hidden');
// Reload template gallery
await templateGallery.load();
// Reset editing state
editingTemplate = null;
editingTemplateSource = null;
@ -1666,7 +1693,7 @@ document.addEventListener('DOMContentLoaded', () => {
loadGallery();
loadTemplateGallery();
initializeSidebarResizer(sidebar, resizeHandle);
// Restore last image if available
try {
const lastImage = localStorage.getItem('gemini-app-last-image');
@ -1676,13 +1703,13 @@ document.addEventListener('DOMContentLoaded', () => {
} catch (e) {
console.warn('Failed to restore last image', e);
}
// Setup canvas language toggle
const canvasLangInput = document.getElementById('canvas-lang-input');
if (canvasLangInput) {
// Set initial state
canvasLangInput.checked = i18n.currentLang === 'en';
canvasLangInput.addEventListener('change', (e) => {
i18n.setLanguage(e.target.checked ? 'en' : 'vi');
// Update visual state
@ -1753,7 +1780,7 @@ document.addEventListener('DOMContentLoaded', () => {
if (!btn.classList.contains('history-favorites-btn')) {
btn.addEventListener('click', () => {
const filterType = btn.dataset.filter;
// Remove active from all date filter buttons (not favorites)
historyFilterBtns.forEach(b => {
if (!b.classList.contains('history-favorites-btn')) {
@ -1834,7 +1861,7 @@ document.addEventListener('DOMContentLoaded', () => {
hasGeneratedImage = true; // Mark that we have an image
setViewState('result');
// Persist image URL
try {
localStorage.setItem('gemini-app-last-image', imageUrl);
@ -1864,7 +1891,7 @@ document.addEventListener('DOMContentLoaded', () => {
promptInput.value = metadata.prompt;
refreshPromptHighlight();
}
// If metadata doesn't have 'note' field, set to empty string instead of keeping current value
if (metadata.hasOwnProperty('note')) {
promptNoteInput.value = metadata.note || '';
@ -1872,14 +1899,14 @@ document.addEventListener('DOMContentLoaded', () => {
promptNoteInput.value = '';
}
refreshNoteHighlight();
if (metadata.aspect_ratio) aspectRatioInput.value = metadata.aspect_ratio;
if (metadata.resolution) resolutionInput.value = metadata.resolution;
if (metadata.reference_images && Array.isArray(metadata.reference_images)) {
slotManager.setReferenceImages(metadata.reference_images);
}
persistSettings();
}
@ -1968,9 +1995,9 @@ document.addEventListener('DOMContentLoaded', () => {
const targetTag = event.target?.tagName;
if (targetTag && ['INPUT', 'TEXTAREA', 'SELECT'].includes(targetTag)) return;
if (event.target?.isContentEditable) return;
event.preventDefault();
// Toggle template gallery
if (templateGalleryState.classList.contains('hidden')) {
setViewState('template-gallery');
@ -2140,9 +2167,9 @@ document.addEventListener('DOMContentLoaded', () => {
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url })
});
const data = await response.json();
if (!response.ok) {
throw new Error(data.error || 'Failed to download image');
}
@ -2155,7 +2182,7 @@ document.addEventListener('DOMContentLoaded', () => {
alert('Không còn slot trống cho ảnh tham chiếu.');
}
} else {
throw new Error('No image path returned');
throw new Error('No image path returned');
}
} catch (error) {

View file

@ -46,18 +46,16 @@
<div class="field-action-buttons" data-target="prompt" aria-label="Prompt actions">
<button type="button" class="field-action-btn" data-action="copy" title="Copy prompt"
aria-label="Copy prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<rect x="9" y="9" width="13" height="13" rx="2.5" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg>
</button>
<button type="button" class="field-action-btn" data-action="paste" title="Paste"
aria-label="Paste vào prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M8 4h8" />
<path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" />
<rect x="5" y="5" width="14" height="16" rx="2" />
@ -67,9 +65,8 @@
</button>
<button type="button" class="field-action-btn" data-action="clear" title="Clear prompt"
aria-label="Xoá prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M3 6h18" />
<path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" />
<path d="M10 11v6" />
@ -132,18 +129,16 @@
<div class="field-action-buttons" data-target="note" aria-label="Note actions">
<button type="button" class="field-action-btn" data-action="copy" title="Copy note"
aria-label="Copy note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<rect x="9" y="9" width="13" height="13" rx="2.5" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg>
</button>
<button type="button" class="field-action-btn" data-action="paste" title="Paste"
aria-label="Paste vào note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M8 4h8" />
<path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" />
<rect x="5" y="5" width="14" height="16" rx="2" />
@ -153,9 +148,8 @@
</button>
<button type="button" class="field-action-btn" data-action="clear" title="Clear note"
aria-label="Xoá note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round"
stroke-linejoin="round">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
<path d="M3 6h18" />
<path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" />
<path d="M10 11v6" />
@ -173,7 +167,7 @@
</div>
<div id="image-input-grid" class="image-input-grid" aria-live="polite"></div>
<div class="image-url-input-wrapper" style="margin-top: 0.5rem;">
<input type="text" id="reference-url-input" placeholder="Nhập URL hoặc đường dẫn ảnh..."
<input type="text" id="reference-url-input" placeholder="Nhập URL hoặc đường dẫn ảnh..."
style="width: 100%; padding: 0.5rem; border-radius: 4px; border: 1px solid var(--border-color); background: rgba(0,0,0,0.2); color: var(--text-primary); font-size: 0.85rem;">
</div>
</div>
@ -498,6 +492,15 @@
rel="noreferrer">aistudio.google.com/api-keys</a>
</p>
</div>
<!-- Whisk Cookies Input -->
<div class="input-group api-settings-input-group hidden" id="whisk-cookies-group">
<label for="whisk-cookies">Whisk Cookies (dành cho ImageFX)</label>
<textarea id="whisk-cookies" rows="3" placeholder="Paste toàn bộ cookie string từ labs.google..."
style="width: 100%; padding: 0.5rem; background: rgba(0,0,0,0.2); border: 1px solid var(--border-color); color: var(--text-primary); border-radius: 4px; font-size: 0.8rem;"></textarea>
<p class="input-hint">
F12 trên labs.google > Network > Request bất kỳ > Copy Request Headers > Cookie.
</p>
</div>
<div class="input-group api-settings-input-group">
<label for="api-model">Model</label>
<div class="select-wrapper">
@ -505,6 +508,7 @@
style="width: 100%; padding: 0.75rem; background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 0.5rem; color: var(--text-primary); font-size: 0.9rem;">
<option value="gemini-3-pro-image-preview">Gemini 3 Pro (Image Preview)</option>
<option value="gemini-2.5-flash-image">Gemini 2.5 Flash Image</option>
<option value="whisk">Whisk (ImageFX) [Experimental]</option>
</select>
</div>
</div>
@ -562,4 +566,4 @@
<script type="module" src="{{ url_for('static', filename='script.js') }}"></script>
</body>
</html>
</html>

254
whisk_client.py Normal file
View file

@ -0,0 +1,254 @@
import requests
import json
import time
import base64
import os
import uuid
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger("whisk_client")
# Constants from reverse engineering
AUTH_ENDPOINT = "https://labs.google/fx/api/auth/session"
UPLOAD_ENDPOINT = "https://labs.google/fx/api/trpc/backbone.uploadImage"
# Endpoint 1: Text-to-Image
# (Captured in Step 405)
GENERATE_ENDPOINT = "https://aisandbox-pa.googleapis.com/v1/whisk:generateImage"
# Endpoint 2: Reference Image (Recipe)
# (Captured in Step 424)
RECIPE_ENDPOINT = "https://aisandbox-pa.googleapis.com/v1/whisk:runImageRecipe"
DEFAULT_HEADERS = {
"Origin": "https://labs.google",
"Content-Type": "application/json",
"Referer": "https://labs.google/fx/tools/image-fx",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
class WhiskClientError(Exception):
pass
def parse_cookies(cookie_input):
if not cookie_input:
return {}
cookies = {}
cookie_input = cookie_input.strip()
if cookie_input.startswith('[') and cookie_input.endswith(']'):
try:
cookie_list = json.loads(cookie_input)
for c in cookie_list:
name = c.get('name')
value = c.get('value')
if name and value:
cookies[name] = value
return cookies
except json.JSONDecodeError:
pass
for item in cookie_input.split(';'):
if '=' in item:
name, value = item.split('=', 1)
cookies[name.strip()] = value.strip()
return cookies
def get_session_token(cookies):
logger.info("Fetching session token from labs.google...")
try:
response = requests.get(
AUTH_ENDPOINT,
headers={**DEFAULT_HEADERS},
cookies=cookies,
timeout=30
)
response.raise_for_status()
data = response.json()
if not data.get('access_token'):
raise WhiskClientError("Session response missing access_token")
return data['access_token']
except Exception as e:
logger.error(f"Failed to fetch session token: {e}")
raise WhiskClientError(f"Authentication failed: {str(e)}")
def upload_reference_image(image_path, cookies):
if not image_path or not os.path.exists(image_path):
return None
logger.info(f"Uploading reference image: {image_path}")
try:
with open(image_path, "rb") as img_file:
import mimetypes
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type: mime_type = "image/png"
b64_data = base64.b64encode(img_file.read()).decode('utf-8')
data_uri = f"data:{mime_type};base64,{b64_data}"
payload = {
"json": {
"clientContext": {
"workflowId": str(uuid.uuid4()),
"sessionId": str(int(time.time() * 1000))
},
"uploadMediaInput": {
"mediaCategory": "MEDIA_CATEGORY_SUBJECT",
"rawBytes": data_uri,
"caption": ""
}
}
}
response = requests.post(
UPLOAD_ENDPOINT,
headers=DEFAULT_HEADERS,
cookies=cookies,
json=payload,
timeout=60
)
if not response.ok:
raise WhiskClientError(f"Image upload failed: {response.text}")
data = response.json()
try:
media_id = data['result']['data']['json']['result']['uploadMediaGenerationId']
except (KeyError, TypeError):
raise WhiskClientError("Failed to retrieve uploadMediaGenerationId")
logger.info(f"Image uploaded successfully. ID: {media_id}")
return media_id
except Exception as e:
logger.error(f"Error uploading image: {e}")
raise e
def generate_image_whisk(prompt, cookie_str, **kwargs):
cookies = parse_cookies(cookie_str)
if not cookies:
raise WhiskClientError("No valid cookies found")
access_token = get_session_token(cookies)
ref_image_path = kwargs.get('reference_image_path')
media_generation_id = None
if ref_image_path:
try:
media_generation_id = upload_reference_image(ref_image_path, cookies)
except Exception as e:
logger.warning(f"Skipping reference image due to upload error: {e}")
aspect_ratio_map = {
"1:1": "IMAGE_ASPECT_RATIO_SQUARE",
"9:16": "IMAGE_ASPECT_RATIO_PORTRAIT",
"16:9": "IMAGE_ASPECT_RATIO_LANDSCAPE",
"4:3": "IMAGE_ASPECT_RATIO_LANDSCAPE_FOUR_THREE",
"3:4": "IMAGE_ASPECT_RATIO_PORTRAIT",
"Auto": "IMAGE_ASPECT_RATIO_SQUARE"
}
aspect_ratio_key = kwargs.get('aspect_ratio', 'Auto')
aspect_ratio_enum = aspect_ratio_map.get(aspect_ratio_key, "IMAGE_ASPECT_RATIO_SQUARE")
seed = kwargs.get('seed', int(time.time()))
headers = {
**DEFAULT_HEADERS,
"Authorization": f"Bearer {access_token}"
}
# BRANCH: Use Recipe Endpoint if Reference Image exists
if media_generation_id:
target_endpoint = RECIPE_ENDPOINT
payload = {
"clientContext": {
"workflowId": str(uuid.uuid4()),
"tool": "BACKBONE",
"sessionId": str(int(time.time() * 1000))
},
"seed": seed,
"imageModelSettings": {
"imageModel": "GEM_PIX",
"aspectRatio": aspect_ratio_enum
},
"userInstruction": prompt,
"recipeMediaInputs": [{
"mediaInput": {
"mediaCategory": "MEDIA_CATEGORY_SUBJECT",
"mediaGenerationId": media_generation_id
}
}]
}
else:
# BRANCH: Use Generate Endpoint for Text-to-Image
# NOTE: Payload for generateImage is inferred to be userInput based.
# If this fails, we might need further inspection, but Recipe flow is the priority.
target_endpoint = GENERATE_ENDPOINT
payload = {
"userInput": {
"candidatesCount": 2,
"prompts": [prompt],
"seed": seed
},
"clientContext": {
"workflowId": str(uuid.uuid4()),
"tool": "IMAGE_FX", # Usually ImageFX for T2I
"sessionId": str(int(time.time() * 1000))
},
"modelInput": {
"modelNameType": "IMAGEN_3_5", # Usually Imagen 3 for ImageFX
"aspectRatio": aspect_ratio_enum
}
}
logger.info(f"Generating image. Endpoint: {target_endpoint}, Prompt: {prompt}")
try:
response = requests.post(
target_endpoint,
headers=headers,
json=payload,
timeout=120
)
if not response.ok:
error_text = response.text
try:
err_json = response.json()
details = err_json.get('error', {}).get('details', [])
if any(d.get('reason') in ['PUBLIC_ERROR_UNSAFE_GENERATION', 'PUBLIC_ERROR_SEXUAL'] for d in details):
raise WhiskClientError("⚠️ Google Safety Filter Triggered. Prompt bị từ chối do nội dung không an toàn.")
except (json.JSONDecodeError, WhiskClientError) as e:
if isinstance(e, WhiskClientError): raise e
# Additional T2I Fallback: If generateImage fails 400, try Recipe with empty media?
# Not implementing strictly to avoid loops, but helpful mental note.
raise WhiskClientError(f"Generation failed ({response.status_code}): {error_text}")
# Parse Response
json_resp = response.json()
images = []
if 'imagePanels' in json_resp:
for panel in json_resp['imagePanels']:
for img in panel.get('generatedImages', []):
if 'encodedImage' in img:
images.append(img['encodedImage'])
if not images:
logger.error(f"Unexpected response structure: {json_resp.keys()}")
raise WhiskClientError("No images found in response")
return base64.b64decode(images[0])
except requests.exceptions.Timeout:
raise WhiskClientError("Timout connecting to Google Whisk.")
except Exception as e:
logger.error(f"Whisk Generation Error: {e}")
raise WhiskClientError(str(e))