feat: Add Whisk integration and Docker support

This commit is contained in:
Khoa.vo 2025-12-30 19:09:21 +07:00
parent 9e0f3b80b2
commit 63ad7cc21f
9 changed files with 543 additions and 89 deletions

BIN
.DS_Store vendored

Binary file not shown.

21
Dockerfile Normal file
View file

@ -0,0 +1,21 @@
FROM python:3.10-slim
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Install python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY . .
# Expose port
EXPOSE 8888
# Run the application
CMD ["python", "app.py"]

Binary file not shown.

Binary file not shown.

137
app.py
View file

@ -12,6 +12,7 @@ from google import genai
from google.genai import types from google.genai import types
from PIL import Image, PngImagePlugin from PIL import Image, PngImagePlugin
import threading, time, subprocess, re import threading, time, subprocess, re
import whisk_client
import logging import logging
@ -393,12 +394,15 @@ def generate_image():
if not prompt: if not prompt:
return jsonify({'error': 'Prompt is required'}), 400 return jsonify({'error': 'Prompt is required'}), 400
if not api_key: # Determine if this is a Whisk request
return jsonify({'error': 'API Key is required.'}), 401 is_whisk = 'whisk' in model.lower() or 'imagefx' in model.lower()
if not is_whisk and not api_key:
return jsonify({'error': 'API Key is required for Gemini models.'}), 401
try: try:
print("Đang gửi lệnh...", flush=True) print("Đang gửi lệnh...", flush=True)
client = genai.Client(api_key=api_key) # client initialization moved to Gemini block
image_config_args = {} image_config_args = {}
@ -514,6 +518,133 @@ def generate_image():
continue continue
model_name = model model_name = model
# ==================================================================================
# WHISK (IMAGEFX) HANDLING
# ==================================================================================
if is_whisk:
print(f"Detected Whisk/ImageFX model request: {model_name}", flush=True)
# Extract cookies from request headers or form data
# Priority: Form Data 'cookies' > Request Header 'x-whisk-cookies' > Environment Variable
cookie_str = request.form.get('cookies') or request.headers.get('x-whisk-cookies') or os.environ.get('WHISK_COOKIES')
if not cookie_str:
return jsonify({'error': 'Whisk cookies are required. Please provide them in the "cookies" form field or configuration.'}), 400
print("Sending request to Whisk...", flush=True)
try:
# Check for reference images
reference_image_path = None
# final_reference_paths (populated above) contains URLs/paths to reference images.
# Can be new uploads or history items.
if final_reference_paths:
# Use the first one
ref_url = final_reference_paths[0]
# Convert URL/Path to absolute local path
# ref_url might be "http://.../static/..." or "/static/..."
if '/static/' in ref_url:
rel_path = ref_url.split('/static/')[1]
possible_path = os.path.join(app.static_folder, rel_path)
if os.path.exists(possible_path):
reference_image_path = possible_path
print(f"Whisk: Using reference image at {reference_image_path}", flush=True)
elif os.path.exists(ref_url):
# It's already a path?
reference_image_path = ref_url
# Call the client
try:
whisk_result = whisk_client.generate_image_whisk(
prompt=api_prompt,
cookie_str=cookie_str,
aspect_ratio=aspect_ratio,
resolution=resolution,
reference_image_path=reference_image_path
)
except Exception as e:
# Re-raise to be caught by the outer block
raise e
# Process result - whisk_client returns raw bytes
image_bytes = None
if isinstance(whisk_result, bytes):
image_bytes = whisk_result
elif isinstance(whisk_result, dict):
# Fallback if I ever change the client to return dict
if 'image_data' in whisk_result:
image_bytes = whisk_result['image_data']
elif 'image_url' in whisk_result:
import requests
img_resp = requests.get(whisk_result['image_url'])
image_bytes = img_resp.content
if not image_bytes:
raise ValueError("No image data returned from Whisk.")
# Save and process image (Reuse existing logic)
image = Image.open(BytesIO(image_bytes))
png_info = PngImagePlugin.PngInfo()
date_str = datetime.now().strftime("%Y%m%d")
search_pattern = os.path.join(GENERATED_DIR, f"whisk_{date_str}_*.png")
existing_files = glob.glob(search_pattern)
max_id = 0
for f in existing_files:
try:
basename = os.path.basename(f)
name_without_ext = os.path.splitext(basename)[0]
id_part = name_without_ext.split('_')[-1]
id_num = int(id_part)
if id_num > max_id:
max_id = id_num
except ValueError:
continue
next_id = max_id + 1
filename = f"whisk_{date_str}_{next_id}.png"
filepath = os.path.join(GENERATED_DIR, filename)
rel_path = os.path.join('generated', filename)
image_url = url_for('static', filename=rel_path)
metadata = {
'prompt': prompt,
'note': note,
'processed_prompt': api_prompt,
'aspect_ratio': aspect_ratio or 'Auto',
'resolution': resolution,
'reference_images': final_reference_paths,
'model': 'whisk'
}
png_info.add_text('sdvn_meta', json.dumps(metadata))
buffer = BytesIO()
image.save(buffer, format='PNG', pnginfo=png_info)
final_bytes = buffer.getvalue()
with open(filepath, 'wb') as f:
f.write(final_bytes)
image_data = base64.b64encode(final_bytes).decode('utf-8')
return jsonify({
'image': image_url,
'image_data': image_data,
'metadata': metadata,
})
except Exception as e:
print(f"Whisk error: {e}")
return jsonify({'error': f"Whisk Generation Error: {str(e)}"}), 500
# ==================================================================================
# STANDARD GEMINI HANDLING
# ==================================================================================
# Initialize Client here, since API Key is required
client = genai.Client(api_key=api_key)
print(f"Đang tạo với model {model_name}...", flush=True) print(f"Đang tạo với model {model_name}...", flush=True)
response = client.models.generate_content( response = client.models.generate_content(
model=model_name, model=model_name,

17
docker-compose.yml Normal file
View file

@ -0,0 +1,17 @@
version: '3.8'
services:
app:
build: .
platform: linux/amd64
ports:
- "8558:8888"
volumes:
- ./static:/app/static
- ./prompts.json:/app/prompts.json
- ./user_prompts.json:/app/user_prompts.json
- ./gallery_favorites.json:/app/gallery_favorites.json
environment:
- GOOGLE_API_KEY=${GOOGLE_API_KEY:-} # Optional for Whisk
- WHISK_COOKIES=${WHISK_COOKIES:-}
restart: unless-stopped

View file

@ -132,10 +132,28 @@ document.addEventListener('DOMContentLoaded', () => {
if (apiModelSelect) { if (apiModelSelect) {
apiModelSelect.addEventListener('change', () => { apiModelSelect.addEventListener('change', () => {
toggleResolutionVisibility(); toggleResolutionVisibility();
toggleCookiesVisibility();
persistSettings(); persistSettings();
}); });
} }
const whiskCookiesGroup = document.getElementById('whisk-cookies-group');
const whiskCookiesInput = document.getElementById('whisk-cookies');
function toggleCookiesVisibility() {
if (whiskCookiesGroup && apiModelSelect) {
if (apiModelSelect.value === 'whisk') {
whiskCookiesGroup.classList.remove('hidden');
} else {
whiskCookiesGroup.classList.add('hidden');
}
}
}
if (whiskCookiesInput) {
whiskCookiesInput.addEventListener('input', persistSettings);
}
// Load Settings // Load Settings
function loadSettings() { function loadSettings() {
try { try {
@ -156,6 +174,10 @@ document.addEventListener('DOMContentLoaded', () => {
if (bodyFontSelect && settings.bodyFont) { if (bodyFontSelect && settings.bodyFont) {
bodyFontSelect.value = settings.bodyFont; bodyFontSelect.value = settings.bodyFont;
} }
if (whiskCookiesInput && settings.whiskCookies) {
whiskCookiesInput.value = settings.whiskCookies;
}
toggleCookiesVisibility();
return settings; return settings;
} }
} catch (e) { } catch (e) {
@ -169,7 +191,7 @@ document.addEventListener('DOMContentLoaded', () => {
const referenceImages = (typeof slotManager !== 'undefined' && typeof slotManager.serializeReferenceImages === 'function') const referenceImages = (typeof slotManager !== 'undefined' && typeof slotManager.serializeReferenceImages === 'function')
? slotManager.serializeReferenceImages() ? slotManager.serializeReferenceImages()
: []; : [];
const settings = { const settings = {
apiKey: apiKeyInput.value, apiKey: apiKeyInput.value,
prompt: promptInput.value, prompt: promptInput.value,
@ -180,6 +202,7 @@ document.addEventListener('DOMContentLoaded', () => {
referenceImages, referenceImages,
theme: currentTheme || DEFAULT_THEME, theme: currentTheme || DEFAULT_THEME,
bodyFont: bodyFontSelect ? bodyFontSelect.value : DEFAULT_BODY_FONT, bodyFont: bodyFontSelect ? bodyFontSelect.value : DEFAULT_BODY_FONT,
whiskCookies: whiskCookiesInput ? whiskCookiesInput.value : '',
}; };
try { try {
localStorage.setItem(SETTINGS_STORAGE_KEY, JSON.stringify(settings)); localStorage.setItem(SETTINGS_STORAGE_KEY, JSON.stringify(settings));
@ -199,12 +222,16 @@ document.addEventListener('DOMContentLoaded', () => {
const selectedModel = model || (apiModelSelect ? apiModelSelect.value : 'gemini-3-pro-image-preview'); const selectedModel = model || (apiModelSelect ? apiModelSelect.value : 'gemini-3-pro-image-preview');
formData.append('model', selectedModel); formData.append('model', selectedModel);
if (whiskCookiesInput && whiskCookiesInput.value) {
formData.append('cookies', whiskCookiesInput.value);
}
// Add reference images using correct slotManager methods // Add reference images using correct slotManager methods
const referenceFiles = slotManager.getReferenceFiles(); const referenceFiles = slotManager.getReferenceFiles();
referenceFiles.forEach(file => { referenceFiles.forEach(file => {
formData.append('reference_images', file); formData.append('reference_images', file);
}); });
const referencePaths = slotManager.getReferencePaths(); const referencePaths = slotManager.getReferencePaths();
if (referencePaths && referencePaths.length > 0) { if (referencePaths && referencePaths.length > 0) {
formData.append('reference_image_paths', JSON.stringify(referencePaths)); formData.append('reference_image_paths', JSON.stringify(referencePaths));
@ -592,14 +619,14 @@ document.addEventListener('DOMContentLoaded', () => {
// 2. Item currently being processed (isProcessingQueue) // 2. Item currently being processed (isProcessingQueue)
// 3. Items waiting for backend response (pendingRequests) // 3. Items waiting for backend response (pendingRequests)
const count = generationQueue.length + (isProcessingQueue ? 1 : 0) + pendingRequests; const count = generationQueue.length + (isProcessingQueue ? 1 : 0) + pendingRequests;
console.log('Queue counter update:', { console.log('Queue counter update:', {
queue: generationQueue.length, queue: generationQueue.length,
processing: isProcessingQueue, processing: isProcessingQueue,
pending: pendingRequests, pending: pendingRequests,
total: count total: count
}); });
if (count > 0) { if (count > 0) {
if (queueCounter) { if (queueCounter) {
queueCounter.classList.remove('hidden'); queueCounter.classList.remove('hidden');
@ -623,10 +650,10 @@ document.addEventListener('DOMContentLoaded', () => {
const task = generationQueue.shift(); const task = generationQueue.shift();
isProcessingQueue = true; isProcessingQueue = true;
updateQueueCounter(); // Show counter immediately updateQueueCounter(); // Show counter immediately
try { try {
setViewState('loading'); setViewState('loading');
// Check if this task already has a result (immediate generation) // Check if this task already has a result (immediate generation)
if (task.immediateResult) { if (task.immediateResult) {
// Display the already-generated image // Display the already-generated image
@ -730,7 +757,7 @@ document.addEventListener('DOMContentLoaded', () => {
}); });
const data = await response.json(); const data = await response.json();
// Mark fetch as completed and decrement pending // Mark fetch as completed and decrement pending
// We do this BEFORE adding to queue to avoid double counting // We do this BEFORE adding to queue to avoid double counting
fetchCompleted = true; fetchCompleted = true;
@ -785,12 +812,12 @@ document.addEventListener('DOMContentLoaded', () => {
} }
} catch (error) { } catch (error) {
console.error('Error in addToQueue:', error); console.error('Error in addToQueue:', error);
// If fetch failed (didn't complete), we need to decrement pendingRequests // If fetch failed (didn't complete), we need to decrement pendingRequests
if (!fetchCompleted) { if (!fetchCompleted) {
pendingRequests--; pendingRequests--;
} }
updateQueueCounter(); updateQueueCounter();
showError(error.message); showError(error.message);
} }
@ -816,7 +843,7 @@ document.addEventListener('DOMContentLoaded', () => {
const response = await fetch(url); const response = await fetch(url);
const blob = await response.blob(); const blob = await response.blob();
const blobUrl = window.URL.createObjectURL(blob); const blobUrl = window.URL.createObjectURL(blob);
const tempLink = document.createElement('a'); const tempLink = document.createElement('a');
tempLink.href = blobUrl; tempLink.href = blobUrl;
tempLink.download = filename; tempLink.download = filename;
@ -834,7 +861,7 @@ document.addEventListener('DOMContentLoaded', () => {
if (imageDisplayArea) { if (imageDisplayArea) {
imageDisplayArea.addEventListener('wheel', handleCanvasWheel, { passive: false }); imageDisplayArea.addEventListener('wheel', handleCanvasWheel, { passive: false });
imageDisplayArea.addEventListener('pointerdown', handleCanvasPointerDown); imageDisplayArea.addEventListener('pointerdown', handleCanvasPointerDown);
// Drag and drop support // Drag and drop support
imageDisplayArea.addEventListener('dragover', (e) => { imageDisplayArea.addEventListener('dragover', (e) => {
e.preventDefault(); e.preventDefault();
@ -849,7 +876,7 @@ document.addEventListener('DOMContentLoaded', () => {
imageDisplayArea.addEventListener('drop', async (e) => { imageDisplayArea.addEventListener('drop', async (e) => {
e.preventDefault(); e.preventDefault();
imageDisplayArea.classList.remove('drag-over'); imageDisplayArea.classList.remove('drag-over');
const files = e.dataTransfer?.files; const files = e.dataTransfer?.files;
if (files && files.length > 0) { if (files && files.length > 0) {
const file = files[0]; const file = files[0];
@ -858,7 +885,7 @@ document.addEventListener('DOMContentLoaded', () => {
// Display image immediately // Display image immediately
const objectUrl = URL.createObjectURL(file); const objectUrl = URL.createObjectURL(file);
displayImage(objectUrl); displayImage(objectUrl);
// Extract and apply metadata // Extract and apply metadata
const metadata = await extractMetadataFromBlob(file); const metadata = await extractMetadataFromBlob(file);
if (metadata) { if (metadata) {
@ -965,7 +992,7 @@ document.addEventListener('DOMContentLoaded', () => {
const createTemplateModal = document.getElementById('create-template-modal'); const createTemplateModal = document.getElementById('create-template-modal');
const closeTemplateModalBtn = document.getElementById('close-template-modal'); const closeTemplateModalBtn = document.getElementById('close-template-modal');
const saveTemplateBtn = document.getElementById('save-template-btn'); const saveTemplateBtn = document.getElementById('save-template-btn');
const templateTitleInput = document.getElementById('template-title'); const templateTitleInput = document.getElementById('template-title');
const templatePromptInput = document.getElementById('template-prompt'); const templatePromptInput = document.getElementById('template-prompt');
const templateNoteInput = document.getElementById('template-note'); const templateNoteInput = document.getElementById('template-note');
@ -1189,11 +1216,11 @@ document.addEventListener('DOMContentLoaded', () => {
} }
// Global function for opening edit modal (called from templateGallery.js) // Global function for opening edit modal (called from templateGallery.js)
window.openEditTemplateModal = async function(template) { window.openEditTemplateModal = async function (template) {
editingTemplate = template; editingTemplate = template;
editingTemplateSource = template.isUserTemplate ? 'user' : 'builtin'; editingTemplateSource = template.isUserTemplate ? 'user' : 'builtin';
editingBuiltinIndex = editingTemplateSource === 'builtin' ? template.builtinTemplateIndex : null; editingBuiltinIndex = editingTemplateSource === 'builtin' ? template.builtinTemplateIndex : null;
// Pre-fill with template data // Pre-fill with template data
templateTitleInput.value = template.title || ''; templateTitleInput.value = template.title || '';
templatePromptInput.value = template.prompt || ''; templatePromptInput.value = template.prompt || '';
@ -1206,18 +1233,18 @@ document.addEventListener('DOMContentLoaded', () => {
try { try {
const response = await fetch('/prompts'); const response = await fetch('/prompts');
const data = await response.json(); const data = await response.json();
if (data.prompts) { if (data.prompts) {
const categories = new Set(); const categories = new Set();
data.prompts.forEach(t => { data.prompts.forEach(t => {
if (t.category) { if (t.category) {
const categoryText = typeof t.category === 'string' const categoryText = typeof t.category === 'string'
? t.category ? t.category
: (t.category.vi || t.category.en || ''); : (t.category.vi || t.category.en || '');
if (categoryText) categories.add(categoryText); if (categoryText) categories.add(categoryText);
} }
}); });
templateCategorySelect.innerHTML = ''; templateCategorySelect.innerHTML = '';
const sortedCategories = Array.from(categories).sort(); const sortedCategories = Array.from(categories).sort();
sortedCategories.forEach(cat => { sortedCategories.forEach(cat => {
@ -1226,15 +1253,15 @@ document.addEventListener('DOMContentLoaded', () => {
option.textContent = cat; option.textContent = cat;
templateCategorySelect.appendChild(option); templateCategorySelect.appendChild(option);
}); });
const newOption = document.createElement('option'); const newOption = document.createElement('option');
newOption.value = 'new'; newOption.value = 'new';
newOption.textContent = '+ New Category'; newOption.textContent = '+ New Category';
templateCategorySelect.appendChild(newOption); templateCategorySelect.appendChild(newOption);
// Set to template's category // Set to template's category
const templateCategory = typeof template.category === 'string' const templateCategory = typeof template.category === 'string'
? template.category ? template.category
: (template.category.vi || template.category.en || ''); : (template.category.vi || template.category.en || '');
templateCategorySelect.value = templateCategory || 'User'; templateCategorySelect.value = templateCategory || 'User';
} }
@ -1263,16 +1290,16 @@ document.addEventListener('DOMContentLoaded', () => {
// Update button text // Update button text
saveTemplateBtn.innerHTML = '<span>Update Template</span><div class="btn-shine"></div>'; saveTemplateBtn.innerHTML = '<span>Update Template</span><div class="btn-shine"></div>';
createTemplateModal.classList.remove('hidden'); createTemplateModal.classList.remove('hidden');
}; };
// Global function for opening create modal with empty values (called from templateGallery.js) // Global function for opening create modal with empty values (called from templateGallery.js)
window.openCreateTemplateModal = async function() { window.openCreateTemplateModal = async function () {
editingTemplate = null; editingTemplate = null;
editingTemplateSource = 'user'; editingTemplateSource = 'user';
editingBuiltinIndex = null; editingBuiltinIndex = null;
setTemplateTags([]); setTemplateTags([]);
if (templateTagInput) { if (templateTagInput) {
templateTagInput.value = ''; templateTagInput.value = '';
@ -1290,18 +1317,18 @@ document.addEventListener('DOMContentLoaded', () => {
try { try {
const response = await fetch('/prompts'); const response = await fetch('/prompts');
const data = await response.json(); const data = await response.json();
if (data.prompts) { if (data.prompts) {
const categories = new Set(); const categories = new Set();
data.prompts.forEach(t => { data.prompts.forEach(t => {
if (t.category) { if (t.category) {
const categoryText = typeof t.category === 'string' const categoryText = typeof t.category === 'string'
? t.category ? t.category
: (t.category.vi || t.category.en || ''); : (t.category.vi || t.category.en || '');
if (categoryText) categories.add(categoryText); if (categoryText) categories.add(categoryText);
} }
}); });
templateCategorySelect.innerHTML = ''; templateCategorySelect.innerHTML = '';
const sortedCategories = Array.from(categories).sort(); const sortedCategories = Array.from(categories).sort();
sortedCategories.forEach(cat => { sortedCategories.forEach(cat => {
@ -1310,12 +1337,12 @@ document.addEventListener('DOMContentLoaded', () => {
option.textContent = cat; option.textContent = cat;
templateCategorySelect.appendChild(option); templateCategorySelect.appendChild(option);
}); });
const newOption = document.createElement('option'); const newOption = document.createElement('option');
newOption.value = 'new'; newOption.value = 'new';
newOption.textContent = '+ New Category'; newOption.textContent = '+ New Category';
templateCategorySelect.appendChild(newOption); templateCategorySelect.appendChild(newOption);
if (sortedCategories.includes('User')) { if (sortedCategories.includes('User')) {
templateCategorySelect.value = 'User'; templateCategorySelect.value = 'User';
} else if (sortedCategories.length > 0) { } else if (sortedCategories.length > 0) {
@ -1335,7 +1362,7 @@ document.addEventListener('DOMContentLoaded', () => {
// Update button text // Update button text
saveTemplateBtn.innerHTML = '<span>Save Template</span><div class="btn-shine"></div>'; saveTemplateBtn.innerHTML = '<span>Save Template</span><div class="btn-shine"></div>';
createTemplateModal.classList.remove('hidden'); createTemplateModal.classList.remove('hidden');
}; };
@ -1345,7 +1372,7 @@ document.addEventListener('DOMContentLoaded', () => {
editingTemplate = null; editingTemplate = null;
editingTemplateSource = 'user'; editingTemplateSource = 'user';
editingBuiltinIndex = null; editingBuiltinIndex = null;
// Pre-fill data // Pre-fill data
templateTitleInput.value = ''; templateTitleInput.value = '';
templatePromptInput.value = promptInput.value; templatePromptInput.value = promptInput.value;
@ -1358,25 +1385,25 @@ document.addEventListener('DOMContentLoaded', () => {
try { try {
const response = await fetch('/prompts'); const response = await fetch('/prompts');
const data = await response.json(); const data = await response.json();
if (data.prompts) { if (data.prompts) {
// Extract unique categories // Extract unique categories
const categories = new Set(); const categories = new Set();
data.prompts.forEach(template => { data.prompts.forEach(template => {
if (template.category) { if (template.category) {
// Handle both string and object categories // Handle both string and object categories
const categoryText = typeof template.category === 'string' const categoryText = typeof template.category === 'string'
? template.category ? template.category
: (template.category.vi || template.category.en || ''); : (template.category.vi || template.category.en || '');
if (categoryText) { if (categoryText) {
categories.add(categoryText); categories.add(categoryText);
} }
} }
}); });
// Clear existing options except "new" // Clear existing options except "new"
templateCategorySelect.innerHTML = ''; templateCategorySelect.innerHTML = '';
// Add sorted categories // Add sorted categories
const sortedCategories = Array.from(categories).sort(); const sortedCategories = Array.from(categories).sort();
sortedCategories.forEach(cat => { sortedCategories.forEach(cat => {
@ -1385,13 +1412,13 @@ document.addEventListener('DOMContentLoaded', () => {
option.textContent = cat; option.textContent = cat;
templateCategorySelect.appendChild(option); templateCategorySelect.appendChild(option);
}); });
// Add "new category" option at the end // Add "new category" option at the end
const newOption = document.createElement('option'); const newOption = document.createElement('option');
newOption.value = 'new'; newOption.value = 'new';
newOption.textContent = '+ New Category'; newOption.textContent = '+ New Category';
templateCategorySelect.appendChild(newOption); templateCategorySelect.appendChild(newOption);
// Set default to first category or "User" if it exists // Set default to first category or "User" if it exists
if (sortedCategories.includes('User')) { if (sortedCategories.includes('User')) {
templateCategorySelect.value = 'User'; templateCategorySelect.value = 'User';
@ -1465,7 +1492,7 @@ document.addEventListener('DOMContentLoaded', () => {
templatePreviewDropzone.addEventListener('click', (e) => { templatePreviewDropzone.addEventListener('click', (e) => {
// Don't toggle if clicking on the input itself // Don't toggle if clicking on the input itself
if (e.target === templatePreviewUrlInput) return; if (e.target === templatePreviewUrlInput) return;
if (!isUrlInputMode) { if (!isUrlInputMode) {
// Switch to URL input mode // Switch to URL input mode
isUrlInputMode = true; isUrlInputMode = true;
@ -1520,7 +1547,7 @@ document.addEventListener('DOMContentLoaded', () => {
} }
}); });
} }
templatePreviewDropzone.addEventListener('dragover', (e) => { templatePreviewDropzone.addEventListener('dragover', (e) => {
e.preventDefault(); e.preventDefault();
templatePreviewDropzone.classList.add('drag-over'); templatePreviewDropzone.classList.add('drag-over');
@ -1534,7 +1561,7 @@ document.addEventListener('DOMContentLoaded', () => {
templatePreviewDropzone.addEventListener('drop', (e) => { templatePreviewDropzone.addEventListener('drop', (e) => {
e.preventDefault(); e.preventDefault();
templatePreviewDropzone.classList.remove('drag-over'); templatePreviewDropzone.classList.remove('drag-over');
const files = e.dataTransfer.files; const files = e.dataTransfer.files;
if (files.length > 0) { if (files.length > 0) {
const file = files[0]; const file = files[0];
@ -1559,7 +1586,7 @@ document.addEventListener('DOMContentLoaded', () => {
const note = templateNoteInput.value.trim(); const note = templateNoteInput.value.trim();
const mode = templateModeSelect.value; const mode = templateModeSelect.value;
let category = templateCategorySelect.value; let category = templateCategorySelect.value;
if (category === 'new') { if (category === 'new') {
category = templateCategoryInput.value.trim(); category = templateCategoryInput.value.trim();
} }
@ -1619,10 +1646,10 @@ document.addEventListener('DOMContentLoaded', () => {
// Success // Success
createTemplateModal.classList.add('hidden'); createTemplateModal.classList.add('hidden');
// Reload template gallery // Reload template gallery
await templateGallery.load(); await templateGallery.load();
// Reset editing state // Reset editing state
editingTemplate = null; editingTemplate = null;
editingTemplateSource = null; editingTemplateSource = null;
@ -1666,7 +1693,7 @@ document.addEventListener('DOMContentLoaded', () => {
loadGallery(); loadGallery();
loadTemplateGallery(); loadTemplateGallery();
initializeSidebarResizer(sidebar, resizeHandle); initializeSidebarResizer(sidebar, resizeHandle);
// Restore last image if available // Restore last image if available
try { try {
const lastImage = localStorage.getItem('gemini-app-last-image'); const lastImage = localStorage.getItem('gemini-app-last-image');
@ -1676,13 +1703,13 @@ document.addEventListener('DOMContentLoaded', () => {
} catch (e) { } catch (e) {
console.warn('Failed to restore last image', e); console.warn('Failed to restore last image', e);
} }
// Setup canvas language toggle // Setup canvas language toggle
const canvasLangInput = document.getElementById('canvas-lang-input'); const canvasLangInput = document.getElementById('canvas-lang-input');
if (canvasLangInput) { if (canvasLangInput) {
// Set initial state // Set initial state
canvasLangInput.checked = i18n.currentLang === 'en'; canvasLangInput.checked = i18n.currentLang === 'en';
canvasLangInput.addEventListener('change', (e) => { canvasLangInput.addEventListener('change', (e) => {
i18n.setLanguage(e.target.checked ? 'en' : 'vi'); i18n.setLanguage(e.target.checked ? 'en' : 'vi');
// Update visual state // Update visual state
@ -1753,7 +1780,7 @@ document.addEventListener('DOMContentLoaded', () => {
if (!btn.classList.contains('history-favorites-btn')) { if (!btn.classList.contains('history-favorites-btn')) {
btn.addEventListener('click', () => { btn.addEventListener('click', () => {
const filterType = btn.dataset.filter; const filterType = btn.dataset.filter;
// Remove active from all date filter buttons (not favorites) // Remove active from all date filter buttons (not favorites)
historyFilterBtns.forEach(b => { historyFilterBtns.forEach(b => {
if (!b.classList.contains('history-favorites-btn')) { if (!b.classList.contains('history-favorites-btn')) {
@ -1834,7 +1861,7 @@ document.addEventListener('DOMContentLoaded', () => {
hasGeneratedImage = true; // Mark that we have an image hasGeneratedImage = true; // Mark that we have an image
setViewState('result'); setViewState('result');
// Persist image URL // Persist image URL
try { try {
localStorage.setItem('gemini-app-last-image', imageUrl); localStorage.setItem('gemini-app-last-image', imageUrl);
@ -1864,7 +1891,7 @@ document.addEventListener('DOMContentLoaded', () => {
promptInput.value = metadata.prompt; promptInput.value = metadata.prompt;
refreshPromptHighlight(); refreshPromptHighlight();
} }
// If metadata doesn't have 'note' field, set to empty string instead of keeping current value // If metadata doesn't have 'note' field, set to empty string instead of keeping current value
if (metadata.hasOwnProperty('note')) { if (metadata.hasOwnProperty('note')) {
promptNoteInput.value = metadata.note || ''; promptNoteInput.value = metadata.note || '';
@ -1872,14 +1899,14 @@ document.addEventListener('DOMContentLoaded', () => {
promptNoteInput.value = ''; promptNoteInput.value = '';
} }
refreshNoteHighlight(); refreshNoteHighlight();
if (metadata.aspect_ratio) aspectRatioInput.value = metadata.aspect_ratio; if (metadata.aspect_ratio) aspectRatioInput.value = metadata.aspect_ratio;
if (metadata.resolution) resolutionInput.value = metadata.resolution; if (metadata.resolution) resolutionInput.value = metadata.resolution;
if (metadata.reference_images && Array.isArray(metadata.reference_images)) { if (metadata.reference_images && Array.isArray(metadata.reference_images)) {
slotManager.setReferenceImages(metadata.reference_images); slotManager.setReferenceImages(metadata.reference_images);
} }
persistSettings(); persistSettings();
} }
@ -1968,9 +1995,9 @@ document.addEventListener('DOMContentLoaded', () => {
const targetTag = event.target?.tagName; const targetTag = event.target?.tagName;
if (targetTag && ['INPUT', 'TEXTAREA', 'SELECT'].includes(targetTag)) return; if (targetTag && ['INPUT', 'TEXTAREA', 'SELECT'].includes(targetTag)) return;
if (event.target?.isContentEditable) return; if (event.target?.isContentEditable) return;
event.preventDefault(); event.preventDefault();
// Toggle template gallery // Toggle template gallery
if (templateGalleryState.classList.contains('hidden')) { if (templateGalleryState.classList.contains('hidden')) {
setViewState('template-gallery'); setViewState('template-gallery');
@ -2140,9 +2167,9 @@ document.addEventListener('DOMContentLoaded', () => {
headers: { 'Content-Type': 'application/json' }, headers: { 'Content-Type': 'application/json' },
body: JSON.stringify({ url }) body: JSON.stringify({ url })
}); });
const data = await response.json(); const data = await response.json();
if (!response.ok) { if (!response.ok) {
throw new Error(data.error || 'Failed to download image'); throw new Error(data.error || 'Failed to download image');
} }
@ -2155,7 +2182,7 @@ document.addEventListener('DOMContentLoaded', () => {
alert('Không còn slot trống cho ảnh tham chiếu.'); alert('Không còn slot trống cho ảnh tham chiếu.');
} }
} else { } else {
throw new Error('No image path returned'); throw new Error('No image path returned');
} }
} catch (error) { } catch (error) {

View file

@ -46,18 +46,16 @@
<div class="field-action-buttons" data-target="prompt" aria-label="Prompt actions"> <div class="field-action-buttons" data-target="prompt" aria-label="Prompt actions">
<button type="button" class="field-action-btn" data-action="copy" title="Copy prompt" <button type="button" class="field-action-btn" data-action="copy" title="Copy prompt"
aria-label="Copy prompt"> aria-label="Copy prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" <svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
stroke-linejoin="round">
<rect x="9" y="9" width="13" height="13" rx="2.5" /> <rect x="9" y="9" width="13" height="13" rx="2.5" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" /> <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg> </svg>
</button> </button>
<button type="button" class="field-action-btn" data-action="paste" title="Paste" <button type="button" class="field-action-btn" data-action="paste" title="Paste"
aria-label="Paste vào prompt"> aria-label="Paste vào prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" <svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
stroke-linejoin="round">
<path d="M8 4h8" /> <path d="M8 4h8" />
<path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" /> <path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" />
<rect x="5" y="5" width="14" height="16" rx="2" /> <rect x="5" y="5" width="14" height="16" rx="2" />
@ -67,9 +65,8 @@
</button> </button>
<button type="button" class="field-action-btn" data-action="clear" title="Clear prompt" <button type="button" class="field-action-btn" data-action="clear" title="Clear prompt"
aria-label="Xoá prompt"> aria-label="Xoá prompt">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" <svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
stroke-linejoin="round">
<path d="M3 6h18" /> <path d="M3 6h18" />
<path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" /> <path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" />
<path d="M10 11v6" /> <path d="M10 11v6" />
@ -132,18 +129,16 @@
<div class="field-action-buttons" data-target="note" aria-label="Note actions"> <div class="field-action-buttons" data-target="note" aria-label="Note actions">
<button type="button" class="field-action-btn" data-action="copy" title="Copy note" <button type="button" class="field-action-btn" data-action="copy" title="Copy note"
aria-label="Copy note"> aria-label="Copy note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" <svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
stroke-linejoin="round">
<rect x="9" y="9" width="13" height="13" rx="2.5" /> <rect x="9" y="9" width="13" height="13" rx="2.5" />
<path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" /> <path d="M5 15H4a2 2 0 0 1-2-2V4a2 2 0 0 1 2-2h9a2 2 0 0 1 2 2v1" />
</svg> </svg>
</button> </button>
<button type="button" class="field-action-btn" data-action="paste" title="Paste" <button type="button" class="field-action-btn" data-action="paste" title="Paste"
aria-label="Paste vào note"> aria-label="Paste vào note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" <svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
stroke-linejoin="round">
<path d="M8 4h8" /> <path d="M8 4h8" />
<path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" /> <path d="M9 2h6a2 2 0 0 1 2 2v1H7V4a2 2 0 0 1 2-2z" />
<rect x="5" y="5" width="14" height="16" rx="2" /> <rect x="5" y="5" width="14" height="16" rx="2" />
@ -153,9 +148,8 @@
</button> </button>
<button type="button" class="field-action-btn" data-action="clear" title="Clear note" <button type="button" class="field-action-btn" data-action="clear" title="Clear note"
aria-label="Xoá note"> aria-label="Xoá note">
<svg viewBox="0 0 24 24" width="16" height="16" fill="none" <svg viewBox="0 0 24 24" width="16" height="16" fill="none" stroke="currentColor"
stroke="currentColor" stroke-width="1.8" stroke-linecap="round" stroke-width="1.8" stroke-linecap="round" stroke-linejoin="round">
stroke-linejoin="round">
<path d="M3 6h18" /> <path d="M3 6h18" />
<path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" /> <path d="M19 6v12a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6" />
<path d="M10 11v6" /> <path d="M10 11v6" />
@ -173,7 +167,7 @@
</div> </div>
<div id="image-input-grid" class="image-input-grid" aria-live="polite"></div> <div id="image-input-grid" class="image-input-grid" aria-live="polite"></div>
<div class="image-url-input-wrapper" style="margin-top: 0.5rem;"> <div class="image-url-input-wrapper" style="margin-top: 0.5rem;">
<input type="text" id="reference-url-input" placeholder="Nhập URL hoặc đường dẫn ảnh..." <input type="text" id="reference-url-input" placeholder="Nhập URL hoặc đường dẫn ảnh..."
style="width: 100%; padding: 0.5rem; border-radius: 4px; border: 1px solid var(--border-color); background: rgba(0,0,0,0.2); color: var(--text-primary); font-size: 0.85rem;"> style="width: 100%; padding: 0.5rem; border-radius: 4px; border: 1px solid var(--border-color); background: rgba(0,0,0,0.2); color: var(--text-primary); font-size: 0.85rem;">
</div> </div>
</div> </div>
@ -498,6 +492,15 @@
rel="noreferrer">aistudio.google.com/api-keys</a> rel="noreferrer">aistudio.google.com/api-keys</a>
</p> </p>
</div> </div>
<!-- Whisk Cookies Input -->
<div class="input-group api-settings-input-group hidden" id="whisk-cookies-group">
<label for="whisk-cookies">Whisk Cookies (dành cho ImageFX)</label>
<textarea id="whisk-cookies" rows="3" placeholder="Paste toàn bộ cookie string từ labs.google..."
style="width: 100%; padding: 0.5rem; background: rgba(0,0,0,0.2); border: 1px solid var(--border-color); color: var(--text-primary); border-radius: 4px; font-size: 0.8rem;"></textarea>
<p class="input-hint">
F12 trên labs.google > Network > Request bất kỳ > Copy Request Headers > Cookie.
</p>
</div>
<div class="input-group api-settings-input-group"> <div class="input-group api-settings-input-group">
<label for="api-model">Model</label> <label for="api-model">Model</label>
<div class="select-wrapper"> <div class="select-wrapper">
@ -505,6 +508,7 @@
style="width: 100%; padding: 0.75rem; background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 0.5rem; color: var(--text-primary); font-size: 0.9rem;"> style="width: 100%; padding: 0.75rem; background: rgba(255, 255, 255, 0.05); border: 1px solid rgba(255, 255, 255, 0.1); border-radius: 0.5rem; color: var(--text-primary); font-size: 0.9rem;">
<option value="gemini-3-pro-image-preview">Gemini 3 Pro (Image Preview)</option> <option value="gemini-3-pro-image-preview">Gemini 3 Pro (Image Preview)</option>
<option value="gemini-2.5-flash-image">Gemini 2.5 Flash Image</option> <option value="gemini-2.5-flash-image">Gemini 2.5 Flash Image</option>
<option value="whisk">Whisk (ImageFX) [Experimental]</option>
</select> </select>
</div> </div>
</div> </div>
@ -562,4 +566,4 @@
<script type="module" src="{{ url_for('static', filename='script.js') }}"></script> <script type="module" src="{{ url_for('static', filename='script.js') }}"></script>
</body> </body>
</html> </html>

254
whisk_client.py Normal file
View file

@ -0,0 +1,254 @@
import requests
import json
import time
import base64
import os
import uuid
import logging
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(levelname)s:%(name)s:%(message)s')
logger = logging.getLogger("whisk_client")
# Constants from reverse engineering
AUTH_ENDPOINT = "https://labs.google/fx/api/auth/session"
UPLOAD_ENDPOINT = "https://labs.google/fx/api/trpc/backbone.uploadImage"
# Endpoint 1: Text-to-Image
# (Captured in Step 405)
GENERATE_ENDPOINT = "https://aisandbox-pa.googleapis.com/v1/whisk:generateImage"
# Endpoint 2: Reference Image (Recipe)
# (Captured in Step 424)
RECIPE_ENDPOINT = "https://aisandbox-pa.googleapis.com/v1/whisk:runImageRecipe"
DEFAULT_HEADERS = {
"Origin": "https://labs.google",
"Content-Type": "application/json",
"Referer": "https://labs.google/fx/tools/image-fx",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
}
class WhiskClientError(Exception):
pass
def parse_cookies(cookie_input):
if not cookie_input:
return {}
cookies = {}
cookie_input = cookie_input.strip()
if cookie_input.startswith('[') and cookie_input.endswith(']'):
try:
cookie_list = json.loads(cookie_input)
for c in cookie_list:
name = c.get('name')
value = c.get('value')
if name and value:
cookies[name] = value
return cookies
except json.JSONDecodeError:
pass
for item in cookie_input.split(';'):
if '=' in item:
name, value = item.split('=', 1)
cookies[name.strip()] = value.strip()
return cookies
def get_session_token(cookies):
logger.info("Fetching session token from labs.google...")
try:
response = requests.get(
AUTH_ENDPOINT,
headers={**DEFAULT_HEADERS},
cookies=cookies,
timeout=30
)
response.raise_for_status()
data = response.json()
if not data.get('access_token'):
raise WhiskClientError("Session response missing access_token")
return data['access_token']
except Exception as e:
logger.error(f"Failed to fetch session token: {e}")
raise WhiskClientError(f"Authentication failed: {str(e)}")
def upload_reference_image(image_path, cookies):
if not image_path or not os.path.exists(image_path):
return None
logger.info(f"Uploading reference image: {image_path}")
try:
with open(image_path, "rb") as img_file:
import mimetypes
mime_type, _ = mimetypes.guess_type(image_path)
if not mime_type: mime_type = "image/png"
b64_data = base64.b64encode(img_file.read()).decode('utf-8')
data_uri = f"data:{mime_type};base64,{b64_data}"
payload = {
"json": {
"clientContext": {
"workflowId": str(uuid.uuid4()),
"sessionId": str(int(time.time() * 1000))
},
"uploadMediaInput": {
"mediaCategory": "MEDIA_CATEGORY_SUBJECT",
"rawBytes": data_uri,
"caption": ""
}
}
}
response = requests.post(
UPLOAD_ENDPOINT,
headers=DEFAULT_HEADERS,
cookies=cookies,
json=payload,
timeout=60
)
if not response.ok:
raise WhiskClientError(f"Image upload failed: {response.text}")
data = response.json()
try:
media_id = data['result']['data']['json']['result']['uploadMediaGenerationId']
except (KeyError, TypeError):
raise WhiskClientError("Failed to retrieve uploadMediaGenerationId")
logger.info(f"Image uploaded successfully. ID: {media_id}")
return media_id
except Exception as e:
logger.error(f"Error uploading image: {e}")
raise e
def generate_image_whisk(prompt, cookie_str, **kwargs):
cookies = parse_cookies(cookie_str)
if not cookies:
raise WhiskClientError("No valid cookies found")
access_token = get_session_token(cookies)
ref_image_path = kwargs.get('reference_image_path')
media_generation_id = None
if ref_image_path:
try:
media_generation_id = upload_reference_image(ref_image_path, cookies)
except Exception as e:
logger.warning(f"Skipping reference image due to upload error: {e}")
aspect_ratio_map = {
"1:1": "IMAGE_ASPECT_RATIO_SQUARE",
"9:16": "IMAGE_ASPECT_RATIO_PORTRAIT",
"16:9": "IMAGE_ASPECT_RATIO_LANDSCAPE",
"4:3": "IMAGE_ASPECT_RATIO_LANDSCAPE_FOUR_THREE",
"3:4": "IMAGE_ASPECT_RATIO_PORTRAIT",
"Auto": "IMAGE_ASPECT_RATIO_SQUARE"
}
aspect_ratio_key = kwargs.get('aspect_ratio', 'Auto')
aspect_ratio_enum = aspect_ratio_map.get(aspect_ratio_key, "IMAGE_ASPECT_RATIO_SQUARE")
seed = kwargs.get('seed', int(time.time()))
headers = {
**DEFAULT_HEADERS,
"Authorization": f"Bearer {access_token}"
}
# BRANCH: Use Recipe Endpoint if Reference Image exists
if media_generation_id:
target_endpoint = RECIPE_ENDPOINT
payload = {
"clientContext": {
"workflowId": str(uuid.uuid4()),
"tool": "BACKBONE",
"sessionId": str(int(time.time() * 1000))
},
"seed": seed,
"imageModelSettings": {
"imageModel": "GEM_PIX",
"aspectRatio": aspect_ratio_enum
},
"userInstruction": prompt,
"recipeMediaInputs": [{
"mediaInput": {
"mediaCategory": "MEDIA_CATEGORY_SUBJECT",
"mediaGenerationId": media_generation_id
}
}]
}
else:
# BRANCH: Use Generate Endpoint for Text-to-Image
# NOTE: Payload for generateImage is inferred to be userInput based.
# If this fails, we might need further inspection, but Recipe flow is the priority.
target_endpoint = GENERATE_ENDPOINT
payload = {
"userInput": {
"candidatesCount": 2,
"prompts": [prompt],
"seed": seed
},
"clientContext": {
"workflowId": str(uuid.uuid4()),
"tool": "IMAGE_FX", # Usually ImageFX for T2I
"sessionId": str(int(time.time() * 1000))
},
"modelInput": {
"modelNameType": "IMAGEN_3_5", # Usually Imagen 3 for ImageFX
"aspectRatio": aspect_ratio_enum
}
}
logger.info(f"Generating image. Endpoint: {target_endpoint}, Prompt: {prompt}")
try:
response = requests.post(
target_endpoint,
headers=headers,
json=payload,
timeout=120
)
if not response.ok:
error_text = response.text
try:
err_json = response.json()
details = err_json.get('error', {}).get('details', [])
if any(d.get('reason') in ['PUBLIC_ERROR_UNSAFE_GENERATION', 'PUBLIC_ERROR_SEXUAL'] for d in details):
raise WhiskClientError("⚠️ Google Safety Filter Triggered. Prompt bị từ chối do nội dung không an toàn.")
except (json.JSONDecodeError, WhiskClientError) as e:
if isinstance(e, WhiskClientError): raise e
# Additional T2I Fallback: If generateImage fails 400, try Recipe with empty media?
# Not implementing strictly to avoid loops, but helpful mental note.
raise WhiskClientError(f"Generation failed ({response.status_code}): {error_text}")
# Parse Response
json_resp = response.json()
images = []
if 'imagePanels' in json_resp:
for panel in json_resp['imagePanels']:
for img in panel.get('generatedImages', []):
if 'encodedImage' in img:
images.append(img['encodedImage'])
if not images:
logger.error(f"Unexpected response structure: {json_resp.keys()}")
raise WhiskClientError("No images found in response")
return base64.b64decode(images[0])
except requests.exceptions.Timeout:
raise WhiskClientError("Timout connecting to Google Whisk.")
except Exception as e:
logger.error(f"Whisk Generation Error: {e}")
raise WhiskClientError(str(e))