feat: Add client-side AI subtitle generation with Whisper

- Add webai.js with Transformers.js Whisper integration
- Add Generate Subs button to watch page
- Fix 403 video playback with IPv4 adapter
- Update streaming proxy headers
This commit is contained in:
KV-Tube Deployer 2026-01-12 16:18:43 +07:00
parent 92acf81362
commit a93a875ce2
11 changed files with 399 additions and 216 deletions

View file

@ -15,6 +15,8 @@ import time
import random
import concurrent.futures
import yt_dlp
import tempfile
import threading
logger = logging.getLogger(__name__)
@ -28,6 +30,10 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db")
API_CACHE = {}
CACHE_TIMEOUT = 600 # 10 minutes
# AI Models
WHISPER_MODEL = None
WHISPER_LOCK = threading.Lock()
def get_db_connection():
"""Get database connection with row factory."""
@ -436,10 +442,10 @@ def get_stream_info():
"format": "best[ext=mp4]/best",
"noplaylist": True,
"quiet": True,
"no_warnings": True,
"skip_download": True,
"force_ipv4": True,
"socket_timeout": 10,
"force_ipv4": True,
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
@ -453,6 +459,10 @@ def get_stream_info():
if not stream_url:
return jsonify({"error": "No stream URL found"}), 500
# Log the headers yt-dlp expects us to use
expected_headers = info.get("http_headers", {})
logger.info(f"YT-DLP Expected Headers: {expected_headers}")
# Extract subtitles
subtitle_url = None
subs = info.get("subtitles") or {}
@ -466,6 +476,33 @@ def get_stream_info():
subtitle_url = auto_subs[lang][0]["url"]
break
# Extract best audio-only URL for AI transcription
audio_url = None
try:
formats = info.get("formats", [])
# Debug: Log format details to understand why we aren't matching
# logger.info(f"Scanning {len(formats)} formats for audio-only...")
audio_formats = []
for f in formats:
vcodec = f.get("vcodec")
acodec = f.get("acodec")
# Check for audio-only: vcodec should be none/None, acodec should be something
if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None):
audio_formats.append(f)
if audio_formats:
# Prefer m4a (itag 140) for best compatibility, or webm (251)
# Sort by filesize (smaller is faster for whisper) or bitrate?
# For now simply pick the first one that looks like m4a, else first available
chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0])
audio_url = chosen_audio.get("url")
logger.info(f"Found audio-only URL: {audio_url[:30]}...")
else:
logger.warning("No audio-only formats found in valid stream info.")
except Exception as e:
logger.error(f"Failed to extract audio url: {e}")
response_data = {
"original_url": stream_url,
"title": info.get("title", "Unknown"),
@ -477,8 +514,16 @@ def get_stream_info():
"view_count": info.get("view_count", 0),
"related": [],
"subtitle_url": subtitle_url,
"audio_url": None # Placeholder, filled below
}
from urllib.parse import quote
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
response_data["stream_url"] = proxied_url
if audio_url:
response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}"
# Cache it
expiry = current_time + 3600
conn.execute(
@ -488,10 +533,6 @@ def get_stream_info():
conn.commit()
conn.close()
from urllib.parse import quote
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
response_data["stream_url"] = proxied_url
response = jsonify(response_data)
response.headers["X-Cache"] = "MISS"
return response
@ -513,7 +554,12 @@ def search():
if url_match:
video_id = url_match.group(1)
# Fetch single video info
ydl_opts = {"quiet": True, "no_warnings": True, "noplaylist": True}
ydl_opts = {
"quiet": True,
"no_warnings": True,
"noplaylist": True,
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False)
return jsonify([{
@ -539,17 +585,20 @@ def search():
def get_channel_videos_simple():
"""Get videos from a channel."""
channel_id = request.args.get("id")
filter_type = request.args.get("filter_type", "video")
if not channel_id:
return jsonify({"error": "No channel ID provided"}), 400
try:
# Construct URL
suffix = "shorts" if filter_type == "shorts" else "videos"
if channel_id.startswith("UC"):
url = f"https://www.youtube.com/channel/{channel_id}/videos"
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
elif channel_id.startswith("@"):
url = f"https://www.youtube.com/{channel_id}/videos"
url = f"https://www.youtube.com/{channel_id}/{suffix}"
else:
url = f"https://www.youtube.com/channel/{channel_id}/videos"
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
cmd = [
sys.executable, "-m", "yt_dlp",
@ -743,6 +792,85 @@ def get_transcript():
return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"})
@api_bp.route("/generate_subtitles", methods=["POST"])
def generate_subtitles():
"""Generate subtitles using server-side Whisper."""
global WHISPER_MODEL
data = request.get_json()
video_id = data.get("video_id")
if not video_id:
return jsonify({"error": "No video ID provided"}), 400
temp_path = None
try:
# Lazy load model
with WHISPER_LOCK:
if WHISPER_MODEL is None:
import whisper
logger.info("Loading Whisper model (tiny)...")
WHISPER_MODEL = whisper.load_model("tiny")
# Extract Audio URL
url = f"https://www.youtube.com/watch?v={video_id}"
ydl_opts = {
"format": "bestaudio[ext=m4a]/bestaudio/best",
"noplaylist": True,
"quiet": True,
"force_ipv4": True,
}
audio_url = None
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
audio_url = info.get("url")
if not audio_url:
return jsonify({"error": "Could not extract audio URL"}), 500
# Download audio to temp file
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
}
logger.info(f"Downloading audio for transcription: {audio_url[:30]}...")
with requests.get(audio_url, headers=headers, stream=True) as r:
r.raise_for_status()
with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f:
temp_path = f.name
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Transcribe
logger.info("Transcribing...")
result = WHISPER_MODEL.transcribe(temp_path)
# Convert to VTT
def format_timestamp(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
vtt_output = "WEBVTT\n\n"
for segment in result["segments"]:
start = format_timestamp(segment["start"])
end = format_timestamp(segment["end"])
text = segment["text"].strip()
vtt_output += f"{start} --> {end}\n{text}\n\n"
return jsonify({"success": True, "vtt": vtt_output})
except Exception as e:
logger.error(f"Subtitle generation failed: {e}")
return jsonify({"error": str(e)}), 500
finally:
if temp_path and os.path.exists(temp_path):
os.remove(temp_path)
@api_bp.route("/update_ytdlp", methods=["POST"])
def update_ytdlp():
"""Update yt-dlp to latest version."""

View file

@ -6,6 +6,14 @@ from flask import Blueprint, request, Response, stream_with_context, send_from_d
import requests
import os
import logging
import socket
import urllib3.util.connection as urllib3_cn
# Force IPv4 for requests (which uses urllib3)
def allowed_gai_family():
return socket.AF_INET
urllib3_cn.allowed_gai_family = allowed_gai_family
logger = logging.getLogger(__name__)
@ -30,7 +38,8 @@ def video_proxy():
# Forward headers to mimic browser and support seeking
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
# "Referer": "https://www.youtube.com/", # Removed to test if it fixes 403
}
# Support Range requests (scrubbing)
@ -39,7 +48,13 @@ def video_proxy():
headers["Range"] = range_header
try:
logger.info(f"Proxying URL: {url}")
# logger.info(f"Proxy Request Headers: {headers}")
req = requests.get(url, headers=headers, stream=True, timeout=30)
logger.info(f"Upstream Status: {req.status_code}")
if req.status_code != 200:
logger.error(f"Upstream Error Body: {req.text[:500]}")
# Handle HLS (M3U8) Rewriting - CRITICAL for 1080p+ and proper sync
content_type = req.headers.get("content-type", "").lower()
@ -50,7 +65,7 @@ def video_proxy():
or "application/vnd.apple.mpegurl" in content_type
)
if is_manifest:
if is_manifest and req.status_code == 200:
content = req.text
base_url = url.rsplit("/", 1)[0]
new_lines = []

43
deploy.py Normal file → Executable file
View file

@ -1,7 +1,6 @@
#!/usr/bin/env python3
"""Check git status and redeploy."""
"""Build and push multi-platform Docker image."""
import subprocess
import os
def run_cmd(cmd):
print(f"\n>>> {cmd}")
@ -13,37 +12,17 @@ def run_cmd(cmd):
return result.returncode == 0
print("="*50)
print("Checking git status...")
print("Building Multi-Platform Docker Image")
print("(linux/amd64 + linux/arm64)")
print("="*50)
run_cmd("git status")
# Create buildx builder if it doesn't exist
run_cmd("docker buildx create --name multiplatform --use 2>/dev/null || docker buildx use multiplatform")
# Build and push multi-platform image
print("\nBuilding and pushing...")
run_cmd("docker buildx build --platform linux/amd64,linux/arm64 -t vndangkhoa/kv-tube:latest --push .")
print("\n" + "="*50)
print("Staging all changes...")
print("="*50)
run_cmd("git add .")
print("\n" + "="*50)
print("Committing...")
print("="*50)
run_cmd('git commit -m "Latest local changes"')
print("\n" + "="*50)
print("Pushing to GitHub...")
print("="*50)
run_cmd("git push origin main")
print("\n" + "="*50)
print("Pushing to Forgejo...")
print("="*50)
run_cmd("git push private main")
print("\n" + "="*50)
print("Building Docker image...")
print("="*50)
if run_cmd("docker build -t vndangkhoa/kv-tube:latest ."):
print("\nPushing Docker image...")
run_cmd("docker push vndangkhoa/kv-tube:latest")
print("\n" + "="*50)
print("DEPLOYMENT COMPLETE!")
print("DONE! Image now supports both amd64 and arm64")
print("="*50)

2
kv_server.py Normal file → Executable file
View file

@ -8,7 +8,7 @@ try:
except NameError:
base_dir = os.getcwd()
venv_dirs = ['env', '.venv']
venv_dirs = ['.venv', 'env']
activated = False
for venv_name in venv_dirs:

View file

@ -4,3 +4,5 @@ yt-dlp>=2024.1.0
werkzeug
gunicorn
python-dotenv
openai-whisper
numpy

28
start.sh Normal file → Executable file
View file

@ -3,11 +3,26 @@ cd "$(dirname "$0")"
echo "=== Diagnostic Start Script ==="
# Activate env
if [ -d "env" ]; then
# Activate env
if [ -d ".venv_clean" ]; then
echo "Activating .venv_clean..."
export PYTHONPATH="$(pwd)/.venv_clean/lib/python3.14/site-packages"
# Use system python with PYTHONPATH if bindir is missing/broken
PYTHON_EXEC="/Library/Frameworks/Python.framework/Versions/3.14/bin/python3"
export FLASK_APP=wsgi.py
export FLASK_RUN_PORT=5002
echo "--- Starting with System Python + PYTHONPATH ---"
$PYTHON_EXEC -m flask run --host=0.0.0.0 --port=5002
exit 0
elif [ -d ".venv" ]; then
echo "Activating .venv..."
source .venv/bin/activate
elif [ -d "env" ]; then
echo "Activating env..."
source env/bin/activate
else
echo "No 'env' directory found!"
echo "No '.venv' or 'env' directory found!"
exit 1
fi
@ -23,13 +38,14 @@ else
fi
echo "--- Attempting to start with Gunicorn ---"
if [ -f "env/bin/gunicorn" ]; then
./env/bin/gunicorn -b 0.0.0.0:5002 wsgi:app
echo "--- Attempting to start with Gunicorn ---"
if command -v gunicorn &> /dev/null; then
gunicorn -b 0.0.0.0:5002 wsgi:app
else
echo "Gunicorn not found."
echo "Gunicorn not found in path."
fi
echo "--- Attempting to start with Flask explicitly ---"
export FLASK_APP=wsgi.py
export FLASK_RUN_PORT=5002
./env/bin/flask run --host=0.0.0.0
python -m flask run --host=0.0.0.0

View file

@ -1,144 +1,119 @@
/**
* KV-Tube WebAI Service
* Local AI chatbot for transcript Q&A using WebLLM
*
* Runs entirely in-browser, no server required after model download
*/
// WebLLM CDN import (lazy loaded)
var WEBLLM_CDN = 'https://esm.run/@mlc-ai/web-llm';
// Model options - using verified WebLLM model IDs
var AI_MODELS = {
small: { id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (0.5B)', size: '350MB' },
medium: { id: 'Qwen2-1.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (1.5B)', size: '1GB' },
};
// Default to small model
var DEFAULT_MODEL = AI_MODELS.small;
if (typeof TranscriptAI === 'undefined') {
window.TranscriptAI = class TranscriptAI {
constructor() {
this.engine = null;
this.isLoading = false;
this.isReady = false;
this.transcript = '';
this.onProgressCallback = null;
this.onReadyCallback = null;
}
setTranscript(text) {
this.transcript = text.slice(0, 8000); // Limit context size
}
setCallbacks({ onProgress, onReady }) {
this.onProgressCallback = onProgress;
this.onReadyCallback = onReady;
}
async init() {
if (this.isReady || this.isLoading) return;
this.isLoading = true;
try {
// Dynamic import WebLLM
const { CreateMLCEngine } = await import(WEBLLM_CDN);
// Initialize engine with progress callback
this.engine = await CreateMLCEngine(DEFAULT_MODEL.id, {
initProgressCallback: (report) => {
if (this.onProgressCallback) {
this.onProgressCallback(report);
}
console.log('AI Load Progress:', report.text);
}
});
this.isReady = true;
this.isLoading = false;
if (this.onReadyCallback) {
this.onReadyCallback();
}
console.log('TranscriptAI ready with model:', DEFAULT_MODEL.name);
} catch (err) {
this.isLoading = false;
console.error('Failed to load AI model:', err);
throw err;
}
}
async ask(question) {
if (!this.isReady) {
throw new Error('AI not initialized');
}
const systemPrompt = this.transcript
? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}`
: `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`;
try {
const response = await this.engine.chat.completions.create({
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: question }
],
max_tokens: 256,
temperature: 0.7,
});
return response.choices[0].message.content;
} catch (err) {
console.error('AI response error:', err);
throw err;
}
}
async *askStreaming(question) {
if (!this.isReady) {
throw new Error('AI not initialized');
}
const systemPrompt = this.transcript
? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}`
: `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`;
const chunks = await this.engine.chat.completions.create({
messages: [
{ role: 'system', content: systemPrompt },
{ role: 'user', content: question }
],
max_tokens: 256,
temperature: 0.7,
stream: true,
});
for await (const chunk of chunks) {
const delta = chunk.choices[0]?.delta?.content;
if (delta) {
yield delta;
}
}
}
getModelInfo() {
return DEFAULT_MODEL;
}
isModelReady() {
return this.isReady;
}
isModelLoading() {
return this.isLoading;
}
}
// Global instance
window.transcriptAI = new TranscriptAI();
}
/**
* WebAI - Client-side AI features using Transformers.js
*/
// Suppress ONNX Runtime warnings
if (typeof ort !== 'undefined') {
ort.env.logLevel = 'fatal';
}
class SubtitleGenerator {
constructor() {
this.pipeline = null;
this.isLoading = false;
}
async init(progressCallback) {
if (this.pipeline) return;
if (this.isLoading) return;
this.isLoading = true;
try {
// Suppress ONNX warnings at import time
if (typeof ort !== 'undefined') {
ort.env.logLevel = 'fatal';
}
progressCallback?.('Loading AI model...');
const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2');
// Configure environment
env.allowLocalModels = false;
env.useBrowserCache = true;
// Suppress ONNX Runtime warnings
if (typeof ort !== 'undefined') {
ort.env.logLevel = 'fatal';
}
progressCallback?.('Downloading Whisper model (~40MB)...');
this.pipeline = await pipeline(
'automatic-speech-recognition',
'Xenova/whisper-tiny',
{
progress_callback: (progress) => {
if (progress.status === 'downloading') {
const pct = Math.round((progress.loaded / progress.total) * 100);
progressCallback?.(`Downloading: ${pct}%`);
} else if (progress.status === 'loading') {
progressCallback?.('Loading model...');
}
}
}
);
progressCallback?.('Model ready!');
} catch (e) {
console.error('Failed to load Whisper:', e);
throw e;
} finally {
this.isLoading = false;
}
}
async generate(audioUrl, progressCallback) {
if (!this.pipeline) {
throw new Error('Model not initialized. Call init() first.');
}
progressCallback?.('Transcribing audio...');
try {
const result = await this.pipeline(audioUrl, {
chunk_length_s: 30,
stride_length_s: 5,
return_timestamps: true,
});
progressCallback?.('Formatting subtitles...');
// Convert to VTT format
return this.toVTT(result.chunks || []);
} catch (e) {
console.error('Transcription failed:', e);
throw e;
}
}
toVTT(chunks) {
let vtt = 'WEBVTT\n\n';
chunks.forEach((chunk, i) => {
const start = this.formatTime(chunk.timestamp[0]);
const end = this.formatTime(chunk.timestamp[1]);
const text = chunk.text.trim();
if (text) {
vtt += `${i + 1}\n`;
vtt += `${start} --> ${end}\n`;
vtt += `${text}\n\n`;
}
});
return vtt;
}
formatTime(seconds) {
if (seconds === null || seconds === undefined) seconds = 0;
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = (seconds % 60).toFixed(3);
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`;
}
}
// Export singleton
window.subtitleGenerator = new SubtitleGenerator();

View file

@ -32,26 +32,28 @@
<div class="yt-section">
<div class="yt-section-header">
<div class="yt-tabs">
<a href="#" onclick="changeChannelTab('video', this); return false;" class="active">
<a href="javascript:void(0)" onclick="changeChannelTab('video', this); return false;"
class="active no-spa">
<i class="fas fa-video"></i>
<span>Videos</span>
</a>
<a href="#" onclick="changeChannelTab('shorts', this); return false;">
<a href="javascript:void(0)" onclick="changeChannelTab('shorts', this); return false;" class="no-spa">
<i class="fas fa-bolt"></i>
<span>Shorts</span>
</a>
</div>
<div class="yt-sort-options">
<a href="#" onclick="changeChannelSort('latest', this); return false;" class="active">
<a href="javascript:void(0)" onclick="changeChannelSort('latest', this); return false;"
class="active no-spa">
<i class="fas fa-clock"></i>
<span>Latest</span>
</a>
<a href="#" onclick="changeChannelSort('popular', this); return false;">
<a href="javascript:void(0)" onclick="changeChannelSort('popular', this); return false;" class="no-spa">
<i class="fas fa-fire"></i>
<span>Popular</span>
</a>
<a href="#" onclick="changeChannelSort('oldest', this); return false;">
<a href="javascript:void(0)" onclick="changeChannelSort('oldest', this); return false;" class="no-spa">
<i class="fas fa-history"></i>
<span>Oldest</span>
</a>
@ -343,8 +345,8 @@
}
try {
console.log(`Fetching: /api/channel/videos?id=${channelId}&page=${currentChannelPage}`);
const response = await fetch(`/api/channel/videos?id=${channelId}&page=${currentChannelPage}&sort=${currentChannelSort}&filter_type=${currentFilterType}`);
console.log(`Fetching: /api/channel?id=${channelId}&page=${currentChannelPage}`);
const response = await fetch(`/api/channel?id=${channelId}&page=${currentChannelPage}&sort=${currentChannelSort}&filter_type=${currentFilterType}`);
const videos = await response.json();
console.log("Channel Videos Response:", videos);

View file

@ -554,6 +554,7 @@
<!-- WebAI Script -->
<script src="{{ url_for('static', filename='js/webai.js') }}"></script>
<script>
// AI Chat Toggle and Message Handler
var aiChatVisible = false;

View file

@ -352,22 +352,35 @@
}
}
// Run on initial page load
document.addEventListener('DOMContentLoaded', () => {
// Run on initial page load and SPA navigation
if (document.readyState === 'loading') {
document.addEventListener('DOMContentLoaded', () => {
loadLibraryContent();
initTabs();
});
} else {
// Document already loaded (SPA navigation)
loadLibraryContent();
initTabs();
}
function initTabs() {
// Intercept tab clicks for client-side navigation
document.querySelectorAll('.library-tab').forEach(tab => {
tab.addEventListener('click', (e) => {
// Remove old listeners to be safe (optional but good practice in SPA)
const newTab = tab.cloneNode(true);
tab.parentNode.replaceChild(newTab, tab);
newTab.addEventListener('click', (e) => {
e.preventDefault();
const newUrl = tab.getAttribute('href');
const newUrl = newTab.getAttribute('href');
// Update URL without reloading
history.pushState(null, '', newUrl);
// Immediately load the new content
loadLibraryContent();
});
});
});
}
// Handle browser back/forward buttons
window.addEventListener('popstate', () => {

View file

@ -39,13 +39,6 @@
<!-- Actions -->
<div class="yt-video-actions">
<button class="yt-action-btn" id="likeBtn">
<i class="fas fa-thumbs-up"></i>
<span id="likeCount">Like</span>
</button>
<button class="yt-action-btn" id="dislikeBtn">
<i class="fas fa-thumbs-down"></i>
</button>
<button class="yt-action-btn" id="shareBtn">
<i class="fas fa-share"></i>
Share
@ -54,7 +47,7 @@
<i class="fas fa-download"></i>
Download
</button>
<button class="yt-action-btn" id="saveBtn">
<button class="yt-action-btn" id="saveBtn" onclick="toggleSaveToLibrary()">
<i class="far fa-bookmark"></i>
Save
</button>
@ -67,6 +60,10 @@
Queue
<span id="queueBadge" class="queue-badge" style="display:none;">0</span>
</button>
<button class="yt-action-btn" id="genSubBtn" onclick="generateSubtitles()" style="display:none;">
<i class="fas fa-closed-captioning"></i>
Generate Subs
</button>
<!-- View Mode Buttons -->
<div class="view-mode-buttons">
@ -1247,7 +1244,8 @@
thumbnail: `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`,
uploader: data.uploader || 'Unknown',
channel_id: data.channel_id || data.uploader_id || '',
duration: data.duration
duration: data.duration,
audioUrl: data.audio_url
};
// Check if video is already in queue
@ -1255,6 +1253,12 @@
updateQueueCount();
updateQueueBadge();
// Enable AI Subtitles (Always show for testing if audio is available)
if (data.audio_url) {
const genBtn = document.getElementById('genSubBtn');
if (genBtn) genBtn.style.display = 'inline-flex';
}
if (data.error) {
loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`;
if (infoSkeleton) infoSkeleton.style.display = 'none';
@ -1394,6 +1398,54 @@
return div.innerHTML;
}
async function generateSubtitles() {
const btn = document.getElementById('genSubBtn');
if (!currentVideoData.audioUrl) {
showToast("No audio source available for AI", "error");
return;
}
btn.disabled = true;
const originalHtml = btn.innerHTML;
try {
// Initialize/Load
await window.subtitleGenerator.init((msg) => {
btn.innerHTML = `<i class="fas fa-spinner fa-spin"></i> ${msg}`;
});
// Generate
btn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Transcribing...';
const vttContent = await window.subtitleGenerator.generate(currentVideoData.audioUrl, (msg) => {
btn.innerHTML = `<i class="fas fa-circle-notch fa-spin"></i> ${msg}`;
});
console.log("Generated VTT:", vttContent);
// Inject into Player
if (window.player) {
const blob = new Blob([vttContent], { type: 'text/vtt' });
const url = URL.createObjectURL(blob);
// Artplayer subtitle API
window.player.subtitle.url = url;
window.player.subtitle.show = true;
window.player.notice.show = 'AI Subtitles Generated';
showToast("Subtitles generated successfully!", "success");
btn.style.display = 'none'; // Hide button after success
}
} catch (e) {
console.error(e);
showToast("Subtitle generation failed: " + e.message, "error");
btn.innerHTML = originalHtml;
btn.disabled = false;
}
}
async function summarizeVideo() {
const videoId = "{{ video_id }}";
const btn = document.getElementById('summarizeBtn');