feat: Add client-side AI subtitle generation with Whisper
- Add webai.js with Transformers.js Whisper integration - Add Generate Subs button to watch page - Fix 403 video playback with IPv4 adapter - Update streaming proxy headers
This commit is contained in:
parent
92acf81362
commit
a93a875ce2
11 changed files with 399 additions and 216 deletions
|
|
@ -15,6 +15,8 @@ import time
|
|||
import random
|
||||
import concurrent.futures
|
||||
import yt_dlp
|
||||
import tempfile
|
||||
import threading
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -28,6 +30,10 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db")
|
|||
API_CACHE = {}
|
||||
CACHE_TIMEOUT = 600 # 10 minutes
|
||||
|
||||
# AI Models
|
||||
WHISPER_MODEL = None
|
||||
WHISPER_LOCK = threading.Lock()
|
||||
|
||||
|
||||
def get_db_connection():
|
||||
"""Get database connection with row factory."""
|
||||
|
|
@ -436,10 +442,10 @@ def get_stream_info():
|
|||
"format": "best[ext=mp4]/best",
|
||||
"noplaylist": True,
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"skip_download": True,
|
||||
"force_ipv4": True,
|
||||
"socket_timeout": 10,
|
||||
"force_ipv4": True,
|
||||
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
}
|
||||
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
|
|
@ -453,6 +459,10 @@ def get_stream_info():
|
|||
if not stream_url:
|
||||
return jsonify({"error": "No stream URL found"}), 500
|
||||
|
||||
# Log the headers yt-dlp expects us to use
|
||||
expected_headers = info.get("http_headers", {})
|
||||
logger.info(f"YT-DLP Expected Headers: {expected_headers}")
|
||||
|
||||
# Extract subtitles
|
||||
subtitle_url = None
|
||||
subs = info.get("subtitles") or {}
|
||||
|
|
@ -466,6 +476,33 @@ def get_stream_info():
|
|||
subtitle_url = auto_subs[lang][0]["url"]
|
||||
break
|
||||
|
||||
# Extract best audio-only URL for AI transcription
|
||||
audio_url = None
|
||||
try:
|
||||
formats = info.get("formats", [])
|
||||
# Debug: Log format details to understand why we aren't matching
|
||||
# logger.info(f"Scanning {len(formats)} formats for audio-only...")
|
||||
|
||||
audio_formats = []
|
||||
for f in formats:
|
||||
vcodec = f.get("vcodec")
|
||||
acodec = f.get("acodec")
|
||||
# Check for audio-only: vcodec should be none/None, acodec should be something
|
||||
if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None):
|
||||
audio_formats.append(f)
|
||||
|
||||
if audio_formats:
|
||||
# Prefer m4a (itag 140) for best compatibility, or webm (251)
|
||||
# Sort by filesize (smaller is faster for whisper) or bitrate?
|
||||
# For now simply pick the first one that looks like m4a, else first available
|
||||
chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0])
|
||||
audio_url = chosen_audio.get("url")
|
||||
logger.info(f"Found audio-only URL: {audio_url[:30]}...")
|
||||
else:
|
||||
logger.warning("No audio-only formats found in valid stream info.")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to extract audio url: {e}")
|
||||
|
||||
response_data = {
|
||||
"original_url": stream_url,
|
||||
"title": info.get("title", "Unknown"),
|
||||
|
|
@ -477,8 +514,16 @@ def get_stream_info():
|
|||
"view_count": info.get("view_count", 0),
|
||||
"related": [],
|
||||
"subtitle_url": subtitle_url,
|
||||
"audio_url": None # Placeholder, filled below
|
||||
}
|
||||
|
||||
from urllib.parse import quote
|
||||
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
|
||||
response_data["stream_url"] = proxied_url
|
||||
|
||||
if audio_url:
|
||||
response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}"
|
||||
|
||||
# Cache it
|
||||
expiry = current_time + 3600
|
||||
conn.execute(
|
||||
|
|
@ -488,10 +533,6 @@ def get_stream_info():
|
|||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
from urllib.parse import quote
|
||||
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
|
||||
response_data["stream_url"] = proxied_url
|
||||
|
||||
response = jsonify(response_data)
|
||||
response.headers["X-Cache"] = "MISS"
|
||||
return response
|
||||
|
|
@ -513,7 +554,12 @@ def search():
|
|||
if url_match:
|
||||
video_id = url_match.group(1)
|
||||
# Fetch single video info
|
||||
ydl_opts = {"quiet": True, "no_warnings": True, "noplaylist": True}
|
||||
ydl_opts = {
|
||||
"quiet": True,
|
||||
"no_warnings": True,
|
||||
"noplaylist": True,
|
||||
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
}
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False)
|
||||
return jsonify([{
|
||||
|
|
@ -539,17 +585,20 @@ def search():
|
|||
def get_channel_videos_simple():
|
||||
"""Get videos from a channel."""
|
||||
channel_id = request.args.get("id")
|
||||
filter_type = request.args.get("filter_type", "video")
|
||||
if not channel_id:
|
||||
return jsonify({"error": "No channel ID provided"}), 400
|
||||
|
||||
try:
|
||||
# Construct URL
|
||||
suffix = "shorts" if filter_type == "shorts" else "videos"
|
||||
|
||||
if channel_id.startswith("UC"):
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/videos"
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
|
||||
elif channel_id.startswith("@"):
|
||||
url = f"https://www.youtube.com/{channel_id}/videos"
|
||||
url = f"https://www.youtube.com/{channel_id}/{suffix}"
|
||||
else:
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/videos"
|
||||
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
|
||||
|
||||
cmd = [
|
||||
sys.executable, "-m", "yt_dlp",
|
||||
|
|
@ -743,6 +792,85 @@ def get_transcript():
|
|||
return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"})
|
||||
|
||||
|
||||
@api_bp.route("/generate_subtitles", methods=["POST"])
|
||||
def generate_subtitles():
|
||||
"""Generate subtitles using server-side Whisper."""
|
||||
global WHISPER_MODEL
|
||||
|
||||
data = request.get_json()
|
||||
video_id = data.get("video_id")
|
||||
|
||||
if not video_id:
|
||||
return jsonify({"error": "No video ID provided"}), 400
|
||||
|
||||
temp_path = None
|
||||
try:
|
||||
# Lazy load model
|
||||
with WHISPER_LOCK:
|
||||
if WHISPER_MODEL is None:
|
||||
import whisper
|
||||
logger.info("Loading Whisper model (tiny)...")
|
||||
WHISPER_MODEL = whisper.load_model("tiny")
|
||||
|
||||
# Extract Audio URL
|
||||
url = f"https://www.youtube.com/watch?v={video_id}"
|
||||
ydl_opts = {
|
||||
"format": "bestaudio[ext=m4a]/bestaudio/best",
|
||||
"noplaylist": True,
|
||||
"quiet": True,
|
||||
"force_ipv4": True,
|
||||
}
|
||||
|
||||
audio_url = None
|
||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||
info = ydl.extract_info(url, download=False)
|
||||
audio_url = info.get("url")
|
||||
|
||||
if not audio_url:
|
||||
return jsonify({"error": "Could not extract audio URL"}), 500
|
||||
|
||||
# Download audio to temp file
|
||||
import requests
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
}
|
||||
|
||||
logger.info(f"Downloading audio for transcription: {audio_url[:30]}...")
|
||||
with requests.get(audio_url, headers=headers, stream=True) as r:
|
||||
r.raise_for_status()
|
||||
with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f:
|
||||
temp_path = f.name
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
# Transcribe
|
||||
logger.info("Transcribing...")
|
||||
result = WHISPER_MODEL.transcribe(temp_path)
|
||||
|
||||
# Convert to VTT
|
||||
def format_timestamp(seconds):
|
||||
hours = int(seconds // 3600)
|
||||
minutes = int((seconds % 3600) // 60)
|
||||
seconds = seconds % 60
|
||||
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
|
||||
|
||||
vtt_output = "WEBVTT\n\n"
|
||||
for segment in result["segments"]:
|
||||
start = format_timestamp(segment["start"])
|
||||
end = format_timestamp(segment["end"])
|
||||
text = segment["text"].strip()
|
||||
vtt_output += f"{start} --> {end}\n{text}\n\n"
|
||||
|
||||
return jsonify({"success": True, "vtt": vtt_output})
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Subtitle generation failed: {e}")
|
||||
return jsonify({"error": str(e)}), 500
|
||||
finally:
|
||||
if temp_path and os.path.exists(temp_path):
|
||||
os.remove(temp_path)
|
||||
|
||||
|
||||
@api_bp.route("/update_ytdlp", methods=["POST"])
|
||||
def update_ytdlp():
|
||||
"""Update yt-dlp to latest version."""
|
||||
|
|
|
|||
|
|
@ -6,6 +6,14 @@ from flask import Blueprint, request, Response, stream_with_context, send_from_d
|
|||
import requests
|
||||
import os
|
||||
import logging
|
||||
import socket
|
||||
import urllib3.util.connection as urllib3_cn
|
||||
|
||||
# Force IPv4 for requests (which uses urllib3)
|
||||
def allowed_gai_family():
|
||||
return socket.AF_INET
|
||||
|
||||
urllib3_cn.allowed_gai_family = allowed_gai_family
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
|
@ -30,7 +38,8 @@ def video_proxy():
|
|||
|
||||
# Forward headers to mimic browser and support seeking
|
||||
headers = {
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||
# "Referer": "https://www.youtube.com/", # Removed to test if it fixes 403
|
||||
}
|
||||
|
||||
# Support Range requests (scrubbing)
|
||||
|
|
@ -39,7 +48,13 @@ def video_proxy():
|
|||
headers["Range"] = range_header
|
||||
|
||||
try:
|
||||
logger.info(f"Proxying URL: {url}")
|
||||
# logger.info(f"Proxy Request Headers: {headers}")
|
||||
req = requests.get(url, headers=headers, stream=True, timeout=30)
|
||||
|
||||
logger.info(f"Upstream Status: {req.status_code}")
|
||||
if req.status_code != 200:
|
||||
logger.error(f"Upstream Error Body: {req.text[:500]}")
|
||||
|
||||
# Handle HLS (M3U8) Rewriting - CRITICAL for 1080p+ and proper sync
|
||||
content_type = req.headers.get("content-type", "").lower()
|
||||
|
|
@ -50,7 +65,7 @@ def video_proxy():
|
|||
or "application/vnd.apple.mpegurl" in content_type
|
||||
)
|
||||
|
||||
if is_manifest:
|
||||
if is_manifest and req.status_code == 200:
|
||||
content = req.text
|
||||
base_url = url.rsplit("/", 1)[0]
|
||||
new_lines = []
|
||||
|
|
|
|||
43
deploy.py
Normal file → Executable file
43
deploy.py
Normal file → Executable file
|
|
@ -1,7 +1,6 @@
|
|||
#!/usr/bin/env python3
|
||||
"""Check git status and redeploy."""
|
||||
"""Build and push multi-platform Docker image."""
|
||||
import subprocess
|
||||
import os
|
||||
|
||||
def run_cmd(cmd):
|
||||
print(f"\n>>> {cmd}")
|
||||
|
|
@ -13,37 +12,17 @@ def run_cmd(cmd):
|
|||
return result.returncode == 0
|
||||
|
||||
print("="*50)
|
||||
print("Checking git status...")
|
||||
print("Building Multi-Platform Docker Image")
|
||||
print("(linux/amd64 + linux/arm64)")
|
||||
print("="*50)
|
||||
run_cmd("git status")
|
||||
|
||||
# Create buildx builder if it doesn't exist
|
||||
run_cmd("docker buildx create --name multiplatform --use 2>/dev/null || docker buildx use multiplatform")
|
||||
|
||||
# Build and push multi-platform image
|
||||
print("\nBuilding and pushing...")
|
||||
run_cmd("docker buildx build --platform linux/amd64,linux/arm64 -t vndangkhoa/kv-tube:latest --push .")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Staging all changes...")
|
||||
print("="*50)
|
||||
run_cmd("git add .")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Committing...")
|
||||
print("="*50)
|
||||
run_cmd('git commit -m "Latest local changes"')
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Pushing to GitHub...")
|
||||
print("="*50)
|
||||
run_cmd("git push origin main")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Pushing to Forgejo...")
|
||||
print("="*50)
|
||||
run_cmd("git push private main")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("Building Docker image...")
|
||||
print("="*50)
|
||||
if run_cmd("docker build -t vndangkhoa/kv-tube:latest ."):
|
||||
print("\nPushing Docker image...")
|
||||
run_cmd("docker push vndangkhoa/kv-tube:latest")
|
||||
|
||||
print("\n" + "="*50)
|
||||
print("DEPLOYMENT COMPLETE!")
|
||||
print("DONE! Image now supports both amd64 and arm64")
|
||||
print("="*50)
|
||||
|
|
|
|||
2
kv_server.py
Normal file → Executable file
2
kv_server.py
Normal file → Executable file
|
|
@ -8,7 +8,7 @@ try:
|
|||
except NameError:
|
||||
base_dir = os.getcwd()
|
||||
|
||||
venv_dirs = ['env', '.venv']
|
||||
venv_dirs = ['.venv', 'env']
|
||||
activated = False
|
||||
|
||||
for venv_name in venv_dirs:
|
||||
|
|
|
|||
|
|
@ -4,3 +4,5 @@ yt-dlp>=2024.1.0
|
|||
werkzeug
|
||||
gunicorn
|
||||
python-dotenv
|
||||
openai-whisper
|
||||
numpy
|
||||
|
|
|
|||
28
start.sh
Normal file → Executable file
28
start.sh
Normal file → Executable file
|
|
@ -3,11 +3,26 @@ cd "$(dirname "$0")"
|
|||
echo "=== Diagnostic Start Script ==="
|
||||
|
||||
# Activate env
|
||||
if [ -d "env" ]; then
|
||||
# Activate env
|
||||
if [ -d ".venv_clean" ]; then
|
||||
echo "Activating .venv_clean..."
|
||||
export PYTHONPATH="$(pwd)/.venv_clean/lib/python3.14/site-packages"
|
||||
# Use system python with PYTHONPATH if bindir is missing/broken
|
||||
PYTHON_EXEC="/Library/Frameworks/Python.framework/Versions/3.14/bin/python3"
|
||||
export FLASK_APP=wsgi.py
|
||||
export FLASK_RUN_PORT=5002
|
||||
|
||||
echo "--- Starting with System Python + PYTHONPATH ---"
|
||||
$PYTHON_EXEC -m flask run --host=0.0.0.0 --port=5002
|
||||
exit 0
|
||||
elif [ -d ".venv" ]; then
|
||||
echo "Activating .venv..."
|
||||
source .venv/bin/activate
|
||||
elif [ -d "env" ]; then
|
||||
echo "Activating env..."
|
||||
source env/bin/activate
|
||||
else
|
||||
echo "No 'env' directory found!"
|
||||
echo "No '.venv' or 'env' directory found!"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
|
|
@ -23,13 +38,14 @@ else
|
|||
fi
|
||||
|
||||
echo "--- Attempting to start with Gunicorn ---"
|
||||
if [ -f "env/bin/gunicorn" ]; then
|
||||
./env/bin/gunicorn -b 0.0.0.0:5002 wsgi:app
|
||||
echo "--- Attempting to start with Gunicorn ---"
|
||||
if command -v gunicorn &> /dev/null; then
|
||||
gunicorn -b 0.0.0.0:5002 wsgi:app
|
||||
else
|
||||
echo "Gunicorn not found."
|
||||
echo "Gunicorn not found in path."
|
||||
fi
|
||||
|
||||
echo "--- Attempting to start with Flask explicitly ---"
|
||||
export FLASK_APP=wsgi.py
|
||||
export FLASK_RUN_PORT=5002
|
||||
./env/bin/flask run --host=0.0.0.0
|
||||
python -m flask run --host=0.0.0.0
|
||||
|
|
|
|||
|
|
@ -1,144 +1,119 @@
|
|||
/**
|
||||
* KV-Tube WebAI Service
|
||||
* Local AI chatbot for transcript Q&A using WebLLM
|
||||
*
|
||||
* Runs entirely in-browser, no server required after model download
|
||||
*/
|
||||
|
||||
// WebLLM CDN import (lazy loaded)
|
||||
var WEBLLM_CDN = 'https://esm.run/@mlc-ai/web-llm';
|
||||
|
||||
// Model options - using verified WebLLM model IDs
|
||||
var AI_MODELS = {
|
||||
small: { id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (0.5B)', size: '350MB' },
|
||||
medium: { id: 'Qwen2-1.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (1.5B)', size: '1GB' },
|
||||
};
|
||||
|
||||
// Default to small model
|
||||
var DEFAULT_MODEL = AI_MODELS.small;
|
||||
|
||||
if (typeof TranscriptAI === 'undefined') {
|
||||
window.TranscriptAI = class TranscriptAI {
|
||||
constructor() {
|
||||
this.engine = null;
|
||||
this.isLoading = false;
|
||||
this.isReady = false;
|
||||
this.transcript = '';
|
||||
this.onProgressCallback = null;
|
||||
this.onReadyCallback = null;
|
||||
}
|
||||
|
||||
setTranscript(text) {
|
||||
this.transcript = text.slice(0, 8000); // Limit context size
|
||||
}
|
||||
|
||||
setCallbacks({ onProgress, onReady }) {
|
||||
this.onProgressCallback = onProgress;
|
||||
this.onReadyCallback = onReady;
|
||||
}
|
||||
|
||||
async init() {
|
||||
if (this.isReady || this.isLoading) return;
|
||||
|
||||
this.isLoading = true;
|
||||
|
||||
try {
|
||||
// Dynamic import WebLLM
|
||||
const { CreateMLCEngine } = await import(WEBLLM_CDN);
|
||||
|
||||
// Initialize engine with progress callback
|
||||
this.engine = await CreateMLCEngine(DEFAULT_MODEL.id, {
|
||||
initProgressCallback: (report) => {
|
||||
if (this.onProgressCallback) {
|
||||
this.onProgressCallback(report);
|
||||
}
|
||||
console.log('AI Load Progress:', report.text);
|
||||
}
|
||||
});
|
||||
|
||||
this.isReady = true;
|
||||
this.isLoading = false;
|
||||
|
||||
if (this.onReadyCallback) {
|
||||
this.onReadyCallback();
|
||||
}
|
||||
|
||||
console.log('TranscriptAI ready with model:', DEFAULT_MODEL.name);
|
||||
|
||||
} catch (err) {
|
||||
this.isLoading = false;
|
||||
console.error('Failed to load AI model:', err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async ask(question) {
|
||||
if (!this.isReady) {
|
||||
throw new Error('AI not initialized');
|
||||
}
|
||||
|
||||
const systemPrompt = this.transcript
|
||||
? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}`
|
||||
: `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`;
|
||||
|
||||
try {
|
||||
const response = await this.engine.chat.completions.create({
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: question }
|
||||
],
|
||||
max_tokens: 256,
|
||||
temperature: 0.7,
|
||||
});
|
||||
|
||||
return response.choices[0].message.content;
|
||||
|
||||
} catch (err) {
|
||||
console.error('AI response error:', err);
|
||||
throw err;
|
||||
}
|
||||
}
|
||||
|
||||
async *askStreaming(question) {
|
||||
if (!this.isReady) {
|
||||
throw new Error('AI not initialized');
|
||||
}
|
||||
|
||||
const systemPrompt = this.transcript
|
||||
? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}`
|
||||
: `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`;
|
||||
|
||||
const chunks = await this.engine.chat.completions.create({
|
||||
messages: [
|
||||
{ role: 'system', content: systemPrompt },
|
||||
{ role: 'user', content: question }
|
||||
],
|
||||
max_tokens: 256,
|
||||
temperature: 0.7,
|
||||
stream: true,
|
||||
});
|
||||
|
||||
for await (const chunk of chunks) {
|
||||
const delta = chunk.choices[0]?.delta?.content;
|
||||
if (delta) {
|
||||
yield delta;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
getModelInfo() {
|
||||
return DEFAULT_MODEL;
|
||||
}
|
||||
|
||||
isModelReady() {
|
||||
return this.isReady;
|
||||
}
|
||||
|
||||
isModelLoading() {
|
||||
return this.isLoading;
|
||||
}
|
||||
}
|
||||
|
||||
// Global instance
|
||||
window.transcriptAI = new TranscriptAI();
|
||||
}
|
||||
/**
|
||||
* WebAI - Client-side AI features using Transformers.js
|
||||
*/
|
||||
|
||||
// Suppress ONNX Runtime warnings
|
||||
if (typeof ort !== 'undefined') {
|
||||
ort.env.logLevel = 'fatal';
|
||||
}
|
||||
|
||||
class SubtitleGenerator {
|
||||
constructor() {
|
||||
this.pipeline = null;
|
||||
this.isLoading = false;
|
||||
}
|
||||
|
||||
async init(progressCallback) {
|
||||
if (this.pipeline) return;
|
||||
if (this.isLoading) return;
|
||||
|
||||
this.isLoading = true;
|
||||
|
||||
try {
|
||||
// Suppress ONNX warnings at import time
|
||||
if (typeof ort !== 'undefined') {
|
||||
ort.env.logLevel = 'fatal';
|
||||
}
|
||||
|
||||
progressCallback?.('Loading AI model...');
|
||||
|
||||
const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2');
|
||||
|
||||
// Configure environment
|
||||
env.allowLocalModels = false;
|
||||
env.useBrowserCache = true;
|
||||
|
||||
// Suppress ONNX Runtime warnings
|
||||
if (typeof ort !== 'undefined') {
|
||||
ort.env.logLevel = 'fatal';
|
||||
}
|
||||
|
||||
progressCallback?.('Downloading Whisper model (~40MB)...');
|
||||
|
||||
this.pipeline = await pipeline(
|
||||
'automatic-speech-recognition',
|
||||
'Xenova/whisper-tiny',
|
||||
{
|
||||
progress_callback: (progress) => {
|
||||
if (progress.status === 'downloading') {
|
||||
const pct = Math.round((progress.loaded / progress.total) * 100);
|
||||
progressCallback?.(`Downloading: ${pct}%`);
|
||||
} else if (progress.status === 'loading') {
|
||||
progressCallback?.('Loading model...');
|
||||
}
|
||||
}
|
||||
}
|
||||
);
|
||||
|
||||
progressCallback?.('Model ready!');
|
||||
} catch (e) {
|
||||
console.error('Failed to load Whisper:', e);
|
||||
throw e;
|
||||
} finally {
|
||||
this.isLoading = false;
|
||||
}
|
||||
}
|
||||
|
||||
async generate(audioUrl, progressCallback) {
|
||||
if (!this.pipeline) {
|
||||
throw new Error('Model not initialized. Call init() first.');
|
||||
}
|
||||
|
||||
progressCallback?.('Transcribing audio...');
|
||||
|
||||
try {
|
||||
const result = await this.pipeline(audioUrl, {
|
||||
chunk_length_s: 30,
|
||||
stride_length_s: 5,
|
||||
return_timestamps: true,
|
||||
});
|
||||
|
||||
progressCallback?.('Formatting subtitles...');
|
||||
|
||||
// Convert to VTT format
|
||||
return this.toVTT(result.chunks || []);
|
||||
} catch (e) {
|
||||
console.error('Transcription failed:', e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
toVTT(chunks) {
|
||||
let vtt = 'WEBVTT\n\n';
|
||||
|
||||
chunks.forEach((chunk, i) => {
|
||||
const start = this.formatTime(chunk.timestamp[0]);
|
||||
const end = this.formatTime(chunk.timestamp[1]);
|
||||
const text = chunk.text.trim();
|
||||
|
||||
if (text) {
|
||||
vtt += `${i + 1}\n`;
|
||||
vtt += `${start} --> ${end}\n`;
|
||||
vtt += `${text}\n\n`;
|
||||
}
|
||||
});
|
||||
|
||||
return vtt;
|
||||
}
|
||||
|
||||
formatTime(seconds) {
|
||||
if (seconds === null || seconds === undefined) seconds = 0;
|
||||
const h = Math.floor(seconds / 3600);
|
||||
const m = Math.floor((seconds % 3600) / 60);
|
||||
const s = (seconds % 60).toFixed(3);
|
||||
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`;
|
||||
}
|
||||
}
|
||||
|
||||
// Export singleton
|
||||
window.subtitleGenerator = new SubtitleGenerator();
|
||||
|
|
|
|||
|
|
@ -32,26 +32,28 @@
|
|||
<div class="yt-section">
|
||||
<div class="yt-section-header">
|
||||
<div class="yt-tabs">
|
||||
<a href="#" onclick="changeChannelTab('video', this); return false;" class="active">
|
||||
<a href="javascript:void(0)" onclick="changeChannelTab('video', this); return false;"
|
||||
class="active no-spa">
|
||||
<i class="fas fa-video"></i>
|
||||
<span>Videos</span>
|
||||
</a>
|
||||
<a href="#" onclick="changeChannelTab('shorts', this); return false;">
|
||||
<a href="javascript:void(0)" onclick="changeChannelTab('shorts', this); return false;" class="no-spa">
|
||||
<i class="fas fa-bolt"></i>
|
||||
<span>Shorts</span>
|
||||
</a>
|
||||
</div>
|
||||
|
||||
<div class="yt-sort-options">
|
||||
<a href="#" onclick="changeChannelSort('latest', this); return false;" class="active">
|
||||
<a href="javascript:void(0)" onclick="changeChannelSort('latest', this); return false;"
|
||||
class="active no-spa">
|
||||
<i class="fas fa-clock"></i>
|
||||
<span>Latest</span>
|
||||
</a>
|
||||
<a href="#" onclick="changeChannelSort('popular', this); return false;">
|
||||
<a href="javascript:void(0)" onclick="changeChannelSort('popular', this); return false;" class="no-spa">
|
||||
<i class="fas fa-fire"></i>
|
||||
<span>Popular</span>
|
||||
</a>
|
||||
<a href="#" onclick="changeChannelSort('oldest', this); return false;">
|
||||
<a href="javascript:void(0)" onclick="changeChannelSort('oldest', this); return false;" class="no-spa">
|
||||
<i class="fas fa-history"></i>
|
||||
<span>Oldest</span>
|
||||
</a>
|
||||
|
|
@ -343,8 +345,8 @@
|
|||
}
|
||||
|
||||
try {
|
||||
console.log(`Fetching: /api/channel/videos?id=${channelId}&page=${currentChannelPage}`);
|
||||
const response = await fetch(`/api/channel/videos?id=${channelId}&page=${currentChannelPage}&sort=${currentChannelSort}&filter_type=${currentFilterType}`);
|
||||
console.log(`Fetching: /api/channel?id=${channelId}&page=${currentChannelPage}`);
|
||||
const response = await fetch(`/api/channel?id=${channelId}&page=${currentChannelPage}&sort=${currentChannelSort}&filter_type=${currentFilterType}`);
|
||||
const videos = await response.json();
|
||||
console.log("Channel Videos Response:", videos);
|
||||
|
||||
|
|
|
|||
|
|
@ -554,6 +554,7 @@
|
|||
|
||||
<!-- WebAI Script -->
|
||||
<script src="{{ url_for('static', filename='js/webai.js') }}"></script>
|
||||
|
||||
<script>
|
||||
// AI Chat Toggle and Message Handler
|
||||
var aiChatVisible = false;
|
||||
|
|
|
|||
|
|
@ -352,22 +352,35 @@
|
|||
}
|
||||
}
|
||||
|
||||
// Run on initial page load
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
// Run on initial page load and SPA navigation
|
||||
if (document.readyState === 'loading') {
|
||||
document.addEventListener('DOMContentLoaded', () => {
|
||||
loadLibraryContent();
|
||||
initTabs();
|
||||
});
|
||||
} else {
|
||||
// Document already loaded (SPA navigation)
|
||||
loadLibraryContent();
|
||||
initTabs();
|
||||
}
|
||||
|
||||
function initTabs() {
|
||||
// Intercept tab clicks for client-side navigation
|
||||
document.querySelectorAll('.library-tab').forEach(tab => {
|
||||
tab.addEventListener('click', (e) => {
|
||||
// Remove old listeners to be safe (optional but good practice in SPA)
|
||||
const newTab = tab.cloneNode(true);
|
||||
tab.parentNode.replaceChild(newTab, tab);
|
||||
|
||||
newTab.addEventListener('click', (e) => {
|
||||
e.preventDefault();
|
||||
const newUrl = tab.getAttribute('href');
|
||||
const newUrl = newTab.getAttribute('href');
|
||||
// Update URL without reloading
|
||||
history.pushState(null, '', newUrl);
|
||||
// Immediately load the new content
|
||||
loadLibraryContent();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// Handle browser back/forward buttons
|
||||
window.addEventListener('popstate', () => {
|
||||
|
|
|
|||
|
|
@ -39,13 +39,6 @@
|
|||
|
||||
<!-- Actions -->
|
||||
<div class="yt-video-actions">
|
||||
<button class="yt-action-btn" id="likeBtn">
|
||||
<i class="fas fa-thumbs-up"></i>
|
||||
<span id="likeCount">Like</span>
|
||||
</button>
|
||||
<button class="yt-action-btn" id="dislikeBtn">
|
||||
<i class="fas fa-thumbs-down"></i>
|
||||
</button>
|
||||
<button class="yt-action-btn" id="shareBtn">
|
||||
<i class="fas fa-share"></i>
|
||||
Share
|
||||
|
|
@ -54,7 +47,7 @@
|
|||
<i class="fas fa-download"></i>
|
||||
Download
|
||||
</button>
|
||||
<button class="yt-action-btn" id="saveBtn">
|
||||
<button class="yt-action-btn" id="saveBtn" onclick="toggleSaveToLibrary()">
|
||||
<i class="far fa-bookmark"></i>
|
||||
Save
|
||||
</button>
|
||||
|
|
@ -67,6 +60,10 @@
|
|||
Queue
|
||||
<span id="queueBadge" class="queue-badge" style="display:none;">0</span>
|
||||
</button>
|
||||
<button class="yt-action-btn" id="genSubBtn" onclick="generateSubtitles()" style="display:none;">
|
||||
<i class="fas fa-closed-captioning"></i>
|
||||
Generate Subs
|
||||
</button>
|
||||
|
||||
<!-- View Mode Buttons -->
|
||||
<div class="view-mode-buttons">
|
||||
|
|
@ -1247,7 +1244,8 @@
|
|||
thumbnail: `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`,
|
||||
uploader: data.uploader || 'Unknown',
|
||||
channel_id: data.channel_id || data.uploader_id || '',
|
||||
duration: data.duration
|
||||
duration: data.duration,
|
||||
audioUrl: data.audio_url
|
||||
};
|
||||
|
||||
// Check if video is already in queue
|
||||
|
|
@ -1255,6 +1253,12 @@
|
|||
updateQueueCount();
|
||||
updateQueueBadge();
|
||||
|
||||
// Enable AI Subtitles (Always show for testing if audio is available)
|
||||
if (data.audio_url) {
|
||||
const genBtn = document.getElementById('genSubBtn');
|
||||
if (genBtn) genBtn.style.display = 'inline-flex';
|
||||
}
|
||||
|
||||
if (data.error) {
|
||||
loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`;
|
||||
if (infoSkeleton) infoSkeleton.style.display = 'none';
|
||||
|
|
@ -1394,6 +1398,54 @@
|
|||
return div.innerHTML;
|
||||
}
|
||||
|
||||
async function generateSubtitles() {
|
||||
const btn = document.getElementById('genSubBtn');
|
||||
|
||||
if (!currentVideoData.audioUrl) {
|
||||
showToast("No audio source available for AI", "error");
|
||||
return;
|
||||
}
|
||||
|
||||
btn.disabled = true;
|
||||
const originalHtml = btn.innerHTML;
|
||||
|
||||
try {
|
||||
// Initialize/Load
|
||||
await window.subtitleGenerator.init((msg) => {
|
||||
btn.innerHTML = `<i class="fas fa-spinner fa-spin"></i> ${msg}`;
|
||||
});
|
||||
|
||||
// Generate
|
||||
btn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Transcribing...';
|
||||
|
||||
const vttContent = await window.subtitleGenerator.generate(currentVideoData.audioUrl, (msg) => {
|
||||
btn.innerHTML = `<i class="fas fa-circle-notch fa-spin"></i> ${msg}`;
|
||||
});
|
||||
|
||||
console.log("Generated VTT:", vttContent);
|
||||
|
||||
// Inject into Player
|
||||
if (window.player) {
|
||||
const blob = new Blob([vttContent], { type: 'text/vtt' });
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
// Artplayer subtitle API
|
||||
window.player.subtitle.url = url;
|
||||
window.player.subtitle.show = true;
|
||||
window.player.notice.show = 'AI Subtitles Generated';
|
||||
|
||||
showToast("Subtitles generated successfully!", "success");
|
||||
btn.style.display = 'none'; // Hide button after success
|
||||
}
|
||||
|
||||
} catch (e) {
|
||||
console.error(e);
|
||||
showToast("Subtitle generation failed: " + e.message, "error");
|
||||
btn.innerHTML = originalHtml;
|
||||
btn.disabled = false;
|
||||
}
|
||||
}
|
||||
|
||||
async function summarizeVideo() {
|
||||
const videoId = "{{ video_id }}";
|
||||
const btn = document.getElementById('summarizeBtn');
|
||||
|
|
|
|||
Loading…
Reference in a new issue