feat: Add client-side AI subtitle generation with Whisper
- Add webai.js with Transformers.js Whisper integration - Add Generate Subs button to watch page - Fix 403 video playback with IPv4 adapter - Update streaming proxy headers
This commit is contained in:
parent
92acf81362
commit
a93a875ce2
11 changed files with 399 additions and 216 deletions
|
|
@ -15,6 +15,8 @@ import time
|
||||||
import random
|
import random
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
import yt_dlp
|
import yt_dlp
|
||||||
|
import tempfile
|
||||||
|
import threading
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -28,6 +30,10 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db")
|
||||||
API_CACHE = {}
|
API_CACHE = {}
|
||||||
CACHE_TIMEOUT = 600 # 10 minutes
|
CACHE_TIMEOUT = 600 # 10 minutes
|
||||||
|
|
||||||
|
# AI Models
|
||||||
|
WHISPER_MODEL = None
|
||||||
|
WHISPER_LOCK = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
def get_db_connection():
|
def get_db_connection():
|
||||||
"""Get database connection with row factory."""
|
"""Get database connection with row factory."""
|
||||||
|
|
@ -436,10 +442,10 @@ def get_stream_info():
|
||||||
"format": "best[ext=mp4]/best",
|
"format": "best[ext=mp4]/best",
|
||||||
"noplaylist": True,
|
"noplaylist": True,
|
||||||
"quiet": True,
|
"quiet": True,
|
||||||
"no_warnings": True,
|
|
||||||
"skip_download": True,
|
"skip_download": True,
|
||||||
"force_ipv4": True,
|
|
||||||
"socket_timeout": 10,
|
"socket_timeout": 10,
|
||||||
|
"force_ipv4": True,
|
||||||
|
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||||
}
|
}
|
||||||
|
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
|
@ -453,6 +459,10 @@ def get_stream_info():
|
||||||
if not stream_url:
|
if not stream_url:
|
||||||
return jsonify({"error": "No stream URL found"}), 500
|
return jsonify({"error": "No stream URL found"}), 500
|
||||||
|
|
||||||
|
# Log the headers yt-dlp expects us to use
|
||||||
|
expected_headers = info.get("http_headers", {})
|
||||||
|
logger.info(f"YT-DLP Expected Headers: {expected_headers}")
|
||||||
|
|
||||||
# Extract subtitles
|
# Extract subtitles
|
||||||
subtitle_url = None
|
subtitle_url = None
|
||||||
subs = info.get("subtitles") or {}
|
subs = info.get("subtitles") or {}
|
||||||
|
|
@ -466,6 +476,33 @@ def get_stream_info():
|
||||||
subtitle_url = auto_subs[lang][0]["url"]
|
subtitle_url = auto_subs[lang][0]["url"]
|
||||||
break
|
break
|
||||||
|
|
||||||
|
# Extract best audio-only URL for AI transcription
|
||||||
|
audio_url = None
|
||||||
|
try:
|
||||||
|
formats = info.get("formats", [])
|
||||||
|
# Debug: Log format details to understand why we aren't matching
|
||||||
|
# logger.info(f"Scanning {len(formats)} formats for audio-only...")
|
||||||
|
|
||||||
|
audio_formats = []
|
||||||
|
for f in formats:
|
||||||
|
vcodec = f.get("vcodec")
|
||||||
|
acodec = f.get("acodec")
|
||||||
|
# Check for audio-only: vcodec should be none/None, acodec should be something
|
||||||
|
if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None):
|
||||||
|
audio_formats.append(f)
|
||||||
|
|
||||||
|
if audio_formats:
|
||||||
|
# Prefer m4a (itag 140) for best compatibility, or webm (251)
|
||||||
|
# Sort by filesize (smaller is faster for whisper) or bitrate?
|
||||||
|
# For now simply pick the first one that looks like m4a, else first available
|
||||||
|
chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0])
|
||||||
|
audio_url = chosen_audio.get("url")
|
||||||
|
logger.info(f"Found audio-only URL: {audio_url[:30]}...")
|
||||||
|
else:
|
||||||
|
logger.warning("No audio-only formats found in valid stream info.")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to extract audio url: {e}")
|
||||||
|
|
||||||
response_data = {
|
response_data = {
|
||||||
"original_url": stream_url,
|
"original_url": stream_url,
|
||||||
"title": info.get("title", "Unknown"),
|
"title": info.get("title", "Unknown"),
|
||||||
|
|
@ -477,8 +514,16 @@ def get_stream_info():
|
||||||
"view_count": info.get("view_count", 0),
|
"view_count": info.get("view_count", 0),
|
||||||
"related": [],
|
"related": [],
|
||||||
"subtitle_url": subtitle_url,
|
"subtitle_url": subtitle_url,
|
||||||
|
"audio_url": None # Placeholder, filled below
|
||||||
}
|
}
|
||||||
|
|
||||||
|
from urllib.parse import quote
|
||||||
|
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
|
||||||
|
response_data["stream_url"] = proxied_url
|
||||||
|
|
||||||
|
if audio_url:
|
||||||
|
response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}"
|
||||||
|
|
||||||
# Cache it
|
# Cache it
|
||||||
expiry = current_time + 3600
|
expiry = current_time + 3600
|
||||||
conn.execute(
|
conn.execute(
|
||||||
|
|
@ -488,10 +533,6 @@ def get_stream_info():
|
||||||
conn.commit()
|
conn.commit()
|
||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
from urllib.parse import quote
|
|
||||||
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
|
|
||||||
response_data["stream_url"] = proxied_url
|
|
||||||
|
|
||||||
response = jsonify(response_data)
|
response = jsonify(response_data)
|
||||||
response.headers["X-Cache"] = "MISS"
|
response.headers["X-Cache"] = "MISS"
|
||||||
return response
|
return response
|
||||||
|
|
@ -513,7 +554,12 @@ def search():
|
||||||
if url_match:
|
if url_match:
|
||||||
video_id = url_match.group(1)
|
video_id = url_match.group(1)
|
||||||
# Fetch single video info
|
# Fetch single video info
|
||||||
ydl_opts = {"quiet": True, "no_warnings": True, "noplaylist": True}
|
ydl_opts = {
|
||||||
|
"quiet": True,
|
||||||
|
"no_warnings": True,
|
||||||
|
"noplaylist": True,
|
||||||
|
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||||
|
}
|
||||||
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
info = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False)
|
info = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False)
|
||||||
return jsonify([{
|
return jsonify([{
|
||||||
|
|
@ -539,17 +585,20 @@ def search():
|
||||||
def get_channel_videos_simple():
|
def get_channel_videos_simple():
|
||||||
"""Get videos from a channel."""
|
"""Get videos from a channel."""
|
||||||
channel_id = request.args.get("id")
|
channel_id = request.args.get("id")
|
||||||
|
filter_type = request.args.get("filter_type", "video")
|
||||||
if not channel_id:
|
if not channel_id:
|
||||||
return jsonify({"error": "No channel ID provided"}), 400
|
return jsonify({"error": "No channel ID provided"}), 400
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Construct URL
|
# Construct URL
|
||||||
|
suffix = "shorts" if filter_type == "shorts" else "videos"
|
||||||
|
|
||||||
if channel_id.startswith("UC"):
|
if channel_id.startswith("UC"):
|
||||||
url = f"https://www.youtube.com/channel/{channel_id}/videos"
|
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
|
||||||
elif channel_id.startswith("@"):
|
elif channel_id.startswith("@"):
|
||||||
url = f"https://www.youtube.com/{channel_id}/videos"
|
url = f"https://www.youtube.com/{channel_id}/{suffix}"
|
||||||
else:
|
else:
|
||||||
url = f"https://www.youtube.com/channel/{channel_id}/videos"
|
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
|
||||||
|
|
||||||
cmd = [
|
cmd = [
|
||||||
sys.executable, "-m", "yt_dlp",
|
sys.executable, "-m", "yt_dlp",
|
||||||
|
|
@ -743,6 +792,85 @@ def get_transcript():
|
||||||
return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"})
|
return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"})
|
||||||
|
|
||||||
|
|
||||||
|
@api_bp.route("/generate_subtitles", methods=["POST"])
|
||||||
|
def generate_subtitles():
|
||||||
|
"""Generate subtitles using server-side Whisper."""
|
||||||
|
global WHISPER_MODEL
|
||||||
|
|
||||||
|
data = request.get_json()
|
||||||
|
video_id = data.get("video_id")
|
||||||
|
|
||||||
|
if not video_id:
|
||||||
|
return jsonify({"error": "No video ID provided"}), 400
|
||||||
|
|
||||||
|
temp_path = None
|
||||||
|
try:
|
||||||
|
# Lazy load model
|
||||||
|
with WHISPER_LOCK:
|
||||||
|
if WHISPER_MODEL is None:
|
||||||
|
import whisper
|
||||||
|
logger.info("Loading Whisper model (tiny)...")
|
||||||
|
WHISPER_MODEL = whisper.load_model("tiny")
|
||||||
|
|
||||||
|
# Extract Audio URL
|
||||||
|
url = f"https://www.youtube.com/watch?v={video_id}"
|
||||||
|
ydl_opts = {
|
||||||
|
"format": "bestaudio[ext=m4a]/bestaudio/best",
|
||||||
|
"noplaylist": True,
|
||||||
|
"quiet": True,
|
||||||
|
"force_ipv4": True,
|
||||||
|
}
|
||||||
|
|
||||||
|
audio_url = None
|
||||||
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
||||||
|
info = ydl.extract_info(url, download=False)
|
||||||
|
audio_url = info.get("url")
|
||||||
|
|
||||||
|
if not audio_url:
|
||||||
|
return jsonify({"error": "Could not extract audio URL"}), 500
|
||||||
|
|
||||||
|
# Download audio to temp file
|
||||||
|
import requests
|
||||||
|
headers = {
|
||||||
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.info(f"Downloading audio for transcription: {audio_url[:30]}...")
|
||||||
|
with requests.get(audio_url, headers=headers, stream=True) as r:
|
||||||
|
r.raise_for_status()
|
||||||
|
with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f:
|
||||||
|
temp_path = f.name
|
||||||
|
for chunk in r.iter_content(chunk_size=8192):
|
||||||
|
f.write(chunk)
|
||||||
|
|
||||||
|
# Transcribe
|
||||||
|
logger.info("Transcribing...")
|
||||||
|
result = WHISPER_MODEL.transcribe(temp_path)
|
||||||
|
|
||||||
|
# Convert to VTT
|
||||||
|
def format_timestamp(seconds):
|
||||||
|
hours = int(seconds // 3600)
|
||||||
|
minutes = int((seconds % 3600) // 60)
|
||||||
|
seconds = seconds % 60
|
||||||
|
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
|
||||||
|
|
||||||
|
vtt_output = "WEBVTT\n\n"
|
||||||
|
for segment in result["segments"]:
|
||||||
|
start = format_timestamp(segment["start"])
|
||||||
|
end = format_timestamp(segment["end"])
|
||||||
|
text = segment["text"].strip()
|
||||||
|
vtt_output += f"{start} --> {end}\n{text}\n\n"
|
||||||
|
|
||||||
|
return jsonify({"success": True, "vtt": vtt_output})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Subtitle generation failed: {e}")
|
||||||
|
return jsonify({"error": str(e)}), 500
|
||||||
|
finally:
|
||||||
|
if temp_path and os.path.exists(temp_path):
|
||||||
|
os.remove(temp_path)
|
||||||
|
|
||||||
|
|
||||||
@api_bp.route("/update_ytdlp", methods=["POST"])
|
@api_bp.route("/update_ytdlp", methods=["POST"])
|
||||||
def update_ytdlp():
|
def update_ytdlp():
|
||||||
"""Update yt-dlp to latest version."""
|
"""Update yt-dlp to latest version."""
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,14 @@ from flask import Blueprint, request, Response, stream_with_context, send_from_d
|
||||||
import requests
|
import requests
|
||||||
import os
|
import os
|
||||||
import logging
|
import logging
|
||||||
|
import socket
|
||||||
|
import urllib3.util.connection as urllib3_cn
|
||||||
|
|
||||||
|
# Force IPv4 for requests (which uses urllib3)
|
||||||
|
def allowed_gai_family():
|
||||||
|
return socket.AF_INET
|
||||||
|
|
||||||
|
urllib3_cn.allowed_gai_family = allowed_gai_family
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
@ -30,7 +38,8 @@ def video_proxy():
|
||||||
|
|
||||||
# Forward headers to mimic browser and support seeking
|
# Forward headers to mimic browser and support seeking
|
||||||
headers = {
|
headers = {
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
||||||
|
# "Referer": "https://www.youtube.com/", # Removed to test if it fixes 403
|
||||||
}
|
}
|
||||||
|
|
||||||
# Support Range requests (scrubbing)
|
# Support Range requests (scrubbing)
|
||||||
|
|
@ -39,8 +48,14 @@ def video_proxy():
|
||||||
headers["Range"] = range_header
|
headers["Range"] = range_header
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
logger.info(f"Proxying URL: {url}")
|
||||||
|
# logger.info(f"Proxy Request Headers: {headers}")
|
||||||
req = requests.get(url, headers=headers, stream=True, timeout=30)
|
req = requests.get(url, headers=headers, stream=True, timeout=30)
|
||||||
|
|
||||||
|
logger.info(f"Upstream Status: {req.status_code}")
|
||||||
|
if req.status_code != 200:
|
||||||
|
logger.error(f"Upstream Error Body: {req.text[:500]}")
|
||||||
|
|
||||||
# Handle HLS (M3U8) Rewriting - CRITICAL for 1080p+ and proper sync
|
# Handle HLS (M3U8) Rewriting - CRITICAL for 1080p+ and proper sync
|
||||||
content_type = req.headers.get("content-type", "").lower()
|
content_type = req.headers.get("content-type", "").lower()
|
||||||
url_path = url.split("?")[0]
|
url_path = url.split("?")[0]
|
||||||
|
|
@ -50,7 +65,7 @@ def video_proxy():
|
||||||
or "application/vnd.apple.mpegurl" in content_type
|
or "application/vnd.apple.mpegurl" in content_type
|
||||||
)
|
)
|
||||||
|
|
||||||
if is_manifest:
|
if is_manifest and req.status_code == 200:
|
||||||
content = req.text
|
content = req.text
|
||||||
base_url = url.rsplit("/", 1)[0]
|
base_url = url.rsplit("/", 1)[0]
|
||||||
new_lines = []
|
new_lines = []
|
||||||
|
|
|
||||||
43
deploy.py
Normal file → Executable file
43
deploy.py
Normal file → Executable file
|
|
@ -1,7 +1,6 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
"""Check git status and redeploy."""
|
"""Build and push multi-platform Docker image."""
|
||||||
import subprocess
|
import subprocess
|
||||||
import os
|
|
||||||
|
|
||||||
def run_cmd(cmd):
|
def run_cmd(cmd):
|
||||||
print(f"\n>>> {cmd}")
|
print(f"\n>>> {cmd}")
|
||||||
|
|
@ -13,37 +12,17 @@ def run_cmd(cmd):
|
||||||
return result.returncode == 0
|
return result.returncode == 0
|
||||||
|
|
||||||
print("="*50)
|
print("="*50)
|
||||||
print("Checking git status...")
|
print("Building Multi-Platform Docker Image")
|
||||||
|
print("(linux/amd64 + linux/arm64)")
|
||||||
print("="*50)
|
print("="*50)
|
||||||
run_cmd("git status")
|
|
||||||
|
# Create buildx builder if it doesn't exist
|
||||||
|
run_cmd("docker buildx create --name multiplatform --use 2>/dev/null || docker buildx use multiplatform")
|
||||||
|
|
||||||
|
# Build and push multi-platform image
|
||||||
|
print("\nBuilding and pushing...")
|
||||||
|
run_cmd("docker buildx build --platform linux/amd64,linux/arm64 -t vndangkhoa/kv-tube:latest --push .")
|
||||||
|
|
||||||
print("\n" + "="*50)
|
print("\n" + "="*50)
|
||||||
print("Staging all changes...")
|
print("DONE! Image now supports both amd64 and arm64")
|
||||||
print("="*50)
|
|
||||||
run_cmd("git add .")
|
|
||||||
|
|
||||||
print("\n" + "="*50)
|
|
||||||
print("Committing...")
|
|
||||||
print("="*50)
|
|
||||||
run_cmd('git commit -m "Latest local changes"')
|
|
||||||
|
|
||||||
print("\n" + "="*50)
|
|
||||||
print("Pushing to GitHub...")
|
|
||||||
print("="*50)
|
|
||||||
run_cmd("git push origin main")
|
|
||||||
|
|
||||||
print("\n" + "="*50)
|
|
||||||
print("Pushing to Forgejo...")
|
|
||||||
print("="*50)
|
|
||||||
run_cmd("git push private main")
|
|
||||||
|
|
||||||
print("\n" + "="*50)
|
|
||||||
print("Building Docker image...")
|
|
||||||
print("="*50)
|
|
||||||
if run_cmd("docker build -t vndangkhoa/kv-tube:latest ."):
|
|
||||||
print("\nPushing Docker image...")
|
|
||||||
run_cmd("docker push vndangkhoa/kv-tube:latest")
|
|
||||||
|
|
||||||
print("\n" + "="*50)
|
|
||||||
print("DEPLOYMENT COMPLETE!")
|
|
||||||
print("="*50)
|
print("="*50)
|
||||||
|
|
|
||||||
2
kv_server.py
Normal file → Executable file
2
kv_server.py
Normal file → Executable file
|
|
@ -8,7 +8,7 @@ try:
|
||||||
except NameError:
|
except NameError:
|
||||||
base_dir = os.getcwd()
|
base_dir = os.getcwd()
|
||||||
|
|
||||||
venv_dirs = ['env', '.venv']
|
venv_dirs = ['.venv', 'env']
|
||||||
activated = False
|
activated = False
|
||||||
|
|
||||||
for venv_name in venv_dirs:
|
for venv_name in venv_dirs:
|
||||||
|
|
|
||||||
|
|
@ -4,3 +4,5 @@ yt-dlp>=2024.1.0
|
||||||
werkzeug
|
werkzeug
|
||||||
gunicorn
|
gunicorn
|
||||||
python-dotenv
|
python-dotenv
|
||||||
|
openai-whisper
|
||||||
|
numpy
|
||||||
|
|
|
||||||
28
start.sh
Normal file → Executable file
28
start.sh
Normal file → Executable file
|
|
@ -3,11 +3,26 @@ cd "$(dirname "$0")"
|
||||||
echo "=== Diagnostic Start Script ==="
|
echo "=== Diagnostic Start Script ==="
|
||||||
|
|
||||||
# Activate env
|
# Activate env
|
||||||
if [ -d "env" ]; then
|
# Activate env
|
||||||
|
if [ -d ".venv_clean" ]; then
|
||||||
|
echo "Activating .venv_clean..."
|
||||||
|
export PYTHONPATH="$(pwd)/.venv_clean/lib/python3.14/site-packages"
|
||||||
|
# Use system python with PYTHONPATH if bindir is missing/broken
|
||||||
|
PYTHON_EXEC="/Library/Frameworks/Python.framework/Versions/3.14/bin/python3"
|
||||||
|
export FLASK_APP=wsgi.py
|
||||||
|
export FLASK_RUN_PORT=5002
|
||||||
|
|
||||||
|
echo "--- Starting with System Python + PYTHONPATH ---"
|
||||||
|
$PYTHON_EXEC -m flask run --host=0.0.0.0 --port=5002
|
||||||
|
exit 0
|
||||||
|
elif [ -d ".venv" ]; then
|
||||||
|
echo "Activating .venv..."
|
||||||
|
source .venv/bin/activate
|
||||||
|
elif [ -d "env" ]; then
|
||||||
echo "Activating env..."
|
echo "Activating env..."
|
||||||
source env/bin/activate
|
source env/bin/activate
|
||||||
else
|
else
|
||||||
echo "No 'env' directory found!"
|
echo "No '.venv' or 'env' directory found!"
|
||||||
exit 1
|
exit 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
@ -23,13 +38,14 @@ else
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "--- Attempting to start with Gunicorn ---"
|
echo "--- Attempting to start with Gunicorn ---"
|
||||||
if [ -f "env/bin/gunicorn" ]; then
|
echo "--- Attempting to start with Gunicorn ---"
|
||||||
./env/bin/gunicorn -b 0.0.0.0:5002 wsgi:app
|
if command -v gunicorn &> /dev/null; then
|
||||||
|
gunicorn -b 0.0.0.0:5002 wsgi:app
|
||||||
else
|
else
|
||||||
echo "Gunicorn not found."
|
echo "Gunicorn not found in path."
|
||||||
fi
|
fi
|
||||||
|
|
||||||
echo "--- Attempting to start with Flask explicitly ---"
|
echo "--- Attempting to start with Flask explicitly ---"
|
||||||
export FLASK_APP=wsgi.py
|
export FLASK_APP=wsgi.py
|
||||||
export FLASK_RUN_PORT=5002
|
export FLASK_RUN_PORT=5002
|
||||||
./env/bin/flask run --host=0.0.0.0
|
python -m flask run --host=0.0.0.0
|
||||||
|
|
|
||||||
|
|
@ -1,144 +1,119 @@
|
||||||
/**
|
/**
|
||||||
* KV-Tube WebAI Service
|
* WebAI - Client-side AI features using Transformers.js
|
||||||
* Local AI chatbot for transcript Q&A using WebLLM
|
|
||||||
*
|
|
||||||
* Runs entirely in-browser, no server required after model download
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// WebLLM CDN import (lazy loaded)
|
// Suppress ONNX Runtime warnings
|
||||||
var WEBLLM_CDN = 'https://esm.run/@mlc-ai/web-llm';
|
if (typeof ort !== 'undefined') {
|
||||||
|
ort.env.logLevel = 'fatal';
|
||||||
|
}
|
||||||
|
|
||||||
// Model options - using verified WebLLM model IDs
|
class SubtitleGenerator {
|
||||||
var AI_MODELS = {
|
|
||||||
small: { id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (0.5B)', size: '350MB' },
|
|
||||||
medium: { id: 'Qwen2-1.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (1.5B)', size: '1GB' },
|
|
||||||
};
|
|
||||||
|
|
||||||
// Default to small model
|
|
||||||
var DEFAULT_MODEL = AI_MODELS.small;
|
|
||||||
|
|
||||||
if (typeof TranscriptAI === 'undefined') {
|
|
||||||
window.TranscriptAI = class TranscriptAI {
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.engine = null;
|
this.pipeline = null;
|
||||||
this.isLoading = false;
|
this.isLoading = false;
|
||||||
this.isReady = false;
|
|
||||||
this.transcript = '';
|
|
||||||
this.onProgressCallback = null;
|
|
||||||
this.onReadyCallback = null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
setTranscript(text) {
|
async init(progressCallback) {
|
||||||
this.transcript = text.slice(0, 8000); // Limit context size
|
if (this.pipeline) return;
|
||||||
}
|
if (this.isLoading) return;
|
||||||
|
|
||||||
setCallbacks({ onProgress, onReady }) {
|
|
||||||
this.onProgressCallback = onProgress;
|
|
||||||
this.onReadyCallback = onReady;
|
|
||||||
}
|
|
||||||
|
|
||||||
async init() {
|
|
||||||
if (this.isReady || this.isLoading) return;
|
|
||||||
|
|
||||||
this.isLoading = true;
|
this.isLoading = true;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Dynamic import WebLLM
|
// Suppress ONNX warnings at import time
|
||||||
const { CreateMLCEngine } = await import(WEBLLM_CDN);
|
if (typeof ort !== 'undefined') {
|
||||||
|
ort.env.logLevel = 'fatal';
|
||||||
// Initialize engine with progress callback
|
|
||||||
this.engine = await CreateMLCEngine(DEFAULT_MODEL.id, {
|
|
||||||
initProgressCallback: (report) => {
|
|
||||||
if (this.onProgressCallback) {
|
|
||||||
this.onProgressCallback(report);
|
|
||||||
}
|
}
|
||||||
console.log('AI Load Progress:', report.text);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
this.isReady = true;
|
progressCallback?.('Loading AI model...');
|
||||||
|
|
||||||
|
const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2');
|
||||||
|
|
||||||
|
// Configure environment
|
||||||
|
env.allowLocalModels = false;
|
||||||
|
env.useBrowserCache = true;
|
||||||
|
|
||||||
|
// Suppress ONNX Runtime warnings
|
||||||
|
if (typeof ort !== 'undefined') {
|
||||||
|
ort.env.logLevel = 'fatal';
|
||||||
|
}
|
||||||
|
|
||||||
|
progressCallback?.('Downloading Whisper model (~40MB)...');
|
||||||
|
|
||||||
|
this.pipeline = await pipeline(
|
||||||
|
'automatic-speech-recognition',
|
||||||
|
'Xenova/whisper-tiny',
|
||||||
|
{
|
||||||
|
progress_callback: (progress) => {
|
||||||
|
if (progress.status === 'downloading') {
|
||||||
|
const pct = Math.round((progress.loaded / progress.total) * 100);
|
||||||
|
progressCallback?.(`Downloading: ${pct}%`);
|
||||||
|
} else if (progress.status === 'loading') {
|
||||||
|
progressCallback?.('Loading model...');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
progressCallback?.('Model ready!');
|
||||||
|
} catch (e) {
|
||||||
|
console.error('Failed to load Whisper:', e);
|
||||||
|
throw e;
|
||||||
|
} finally {
|
||||||
this.isLoading = false;
|
this.isLoading = false;
|
||||||
|
|
||||||
if (this.onReadyCallback) {
|
|
||||||
this.onReadyCallback();
|
|
||||||
}
|
|
||||||
|
|
||||||
console.log('TranscriptAI ready with model:', DEFAULT_MODEL.name);
|
|
||||||
|
|
||||||
} catch (err) {
|
|
||||||
this.isLoading = false;
|
|
||||||
console.error('Failed to load AI model:', err);
|
|
||||||
throw err;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async ask(question) {
|
async generate(audioUrl, progressCallback) {
|
||||||
if (!this.isReady) {
|
if (!this.pipeline) {
|
||||||
throw new Error('AI not initialized');
|
throw new Error('Model not initialized. Call init() first.');
|
||||||
}
|
}
|
||||||
|
|
||||||
const systemPrompt = this.transcript
|
progressCallback?.('Transcribing audio...');
|
||||||
? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}`
|
|
||||||
: `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`;
|
|
||||||
|
|
||||||
try {
|
try {
|
||||||
const response = await this.engine.chat.completions.create({
|
const result = await this.pipeline(audioUrl, {
|
||||||
messages: [
|
chunk_length_s: 30,
|
||||||
{ role: 'system', content: systemPrompt },
|
stride_length_s: 5,
|
||||||
{ role: 'user', content: question }
|
return_timestamps: true,
|
||||||
],
|
|
||||||
max_tokens: 256,
|
|
||||||
temperature: 0.7,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return response.choices[0].message.content;
|
progressCallback?.('Formatting subtitles...');
|
||||||
|
|
||||||
} catch (err) {
|
// Convert to VTT format
|
||||||
console.error('AI response error:', err);
|
return this.toVTT(result.chunks || []);
|
||||||
throw err;
|
} catch (e) {
|
||||||
|
console.error('Transcription failed:', e);
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async *askStreaming(question) {
|
toVTT(chunks) {
|
||||||
if (!this.isReady) {
|
let vtt = 'WEBVTT\n\n';
|
||||||
throw new Error('AI not initialized');
|
|
||||||
|
chunks.forEach((chunk, i) => {
|
||||||
|
const start = this.formatTime(chunk.timestamp[0]);
|
||||||
|
const end = this.formatTime(chunk.timestamp[1]);
|
||||||
|
const text = chunk.text.trim();
|
||||||
|
|
||||||
|
if (text) {
|
||||||
|
vtt += `${i + 1}\n`;
|
||||||
|
vtt += `${start} --> ${end}\n`;
|
||||||
|
vtt += `${text}\n\n`;
|
||||||
}
|
}
|
||||||
|
|
||||||
const systemPrompt = this.transcript
|
|
||||||
? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}`
|
|
||||||
: `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`;
|
|
||||||
|
|
||||||
const chunks = await this.engine.chat.completions.create({
|
|
||||||
messages: [
|
|
||||||
{ role: 'system', content: systemPrompt },
|
|
||||||
{ role: 'user', content: question }
|
|
||||||
],
|
|
||||||
max_tokens: 256,
|
|
||||||
temperature: 0.7,
|
|
||||||
stream: true,
|
|
||||||
});
|
});
|
||||||
|
|
||||||
for await (const chunk of chunks) {
|
return vtt;
|
||||||
const delta = chunk.choices[0]?.delta?.content;
|
|
||||||
if (delta) {
|
|
||||||
yield delta;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
formatTime(seconds) {
|
||||||
|
if (seconds === null || seconds === undefined) seconds = 0;
|
||||||
|
const h = Math.floor(seconds / 3600);
|
||||||
|
const m = Math.floor((seconds % 3600) / 60);
|
||||||
|
const s = (seconds % 60).toFixed(3);
|
||||||
|
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
getModelInfo() {
|
// Export singleton
|
||||||
return DEFAULT_MODEL;
|
window.subtitleGenerator = new SubtitleGenerator();
|
||||||
}
|
|
||||||
|
|
||||||
isModelReady() {
|
|
||||||
return this.isReady;
|
|
||||||
}
|
|
||||||
|
|
||||||
isModelLoading() {
|
|
||||||
return this.isLoading;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Global instance
|
|
||||||
window.transcriptAI = new TranscriptAI();
|
|
||||||
}
|
|
||||||
|
|
|
||||||
|
|
@ -32,26 +32,28 @@
|
||||||
<div class="yt-section">
|
<div class="yt-section">
|
||||||
<div class="yt-section-header">
|
<div class="yt-section-header">
|
||||||
<div class="yt-tabs">
|
<div class="yt-tabs">
|
||||||
<a href="#" onclick="changeChannelTab('video', this); return false;" class="active">
|
<a href="javascript:void(0)" onclick="changeChannelTab('video', this); return false;"
|
||||||
|
class="active no-spa">
|
||||||
<i class="fas fa-video"></i>
|
<i class="fas fa-video"></i>
|
||||||
<span>Videos</span>
|
<span>Videos</span>
|
||||||
</a>
|
</a>
|
||||||
<a href="#" onclick="changeChannelTab('shorts', this); return false;">
|
<a href="javascript:void(0)" onclick="changeChannelTab('shorts', this); return false;" class="no-spa">
|
||||||
<i class="fas fa-bolt"></i>
|
<i class="fas fa-bolt"></i>
|
||||||
<span>Shorts</span>
|
<span>Shorts</span>
|
||||||
</a>
|
</a>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="yt-sort-options">
|
<div class="yt-sort-options">
|
||||||
<a href="#" onclick="changeChannelSort('latest', this); return false;" class="active">
|
<a href="javascript:void(0)" onclick="changeChannelSort('latest', this); return false;"
|
||||||
|
class="active no-spa">
|
||||||
<i class="fas fa-clock"></i>
|
<i class="fas fa-clock"></i>
|
||||||
<span>Latest</span>
|
<span>Latest</span>
|
||||||
</a>
|
</a>
|
||||||
<a href="#" onclick="changeChannelSort('popular', this); return false;">
|
<a href="javascript:void(0)" onclick="changeChannelSort('popular', this); return false;" class="no-spa">
|
||||||
<i class="fas fa-fire"></i>
|
<i class="fas fa-fire"></i>
|
||||||
<span>Popular</span>
|
<span>Popular</span>
|
||||||
</a>
|
</a>
|
||||||
<a href="#" onclick="changeChannelSort('oldest', this); return false;">
|
<a href="javascript:void(0)" onclick="changeChannelSort('oldest', this); return false;" class="no-spa">
|
||||||
<i class="fas fa-history"></i>
|
<i class="fas fa-history"></i>
|
||||||
<span>Oldest</span>
|
<span>Oldest</span>
|
||||||
</a>
|
</a>
|
||||||
|
|
@ -343,8 +345,8 @@
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
console.log(`Fetching: /api/channel/videos?id=${channelId}&page=${currentChannelPage}`);
|
console.log(`Fetching: /api/channel?id=${channelId}&page=${currentChannelPage}`);
|
||||||
const response = await fetch(`/api/channel/videos?id=${channelId}&page=${currentChannelPage}&sort=${currentChannelSort}&filter_type=${currentFilterType}`);
|
const response = await fetch(`/api/channel?id=${channelId}&page=${currentChannelPage}&sort=${currentChannelSort}&filter_type=${currentFilterType}`);
|
||||||
const videos = await response.json();
|
const videos = await response.json();
|
||||||
console.log("Channel Videos Response:", videos);
|
console.log("Channel Videos Response:", videos);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -554,6 +554,7 @@
|
||||||
|
|
||||||
<!-- WebAI Script -->
|
<!-- WebAI Script -->
|
||||||
<script src="{{ url_for('static', filename='js/webai.js') }}"></script>
|
<script src="{{ url_for('static', filename='js/webai.js') }}"></script>
|
||||||
|
|
||||||
<script>
|
<script>
|
||||||
// AI Chat Toggle and Message Handler
|
// AI Chat Toggle and Message Handler
|
||||||
var aiChatVisible = false;
|
var aiChatVisible = false;
|
||||||
|
|
|
||||||
|
|
@ -352,22 +352,35 @@
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Run on initial page load
|
// Run on initial page load and SPA navigation
|
||||||
|
if (document.readyState === 'loading') {
|
||||||
document.addEventListener('DOMContentLoaded', () => {
|
document.addEventListener('DOMContentLoaded', () => {
|
||||||
loadLibraryContent();
|
loadLibraryContent();
|
||||||
|
initTabs();
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Document already loaded (SPA navigation)
|
||||||
|
loadLibraryContent();
|
||||||
|
initTabs();
|
||||||
|
}
|
||||||
|
|
||||||
|
function initTabs() {
|
||||||
// Intercept tab clicks for client-side navigation
|
// Intercept tab clicks for client-side navigation
|
||||||
document.querySelectorAll('.library-tab').forEach(tab => {
|
document.querySelectorAll('.library-tab').forEach(tab => {
|
||||||
tab.addEventListener('click', (e) => {
|
// Remove old listeners to be safe (optional but good practice in SPA)
|
||||||
|
const newTab = tab.cloneNode(true);
|
||||||
|
tab.parentNode.replaceChild(newTab, tab);
|
||||||
|
|
||||||
|
newTab.addEventListener('click', (e) => {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
const newUrl = tab.getAttribute('href');
|
const newUrl = newTab.getAttribute('href');
|
||||||
// Update URL without reloading
|
// Update URL without reloading
|
||||||
history.pushState(null, '', newUrl);
|
history.pushState(null, '', newUrl);
|
||||||
// Immediately load the new content
|
// Immediately load the new content
|
||||||
loadLibraryContent();
|
loadLibraryContent();
|
||||||
});
|
});
|
||||||
});
|
});
|
||||||
});
|
}
|
||||||
|
|
||||||
// Handle browser back/forward buttons
|
// Handle browser back/forward buttons
|
||||||
window.addEventListener('popstate', () => {
|
window.addEventListener('popstate', () => {
|
||||||
|
|
|
||||||
|
|
@ -39,13 +39,6 @@
|
||||||
|
|
||||||
<!-- Actions -->
|
<!-- Actions -->
|
||||||
<div class="yt-video-actions">
|
<div class="yt-video-actions">
|
||||||
<button class="yt-action-btn" id="likeBtn">
|
|
||||||
<i class="fas fa-thumbs-up"></i>
|
|
||||||
<span id="likeCount">Like</span>
|
|
||||||
</button>
|
|
||||||
<button class="yt-action-btn" id="dislikeBtn">
|
|
||||||
<i class="fas fa-thumbs-down"></i>
|
|
||||||
</button>
|
|
||||||
<button class="yt-action-btn" id="shareBtn">
|
<button class="yt-action-btn" id="shareBtn">
|
||||||
<i class="fas fa-share"></i>
|
<i class="fas fa-share"></i>
|
||||||
Share
|
Share
|
||||||
|
|
@ -54,7 +47,7 @@
|
||||||
<i class="fas fa-download"></i>
|
<i class="fas fa-download"></i>
|
||||||
Download
|
Download
|
||||||
</button>
|
</button>
|
||||||
<button class="yt-action-btn" id="saveBtn">
|
<button class="yt-action-btn" id="saveBtn" onclick="toggleSaveToLibrary()">
|
||||||
<i class="far fa-bookmark"></i>
|
<i class="far fa-bookmark"></i>
|
||||||
Save
|
Save
|
||||||
</button>
|
</button>
|
||||||
|
|
@ -67,6 +60,10 @@
|
||||||
Queue
|
Queue
|
||||||
<span id="queueBadge" class="queue-badge" style="display:none;">0</span>
|
<span id="queueBadge" class="queue-badge" style="display:none;">0</span>
|
||||||
</button>
|
</button>
|
||||||
|
<button class="yt-action-btn" id="genSubBtn" onclick="generateSubtitles()" style="display:none;">
|
||||||
|
<i class="fas fa-closed-captioning"></i>
|
||||||
|
Generate Subs
|
||||||
|
</button>
|
||||||
|
|
||||||
<!-- View Mode Buttons -->
|
<!-- View Mode Buttons -->
|
||||||
<div class="view-mode-buttons">
|
<div class="view-mode-buttons">
|
||||||
|
|
@ -1247,7 +1244,8 @@
|
||||||
thumbnail: `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`,
|
thumbnail: `https://i.ytimg.com/vi/${videoId}/mqdefault.jpg`,
|
||||||
uploader: data.uploader || 'Unknown',
|
uploader: data.uploader || 'Unknown',
|
||||||
channel_id: data.channel_id || data.uploader_id || '',
|
channel_id: data.channel_id || data.uploader_id || '',
|
||||||
duration: data.duration
|
duration: data.duration,
|
||||||
|
audioUrl: data.audio_url
|
||||||
};
|
};
|
||||||
|
|
||||||
// Check if video is already in queue
|
// Check if video is already in queue
|
||||||
|
|
@ -1255,6 +1253,12 @@
|
||||||
updateQueueCount();
|
updateQueueCount();
|
||||||
updateQueueBadge();
|
updateQueueBadge();
|
||||||
|
|
||||||
|
// Enable AI Subtitles (Always show for testing if audio is available)
|
||||||
|
if (data.audio_url) {
|
||||||
|
const genBtn = document.getElementById('genSubBtn');
|
||||||
|
if (genBtn) genBtn.style.display = 'inline-flex';
|
||||||
|
}
|
||||||
|
|
||||||
if (data.error) {
|
if (data.error) {
|
||||||
loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`;
|
loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`;
|
||||||
if (infoSkeleton) infoSkeleton.style.display = 'none';
|
if (infoSkeleton) infoSkeleton.style.display = 'none';
|
||||||
|
|
@ -1394,6 +1398,54 @@
|
||||||
return div.innerHTML;
|
return div.innerHTML;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
async function generateSubtitles() {
|
||||||
|
const btn = document.getElementById('genSubBtn');
|
||||||
|
|
||||||
|
if (!currentVideoData.audioUrl) {
|
||||||
|
showToast("No audio source available for AI", "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
btn.disabled = true;
|
||||||
|
const originalHtml = btn.innerHTML;
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Initialize/Load
|
||||||
|
await window.subtitleGenerator.init((msg) => {
|
||||||
|
btn.innerHTML = `<i class="fas fa-spinner fa-spin"></i> ${msg}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Generate
|
||||||
|
btn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Transcribing...';
|
||||||
|
|
||||||
|
const vttContent = await window.subtitleGenerator.generate(currentVideoData.audioUrl, (msg) => {
|
||||||
|
btn.innerHTML = `<i class="fas fa-circle-notch fa-spin"></i> ${msg}`;
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log("Generated VTT:", vttContent);
|
||||||
|
|
||||||
|
// Inject into Player
|
||||||
|
if (window.player) {
|
||||||
|
const blob = new Blob([vttContent], { type: 'text/vtt' });
|
||||||
|
const url = URL.createObjectURL(blob);
|
||||||
|
|
||||||
|
// Artplayer subtitle API
|
||||||
|
window.player.subtitle.url = url;
|
||||||
|
window.player.subtitle.show = true;
|
||||||
|
window.player.notice.show = 'AI Subtitles Generated';
|
||||||
|
|
||||||
|
showToast("Subtitles generated successfully!", "success");
|
||||||
|
btn.style.display = 'none'; // Hide button after success
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (e) {
|
||||||
|
console.error(e);
|
||||||
|
showToast("Subtitle generation failed: " + e.message, "error");
|
||||||
|
btn.innerHTML = originalHtml;
|
||||||
|
btn.disabled = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
async function summarizeVideo() {
|
async function summarizeVideo() {
|
||||||
const videoId = "{{ video_id }}";
|
const videoId = "{{ video_id }}";
|
||||||
const btn = document.getElementById('summarizeBtn');
|
const btn = document.getElementById('summarizeBtn');
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue