diff --git a/app/routes/api.py b/app/routes/api.py index 6e124e0..0b1d6dc 100755 --- a/app/routes/api.py +++ b/app/routes/api.py @@ -15,6 +15,8 @@ import time import random import concurrent.futures import yt_dlp +import tempfile +import threading logger = logging.getLogger(__name__) @@ -28,6 +30,10 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db") API_CACHE = {} CACHE_TIMEOUT = 600 # 10 minutes +# AI Models +WHISPER_MODEL = None +WHISPER_LOCK = threading.Lock() + def get_db_connection(): """Get database connection with row factory.""" @@ -436,10 +442,10 @@ def get_stream_info(): "format": "best[ext=mp4]/best", "noplaylist": True, "quiet": True, - "no_warnings": True, "skip_download": True, - "force_ipv4": True, "socket_timeout": 10, + "force_ipv4": True, + "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", } with yt_dlp.YoutubeDL(ydl_opts) as ydl: @@ -453,6 +459,10 @@ def get_stream_info(): if not stream_url: return jsonify({"error": "No stream URL found"}), 500 + # Log the headers yt-dlp expects us to use + expected_headers = info.get("http_headers", {}) + logger.info(f"YT-DLP Expected Headers: {expected_headers}") + # Extract subtitles subtitle_url = None subs = info.get("subtitles") or {} @@ -466,6 +476,33 @@ def get_stream_info(): subtitle_url = auto_subs[lang][0]["url"] break + # Extract best audio-only URL for AI transcription + audio_url = None + try: + formats = info.get("formats", []) + # Debug: Log format details to understand why we aren't matching + # logger.info(f"Scanning {len(formats)} formats for audio-only...") + + audio_formats = [] + for f in formats: + vcodec = f.get("vcodec") + acodec = f.get("acodec") + # Check for audio-only: vcodec should be none/None, acodec should be something + if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None): + audio_formats.append(f) + + if audio_formats: + # Prefer m4a (itag 140) for best compatibility, or webm (251) + # Sort by filesize (smaller is faster for whisper) or bitrate? + # For now simply pick the first one that looks like m4a, else first available + chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0]) + audio_url = chosen_audio.get("url") + logger.info(f"Found audio-only URL: {audio_url[:30]}...") + else: + logger.warning("No audio-only formats found in valid stream info.") + except Exception as e: + logger.error(f"Failed to extract audio url: {e}") + response_data = { "original_url": stream_url, "title": info.get("title", "Unknown"), @@ -477,8 +514,16 @@ def get_stream_info(): "view_count": info.get("view_count", 0), "related": [], "subtitle_url": subtitle_url, + "audio_url": None # Placeholder, filled below } + from urllib.parse import quote + proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}" + response_data["stream_url"] = proxied_url + + if audio_url: + response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}" + # Cache it expiry = current_time + 3600 conn.execute( @@ -488,10 +533,6 @@ def get_stream_info(): conn.commit() conn.close() - from urllib.parse import quote - proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}" - response_data["stream_url"] = proxied_url - response = jsonify(response_data) response.headers["X-Cache"] = "MISS" return response @@ -513,7 +554,12 @@ def search(): if url_match: video_id = url_match.group(1) # Fetch single video info - ydl_opts = {"quiet": True, "no_warnings": True, "noplaylist": True} + ydl_opts = { + "quiet": True, + "no_warnings": True, + "noplaylist": True, + "user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", + } with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False) return jsonify([{ @@ -539,17 +585,20 @@ def search(): def get_channel_videos_simple(): """Get videos from a channel.""" channel_id = request.args.get("id") + filter_type = request.args.get("filter_type", "video") if not channel_id: return jsonify({"error": "No channel ID provided"}), 400 try: # Construct URL + suffix = "shorts" if filter_type == "shorts" else "videos" + if channel_id.startswith("UC"): - url = f"https://www.youtube.com/channel/{channel_id}/videos" + url = f"https://www.youtube.com/channel/{channel_id}/{suffix}" elif channel_id.startswith("@"): - url = f"https://www.youtube.com/{channel_id}/videos" + url = f"https://www.youtube.com/{channel_id}/{suffix}" else: - url = f"https://www.youtube.com/channel/{channel_id}/videos" + url = f"https://www.youtube.com/channel/{channel_id}/{suffix}" cmd = [ sys.executable, "-m", "yt_dlp", @@ -743,6 +792,85 @@ def get_transcript(): return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"}) +@api_bp.route("/generate_subtitles", methods=["POST"]) +def generate_subtitles(): + """Generate subtitles using server-side Whisper.""" + global WHISPER_MODEL + + data = request.get_json() + video_id = data.get("video_id") + + if not video_id: + return jsonify({"error": "No video ID provided"}), 400 + + temp_path = None + try: + # Lazy load model + with WHISPER_LOCK: + if WHISPER_MODEL is None: + import whisper + logger.info("Loading Whisper model (tiny)...") + WHISPER_MODEL = whisper.load_model("tiny") + + # Extract Audio URL + url = f"https://www.youtube.com/watch?v={video_id}" + ydl_opts = { + "format": "bestaudio[ext=m4a]/bestaudio/best", + "noplaylist": True, + "quiet": True, + "force_ipv4": True, + } + + audio_url = None + with yt_dlp.YoutubeDL(ydl_opts) as ydl: + info = ydl.extract_info(url, download=False) + audio_url = info.get("url") + + if not audio_url: + return jsonify({"error": "Could not extract audio URL"}), 500 + + # Download audio to temp file + import requests + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", + } + + logger.info(f"Downloading audio for transcription: {audio_url[:30]}...") + with requests.get(audio_url, headers=headers, stream=True) as r: + r.raise_for_status() + with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f: + temp_path = f.name + for chunk in r.iter_content(chunk_size=8192): + f.write(chunk) + + # Transcribe + logger.info("Transcribing...") + result = WHISPER_MODEL.transcribe(temp_path) + + # Convert to VTT + def format_timestamp(seconds): + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + seconds = seconds % 60 + return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}" + + vtt_output = "WEBVTT\n\n" + for segment in result["segments"]: + start = format_timestamp(segment["start"]) + end = format_timestamp(segment["end"]) + text = segment["text"].strip() + vtt_output += f"{start} --> {end}\n{text}\n\n" + + return jsonify({"success": True, "vtt": vtt_output}) + + except Exception as e: + logger.error(f"Subtitle generation failed: {e}") + return jsonify({"error": str(e)}), 500 + finally: + if temp_path and os.path.exists(temp_path): + os.remove(temp_path) + + @api_bp.route("/update_ytdlp", methods=["POST"]) def update_ytdlp(): """Update yt-dlp to latest version.""" diff --git a/app/routes/streaming.py b/app/routes/streaming.py index 3659167..7eeb924 100755 --- a/app/routes/streaming.py +++ b/app/routes/streaming.py @@ -6,6 +6,14 @@ from flask import Blueprint, request, Response, stream_with_context, send_from_d import requests import os import logging +import socket +import urllib3.util.connection as urllib3_cn + +# Force IPv4 for requests (which uses urllib3) +def allowed_gai_family(): + return socket.AF_INET + +urllib3_cn.allowed_gai_family = allowed_gai_family logger = logging.getLogger(__name__) @@ -30,7 +38,8 @@ def video_proxy(): # Forward headers to mimic browser and support seeking headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", + # "Referer": "https://www.youtube.com/", # Removed to test if it fixes 403 } # Support Range requests (scrubbing) @@ -39,7 +48,13 @@ def video_proxy(): headers["Range"] = range_header try: + logger.info(f"Proxying URL: {url}") + # logger.info(f"Proxy Request Headers: {headers}") req = requests.get(url, headers=headers, stream=True, timeout=30) + + logger.info(f"Upstream Status: {req.status_code}") + if req.status_code != 200: + logger.error(f"Upstream Error Body: {req.text[:500]}") # Handle HLS (M3U8) Rewriting - CRITICAL for 1080p+ and proper sync content_type = req.headers.get("content-type", "").lower() @@ -50,7 +65,7 @@ def video_proxy(): or "application/vnd.apple.mpegurl" in content_type ) - if is_manifest: + if is_manifest and req.status_code == 200: content = req.text base_url = url.rsplit("/", 1)[0] new_lines = [] diff --git a/deploy.py b/deploy.py old mode 100644 new mode 100755 index 1ccf0b1..5cd72b3 --- a/deploy.py +++ b/deploy.py @@ -1,7 +1,6 @@ #!/usr/bin/env python3 -"""Check git status and redeploy.""" +"""Build and push multi-platform Docker image.""" import subprocess -import os def run_cmd(cmd): print(f"\n>>> {cmd}") @@ -13,37 +12,17 @@ def run_cmd(cmd): return result.returncode == 0 print("="*50) -print("Checking git status...") +print("Building Multi-Platform Docker Image") +print("(linux/amd64 + linux/arm64)") print("="*50) -run_cmd("git status") + +# Create buildx builder if it doesn't exist +run_cmd("docker buildx create --name multiplatform --use 2>/dev/null || docker buildx use multiplatform") + +# Build and push multi-platform image +print("\nBuilding and pushing...") +run_cmd("docker buildx build --platform linux/amd64,linux/arm64 -t vndangkhoa/kv-tube:latest --push .") print("\n" + "="*50) -print("Staging all changes...") -print("="*50) -run_cmd("git add .") - -print("\n" + "="*50) -print("Committing...") -print("="*50) -run_cmd('git commit -m "Latest local changes"') - -print("\n" + "="*50) -print("Pushing to GitHub...") -print("="*50) -run_cmd("git push origin main") - -print("\n" + "="*50) -print("Pushing to Forgejo...") -print("="*50) -run_cmd("git push private main") - -print("\n" + "="*50) -print("Building Docker image...") -print("="*50) -if run_cmd("docker build -t vndangkhoa/kv-tube:latest ."): - print("\nPushing Docker image...") - run_cmd("docker push vndangkhoa/kv-tube:latest") - -print("\n" + "="*50) -print("DEPLOYMENT COMPLETE!") +print("DONE! Image now supports both amd64 and arm64") print("="*50) diff --git a/kv_server.py b/kv_server.py old mode 100644 new mode 100755 index f978615..2022214 --- a/kv_server.py +++ b/kv_server.py @@ -8,7 +8,7 @@ try: except NameError: base_dir = os.getcwd() -venv_dirs = ['env', '.venv'] +venv_dirs = ['.venv', 'env'] activated = False for venv_name in venv_dirs: diff --git a/requirements.txt b/requirements.txt index bf221b9..07fe367 100755 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,5 @@ yt-dlp>=2024.1.0 werkzeug gunicorn python-dotenv +openai-whisper +numpy diff --git a/start.sh b/start.sh old mode 100644 new mode 100755 index 10e2696..1076652 --- a/start.sh +++ b/start.sh @@ -3,11 +3,26 @@ cd "$(dirname "$0")" echo "=== Diagnostic Start Script ===" # Activate env -if [ -d "env" ]; then +# Activate env +if [ -d ".venv_clean" ]; then + echo "Activating .venv_clean..." + export PYTHONPATH="$(pwd)/.venv_clean/lib/python3.14/site-packages" + # Use system python with PYTHONPATH if bindir is missing/broken + PYTHON_EXEC="/Library/Frameworks/Python.framework/Versions/3.14/bin/python3" + export FLASK_APP=wsgi.py + export FLASK_RUN_PORT=5002 + + echo "--- Starting with System Python + PYTHONPATH ---" + $PYTHON_EXEC -m flask run --host=0.0.0.0 --port=5002 + exit 0 +elif [ -d ".venv" ]; then + echo "Activating .venv..." + source .venv/bin/activate +elif [ -d "env" ]; then echo "Activating env..." source env/bin/activate else - echo "No 'env' directory found!" + echo "No '.venv' or 'env' directory found!" exit 1 fi @@ -23,13 +38,14 @@ else fi echo "--- Attempting to start with Gunicorn ---" -if [ -f "env/bin/gunicorn" ]; then - ./env/bin/gunicorn -b 0.0.0.0:5002 wsgi:app +echo "--- Attempting to start with Gunicorn ---" +if command -v gunicorn &> /dev/null; then + gunicorn -b 0.0.0.0:5002 wsgi:app else - echo "Gunicorn not found." + echo "Gunicorn not found in path." fi echo "--- Attempting to start with Flask explicitly ---" export FLASK_APP=wsgi.py export FLASK_RUN_PORT=5002 -./env/bin/flask run --host=0.0.0.0 +python -m flask run --host=0.0.0.0 diff --git a/static/js/webai.js b/static/js/webai.js index b0b5eee..f3fc261 100755 --- a/static/js/webai.js +++ b/static/js/webai.js @@ -1,144 +1,119 @@ -/** - * KV-Tube WebAI Service - * Local AI chatbot for transcript Q&A using WebLLM - * - * Runs entirely in-browser, no server required after model download - */ - -// WebLLM CDN import (lazy loaded) -var WEBLLM_CDN = 'https://esm.run/@mlc-ai/web-llm'; - -// Model options - using verified WebLLM model IDs -var AI_MODELS = { - small: { id: 'Qwen2-0.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (0.5B)', size: '350MB' }, - medium: { id: 'Qwen2-1.5B-Instruct-q4f16_1-MLC', name: 'Qwen2 (1.5B)', size: '1GB' }, -}; - -// Default to small model -var DEFAULT_MODEL = AI_MODELS.small; - -if (typeof TranscriptAI === 'undefined') { - window.TranscriptAI = class TranscriptAI { - constructor() { - this.engine = null; - this.isLoading = false; - this.isReady = false; - this.transcript = ''; - this.onProgressCallback = null; - this.onReadyCallback = null; - } - - setTranscript(text) { - this.transcript = text.slice(0, 8000); // Limit context size - } - - setCallbacks({ onProgress, onReady }) { - this.onProgressCallback = onProgress; - this.onReadyCallback = onReady; - } - - async init() { - if (this.isReady || this.isLoading) return; - - this.isLoading = true; - - try { - // Dynamic import WebLLM - const { CreateMLCEngine } = await import(WEBLLM_CDN); - - // Initialize engine with progress callback - this.engine = await CreateMLCEngine(DEFAULT_MODEL.id, { - initProgressCallback: (report) => { - if (this.onProgressCallback) { - this.onProgressCallback(report); - } - console.log('AI Load Progress:', report.text); - } - }); - - this.isReady = true; - this.isLoading = false; - - if (this.onReadyCallback) { - this.onReadyCallback(); - } - - console.log('TranscriptAI ready with model:', DEFAULT_MODEL.name); - - } catch (err) { - this.isLoading = false; - console.error('Failed to load AI model:', err); - throw err; - } - } - - async ask(question) { - if (!this.isReady) { - throw new Error('AI not initialized'); - } - - const systemPrompt = this.transcript - ? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}` - : `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`; - - try { - const response = await this.engine.chat.completions.create({ - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: question } - ], - max_tokens: 256, - temperature: 0.7, - }); - - return response.choices[0].message.content; - - } catch (err) { - console.error('AI response error:', err); - throw err; - } - } - - async *askStreaming(question) { - if (!this.isReady) { - throw new Error('AI not initialized'); - } - - const systemPrompt = this.transcript - ? `You are a helpful AI assistant analyzing a video transcript. Answer the user's question based ONLY on the transcript content below. Be concise and direct. If the answer is not in the transcript, say so.\n\nTRANSCRIPT:\n${this.transcript}` - : `You are a helpful AI assistant for KV-Tube, a lightweight YouTube client. You can help the user with general questions, explain features of the app, or chat casually. Be concise and helpful.`; - - const chunks = await this.engine.chat.completions.create({ - messages: [ - { role: 'system', content: systemPrompt }, - { role: 'user', content: question } - ], - max_tokens: 256, - temperature: 0.7, - stream: true, - }); - - for await (const chunk of chunks) { - const delta = chunk.choices[0]?.delta?.content; - if (delta) { - yield delta; - } - } - } - - getModelInfo() { - return DEFAULT_MODEL; - } - - isModelReady() { - return this.isReady; - } - - isModelLoading() { - return this.isLoading; - } - } - - // Global instance - window.transcriptAI = new TranscriptAI(); -} +/** + * WebAI - Client-side AI features using Transformers.js + */ + +// Suppress ONNX Runtime warnings +if (typeof ort !== 'undefined') { + ort.env.logLevel = 'fatal'; +} + +class SubtitleGenerator { + constructor() { + this.pipeline = null; + this.isLoading = false; + } + + async init(progressCallback) { + if (this.pipeline) return; + if (this.isLoading) return; + + this.isLoading = true; + + try { + // Suppress ONNX warnings at import time + if (typeof ort !== 'undefined') { + ort.env.logLevel = 'fatal'; + } + + progressCallback?.('Loading AI model...'); + + const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2'); + + // Configure environment + env.allowLocalModels = false; + env.useBrowserCache = true; + + // Suppress ONNX Runtime warnings + if (typeof ort !== 'undefined') { + ort.env.logLevel = 'fatal'; + } + + progressCallback?.('Downloading Whisper model (~40MB)...'); + + this.pipeline = await pipeline( + 'automatic-speech-recognition', + 'Xenova/whisper-tiny', + { + progress_callback: (progress) => { + if (progress.status === 'downloading') { + const pct = Math.round((progress.loaded / progress.total) * 100); + progressCallback?.(`Downloading: ${pct}%`); + } else if (progress.status === 'loading') { + progressCallback?.('Loading model...'); + } + } + } + ); + + progressCallback?.('Model ready!'); + } catch (e) { + console.error('Failed to load Whisper:', e); + throw e; + } finally { + this.isLoading = false; + } + } + + async generate(audioUrl, progressCallback) { + if (!this.pipeline) { + throw new Error('Model not initialized. Call init() first.'); + } + + progressCallback?.('Transcribing audio...'); + + try { + const result = await this.pipeline(audioUrl, { + chunk_length_s: 30, + stride_length_s: 5, + return_timestamps: true, + }); + + progressCallback?.('Formatting subtitles...'); + + // Convert to VTT format + return this.toVTT(result.chunks || []); + } catch (e) { + console.error('Transcription failed:', e); + throw e; + } + } + + toVTT(chunks) { + let vtt = 'WEBVTT\n\n'; + + chunks.forEach((chunk, i) => { + const start = this.formatTime(chunk.timestamp[0]); + const end = this.formatTime(chunk.timestamp[1]); + const text = chunk.text.trim(); + + if (text) { + vtt += `${i + 1}\n`; + vtt += `${start} --> ${end}\n`; + vtt += `${text}\n\n`; + } + }); + + return vtt; + } + + formatTime(seconds) { + if (seconds === null || seconds === undefined) seconds = 0; + const h = Math.floor(seconds / 3600); + const m = Math.floor((seconds % 3600) / 60); + const s = (seconds % 60).toFixed(3); + return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`; + } +} + +// Export singleton +window.subtitleGenerator = new SubtitleGenerator(); diff --git a/templates/channel.html b/templates/channel.html index 3a8fd52..5f02539 100755 --- a/templates/channel.html +++ b/templates/channel.html @@ -32,26 +32,28 @@