From 6c1f459cd6c459fc9563075af552eb850d4506af Mon Sep 17 00:00:00 2001 From: KV-Tube Deployer Date: Mon, 12 Jan 2026 17:40:31 +0700 Subject: [PATCH] chore: cleanup project files and remove CC functionality --- .dockerignore | 23 ++-- .gitignore | 4 + app/routes/api.py | 182 +------------------------------- requirements.txt | 3 +- static/css/modules/captions.css | 73 ------------- static/js/webai.js | 119 --------------------- templates/watch.html | 80 ++------------ update_and_restart.sh | 18 ---- 8 files changed, 26 insertions(+), 476 deletions(-) delete mode 100755 static/css/modules/captions.css delete mode 100755 static/js/webai.js delete mode 100755 update_and_restart.sh diff --git a/.dockerignore b/.dockerignore index 3211b7b..0abe5eb 100755 --- a/.dockerignore +++ b/.dockerignore @@ -1,12 +1,11 @@ -__pycache__ -.venv -.git -.env -*.mp4 -*.webm -*.mp3 -videos/ -data/ -temp/ -deployment_package/ -kvtube.db +.venv/ +.venv_clean/ +env/ +__pycache__/ +.git/ +.DS_Store +*.pyc +*.pyo +*.pyd +.idea/ +.vscode/ diff --git a/.gitignore b/.gitignore index a8cc60b..90b501b 100755 --- a/.gitignore +++ b/.gitignore @@ -2,7 +2,11 @@ __pycache__/ *.pyc venv/ +.venv/ +.venv_clean/ .env data/ videos/ *.db +server.log +.ruff_cache/ diff --git a/app/routes/api.py b/app/routes/api.py index 0b1d6dc..e924f19 100755 --- a/app/routes/api.py +++ b/app/routes/api.py @@ -15,8 +15,7 @@ import time import random import concurrent.futures import yt_dlp -import tempfile -import threading + logger = logging.getLogger(__name__) @@ -30,9 +29,6 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db") API_CACHE = {} CACHE_TIMEOUT = 600 # 10 minutes -# AI Models -WHISPER_MODEL = None -WHISPER_LOCK = threading.Lock() def get_db_connection(): @@ -463,45 +459,9 @@ def get_stream_info(): expected_headers = info.get("http_headers", {}) logger.info(f"YT-DLP Expected Headers: {expected_headers}") - # Extract subtitles - subtitle_url = None - subs = info.get("subtitles") or {} - auto_subs = info.get("automatic_captions") or {} - - for lang in ["en", "vi"]: - if lang in subs and subs[lang]: - subtitle_url = subs[lang][0]["url"] - break - if lang in auto_subs and auto_subs[lang]: - subtitle_url = auto_subs[lang][0]["url"] - break - # Extract best audio-only URL for AI transcription - audio_url = None - try: - formats = info.get("formats", []) - # Debug: Log format details to understand why we aren't matching - # logger.info(f"Scanning {len(formats)} formats for audio-only...") - - audio_formats = [] - for f in formats: - vcodec = f.get("vcodec") - acodec = f.get("acodec") - # Check for audio-only: vcodec should be none/None, acodec should be something - if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None): - audio_formats.append(f) - if audio_formats: - # Prefer m4a (itag 140) for best compatibility, or webm (251) - # Sort by filesize (smaller is faster for whisper) or bitrate? - # For now simply pick the first one that looks like m4a, else first available - chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0]) - audio_url = chosen_audio.get("url") - logger.info(f"Found audio-only URL: {audio_url[:30]}...") - else: - logger.warning("No audio-only formats found in valid stream info.") - except Exception as e: - logger.error(f"Failed to extract audio url: {e}") + response_data = { "original_url": stream_url, @@ -513,16 +473,14 @@ def get_stream_info(): "upload_date": info.get("upload_date", ""), "view_count": info.get("view_count", 0), "related": [], - "subtitle_url": subtitle_url, - "audio_url": None # Placeholder, filled below + } from urllib.parse import quote proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}" response_data["stream_url"] = proxied_url - if audio_url: - response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}" + # Cache it expiry = current_time + 3600 @@ -759,116 +717,10 @@ def summarize_video(): return jsonify({"success": False, "message": f"Could not summarize: {str(e)}"}) -@api_bp.route("/transcript") -def get_transcript(): - """Get video transcript.""" - video_id = request.args.get("v") - if not video_id: - return jsonify({"success": False, "error": "No video ID provided"}), 400 - - try: - from youtube_transcript_api import YouTubeTranscriptApi - - transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) - - try: - transcript = transcript_list.find_transcript(["en", "vi"]) - except Exception: - transcript = transcript_list.find_generated_transcript(["en", "vi"]) - - transcript_data = transcript.fetch() - full_text = " ".join([entry["text"] for entry in transcript_data]) - - return jsonify({ - "success": True, - "video_id": video_id, - "transcript": transcript_data, - "language": "en", - "is_generated": True, - "full_text": full_text[:10000], - }) - - except Exception as e: - return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"}) -@api_bp.route("/generate_subtitles", methods=["POST"]) -def generate_subtitles(): - """Generate subtitles using server-side Whisper.""" - global WHISPER_MODEL - - data = request.get_json() - video_id = data.get("video_id") - if not video_id: - return jsonify({"error": "No video ID provided"}), 400 - temp_path = None - try: - # Lazy load model - with WHISPER_LOCK: - if WHISPER_MODEL is None: - import whisper - logger.info("Loading Whisper model (tiny)...") - WHISPER_MODEL = whisper.load_model("tiny") - - # Extract Audio URL - url = f"https://www.youtube.com/watch?v={video_id}" - ydl_opts = { - "format": "bestaudio[ext=m4a]/bestaudio/best", - "noplaylist": True, - "quiet": True, - "force_ipv4": True, - } - - audio_url = None - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - info = ydl.extract_info(url, download=False) - audio_url = info.get("url") - - if not audio_url: - return jsonify({"error": "Could not extract audio URL"}), 500 - - # Download audio to temp file - import requests - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36", - } - - logger.info(f"Downloading audio for transcription: {audio_url[:30]}...") - with requests.get(audio_url, headers=headers, stream=True) as r: - r.raise_for_status() - with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f: - temp_path = f.name - for chunk in r.iter_content(chunk_size=8192): - f.write(chunk) - - # Transcribe - logger.info("Transcribing...") - result = WHISPER_MODEL.transcribe(temp_path) - - # Convert to VTT - def format_timestamp(seconds): - hours = int(seconds // 3600) - minutes = int((seconds % 3600) // 60) - seconds = seconds % 60 - return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}" - - vtt_output = "WEBVTT\n\n" - for segment in result["segments"]: - start = format_timestamp(segment["start"]) - end = format_timestamp(segment["end"]) - text = segment["text"].strip() - vtt_output += f"{start} --> {end}\n{text}\n\n" - - return jsonify({"success": True, "vtt": vtt_output}) - - except Exception as e: - logger.error(f"Subtitle generation failed: {e}") - return jsonify({"error": str(e)}), 500 - finally: - if temp_path and os.path.exists(temp_path): - os.remove(temp_path) @api_bp.route("/update_ytdlp", methods=["POST"]) @@ -933,30 +785,4 @@ def get_comments(): return jsonify({"comments": [], "count": 0, "error": str(e)}) -@api_bp.route("/captions.vtt") -def get_captions_vtt(): - """Get captions in WebVTT format.""" - video_id = request.args.get("v") - if not video_id: - return "WEBVTT\n\n", 400, {'Content-Type': 'text/vtt'} - try: - from youtube_transcript_api import YouTubeTranscriptApi - from youtube_transcript_api.formatters import WebVTTFormatter - - transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) - - try: - transcript = transcript_list.find_transcript(["en", "vi"]) - except Exception: - transcript = transcript_list.find_generated_transcript(["en", "vi"]) - - transcript_data = transcript.fetch() - formatter = WebVTTFormatter() - vtt_formatted = formatter.format_transcript(transcript_data) - - return Response(vtt_formatted, mimetype='text/vtt') - - except Exception as e: - logger.warning(f"Caption Error: {e}") - return "WEBVTT\n\n", 200, {'Content-Type': 'text/vtt'} diff --git a/requirements.txt b/requirements.txt index 07fe367..dad5ba7 100755 --- a/requirements.txt +++ b/requirements.txt @@ -4,5 +4,4 @@ yt-dlp>=2024.1.0 werkzeug gunicorn python-dotenv -openai-whisper -numpy + diff --git a/static/css/modules/captions.css b/static/css/modules/captions.css deleted file mode 100755 index 770d982..0000000 --- a/static/css/modules/captions.css +++ /dev/null @@ -1,73 +0,0 @@ -/** - * KV-Tube Closed Captions Styles - * Styling for CC overlay and controls - */ - -/* CC Overlay Container */ -.cc-overlay { - position: absolute; - bottom: 60px; - left: 50%; - transform: translateX(-50%); - max-width: 90%; - z-index: 100; - pointer-events: none; - transition: opacity 0.3s ease; -} - -.cc-overlay.hidden { - opacity: 0; -} - -/* CC Text */ -.cc-text { - background: rgba(0, 0, 0, 0.75); - color: #fff; - padding: 8px 16px; - border-radius: 4px; - font-size: 18px; - line-height: 1.4; - text-align: center; - max-width: 800px; - word-wrap: break-word; - text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5); -} - -/* CC Button State */ -.yt-action-btn.cc-active { - color: #fff !important; - background: #3ea6ff !important; -} - -/* CC Button Icon */ -.cc-btn-icon { - display: flex; - align-items: center; - gap: 4px; -} - -/* Loading state */ -.cc-loading { - font-style: italic; - opacity: 0.7; -} - -/* Mobile adjustments */ -@media (max-width: 768px) { - .cc-overlay { - bottom: 50px; - max-width: 95%; - } - - .cc-text { - font-size: 14px; - padding: 6px 12px; - } -} - -/* Large screen */ -@media (min-width: 1200px) { - .cc-text { - font-size: 22px; - } -} \ No newline at end of file diff --git a/static/js/webai.js b/static/js/webai.js deleted file mode 100755 index f3fc261..0000000 --- a/static/js/webai.js +++ /dev/null @@ -1,119 +0,0 @@ -/** - * WebAI - Client-side AI features using Transformers.js - */ - -// Suppress ONNX Runtime warnings -if (typeof ort !== 'undefined') { - ort.env.logLevel = 'fatal'; -} - -class SubtitleGenerator { - constructor() { - this.pipeline = null; - this.isLoading = false; - } - - async init(progressCallback) { - if (this.pipeline) return; - if (this.isLoading) return; - - this.isLoading = true; - - try { - // Suppress ONNX warnings at import time - if (typeof ort !== 'undefined') { - ort.env.logLevel = 'fatal'; - } - - progressCallback?.('Loading AI model...'); - - const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2'); - - // Configure environment - env.allowLocalModels = false; - env.useBrowserCache = true; - - // Suppress ONNX Runtime warnings - if (typeof ort !== 'undefined') { - ort.env.logLevel = 'fatal'; - } - - progressCallback?.('Downloading Whisper model (~40MB)...'); - - this.pipeline = await pipeline( - 'automatic-speech-recognition', - 'Xenova/whisper-tiny', - { - progress_callback: (progress) => { - if (progress.status === 'downloading') { - const pct = Math.round((progress.loaded / progress.total) * 100); - progressCallback?.(`Downloading: ${pct}%`); - } else if (progress.status === 'loading') { - progressCallback?.('Loading model...'); - } - } - } - ); - - progressCallback?.('Model ready!'); - } catch (e) { - console.error('Failed to load Whisper:', e); - throw e; - } finally { - this.isLoading = false; - } - } - - async generate(audioUrl, progressCallback) { - if (!this.pipeline) { - throw new Error('Model not initialized. Call init() first.'); - } - - progressCallback?.('Transcribing audio...'); - - try { - const result = await this.pipeline(audioUrl, { - chunk_length_s: 30, - stride_length_s: 5, - return_timestamps: true, - }); - - progressCallback?.('Formatting subtitles...'); - - // Convert to VTT format - return this.toVTT(result.chunks || []); - } catch (e) { - console.error('Transcription failed:', e); - throw e; - } - } - - toVTT(chunks) { - let vtt = 'WEBVTT\n\n'; - - chunks.forEach((chunk, i) => { - const start = this.formatTime(chunk.timestamp[0]); - const end = this.formatTime(chunk.timestamp[1]); - const text = chunk.text.trim(); - - if (text) { - vtt += `${i + 1}\n`; - vtt += `${start} --> ${end}\n`; - vtt += `${text}\n\n`; - } - }); - - return vtt; - } - - formatTime(seconds) { - if (seconds === null || seconds === undefined) seconds = 0; - const h = Math.floor(seconds / 3600); - const m = Math.floor((seconds % 3600) / 60); - const s = (seconds % 60).toFixed(3); - return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`; - } -} - -// Export singleton -window.subtitleGenerator = new SubtitleGenerator(); diff --git a/templates/watch.html b/templates/watch.html index 5198a92..7c65ebd 100755 --- a/templates/watch.html +++ b/templates/watch.html @@ -12,10 +12,7 @@
- - +
@@ -60,10 +57,7 @@ Queue - +
@@ -190,7 +184,7 @@ - + @@ -380,10 +374,7 @@ // --- Custom Mini Player Logic --- setupMiniPlayer(); - // --- Closed Captions sync --- - art.on('video:timeupdate', () => { - updateCaptions(art.currentTime); - }); + // --- Auto Play Next --- art.on('video:ended', () => { @@ -1253,11 +1244,7 @@ updateQueueCount(); updateQueueBadge(); - // Enable AI Subtitles (Always show for testing if audio is available) - if (data.audio_url) { - const genBtn = document.getElementById('genSubBtn'); - if (genBtn) genBtn.style.display = 'inline-flex'; - } + if (data.error) { loading.innerHTML = `

${data.error}

`; @@ -1329,16 +1316,7 @@ }) }).catch(() => { }); // Ignore errors for anon users - // Subtitle Config - // Disable ArtPlayer's built-in subtitle to avoid CORS - // Our custom CC system uses instead - /* -/api/transcript player.subtitle.url = data.subtitle_url || ''; - if (data.subtitle_url) { - player.subtitle.show = true; - player.notice.show = 'CC Enabled'; - } - */ + // Save Button - Local Storage based // Save Button handler is setup in DOMContentLoaded below @@ -1398,53 +1376,7 @@ return div.innerHTML; } - async function generateSubtitles() { - const btn = document.getElementById('genSubBtn'); - if (!currentVideoData.audioUrl) { - showToast("No audio source available for AI", "error"); - return; - } - - btn.disabled = true; - const originalHtml = btn.innerHTML; - - try { - // Initialize/Load - await window.subtitleGenerator.init((msg) => { - btn.innerHTML = ` ${msg}`; - }); - - // Generate - btn.innerHTML = ' Transcribing...'; - - const vttContent = await window.subtitleGenerator.generate(currentVideoData.audioUrl, (msg) => { - btn.innerHTML = ` ${msg}`; - }); - - console.log("Generated VTT:", vttContent); - - // Inject into Player - if (window.player) { - const blob = new Blob([vttContent], { type: 'text/vtt' }); - const url = URL.createObjectURL(blob); - - // Artplayer subtitle API - window.player.subtitle.url = url; - window.player.subtitle.show = true; - window.player.notice.show = 'AI Subtitles Generated'; - - showToast("Subtitles generated successfully!", "success"); - btn.style.display = 'none'; // Hide button after success - } - - } catch (e) { - console.error(e); - showToast("Subtitle generation failed: " + e.message, "error"); - btn.innerHTML = originalHtml; - btn.disabled = false; - } - } async function summarizeVideo() { const videoId = "{{ video_id }}"; diff --git a/update_and_restart.sh b/update_and_restart.sh deleted file mode 100755 index 85f771d..0000000 --- a/update_and_restart.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash - -# KV-Tube Updater Script -# This script pulls the latest code and Docker images, then restarts the service. - -echo "--- 1. Pulling latest code changes... ---" -git pull origin main - -echo "--- 2. Pulling latest Docker image (v2.0)... ---" -docker-compose pull - -echo "--- 3. Restarting service with new configuration... ---" -# We down it first to ensure port bindings (5001 -> 5000) are updated -docker-compose down -docker-compose up -d --force-recreate - -echo "--- Done! Checking logs... ---" -docker-compose logs --tail=20 -f kv-tube