chore: cleanup project files and remove CC functionality

2026-01-12 17:40:31 +07:00 · 2026-01-12 17:40:31 +07:00 · 6c1f459cd6
commit 6c1f459cd6
parent a93a875ce2
8 changed files with 26 additions and 476 deletions
--- a/.dockerignore
+++ b/.dockerignore
@ -1,12 +1,11 @@
-__pycache__
-.venv
-.git
-.env
-*.mp4
-*.webm
-*.mp3
-videos/
-data/
-temp/
-deployment_package/
-kvtube.db
+.venv/
+.venv_clean/
+env/
+__pycache__/
+.git/
+.DS_Store
+*.pyc
+*.pyo
+*.pyd
+.idea/
+.vscode/
--- a/.gitignore
+++ b/.gitignore
@ -2,7 +2,11 @@
 __pycache__/
 *.pyc
 venv/
+.venv/
+.venv_clean/
 .env
 data/
 videos/
 *.db
+server.log
+.ruff_cache/
--- a/app/routes/api.py
+++ b/app/routes/api.py
@ -15,8 +15,7 @@ import time
 import random
 import concurrent.futures
 import yt_dlp
-import tempfile
-import threading
+

 logger = logging.getLogger(__name__)

@ -30,9 +29,6 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db")
 API_CACHE = {}
 CACHE_TIMEOUT = 600  # 10 minutes

-# AI Models
-WHISPER_MODEL = None
-WHISPER_LOCK = threading.Lock()


 def get_db_connection():
@ -463,45 +459,9 @@ def get_stream_info():
        expected_headers = info.get("http_headers", {})
        logger.info(f"YT-DLP Expected Headers: {expected_headers}")

-        # Extract subtitles
-        subtitle_url = None
-        subs = info.get("subtitles") or {}
-        auto_subs = info.get("automatic_captions") or {}
-        
-        for lang in ["en", "vi"]:
-            if lang in subs and subs[lang]:
-                subtitle_url = subs[lang][0]["url"]
-                break
-            if lang in auto_subs and auto_subs[lang]:
-                subtitle_url = auto_subs[lang][0]["url"]
-                break

-        # Extract best audio-only URL for AI transcription
-        audio_url = None
-        try:
-            formats = info.get("formats", [])
-            # Debug: Log format details to understand why we aren't matching
-            # logger.info(f"Scanning {len(formats)} formats for audio-only...")
-            
-            audio_formats = []
-            for f in formats:
-                vcodec = f.get("vcodec")
-                acodec = f.get("acodec")
-                # Check for audio-only: vcodec should be none/None, acodec should be something
-                if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None):
-                     audio_formats.append(f)

-            if audio_formats:
-                # Prefer m4a (itag 140) for best compatibility, or webm (251)
-                # Sort by filesize (smaller is faster for whisper) or bitrate?
-                # For now simply pick the first one that looks like m4a, else first available
-                chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0])
-                audio_url = chosen_audio.get("url")
-                logger.info(f"Found audio-only URL: {audio_url[:30]}...")
-            else:
-                logger.warning("No audio-only formats found in valid stream info.")
-        except Exception as e:
-            logger.error(f"Failed to extract audio url: {e}")
+

        response_data = {
            "original_url": stream_url,
@ -513,16 +473,14 @@ def get_stream_info():
            "upload_date": info.get("upload_date", ""),
            "view_count": info.get("view_count", 0),
            "related": [],
-            "subtitle_url": subtitle_url,
-            "audio_url": None # Placeholder, filled below
+
        }

        from urllib.parse import quote
        proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
        response_data["stream_url"] = proxied_url
        
-        if audio_url:
-             response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}"
+

        # Cache it
        expiry = current_time + 3600
@ -759,116 +717,10 @@ def summarize_video():
        return jsonify({"success": False, "message": f"Could not summarize: {str(e)}"})


-@api_bp.route("/transcript")
-def get_transcript():
-    """Get video transcript."""
-    video_id = request.args.get("v")
-    if not video_id:
-        return jsonify({"success": False, "error": "No video ID provided"}), 400
-
-    try:
-        from youtube_transcript_api import YouTubeTranscriptApi
-        
-        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-        
-        try:
-            transcript = transcript_list.find_transcript(["en", "vi"])
-        except Exception:
-            transcript = transcript_list.find_generated_transcript(["en", "vi"])
-
-        transcript_data = transcript.fetch()
-        full_text = " ".join([entry["text"] for entry in transcript_data])
-
-        return jsonify({
-            "success": True,
-            "video_id": video_id,
-            "transcript": transcript_data,
-            "language": "en",
-            "is_generated": True,
-            "full_text": full_text[:10000],
-        })
-
-    except Exception as e:
-        return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"})


-@api_bp.route("/generate_subtitles", methods=["POST"])
-def generate_subtitles():
-    """Generate subtitles using server-side Whisper."""
-    global WHISPER_MODEL
-    
-    data = request.get_json()
-    video_id = data.get("video_id")

-    if not video_id:
-        return jsonify({"error": "No video ID provided"}), 400

-    temp_path = None
-    try:
-        # Lazy load model
-        with WHISPER_LOCK:
-            if WHISPER_MODEL is None:
-                import whisper
-                logger.info("Loading Whisper model (tiny)...")
-                WHISPER_MODEL = whisper.load_model("tiny")
-
-        # Extract Audio URL
-        url = f"https://www.youtube.com/watch?v={video_id}"
-        ydl_opts = {
-            "format": "bestaudio[ext=m4a]/bestaudio/best",
-            "noplaylist": True,
-            "quiet": True,
-            "force_ipv4": True,
-        }
-        
-        audio_url = None
-        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
-            info = ydl.extract_info(url, download=False)
-            audio_url = info.get("url")
-            
-        if not audio_url:
-             return jsonify({"error": "Could not extract audio URL"}), 500
-
-        # Download audio to temp file
-        import requests
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
-        }
-        
-        logger.info(f"Downloading audio for transcription: {audio_url[:30]}...")
-        with requests.get(audio_url, headers=headers, stream=True) as r:
-            r.raise_for_status()
-            with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f:
-                temp_path = f.name
-                for chunk in r.iter_content(chunk_size=8192): 
-                    f.write(chunk)
-        
-        # Transcribe
-        logger.info("Transcribing...")
-        result = WHISPER_MODEL.transcribe(temp_path)
-        
-        # Convert to VTT
-        def format_timestamp(seconds):
-            hours = int(seconds // 3600)
-            minutes = int((seconds % 3600) // 60)
-            seconds = seconds % 60
-            return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
-
-        vtt_output = "WEBVTT\n\n"
-        for segment in result["segments"]:
-            start = format_timestamp(segment["start"])
-            end = format_timestamp(segment["end"])
-            text = segment["text"].strip()
-            vtt_output += f"{start} --> {end}\n{text}\n\n"
-            
-        return jsonify({"success": True, "vtt": vtt_output})
-
-    except Exception as e:
-        logger.error(f"Subtitle generation failed: {e}")
-        return jsonify({"error": str(e)}), 500
-    finally:
-        if temp_path and os.path.exists(temp_path):
-            os.remove(temp_path)


@api_bp.route("/update_ytdlp", methods=["POST"])
@ -933,30 +785,4 @@ def get_comments():
        return jsonify({"comments": [], "count": 0, "error": str(e)})


-@api_bp.route("/captions.vtt")
-def get_captions_vtt():
-    """Get captions in WebVTT format."""
-    video_id = request.args.get("v")
-    if not video_id:
-        return "WEBVTT\n\n", 400, {'Content-Type': 'text/vtt'}

-    try:
-        from youtube_transcript_api import YouTubeTranscriptApi
-        from youtube_transcript_api.formatters import WebVTTFormatter
-        
-        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
-        
-        try:
-            transcript = transcript_list.find_transcript(["en", "vi"])
-        except Exception:
-            transcript = transcript_list.find_generated_transcript(["en", "vi"])
-        
-        transcript_data = transcript.fetch()
-        formatter = WebVTTFormatter()
-        vtt_formatted = formatter.format_transcript(transcript_data)
-        
-        return Response(vtt_formatted, mimetype='text/vtt')
-
-    except Exception as e:
-        logger.warning(f"Caption Error: {e}")
-        return "WEBVTT\n\n", 200, {'Content-Type': 'text/vtt'}
--- a/requirements.txt
+++ b/requirements.txt
@ -4,5 +4,4 @@ yt-dlp>=2024.1.0
 werkzeug
 gunicorn
 python-dotenv
-openai-whisper
-numpy
+
--- a/static/css/modules/captions.css
+++ b/static/css/modules/captions.css
@ -1,73 +0,0 @@
-/**
- * KV-Tube Closed Captions Styles
- * Styling for CC overlay and controls
- */
-
-/* CC Overlay Container */
-.cc-overlay {
-    position: absolute;
-    bottom: 60px;
-    left: 50%;
-    transform: translateX(-50%);
-    max-width: 90%;
-    z-index: 100;
-    pointer-events: none;
-    transition: opacity 0.3s ease;
-}
-
-.cc-overlay.hidden {
-    opacity: 0;
-}
-
-/* CC Text */
-.cc-text {
-    background: rgba(0, 0, 0, 0.75);
-    color: #fff;
-    padding: 8px 16px;
-    border-radius: 4px;
-    font-size: 18px;
-    line-height: 1.4;
-    text-align: center;
-    max-width: 800px;
-    word-wrap: break-word;
-    text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5);
-}
-
-/* CC Button State */
-.yt-action-btn.cc-active {
-    color: #fff !important;
-    background: #3ea6ff !important;
-}
-
-/* CC Button Icon */
-.cc-btn-icon {
-    display: flex;
-    align-items: center;
-    gap: 4px;
-}
-
-/* Loading state */
-.cc-loading {
-    font-style: italic;
-    opacity: 0.7;
-}
-
-/* Mobile adjustments */
-@media (max-width: 768px) {
-    .cc-overlay {
-        bottom: 50px;
-        max-width: 95%;
-    }
-
-    .cc-text {
-        font-size: 14px;
-        padding: 6px 12px;
-    }
-}
-
-/* Large screen */
-@media (min-width: 1200px) {
-    .cc-text {
-        font-size: 22px;
-    }
-}
--- a/static/js/webai.js
+++ b/static/js/webai.js
@ -1,119 +0,0 @@
-/**
- * WebAI - Client-side AI features using Transformers.js
- */
-
-// Suppress ONNX Runtime warnings
-if (typeof ort !== 'undefined') {
-    ort.env.logLevel = 'fatal';
-}
-
-class SubtitleGenerator {
-    constructor() {
-        this.pipeline = null;
-        this.isLoading = false;
-    }
-
-    async init(progressCallback) {
-        if (this.pipeline) return;
-        if (this.isLoading) return;
-
-        this.isLoading = true;
-
-        try {
-            // Suppress ONNX warnings at import time
-            if (typeof ort !== 'undefined') {
-                ort.env.logLevel = 'fatal';
-            }
-
-            progressCallback?.('Loading AI model...');
-
-            const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2');
-
-            // Configure environment
-            env.allowLocalModels = false;
-            env.useBrowserCache = true;
-
-            // Suppress ONNX Runtime warnings
-            if (typeof ort !== 'undefined') {
-                ort.env.logLevel = 'fatal';
-            }
-
-            progressCallback?.('Downloading Whisper model (~40MB)...');
-
-            this.pipeline = await pipeline(
-                'automatic-speech-recognition',
-                'Xenova/whisper-tiny',
-                {
-                    progress_callback: (progress) => {
-                        if (progress.status === 'downloading') {
-                            const pct = Math.round((progress.loaded / progress.total) * 100);
-                            progressCallback?.(`Downloading: ${pct}%`);
-                        } else if (progress.status === 'loading') {
-                            progressCallback?.('Loading model...');
-                        }
-                    }
-                }
-            );
-
-            progressCallback?.('Model ready!');
-        } catch (e) {
-            console.error('Failed to load Whisper:', e);
-            throw e;
-        } finally {
-            this.isLoading = false;
-        }
-    }
-
-    async generate(audioUrl, progressCallback) {
-        if (!this.pipeline) {
-            throw new Error('Model not initialized. Call init() first.');
-        }
-
-        progressCallback?.('Transcribing audio...');
-
-        try {
-            const result = await this.pipeline(audioUrl, {
-                chunk_length_s: 30,
-                stride_length_s: 5,
-                return_timestamps: true,
-            });
-
-            progressCallback?.('Formatting subtitles...');
-
-            // Convert to VTT format
-            return this.toVTT(result.chunks || []);
-        } catch (e) {
-            console.error('Transcription failed:', e);
-            throw e;
-        }
-    }
-
-    toVTT(chunks) {
-        let vtt = 'WEBVTT\n\n';
-
-        chunks.forEach((chunk, i) => {
-            const start = this.formatTime(chunk.timestamp[0]);
-            const end = this.formatTime(chunk.timestamp[1]);
-            const text = chunk.text.trim();
-
-            if (text) {
-                vtt += `${i + 1}\n`;
-                vtt += `${start} --> ${end}\n`;
-                vtt += `${text}\n\n`;
-            }
-        });
-
-        return vtt;
-    }
-
-    formatTime(seconds) {
-        if (seconds === null || seconds === undefined) seconds = 0;
-        const h = Math.floor(seconds / 3600);
-        const m = Math.floor((seconds % 3600) / 60);
-        const s = (seconds % 60).toFixed(3);
-        return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`;
-    }
-}
-
-// Export singleton
-window.subtitleGenerator = new SubtitleGenerator();
--- a/templates/watch.html
+++ b/templates/watch.html
@ -12,10 +12,7 @@
            <div id="artplayer-app" style="width: 100%; height: 100%;"></div>
            <!-- Loading State (Confined to Player) -->
            <div id="loading" class="yt-loader"></div>
-            <!-- Closed Captions Overlay -->
-            <div id="ccOverlay" class="cc-overlay hidden">
-                <div id="ccText" class="cc-text"></div>
-            </div>
+
        </div>
        <!-- Placeholder for Mini Mode -->
        <div id="playerPlaceholder" class="yt-player-placeholder"></div>
@ -60,10 +57,7 @@
                    Queue
                    <span id="queueBadge" class="queue-badge" style="display:none;">0</span>
                </button>
-                <button class="yt-action-btn" id="genSubBtn" onclick="generateSubtitles()" style="display:none;">
-                    <i class="fas fa-closed-captioning"></i>
-                    Generate Subs
-                </button>
+

                <!-- View Mode Buttons -->
                <div class="view-mode-buttons">
@ -190,7 +184,7 @@

    <!-- Watch page styles extracted to external file for better caching -->
    <link rel="stylesheet" href="{{ url_for('static', filename='css/modules/watch.css') }}">
-    <link rel="stylesheet" href="{{ url_for('static', filename='css/modules/captions.css') }}">
+
    <link rel="stylesheet" href="{{ url_for('static', filename='css/modules/downloads.css') }}">

    <!-- HLS Support (Local) -->
@ -380,10 +374,7 @@
                    // --- Custom Mini Player Logic ---
                    setupMiniPlayer();

-                    // --- Closed Captions sync ---
-                    art.on('video:timeupdate', () => {
-                        updateCaptions(art.currentTime);
-                    });
+

                    // --- Auto Play Next ---
                    art.on('video:ended', () => {
@ -1253,11 +1244,7 @@
            updateQueueCount();
            updateQueueBadge();

-            // Enable AI Subtitles (Always show for testing if audio is available)
-            if (data.audio_url) {
-                const genBtn = document.getElementById('genSubBtn');
-                if (genBtn) genBtn.style.display = 'inline-flex';
-            }
+

            if (data.error) {
                loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`;
@ -1329,16 +1316,7 @@
                })
            }).catch(() => { }); // Ignore errors for anon users

-            // Subtitle Config
-            // Disable ArtPlayer's built-in subtitle to avoid CORS
-            // Our custom CC system uses instead
-            /*
-/api/transcript                player.subtitle.url = data.subtitle_url || '';
-            if (data.subtitle_url) {
-                player.subtitle.show = true;
-                player.notice.show = 'CC Enabled';
-            }
-            */
+

            // Save Button - Local Storage based
            // Save Button handler is setup in DOMContentLoaded below
@ -1398,53 +1376,7 @@
            return div.innerHTML;
        }

-        async function generateSubtitles() {
-            const btn = document.getElementById('genSubBtn');

-            if (!currentVideoData.audioUrl) {
-                showToast("No audio source available for AI", "error");
-                return;
-            }
-
-            btn.disabled = true;
-            const originalHtml = btn.innerHTML;
-
-            try {
-                // Initialize/Load
-                await window.subtitleGenerator.init((msg) => {
-                    btn.innerHTML = `<i class="fas fa-spinner fa-spin"></i> ${msg}`;
-                });
-
-                // Generate
-                btn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Transcribing...';
-
-                const vttContent = await window.subtitleGenerator.generate(currentVideoData.audioUrl, (msg) => {
-                    btn.innerHTML = `<i class="fas fa-circle-notch fa-spin"></i> ${msg}`;
-                });
-
-                console.log("Generated VTT:", vttContent);
-
-                // Inject into Player
-                if (window.player) {
-                    const blob = new Blob([vttContent], { type: 'text/vtt' });
-                    const url = URL.createObjectURL(blob);
-
-                    // Artplayer subtitle API
-                    window.player.subtitle.url = url;
-                    window.player.subtitle.show = true;
-                    window.player.notice.show = 'AI Subtitles Generated';
-
-                    showToast("Subtitles generated successfully!", "success");
-                    btn.style.display = 'none'; // Hide button after success
-                }
-
-            } catch (e) {
-                console.error(e);
-                showToast("Subtitle generation failed: " + e.message, "error");
-                btn.innerHTML = originalHtml;
-                btn.disabled = false;
-            }
-        }

        async function summarizeVideo() {
            const videoId = "{{ video_id }}";
--- a/update_and_restart.sh
+++ b/update_and_restart.sh
@ -1,18 +0,0 @@
-#!/bin/bash
-
-# KV-Tube Updater Script
-# This script pulls the latest code and Docker images, then restarts the service.
-
-echo "--- 1. Pulling latest code changes... ---"
-git pull origin main
-
-echo "--- 2. Pulling latest Docker image (v2.0)... ---"
-docker-compose pull
-
-echo "--- 3. Restarting service with new configuration... ---"
-# We down it first to ensure port bindings (5001 -> 5000) are updated
-docker-compose down
-docker-compose up -d --force-recreate
-
-echo "--- Done! Checking logs... ---"
-docker-compose logs --tail=20 -f kv-tube