chore: cleanup project files and remove CC functionality

This commit is contained in:
KV-Tube Deployer 2026-01-12 17:40:31 +07:00
parent a93a875ce2
commit 6c1f459cd6
8 changed files with 26 additions and 476 deletions

View file

@ -1,12 +1,11 @@
__pycache__ .venv/
.venv .venv_clean/
.git env/
.env __pycache__/
*.mp4 .git/
*.webm .DS_Store
*.mp3 *.pyc
videos/ *.pyo
data/ *.pyd
temp/ .idea/
deployment_package/ .vscode/
kvtube.db

4
.gitignore vendored
View file

@ -2,7 +2,11 @@
__pycache__/ __pycache__/
*.pyc *.pyc
venv/ venv/
.venv/
.venv_clean/
.env .env
data/ data/
videos/ videos/
*.db *.db
server.log
.ruff_cache/

View file

@ -15,8 +15,7 @@ import time
import random import random
import concurrent.futures import concurrent.futures
import yt_dlp import yt_dlp
import tempfile
import threading
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -30,9 +29,6 @@ DB_NAME = os.path.join(DATA_DIR, "kvtube.db")
API_CACHE = {} API_CACHE = {}
CACHE_TIMEOUT = 600 # 10 minutes CACHE_TIMEOUT = 600 # 10 minutes
# AI Models
WHISPER_MODEL = None
WHISPER_LOCK = threading.Lock()
def get_db_connection(): def get_db_connection():
@ -463,45 +459,9 @@ def get_stream_info():
expected_headers = info.get("http_headers", {}) expected_headers = info.get("http_headers", {})
logger.info(f"YT-DLP Expected Headers: {expected_headers}") logger.info(f"YT-DLP Expected Headers: {expected_headers}")
# Extract subtitles
subtitle_url = None
subs = info.get("subtitles") or {}
auto_subs = info.get("automatic_captions") or {}
for lang in ["en", "vi"]:
if lang in subs and subs[lang]:
subtitle_url = subs[lang][0]["url"]
break
if lang in auto_subs and auto_subs[lang]:
subtitle_url = auto_subs[lang][0]["url"]
break
# Extract best audio-only URL for AI transcription
audio_url = None
try:
formats = info.get("formats", [])
# Debug: Log format details to understand why we aren't matching
# logger.info(f"Scanning {len(formats)} formats for audio-only...")
audio_formats = []
for f in formats:
vcodec = f.get("vcodec")
acodec = f.get("acodec")
# Check for audio-only: vcodec should be none/None, acodec should be something
if (vcodec == "none" or vcodec is None) and (acodec != "none" and acodec is not None):
audio_formats.append(f)
if audio_formats:
# Prefer m4a (itag 140) for best compatibility, or webm (251)
# Sort by filesize (smaller is faster for whisper) or bitrate?
# For now simply pick the first one that looks like m4a, else first available
chosen_audio = next((f for f in audio_formats if f.get("ext") == "m4a"), audio_formats[0])
audio_url = chosen_audio.get("url")
logger.info(f"Found audio-only URL: {audio_url[:30]}...")
else:
logger.warning("No audio-only formats found in valid stream info.")
except Exception as e:
logger.error(f"Failed to extract audio url: {e}")
response_data = { response_data = {
"original_url": stream_url, "original_url": stream_url,
@ -513,16 +473,14 @@ def get_stream_info():
"upload_date": info.get("upload_date", ""), "upload_date": info.get("upload_date", ""),
"view_count": info.get("view_count", 0), "view_count": info.get("view_count", 0),
"related": [], "related": [],
"subtitle_url": subtitle_url,
"audio_url": None # Placeholder, filled below
} }
from urllib.parse import quote from urllib.parse import quote
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}" proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
response_data["stream_url"] = proxied_url response_data["stream_url"] = proxied_url
if audio_url:
response_data["audio_url"] = f"/video_proxy?url={quote(audio_url, safe='')}"
# Cache it # Cache it
expiry = current_time + 3600 expiry = current_time + 3600
@ -759,116 +717,10 @@ def summarize_video():
return jsonify({"success": False, "message": f"Could not summarize: {str(e)}"}) return jsonify({"success": False, "message": f"Could not summarize: {str(e)}"})
@api_bp.route("/transcript")
def get_transcript():
"""Get video transcript."""
video_id = request.args.get("v")
if not video_id:
return jsonify({"success": False, "error": "No video ID provided"}), 400
try:
from youtube_transcript_api import YouTubeTranscriptApi
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
try:
transcript = transcript_list.find_transcript(["en", "vi"])
except Exception:
transcript = transcript_list.find_generated_transcript(["en", "vi"])
transcript_data = transcript.fetch()
full_text = " ".join([entry["text"] for entry in transcript_data])
return jsonify({
"success": True,
"video_id": video_id,
"transcript": transcript_data,
"language": "en",
"is_generated": True,
"full_text": full_text[:10000],
})
except Exception as e:
return jsonify({"success": False, "error": f"Could not load transcript: {str(e)}"})
@api_bp.route("/generate_subtitles", methods=["POST"])
def generate_subtitles():
"""Generate subtitles using server-side Whisper."""
global WHISPER_MODEL
data = request.get_json()
video_id = data.get("video_id")
if not video_id:
return jsonify({"error": "No video ID provided"}), 400
temp_path = None
try:
# Lazy load model
with WHISPER_LOCK:
if WHISPER_MODEL is None:
import whisper
logger.info("Loading Whisper model (tiny)...")
WHISPER_MODEL = whisper.load_model("tiny")
# Extract Audio URL
url = f"https://www.youtube.com/watch?v={video_id}"
ydl_opts = {
"format": "bestaudio[ext=m4a]/bestaudio/best",
"noplaylist": True,
"quiet": True,
"force_ipv4": True,
}
audio_url = None
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=False)
audio_url = info.get("url")
if not audio_url:
return jsonify({"error": "Could not extract audio URL"}), 500
# Download audio to temp file
import requests
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
}
logger.info(f"Downloading audio for transcription: {audio_url[:30]}...")
with requests.get(audio_url, headers=headers, stream=True) as r:
r.raise_for_status()
with tempfile.NamedTemporaryFile(suffix=".m4a", delete=False) as f:
temp_path = f.name
for chunk in r.iter_content(chunk_size=8192):
f.write(chunk)
# Transcribe
logger.info("Transcribing...")
result = WHISPER_MODEL.transcribe(temp_path)
# Convert to VTT
def format_timestamp(seconds):
hours = int(seconds // 3600)
minutes = int((seconds % 3600) // 60)
seconds = seconds % 60
return f"{hours:02d}:{minutes:02d}:{seconds:06.3f}"
vtt_output = "WEBVTT\n\n"
for segment in result["segments"]:
start = format_timestamp(segment["start"])
end = format_timestamp(segment["end"])
text = segment["text"].strip()
vtt_output += f"{start} --> {end}\n{text}\n\n"
return jsonify({"success": True, "vtt": vtt_output})
except Exception as e:
logger.error(f"Subtitle generation failed: {e}")
return jsonify({"error": str(e)}), 500
finally:
if temp_path and os.path.exists(temp_path):
os.remove(temp_path)
@api_bp.route("/update_ytdlp", methods=["POST"]) @api_bp.route("/update_ytdlp", methods=["POST"])
@ -933,30 +785,4 @@ def get_comments():
return jsonify({"comments": [], "count": 0, "error": str(e)}) return jsonify({"comments": [], "count": 0, "error": str(e)})
@api_bp.route("/captions.vtt")
def get_captions_vtt():
"""Get captions in WebVTT format."""
video_id = request.args.get("v")
if not video_id:
return "WEBVTT\n\n", 400, {'Content-Type': 'text/vtt'}
try:
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import WebVTTFormatter
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
try:
transcript = transcript_list.find_transcript(["en", "vi"])
except Exception:
transcript = transcript_list.find_generated_transcript(["en", "vi"])
transcript_data = transcript.fetch()
formatter = WebVTTFormatter()
vtt_formatted = formatter.format_transcript(transcript_data)
return Response(vtt_formatted, mimetype='text/vtt')
except Exception as e:
logger.warning(f"Caption Error: {e}")
return "WEBVTT\n\n", 200, {'Content-Type': 'text/vtt'}

View file

@ -4,5 +4,4 @@ yt-dlp>=2024.1.0
werkzeug werkzeug
gunicorn gunicorn
python-dotenv python-dotenv
openai-whisper
numpy

View file

@ -1,73 +0,0 @@
/**
* KV-Tube Closed Captions Styles
* Styling for CC overlay and controls
*/
/* CC Overlay Container */
.cc-overlay {
position: absolute;
bottom: 60px;
left: 50%;
transform: translateX(-50%);
max-width: 90%;
z-index: 100;
pointer-events: none;
transition: opacity 0.3s ease;
}
.cc-overlay.hidden {
opacity: 0;
}
/* CC Text */
.cc-text {
background: rgba(0, 0, 0, 0.75);
color: #fff;
padding: 8px 16px;
border-radius: 4px;
font-size: 18px;
line-height: 1.4;
text-align: center;
max-width: 800px;
word-wrap: break-word;
text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5);
}
/* CC Button State */
.yt-action-btn.cc-active {
color: #fff !important;
background: #3ea6ff !important;
}
/* CC Button Icon */
.cc-btn-icon {
display: flex;
align-items: center;
gap: 4px;
}
/* Loading state */
.cc-loading {
font-style: italic;
opacity: 0.7;
}
/* Mobile adjustments */
@media (max-width: 768px) {
.cc-overlay {
bottom: 50px;
max-width: 95%;
}
.cc-text {
font-size: 14px;
padding: 6px 12px;
}
}
/* Large screen */
@media (min-width: 1200px) {
.cc-text {
font-size: 22px;
}
}

View file

@ -1,119 +0,0 @@
/**
* WebAI - Client-side AI features using Transformers.js
*/
// Suppress ONNX Runtime warnings
if (typeof ort !== 'undefined') {
ort.env.logLevel = 'fatal';
}
class SubtitleGenerator {
constructor() {
this.pipeline = null;
this.isLoading = false;
}
async init(progressCallback) {
if (this.pipeline) return;
if (this.isLoading) return;
this.isLoading = true;
try {
// Suppress ONNX warnings at import time
if (typeof ort !== 'undefined') {
ort.env.logLevel = 'fatal';
}
progressCallback?.('Loading AI model...');
const { pipeline, env } = await import('https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2');
// Configure environment
env.allowLocalModels = false;
env.useBrowserCache = true;
// Suppress ONNX Runtime warnings
if (typeof ort !== 'undefined') {
ort.env.logLevel = 'fatal';
}
progressCallback?.('Downloading Whisper model (~40MB)...');
this.pipeline = await pipeline(
'automatic-speech-recognition',
'Xenova/whisper-tiny',
{
progress_callback: (progress) => {
if (progress.status === 'downloading') {
const pct = Math.round((progress.loaded / progress.total) * 100);
progressCallback?.(`Downloading: ${pct}%`);
} else if (progress.status === 'loading') {
progressCallback?.('Loading model...');
}
}
}
);
progressCallback?.('Model ready!');
} catch (e) {
console.error('Failed to load Whisper:', e);
throw e;
} finally {
this.isLoading = false;
}
}
async generate(audioUrl, progressCallback) {
if (!this.pipeline) {
throw new Error('Model not initialized. Call init() first.');
}
progressCallback?.('Transcribing audio...');
try {
const result = await this.pipeline(audioUrl, {
chunk_length_s: 30,
stride_length_s: 5,
return_timestamps: true,
});
progressCallback?.('Formatting subtitles...');
// Convert to VTT format
return this.toVTT(result.chunks || []);
} catch (e) {
console.error('Transcription failed:', e);
throw e;
}
}
toVTT(chunks) {
let vtt = 'WEBVTT\n\n';
chunks.forEach((chunk, i) => {
const start = this.formatTime(chunk.timestamp[0]);
const end = this.formatTime(chunk.timestamp[1]);
const text = chunk.text.trim();
if (text) {
vtt += `${i + 1}\n`;
vtt += `${start} --> ${end}\n`;
vtt += `${text}\n\n`;
}
});
return vtt;
}
formatTime(seconds) {
if (seconds === null || seconds === undefined) seconds = 0;
const h = Math.floor(seconds / 3600);
const m = Math.floor((seconds % 3600) / 60);
const s = (seconds % 60).toFixed(3);
return `${h.toString().padStart(2, '0')}:${m.toString().padStart(2, '0')}:${s.padStart(6, '0')}`;
}
}
// Export singleton
window.subtitleGenerator = new SubtitleGenerator();

View file

@ -12,10 +12,7 @@
<div id="artplayer-app" style="width: 100%; height: 100%;"></div> <div id="artplayer-app" style="width: 100%; height: 100%;"></div>
<!-- Loading State (Confined to Player) --> <!-- Loading State (Confined to Player) -->
<div id="loading" class="yt-loader"></div> <div id="loading" class="yt-loader"></div>
<!-- Closed Captions Overlay -->
<div id="ccOverlay" class="cc-overlay hidden">
<div id="ccText" class="cc-text"></div>
</div>
</div> </div>
<!-- Placeholder for Mini Mode --> <!-- Placeholder for Mini Mode -->
<div id="playerPlaceholder" class="yt-player-placeholder"></div> <div id="playerPlaceholder" class="yt-player-placeholder"></div>
@ -60,10 +57,7 @@
Queue Queue
<span id="queueBadge" class="queue-badge" style="display:none;">0</span> <span id="queueBadge" class="queue-badge" style="display:none;">0</span>
</button> </button>
<button class="yt-action-btn" id="genSubBtn" onclick="generateSubtitles()" style="display:none;">
<i class="fas fa-closed-captioning"></i>
Generate Subs
</button>
<!-- View Mode Buttons --> <!-- View Mode Buttons -->
<div class="view-mode-buttons"> <div class="view-mode-buttons">
@ -190,7 +184,7 @@
<!-- Watch page styles extracted to external file for better caching --> <!-- Watch page styles extracted to external file for better caching -->
<link rel="stylesheet" href="{{ url_for('static', filename='css/modules/watch.css') }}"> <link rel="stylesheet" href="{{ url_for('static', filename='css/modules/watch.css') }}">
<link rel="stylesheet" href="{{ url_for('static', filename='css/modules/captions.css') }}">
<link rel="stylesheet" href="{{ url_for('static', filename='css/modules/downloads.css') }}"> <link rel="stylesheet" href="{{ url_for('static', filename='css/modules/downloads.css') }}">
<!-- HLS Support (Local) --> <!-- HLS Support (Local) -->
@ -380,10 +374,7 @@
// --- Custom Mini Player Logic --- // --- Custom Mini Player Logic ---
setupMiniPlayer(); setupMiniPlayer();
// --- Closed Captions sync ---
art.on('video:timeupdate', () => {
updateCaptions(art.currentTime);
});
// --- Auto Play Next --- // --- Auto Play Next ---
art.on('video:ended', () => { art.on('video:ended', () => {
@ -1253,11 +1244,7 @@
updateQueueCount(); updateQueueCount();
updateQueueBadge(); updateQueueBadge();
// Enable AI Subtitles (Always show for testing if audio is available)
if (data.audio_url) {
const genBtn = document.getElementById('genSubBtn');
if (genBtn) genBtn.style.display = 'inline-flex';
}
if (data.error) { if (data.error) {
loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`; loading.innerHTML = `<p style="color:#f00; text-align:center;">${data.error}</p>`;
@ -1329,16 +1316,7 @@
}) })
}).catch(() => { }); // Ignore errors for anon users }).catch(() => { }); // Ignore errors for anon users
// Subtitle Config
// Disable ArtPlayer's built-in subtitle to avoid CORS
// Our custom CC system uses instead
/*
/api/transcript player.subtitle.url = data.subtitle_url || '';
if (data.subtitle_url) {
player.subtitle.show = true;
player.notice.show = 'CC Enabled';
}
*/
// Save Button - Local Storage based // Save Button - Local Storage based
// Save Button handler is setup in DOMContentLoaded below // Save Button handler is setup in DOMContentLoaded below
@ -1398,53 +1376,7 @@
return div.innerHTML; return div.innerHTML;
} }
async function generateSubtitles() {
const btn = document.getElementById('genSubBtn');
if (!currentVideoData.audioUrl) {
showToast("No audio source available for AI", "error");
return;
}
btn.disabled = true;
const originalHtml = btn.innerHTML;
try {
// Initialize/Load
await window.subtitleGenerator.init((msg) => {
btn.innerHTML = `<i class="fas fa-spinner fa-spin"></i> ${msg}`;
});
// Generate
btn.innerHTML = '<i class="fas fa-spinner fa-spin"></i> Transcribing...';
const vttContent = await window.subtitleGenerator.generate(currentVideoData.audioUrl, (msg) => {
btn.innerHTML = `<i class="fas fa-circle-notch fa-spin"></i> ${msg}`;
});
console.log("Generated VTT:", vttContent);
// Inject into Player
if (window.player) {
const blob = new Blob([vttContent], { type: 'text/vtt' });
const url = URL.createObjectURL(blob);
// Artplayer subtitle API
window.player.subtitle.url = url;
window.player.subtitle.show = true;
window.player.notice.show = 'AI Subtitles Generated';
showToast("Subtitles generated successfully!", "success");
btn.style.display = 'none'; // Hide button after success
}
} catch (e) {
console.error(e);
showToast("Subtitle generation failed: " + e.message, "error");
btn.innerHTML = originalHtml;
btn.disabled = false;
}
}
async function summarizeVideo() { async function summarizeVideo() {
const videoId = "{{ video_id }}"; const videoId = "{{ video_id }}";

View file

@ -1,18 +0,0 @@
#!/bin/bash
# KV-Tube Updater Script
# This script pulls the latest code and Docker images, then restarts the service.
echo "--- 1. Pulling latest code changes... ---"
git pull origin main
echo "--- 2. Pulling latest Docker image (v2.0)... ---"
docker-compose pull
echo "--- 3. Restarting service with new configuration... ---"
# We down it first to ensure port bindings (5001 -> 5000) are updated
docker-compose down
docker-compose up -d --force-recreate
echo "--- Done! Checking logs... ---"
docker-compose logs --tail=20 -f kv-tube