788 lines
27 KiB
Python
Executable file
788 lines
27 KiB
Python
Executable file
"""
|
|
KV-Tube API Blueprint
|
|
All JSON API endpoints for the frontend
|
|
"""
|
|
from flask import Blueprint, request, jsonify, Response
|
|
import os
|
|
import sys
|
|
import subprocess
|
|
import json
|
|
import sqlite3
|
|
import re
|
|
import heapq
|
|
import logging
|
|
import time
|
|
import random
|
|
import concurrent.futures
|
|
import yt_dlp
|
|
|
|
|
|
logger = logging.getLogger(__name__)
|
|
|
|
api_bp = Blueprint('api', __name__, url_prefix='/api')
|
|
|
|
# Database path
|
|
DATA_DIR = os.environ.get("KVTUBE_DATA_DIR", "data")
|
|
DB_NAME = os.path.join(DATA_DIR, "kvtube.db")
|
|
|
|
# Caching
|
|
API_CACHE = {}
|
|
CACHE_TIMEOUT = 600 # 10 minutes
|
|
|
|
|
|
|
|
def get_db_connection():
|
|
"""Get database connection with row factory."""
|
|
conn = sqlite3.connect(DB_NAME)
|
|
conn.row_factory = sqlite3.Row
|
|
return conn
|
|
|
|
|
|
# --- Helper Functions ---
|
|
|
|
def extractive_summary(text, num_sentences=5):
|
|
"""Extract key sentences from text using word frequency."""
|
|
# Clean text
|
|
clean_text = re.sub(r"\[.*?\]", "", text)
|
|
clean_text = clean_text.replace("\n", " ")
|
|
|
|
# Split into sentences
|
|
sentences = re.split(r"(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s", clean_text)
|
|
|
|
# Calculate word frequencies
|
|
word_frequencies = {}
|
|
stop_words = set([
|
|
"the", "a", "an", "and", "or", "but", "is", "are", "was", "were",
|
|
"to", "of", "in", "on", "at", "for", "width", "that", "this", "it",
|
|
"you", "i", "we", "they", "he", "she"
|
|
])
|
|
|
|
for word in re.findall(r"\w+", clean_text.lower()):
|
|
if word not in stop_words:
|
|
word_frequencies[word] = word_frequencies.get(word, 0) + 1
|
|
|
|
if not word_frequencies:
|
|
return "Not enough content to summarize."
|
|
|
|
# Normalize
|
|
max_freq = max(word_frequencies.values())
|
|
for word in word_frequencies:
|
|
word_frequencies[word] /= max_freq
|
|
|
|
# Score sentences
|
|
sentence_scores = {}
|
|
for sent in sentences:
|
|
for word in re.findall(r"\w+", sent.lower()):
|
|
if word in word_frequencies:
|
|
sentence_scores[sent] = sentence_scores.get(sent, 0) + word_frequencies[word]
|
|
|
|
# Get top sentences
|
|
summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
|
|
return " ".join(summary_sentences)
|
|
|
|
|
|
def fetch_videos(query, limit=20, filter_type=None, playlist_start=1, playlist_end=None):
|
|
"""Fetch videos from YouTube search."""
|
|
try:
|
|
if not playlist_end:
|
|
playlist_end = playlist_start + limit
|
|
|
|
cmd = [
|
|
sys.executable, "-m", "yt_dlp",
|
|
f"ytsearch{limit}:{query}",
|
|
"--dump-json",
|
|
"--flat-playlist",
|
|
"--no-playlist",
|
|
"--playlist-start", str(playlist_start),
|
|
"--playlist-end", str(playlist_end),
|
|
]
|
|
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
stdout, stderr = proc.communicate()
|
|
|
|
results = []
|
|
for line in stdout.splitlines():
|
|
try:
|
|
data = json.loads(line)
|
|
video_id = data.get("id")
|
|
if video_id:
|
|
duration_secs = data.get("duration")
|
|
|
|
# Filter logic
|
|
if filter_type == "video":
|
|
if duration_secs and int(duration_secs) <= 70:
|
|
continue
|
|
if "#shorts" in (data.get("title") or "").lower():
|
|
continue
|
|
|
|
# Format duration
|
|
duration = None
|
|
if duration_secs:
|
|
m, s = divmod(int(duration_secs), 60)
|
|
h, m = divmod(m, 60)
|
|
duration = f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
|
|
|
|
results.append({
|
|
"id": video_id,
|
|
"title": data.get("title", "Unknown"),
|
|
"uploader": data.get("uploader") or data.get("channel") or "Unknown",
|
|
"channel_id": data.get("channel_id"),
|
|
"uploader_id": data.get("uploader_id"),
|
|
"thumbnail": f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg",
|
|
"view_count": data.get("view_count", 0),
|
|
"upload_date": data.get("upload_date", ""),
|
|
"duration": duration,
|
|
})
|
|
except json.JSONDecodeError:
|
|
continue
|
|
return results
|
|
except Exception as e:
|
|
logger.error(f"Error fetching videos: {e}")
|
|
return []
|
|
|
|
|
|
# --- API Routes ---
|
|
|
|
@api_bp.route("/save_video", methods=["POST"])
|
|
def save_video():
|
|
"""Deprecated - client-side handled."""
|
|
return jsonify({"success": True, "message": "Use local storage"})
|
|
|
|
|
|
@api_bp.route("/history")
|
|
def get_history():
|
|
"""Get watch history from database."""
|
|
conn = get_db_connection()
|
|
rows = conn.execute(
|
|
'SELECT video_id as id, title, thumbnail FROM user_videos WHERE type = "history" ORDER BY timestamp DESC LIMIT 50'
|
|
).fetchall()
|
|
conn.close()
|
|
return jsonify([dict(row) for row in rows])
|
|
|
|
|
|
@api_bp.route("/suggested")
|
|
def get_suggested():
|
|
"""Get suggested videos based on watch history."""
|
|
client_titles = request.args.get("titles", "")
|
|
client_channels = request.args.get("channels", "")
|
|
|
|
history_titles = []
|
|
history_channels = []
|
|
|
|
if client_titles:
|
|
history_titles = [t.strip() for t in client_titles.split(",") if t.strip()][:5]
|
|
if client_channels:
|
|
history_channels = [c.strip() for c in client_channels.split(",") if c.strip()][:3]
|
|
|
|
# Server-side fallback
|
|
if not history_titles:
|
|
try:
|
|
conn = get_db_connection()
|
|
rows = conn.execute(
|
|
'SELECT title FROM user_videos WHERE type = "history" ORDER BY timestamp DESC LIMIT 5'
|
|
).fetchall()
|
|
conn.close()
|
|
history_titles = [row['title'] for row in rows]
|
|
except Exception as e:
|
|
logger.debug(f"History fetch failed: {e}")
|
|
|
|
if not history_titles:
|
|
return jsonify(fetch_videos("trending", limit=20))
|
|
|
|
all_suggestions = []
|
|
queries = []
|
|
|
|
for title in history_titles[:3]:
|
|
words = title.split()[:4]
|
|
query_base = " ".join(words)
|
|
queries.append(f"{query_base} related -shorts")
|
|
|
|
for channel in history_channels[:2]:
|
|
queries.append(f"{channel} latest videos -shorts")
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
|
|
results = list(executor.map(lambda q: fetch_videos(q, limit=8, filter_type="video"), queries))
|
|
for res in results:
|
|
all_suggestions.extend(res)
|
|
|
|
unique_vids = {v["id"]: v for v in all_suggestions}.values()
|
|
final_list = list(unique_vids)
|
|
random.shuffle(final_list)
|
|
|
|
return jsonify(final_list[:30])
|
|
|
|
|
|
@api_bp.route("/related")
|
|
def get_related_videos():
|
|
"""Get related videos for a video."""
|
|
video_id = request.args.get("v")
|
|
title = request.args.get("title")
|
|
uploader = request.args.get("uploader", "")
|
|
page = int(request.args.get("page", 1))
|
|
limit = int(request.args.get("limit", 10))
|
|
|
|
if not title and not video_id:
|
|
return jsonify({"error": "Video ID or Title required"}), 400
|
|
|
|
try:
|
|
topic_limit = limit // 2
|
|
channel_limit = limit - topic_limit
|
|
start = (page - 1) * (limit // 2)
|
|
|
|
topic_query = f"{title} related" if title else f"{video_id} related"
|
|
channel_query = uploader if uploader else topic_query
|
|
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
future_topic = executor.submit(fetch_videos, topic_query, limit=topic_limit, playlist_start=start + 1)
|
|
future_channel = executor.submit(fetch_videos, channel_query, limit=channel_limit, playlist_start=start + 1)
|
|
topic_videos = future_topic.result()
|
|
channel_videos = future_channel.result()
|
|
|
|
combined = channel_videos + topic_videos
|
|
|
|
seen = set()
|
|
if video_id:
|
|
seen.add(video_id)
|
|
|
|
unique_videos = []
|
|
for v in combined:
|
|
if v['id'] not in seen:
|
|
seen.add(v['id'])
|
|
unique_videos.append(v)
|
|
|
|
random.shuffle(unique_videos)
|
|
return jsonify(unique_videos)
|
|
except Exception as e:
|
|
logger.error(f"Error fetching related: {e}")
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/download")
|
|
def get_download_url():
|
|
"""Get direct MP4 download URL."""
|
|
video_id = request.args.get("v")
|
|
if not video_id:
|
|
return jsonify({"error": "No video ID"}), 400
|
|
|
|
try:
|
|
url = f"https://www.youtube.com/watch?v={video_id}"
|
|
ydl_opts = {
|
|
"format": "bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best[protocol!*=m3u8]/best",
|
|
"noplaylist": True,
|
|
"quiet": True,
|
|
"no_warnings": True,
|
|
"skip_download": True,
|
|
"youtube_include_dash_manifest": False,
|
|
"youtube_include_hls_manifest": False,
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=False)
|
|
download_url = info.get("url", "")
|
|
|
|
if ".m3u8" in download_url or not download_url:
|
|
formats = info.get("formats", [])
|
|
for f in reversed(formats):
|
|
f_url = f.get("url", "")
|
|
if f_url and "m3u8" not in f_url and f.get("ext") == "mp4":
|
|
download_url = f_url
|
|
break
|
|
|
|
title = info.get("title", "video")
|
|
|
|
if download_url and ".m3u8" not in download_url:
|
|
return jsonify({"url": download_url, "title": title, "ext": "mp4"})
|
|
else:
|
|
return jsonify({
|
|
"error": "Direct download not available. Try a video downloader site.",
|
|
"fallback_url": url,
|
|
}), 200
|
|
|
|
except Exception as e:
|
|
logger.error(f"Download URL error: {e}")
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/download/formats")
|
|
def get_download_formats():
|
|
"""Get available download formats for a video."""
|
|
video_id = request.args.get("v")
|
|
if not video_id:
|
|
return jsonify({"success": False, "error": "No video ID"}), 400
|
|
|
|
try:
|
|
url = f"https://www.youtube.com/watch?v={video_id}"
|
|
ydl_opts = {
|
|
"format": "best",
|
|
"noplaylist": True,
|
|
"quiet": True,
|
|
"no_warnings": True,
|
|
"skip_download": True,
|
|
"youtube_include_dash_manifest": False,
|
|
"youtube_include_hls_manifest": False,
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=False)
|
|
title = info.get("title", "Unknown")
|
|
duration = info.get("duration", 0)
|
|
thumbnail = info.get("thumbnail", "")
|
|
|
|
video_formats = []
|
|
audio_formats = []
|
|
|
|
for f in info.get("formats", []):
|
|
f_url = f.get("url", "")
|
|
if not f_url or "m3u8" in f_url:
|
|
continue
|
|
|
|
f_ext = f.get("ext", "")
|
|
quality = f.get("format_note", "") or f.get("format", "") or "Unknown"
|
|
f_filesize = f.get("filesize", 0) or f.get("filesize_approx", 0)
|
|
|
|
size_str = ""
|
|
if f_filesize:
|
|
if f_filesize > 1024**3:
|
|
size_str = f"{f_filesize / 1024**3:.1f} GB"
|
|
elif f_filesize > 1024**2:
|
|
size_str = f"{f_filesize / 1024**2:.1f} MB"
|
|
elif f_filesize > 1024:
|
|
size_str = f"{f_filesize / 1024:.1f} KB"
|
|
|
|
if f_ext in ["mp4", "webm"]:
|
|
vcodec = f.get("vcodec", "none")
|
|
acodec = f.get("acodec", "none")
|
|
|
|
if vcodec != "none" and acodec != "none":
|
|
video_formats.append({
|
|
"quality": f"{quality} (with audio)",
|
|
"ext": f_ext,
|
|
"size": size_str,
|
|
"url": f_url,
|
|
"type": "combined",
|
|
"has_audio": True,
|
|
})
|
|
elif vcodec != "none":
|
|
video_formats.append({
|
|
"quality": quality,
|
|
"ext": f_ext,
|
|
"size": size_str,
|
|
"url": f_url,
|
|
"type": "video",
|
|
"has_audio": False,
|
|
})
|
|
elif acodec != "none":
|
|
audio_formats.append({
|
|
"quality": quality,
|
|
"ext": f_ext,
|
|
"size": size_str,
|
|
"url": f_url,
|
|
"type": "audio",
|
|
})
|
|
|
|
def parse_quality(f):
|
|
q = f["quality"].lower()
|
|
for i, res in enumerate(["4k", "2160", "1080", "720", "480", "360", "240", "144"]):
|
|
if res in q:
|
|
return i
|
|
return 99
|
|
|
|
video_formats.sort(key=parse_quality)
|
|
audio_formats.sort(key=parse_quality)
|
|
|
|
return jsonify({
|
|
"success": True,
|
|
"video_id": video_id,
|
|
"title": title,
|
|
"duration": duration,
|
|
"thumbnail": thumbnail,
|
|
"formats": {"video": video_formats[:10], "audio": audio_formats[:5]},
|
|
})
|
|
|
|
except Exception as e:
|
|
logger.error(f"Download formats error: {e}")
|
|
return jsonify({"success": False, "error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/get_stream_info")
|
|
def get_stream_info():
|
|
"""Get video stream info with caching."""
|
|
video_id = request.args.get("v")
|
|
if not video_id:
|
|
return jsonify({"error": "No video ID"}), 400
|
|
|
|
try:
|
|
conn = get_db_connection()
|
|
cached = conn.execute(
|
|
"SELECT data, expires_at FROM video_cache WHERE video_id = ?", (video_id,)
|
|
).fetchone()
|
|
|
|
current_time = time.time()
|
|
if cached:
|
|
try:
|
|
expires_at = float(cached["expires_at"])
|
|
if current_time < expires_at:
|
|
data = json.loads(cached["data"])
|
|
conn.close()
|
|
from urllib.parse import quote
|
|
proxied_url = f"/video_proxy?url={quote(data['original_url'], safe='')}"
|
|
data["stream_url"] = proxied_url
|
|
response = jsonify(data)
|
|
response.headers["X-Cache"] = "HIT"
|
|
return response
|
|
except (ValueError, KeyError):
|
|
pass
|
|
|
|
url = f"https://www.youtube.com/watch?v={video_id}"
|
|
ydl_opts = {
|
|
"format": "best[ext=mp4]/best",
|
|
"noplaylist": True,
|
|
"quiet": True,
|
|
"skip_download": True,
|
|
"socket_timeout": 10,
|
|
"force_ipv4": True,
|
|
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
}
|
|
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
try:
|
|
info = ydl.extract_info(url, download=False)
|
|
except Exception as e:
|
|
logger.warning(f"yt-dlp error for {video_id}: {str(e)}")
|
|
return jsonify({"error": f"Stream extraction failed: {str(e)}"}), 500
|
|
|
|
stream_url = info.get("url")
|
|
if not stream_url:
|
|
return jsonify({"error": "No stream URL found"}), 500
|
|
|
|
# Log the headers yt-dlp expects us to use
|
|
expected_headers = info.get("http_headers", {})
|
|
logger.info(f"YT-DLP Expected Headers: {expected_headers}")
|
|
|
|
|
|
|
|
|
|
|
|
response_data = {
|
|
"original_url": stream_url,
|
|
"title": info.get("title", "Unknown"),
|
|
"description": info.get("description", ""),
|
|
"uploader": info.get("uploader", ""),
|
|
"uploader_id": info.get("uploader_id", ""),
|
|
"channel_id": info.get("channel_id", ""),
|
|
"upload_date": info.get("upload_date", ""),
|
|
"view_count": info.get("view_count", 0),
|
|
"related": [],
|
|
|
|
}
|
|
|
|
from urllib.parse import quote
|
|
proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
|
|
response_data["stream_url"] = proxied_url
|
|
|
|
|
|
|
|
# Cache it
|
|
expiry = current_time + 3600
|
|
conn.execute(
|
|
"INSERT OR REPLACE INTO video_cache (video_id, data, expires_at) VALUES (?, ?, ?)",
|
|
(video_id, json.dumps(response_data), expiry),
|
|
)
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
response = jsonify(response_data)
|
|
response.headers["X-Cache"] = "MISS"
|
|
return response
|
|
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/search")
|
|
def search():
|
|
"""Search for videos."""
|
|
query = request.args.get("q")
|
|
if not query:
|
|
return jsonify({"error": "No query provided"}), 400
|
|
|
|
try:
|
|
# Check if URL
|
|
url_match = re.match(r"(?:https?://)?(?:www\.)?(?:youtube\.com/watch\?v=|youtu\.be/)([a-zA-Z0-9_-]{11})", query)
|
|
if url_match:
|
|
video_id = url_match.group(1)
|
|
# Fetch single video info
|
|
ydl_opts = {
|
|
"quiet": True,
|
|
"no_warnings": True,
|
|
"noplaylist": True,
|
|
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
|
|
}
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(f"https://www.youtube.com/watch?v={video_id}", download=False)
|
|
return jsonify([{
|
|
"id": video_id,
|
|
"title": info.get("title", "Unknown"),
|
|
"uploader": info.get("uploader", "Unknown"),
|
|
"thumbnail": f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg",
|
|
"view_count": info.get("view_count", 0),
|
|
"upload_date": info.get("upload_date", ""),
|
|
"duration": None,
|
|
}])
|
|
|
|
# Standard search
|
|
results = fetch_videos(query, limit=20, filter_type="video")
|
|
return jsonify(results)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Search Error: {e}")
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/channel")
|
|
def get_channel_videos_simple():
|
|
"""Get videos from a channel."""
|
|
channel_id = request.args.get("id")
|
|
filter_type = request.args.get("filter_type", "video")
|
|
if not channel_id:
|
|
return jsonify({"error": "No channel ID provided"}), 400
|
|
|
|
try:
|
|
# Construct URL
|
|
suffix = "shorts" if filter_type == "shorts" else "videos"
|
|
|
|
if channel_id.startswith("UC"):
|
|
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
|
|
elif channel_id.startswith("@"):
|
|
url = f"https://www.youtube.com/{channel_id}/{suffix}"
|
|
else:
|
|
url = f"https://www.youtube.com/channel/{channel_id}/{suffix}"
|
|
|
|
cmd = [
|
|
sys.executable, "-m", "yt_dlp",
|
|
url,
|
|
"--dump-json",
|
|
"--flat-playlist",
|
|
"--playlist-end", "20",
|
|
"--no-warnings",
|
|
]
|
|
|
|
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
|
|
stdout, stderr = proc.communicate()
|
|
|
|
videos = []
|
|
for line in stdout.splitlines():
|
|
try:
|
|
v = json.loads(line)
|
|
dur_str = None
|
|
if v.get("duration"):
|
|
m, s = divmod(int(v["duration"]), 60)
|
|
h, m = divmod(m, 60)
|
|
dur_str = f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
|
|
|
|
videos.append({
|
|
"id": v.get("id"),
|
|
"title": v.get("title"),
|
|
"thumbnail": f"https://i.ytimg.com/vi/{v.get('id')}/mqdefault.jpg",
|
|
"view_count": v.get("view_count") or 0,
|
|
"duration": dur_str,
|
|
"upload_date": v.get("upload_date"),
|
|
"uploader": v.get("uploader") or v.get("channel") or "",
|
|
})
|
|
except json.JSONDecodeError:
|
|
continue
|
|
|
|
return jsonify(videos)
|
|
|
|
except Exception as e:
|
|
logger.error(f"Channel Fetch Error: {e}")
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/trending")
|
|
def trending():
|
|
"""Get trending videos."""
|
|
from flask import current_app
|
|
|
|
category = request.args.get("category", "all")
|
|
page = int(request.args.get("page", 1))
|
|
sort = request.args.get("sort", "newest")
|
|
region = request.args.get("region", "vietnam")
|
|
|
|
cache_key = f"trending_{category}_{page}_{sort}_{region}"
|
|
|
|
# Check cache
|
|
if cache_key in API_CACHE:
|
|
cached_time, cached_data = API_CACHE[cache_key]
|
|
if time.time() - cached_time < CACHE_TIMEOUT:
|
|
return jsonify(cached_data)
|
|
|
|
try:
|
|
# Category search queries
|
|
queries = {
|
|
"all": "trending videos 2024",
|
|
"music": "music trending",
|
|
"gaming": "gaming trending",
|
|
"news": "news today",
|
|
"tech": "technology reviews 2024",
|
|
"movies": "movie trailers 2024",
|
|
"sports": "sports highlights",
|
|
}
|
|
|
|
# For 'all' category, always fetch from multiple categories for diverse content
|
|
if category == "all":
|
|
region_suffix = " vietnam" if region == "vietnam" else ""
|
|
|
|
# Rotate through different queries based on page for variety
|
|
query_sets = [
|
|
[f"trending videos 2024{region_suffix}", f"music trending{region_suffix}", f"tech reviews 2024{region_suffix}"],
|
|
[f"movie trailers 2024{region_suffix}", f"gaming trending{region_suffix}", f"sports highlights{region_suffix}"],
|
|
[f"trending music 2024{region_suffix}", f"viral videos{region_suffix}", f"entertainment news{region_suffix}"],
|
|
[f"tech gadgets{region_suffix}", f"comedy videos{region_suffix}", f"documentary{region_suffix}"],
|
|
]
|
|
|
|
# Use different query set based on page to get variety
|
|
query_index = (page - 1) % len(query_sets)
|
|
current_queries = query_sets[query_index]
|
|
|
|
# Calculate offset within query set
|
|
start_offset = ((page - 1) // len(query_sets)) * 7 + 1
|
|
|
|
# Fetch from multiple categories in parallel
|
|
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
futures = [
|
|
executor.submit(fetch_videos, q, limit=7, filter_type="video", playlist_start=start_offset)
|
|
for q in current_queries
|
|
]
|
|
results = [f.result() for f in futures]
|
|
|
|
# Combine all videos and deduplicate
|
|
all_videos = []
|
|
seen_ids = set()
|
|
|
|
for video_list in results:
|
|
for vid in video_list:
|
|
if vid['id'] not in seen_ids:
|
|
seen_ids.add(vid['id'])
|
|
all_videos.append(vid)
|
|
|
|
# Shuffle for variety
|
|
random.shuffle(all_videos)
|
|
|
|
# Cache result
|
|
API_CACHE[cache_key] = (time.time(), all_videos)
|
|
return jsonify(all_videos)
|
|
|
|
# Single category - support proper pagination
|
|
query = queries.get(category, queries["all"])
|
|
if region == "vietnam":
|
|
query += " vietnam"
|
|
|
|
videos = fetch_videos(query, limit=20, filter_type="video", playlist_start=(page-1)*20+1)
|
|
|
|
# Cache result
|
|
API_CACHE[cache_key] = (time.time(), videos)
|
|
|
|
return jsonify(videos)
|
|
|
|
except Exception as e:
|
|
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/summarize")
|
|
def summarize_video():
|
|
"""Get video summary from transcript."""
|
|
video_id = request.args.get("v")
|
|
if not video_id:
|
|
return jsonify({"error": "No video ID"}), 400
|
|
|
|
try:
|
|
from youtube_transcript_api import YouTubeTranscriptApi
|
|
from youtube_transcript_api._errors import TranscriptsDisabled
|
|
|
|
transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
|
|
|
|
try:
|
|
transcript = transcript_list.find_transcript(["en", "vi"])
|
|
except Exception:
|
|
transcript = transcript_list.find_generated_transcript(["en", "vi"])
|
|
|
|
transcript_data = transcript.fetch()
|
|
full_text = " ".join([entry["text"] for entry in transcript_data])
|
|
summary = extractive_summary(full_text, num_sentences=7)
|
|
|
|
return jsonify({"success": True, "summary": summary})
|
|
|
|
except Exception as e:
|
|
return jsonify({"success": False, "message": f"Could not summarize: {str(e)}"})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@api_bp.route("/update_ytdlp", methods=["POST"])
|
|
def update_ytdlp():
|
|
"""Update yt-dlp to latest version."""
|
|
try:
|
|
cmd = [sys.executable, "-m", "pip", "install", "-U", "yt-dlp"]
|
|
result = subprocess.run(cmd, capture_output=True, text=True)
|
|
|
|
if result.returncode == 0:
|
|
ver_cmd = [sys.executable, "-m", "yt_dlp", "--version"]
|
|
ver_result = subprocess.run(ver_cmd, capture_output=True, text=True)
|
|
version = ver_result.stdout.strip()
|
|
return jsonify({"success": True, "message": f"Updated to {version}"})
|
|
else:
|
|
return jsonify({"success": False, "message": f"Update failed: {result.stderr}"}), 500
|
|
except Exception as e:
|
|
return jsonify({"success": False, "message": str(e)}), 500
|
|
|
|
|
|
@api_bp.route("/comments")
|
|
def get_comments():
|
|
"""Get comments for a video."""
|
|
video_id = request.args.get("v")
|
|
if not video_id:
|
|
return jsonify({"error": "No video ID"}), 400
|
|
|
|
try:
|
|
url = f"https://www.youtube.com/watch?v={video_id}"
|
|
cmd = [
|
|
sys.executable, "-m", "yt_dlp",
|
|
url,
|
|
"--write-comments",
|
|
"--skip-download",
|
|
"--dump-json",
|
|
]
|
|
|
|
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
|
|
|
|
if result.returncode == 0:
|
|
data = json.loads(result.stdout)
|
|
comments_data = data.get("comments", [])
|
|
|
|
comments = []
|
|
for c in comments_data[:50]:
|
|
comments.append({
|
|
"author": c.get("author", "Unknown"),
|
|
"author_thumbnail": c.get("author_thumbnail", ""),
|
|
"text": c.get("text", ""),
|
|
"likes": c.get("like_count", 0),
|
|
"time": c.get("time_text", ""),
|
|
"is_pinned": c.get("is_pinned", False),
|
|
})
|
|
|
|
return jsonify({"comments": comments, "count": data.get("comment_count", len(comments))})
|
|
else:
|
|
return jsonify({"comments": [], "count": 0, "error": "Could not load comments"})
|
|
|
|
except subprocess.TimeoutExpired:
|
|
return jsonify({"comments": [], "count": 0, "error": "Comments loading timed out"})
|
|
except Exception as e:
|
|
return jsonify({"comments": [], "count": 0, "error": str(e)})
|
|
|
|
|
|
|