438 lines
15 KiB
Python
438 lines
15 KiB
Python
"""
|
|
Feed API routes with LRU video cache for mobile optimization.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Query, HTTPException, Request
|
|
from fastapi.responses import StreamingResponse, FileResponse
|
|
from pydantic import BaseModel
|
|
from typing import Optional
|
|
import httpx
|
|
import os
|
|
import json
|
|
import tempfile
|
|
import asyncio
|
|
import hashlib
|
|
import time
|
|
import shutil
|
|
|
|
from core.playwright_manager import PlaywrightManager
|
|
|
|
router = APIRouter()
|
|
|
|
# ========== LRU VIDEO CACHE ==========
|
|
CACHE_DIR = os.path.join(tempfile.gettempdir(), "purestream_cache")
|
|
MAX_CACHE_SIZE_MB = 500 # Limit cache to 500MB
|
|
MAX_CACHE_FILES = 30 # Keep max 30 videos cached
|
|
CACHE_TTL_HOURS = 2 # Videos expire after 2 hours
|
|
|
|
def init_cache():
|
|
"""Initialize cache directory."""
|
|
os.makedirs(CACHE_DIR, exist_ok=True)
|
|
cleanup_old_cache()
|
|
|
|
def get_cache_key(url: str) -> str:
|
|
"""Generate cache key from URL."""
|
|
return hashlib.md5(url.encode()).hexdigest()
|
|
|
|
def get_cached_path(url: str) -> Optional[str]:
|
|
"""Check if video is cached and not expired."""
|
|
cache_key = get_cache_key(url)
|
|
cached_file = os.path.join(CACHE_DIR, f"{cache_key}.mp4")
|
|
|
|
if os.path.exists(cached_file):
|
|
# Check TTL
|
|
file_age_hours = (time.time() - os.path.getmtime(cached_file)) / 3600
|
|
if file_age_hours < CACHE_TTL_HOURS:
|
|
# Touch file to update LRU
|
|
os.utime(cached_file, None)
|
|
return cached_file
|
|
else:
|
|
# Expired, delete
|
|
os.unlink(cached_file)
|
|
|
|
return None
|
|
|
|
def save_to_cache(url: str, source_path: str) -> str:
|
|
"""Save video to cache, return cached path."""
|
|
cache_key = get_cache_key(url)
|
|
cached_file = os.path.join(CACHE_DIR, f"{cache_key}.mp4")
|
|
|
|
# Copy to cache
|
|
shutil.copy2(source_path, cached_file)
|
|
|
|
# Enforce cache limits
|
|
enforce_cache_limits()
|
|
|
|
return cached_file
|
|
|
|
def enforce_cache_limits():
|
|
"""Remove old files if cache exceeds limits."""
|
|
if not os.path.exists(CACHE_DIR):
|
|
return
|
|
|
|
files = []
|
|
total_size = 0
|
|
|
|
for f in os.listdir(CACHE_DIR):
|
|
fpath = os.path.join(CACHE_DIR, f)
|
|
if os.path.isfile(fpath):
|
|
stat = os.stat(fpath)
|
|
files.append((fpath, stat.st_mtime, stat.st_size))
|
|
total_size += stat.st_size
|
|
|
|
# Sort by modification time (oldest first)
|
|
files.sort(key=lambda x: x[1])
|
|
|
|
# Remove oldest until under limits
|
|
max_bytes = MAX_CACHE_SIZE_MB * 1024 * 1024
|
|
|
|
while (len(files) > MAX_CACHE_FILES or total_size > max_bytes) and files:
|
|
oldest = files.pop(0)
|
|
try:
|
|
os.unlink(oldest[0])
|
|
total_size -= oldest[2]
|
|
print(f"CACHE: Removed {oldest[0]} (LRU)")
|
|
except:
|
|
pass
|
|
|
|
def cleanup_old_cache():
|
|
"""Remove expired files on startup."""
|
|
if not os.path.exists(CACHE_DIR):
|
|
return
|
|
|
|
now = time.time()
|
|
for f in os.listdir(CACHE_DIR):
|
|
fpath = os.path.join(CACHE_DIR, f)
|
|
if os.path.isfile(fpath):
|
|
age_hours = (now - os.path.getmtime(fpath)) / 3600
|
|
if age_hours > CACHE_TTL_HOURS:
|
|
try:
|
|
os.unlink(fpath)
|
|
print(f"CACHE: Expired {f}")
|
|
except:
|
|
pass
|
|
|
|
def get_cache_stats() -> dict:
|
|
"""Get cache statistics."""
|
|
if not os.path.exists(CACHE_DIR):
|
|
return {"files": 0, "size_mb": 0}
|
|
|
|
total = 0
|
|
count = 0
|
|
for f in os.listdir(CACHE_DIR):
|
|
fpath = os.path.join(CACHE_DIR, f)
|
|
if os.path.isfile(fpath):
|
|
total += os.path.getsize(fpath)
|
|
count += 1
|
|
|
|
return {"files": count, "size_mb": round(total / 1024 / 1024, 2)}
|
|
|
|
# Initialize cache on module load
|
|
init_cache()
|
|
|
|
# ========== API ROUTES ==========
|
|
|
|
class FeedRequest(BaseModel):
|
|
"""Request body for feed endpoint with optional JSON credentials."""
|
|
credentials: Optional[dict] = None
|
|
|
|
|
|
@router.post("")
|
|
async def get_feed(request: FeedRequest = None):
|
|
"""Get TikTok feed using network interception."""
|
|
cookies = None
|
|
user_agent = None
|
|
|
|
if request and request.credentials:
|
|
cookies, user_agent = PlaywrightManager.parse_json_credentials(request.credentials)
|
|
print(f"DEBUG: Using provided credentials ({len(cookies)} cookies)")
|
|
|
|
try:
|
|
videos = await PlaywrightManager.intercept_feed(cookies, user_agent)
|
|
return videos
|
|
except Exception as e:
|
|
print(f"DEBUG: Feed error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("")
|
|
async def get_feed_simple(fast: bool = False):
|
|
"""Simple GET endpoint to fetch feed using stored credentials."""
|
|
try:
|
|
# Fast mode = 0 scrolls (just initial batch), Normal = 5 scrolls
|
|
scroll_count = 0 if fast else 5
|
|
videos = await PlaywrightManager.intercept_feed(scroll_count=scroll_count)
|
|
return videos
|
|
except Exception as e:
|
|
print(f"DEBUG: Feed error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/cache-stats")
|
|
async def cache_stats():
|
|
"""Get video cache statistics."""
|
|
return get_cache_stats()
|
|
|
|
|
|
@router.delete("/cache")
|
|
async def clear_cache():
|
|
"""Clear video cache."""
|
|
if os.path.exists(CACHE_DIR):
|
|
shutil.rmtree(CACHE_DIR, ignore_errors=True)
|
|
os.makedirs(CACHE_DIR, exist_ok=True)
|
|
return {"status": "cleared"}
|
|
|
|
|
|
@router.get("/proxy")
|
|
async def proxy_video(
|
|
url: str = Query(..., description="The TikTok video URL to proxy"),
|
|
download: bool = Query(False, description="Force download with attachment header")
|
|
):
|
|
"""
|
|
Proxy video with LRU caching for mobile optimization.
|
|
Prefers H.264 codec for browser compatibility, re-encodes HEVC if needed.
|
|
"""
|
|
import yt_dlp
|
|
import re
|
|
import subprocess
|
|
|
|
# Check cache first
|
|
cached_path = get_cached_path(url)
|
|
if cached_path:
|
|
print(f"CACHE HIT: {url[:50]}...")
|
|
|
|
response_headers = {}
|
|
if download:
|
|
video_id_match = re.search(r'/video/(\d+)', url)
|
|
video_id = video_id_match.group(1) if video_id_match else "tiktok_video"
|
|
response_headers["Content-Disposition"] = f'attachment; filename="{video_id}.mp4"'
|
|
|
|
return FileResponse(
|
|
cached_path,
|
|
media_type="video/mp4",
|
|
headers=response_headers
|
|
)
|
|
|
|
print(f"CACHE MISS: {url[:50]}... (downloading)")
|
|
|
|
# Load stored credentials
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
# Create temp file for download
|
|
temp_dir = tempfile.mkdtemp()
|
|
output_template = os.path.join(temp_dir, "video.%(ext)s")
|
|
|
|
# Create cookies file for yt-dlp
|
|
cookie_file_path = None
|
|
if cookies:
|
|
cookie_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False)
|
|
cookie_file.write("# Netscape HTTP Cookie File\n")
|
|
for c in cookies:
|
|
cookie_file.write(f".tiktok.com\tTRUE\t/\tFALSE\t0\t{c['name']}\t{c['value']}\n")
|
|
cookie_file.close()
|
|
cookie_file_path = cookie_file.name
|
|
|
|
# Prefer H.264 (avc1) over HEVC (hvc1/hev1) for browser compatibility
|
|
# Format selection: prefer mp4 with h264, fallback to best mp4
|
|
ydl_opts = {
|
|
'format': 'best[ext=mp4][vcodec^=avc]/best[ext=mp4]/best', # Prefer H.264
|
|
'outtmpl': output_template,
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'http_headers': {
|
|
'User-Agent': user_agent,
|
|
'Referer': 'https://www.tiktok.com/'
|
|
}
|
|
}
|
|
|
|
if cookie_file_path:
|
|
ydl_opts['cookiefile'] = cookie_file_path
|
|
|
|
video_path = None
|
|
video_codec = None
|
|
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
|
|
def download_video():
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=True)
|
|
ext = info.get('ext', 'mp4')
|
|
# Get codec info
|
|
vcodec = info.get('vcodec', '') or ''
|
|
return os.path.join(temp_dir, f"video.{ext}"), vcodec
|
|
|
|
video_path, video_codec = await loop.run_in_executor(None, download_video)
|
|
|
|
if not os.path.exists(video_path):
|
|
raise Exception("Video file not created")
|
|
|
|
print(f"Downloaded codec: {video_codec}")
|
|
|
|
# Check if we need to re-encode HEVC to H.264
|
|
# If codec unknown, probe the actual file
|
|
is_hevc = False
|
|
if video_codec:
|
|
is_hevc = any(x in video_codec.lower() for x in ['hevc', 'hev1', 'hvc1', 'h265', 'h.265'])
|
|
else:
|
|
# Try probing with yt-dlp's ffprobe
|
|
try:
|
|
probe_cmd = ['ffprobe', '-v', 'error', '-select_streams', 'v:0',
|
|
'-show_entries', 'stream=codec_name', '-of', 'csv=p=0', video_path]
|
|
probe_result = subprocess.run(probe_cmd, capture_output=True, timeout=10)
|
|
detected_codec = probe_result.stdout.decode().strip().lower()
|
|
is_hevc = 'hevc' in detected_codec or 'h265' in detected_codec
|
|
print(f"Probed codec: {detected_codec}")
|
|
except Exception as probe_err:
|
|
print(f"Probe failed: {probe_err}")
|
|
|
|
if is_hevc:
|
|
print(f"HEVC detected, re-encoding to H.264 for browser compatibility...")
|
|
h264_path = os.path.join(temp_dir, "video_h264.mp4")
|
|
|
|
def reencode_to_h264():
|
|
# Use ffmpeg to re-encode from HEVC to H.264
|
|
cmd = [
|
|
'ffmpeg', '-y',
|
|
'-i', video_path,
|
|
'-c:v', 'libx264', # H.264 codec
|
|
'-preset', 'fast', # Balance speed and quality
|
|
'-crf', '23', # Quality (lower = better, 18-28 is good)
|
|
'-c:a', 'aac', # AAC audio
|
|
'-movflags', '+faststart', # Web optimization
|
|
h264_path
|
|
]
|
|
try:
|
|
result = subprocess.run(cmd, capture_output=True, timeout=120)
|
|
if result.returncode != 0:
|
|
print(f"FFmpeg error: {result.stderr.decode()[:200]}")
|
|
return None
|
|
return h264_path
|
|
except FileNotFoundError:
|
|
print("FFmpeg not installed, trying yt-dlp postprocessor...")
|
|
# Fallback: re-download with recode postprocessor
|
|
recode_opts = ydl_opts.copy()
|
|
recode_opts['postprocessors'] = [{'key': 'FFmpegVideoConvertor', 'preferedformat': 'mp4'}]
|
|
recode_opts['postprocessor_args'] = ['-c:v', 'libx264', '-preset', 'fast', '-crf', '23']
|
|
recode_opts['outtmpl'] = h264_path.replace('.mp4', '.%(ext)s')
|
|
try:
|
|
with yt_dlp.YoutubeDL(recode_opts) as ydl:
|
|
ydl.download([url])
|
|
return h264_path if os.path.exists(h264_path) else None
|
|
except:
|
|
return None
|
|
|
|
reencoded_path = await loop.run_in_executor(None, reencode_to_h264)
|
|
|
|
if reencoded_path and os.path.exists(reencoded_path):
|
|
video_path = reencoded_path
|
|
print("Re-encoding successful!")
|
|
else:
|
|
print("Re-encoding failed, using original video")
|
|
|
|
# Save to cache for future requests
|
|
cached_path = save_to_cache(url, video_path)
|
|
stats = get_cache_stats()
|
|
print(f"CACHED: {url[:50]}... ({stats['files']} files, {stats['size_mb']}MB total)")
|
|
|
|
except Exception as e:
|
|
print(f"DEBUG: yt-dlp download failed: {e}")
|
|
# Cleanup
|
|
if cookie_file_path and os.path.exists(cookie_file_path):
|
|
os.unlink(cookie_file_path)
|
|
if os.path.exists(temp_dir):
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
raise HTTPException(status_code=500, detail=f"Could not download video: {e}")
|
|
|
|
# Cleanup temp (cached file is separate)
|
|
if cookie_file_path and os.path.exists(cookie_file_path):
|
|
os.unlink(cookie_file_path)
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
# Return from cache
|
|
response_headers = {}
|
|
if download:
|
|
video_id_match = re.search(r'/video/(\d+)', url)
|
|
video_id = video_id_match.group(1) if video_id_match else "tiktok_video"
|
|
response_headers["Content-Disposition"] = f'attachment; filename="{video_id}.mp4"'
|
|
|
|
return FileResponse(
|
|
cached_path,
|
|
media_type="video/mp4",
|
|
headers=response_headers
|
|
)
|
|
|
|
|
|
@router.get("/thin-proxy")
|
|
async def thin_proxy_video(
|
|
request: Request,
|
|
cdn_url: str = Query(..., description="Direct TikTok CDN URL")
|
|
):
|
|
"""
|
|
Thin proxy - just forwards CDN requests with proper headers.
|
|
Supports Range requests for buffering and seeking.
|
|
"""
|
|
|
|
# Load stored credentials for headers
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
headers = {
|
|
"User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT,
|
|
"Referer": "https://www.tiktok.com/",
|
|
"Accept": "*/*",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Origin": "https://www.tiktok.com",
|
|
}
|
|
|
|
# Add cookies as header if available
|
|
if cookies:
|
|
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
|
headers["Cookie"] = cookie_str
|
|
|
|
# Forward Range header if present
|
|
client_range = request.headers.get("Range")
|
|
if client_range:
|
|
headers["Range"] = client_range
|
|
|
|
try:
|
|
# Create client outside stream generator to access response headers first
|
|
client = httpx.AsyncClient(timeout=60.0, follow_redirects=True)
|
|
# We need to manually close this client later or use it in the generator
|
|
|
|
# Start the request to get headers (without reading body yet)
|
|
req = client.build_request("GET", cdn_url, headers=headers)
|
|
r = await client.send(req, stream=True)
|
|
|
|
async def stream_from_cdn():
|
|
try:
|
|
async for chunk in r.aiter_bytes(chunk_size=64 * 1024):
|
|
yield chunk
|
|
finally:
|
|
await r.aclose()
|
|
await client.aclose()
|
|
|
|
response_headers = {
|
|
"Accept-Ranges": "bytes",
|
|
"Cache-Control": "public, max-age=3600",
|
|
"Content-Type": r.headers.get("Content-Type", "video/mp4"),
|
|
}
|
|
|
|
# Forward Content-Length and Content-Range
|
|
if "Content-Length" in r.headers:
|
|
response_headers["Content-Length"] = r.headers["Content-Length"]
|
|
if "Content-Range" in r.headers:
|
|
response_headers["Content-Range"] = r.headers["Content-Range"]
|
|
|
|
status_code = r.status_code
|
|
|
|
return StreamingResponse(
|
|
stream_from_cdn(),
|
|
status_code=status_code,
|
|
media_type="video/mp4",
|
|
headers=response_headers
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Thin proxy error: {e}")
|
|
# Ensure cleanup if possible
|
|
raise HTTPException(status_code=500, detail=str(e))
|