""" Feed API routes with LRU video cache for mobile optimization. """ from fastapi import APIRouter, Query, HTTPException, Request from fastapi.responses import StreamingResponse, FileResponse from pydantic import BaseModel from typing import Optional import httpx import os import json import tempfile import asyncio import hashlib import time import shutil from core.playwright_manager import PlaywrightManager router = APIRouter() # ========== LRU VIDEO CACHE ========== CACHE_DIR = os.path.join(tempfile.gettempdir(), "purestream_cache") MAX_CACHE_SIZE_MB = 500 # Limit cache to 500MB MAX_CACHE_FILES = 30 # Keep max 30 videos cached CACHE_TTL_HOURS = 2 # Videos expire after 2 hours def init_cache(): """Initialize cache directory.""" os.makedirs(CACHE_DIR, exist_ok=True) cleanup_old_cache() def get_cache_key(url: str) -> str: """Generate cache key from URL.""" return hashlib.md5(url.encode()).hexdigest() def get_cached_path(url: str) -> Optional[str]: """Check if video is cached and not expired.""" cache_key = get_cache_key(url) cached_file = os.path.join(CACHE_DIR, f"{cache_key}.mp4") if os.path.exists(cached_file): # Check TTL file_age_hours = (time.time() - os.path.getmtime(cached_file)) / 3600 if file_age_hours < CACHE_TTL_HOURS: # Touch file to update LRU os.utime(cached_file, None) return cached_file else: # Expired, delete os.unlink(cached_file) return None def save_to_cache(url: str, source_path: str) -> str: """Save video to cache, return cached path.""" cache_key = get_cache_key(url) cached_file = os.path.join(CACHE_DIR, f"{cache_key}.mp4") # Copy to cache shutil.copy2(source_path, cached_file) # Enforce cache limits enforce_cache_limits() return cached_file def enforce_cache_limits(): """Remove old files if cache exceeds limits.""" if not os.path.exists(CACHE_DIR): return files = [] total_size = 0 for f in os.listdir(CACHE_DIR): fpath = os.path.join(CACHE_DIR, f) if os.path.isfile(fpath): stat = os.stat(fpath) files.append((fpath, stat.st_mtime, stat.st_size)) total_size += stat.st_size # Sort by modification time (oldest first) files.sort(key=lambda x: x[1]) # Remove oldest until under limits max_bytes = MAX_CACHE_SIZE_MB * 1024 * 1024 while (len(files) > MAX_CACHE_FILES or total_size > max_bytes) and files: oldest = files.pop(0) try: os.unlink(oldest[0]) total_size -= oldest[2] print(f"CACHE: Removed {oldest[0]} (LRU)") except: pass def cleanup_old_cache(): """Remove expired files on startup.""" if not os.path.exists(CACHE_DIR): return now = time.time() for f in os.listdir(CACHE_DIR): fpath = os.path.join(CACHE_DIR, f) if os.path.isfile(fpath): age_hours = (now - os.path.getmtime(fpath)) / 3600 if age_hours > CACHE_TTL_HOURS: try: os.unlink(fpath) print(f"CACHE: Expired {f}") except: pass def get_cache_stats() -> dict: """Get cache statistics.""" if not os.path.exists(CACHE_DIR): return {"files": 0, "size_mb": 0} total = 0 count = 0 for f in os.listdir(CACHE_DIR): fpath = os.path.join(CACHE_DIR, f) if os.path.isfile(fpath): total += os.path.getsize(fpath) count += 1 return {"files": count, "size_mb": round(total / 1024 / 1024, 2)} # Initialize cache on module load init_cache() # ========== API ROUTES ========== class FeedRequest(BaseModel): """Request body for feed endpoint with optional JSON credentials.""" credentials: Optional[dict] = None @router.post("") async def get_feed(request: FeedRequest = None): """Get TikTok feed using network interception.""" cookies = None user_agent = None if request and request.credentials: cookies, user_agent = PlaywrightManager.parse_json_credentials(request.credentials) print(f"DEBUG: Using provided credentials ({len(cookies)} cookies)") try: videos = await PlaywrightManager.intercept_feed(cookies, user_agent) return videos except Exception as e: print(f"DEBUG: Feed error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("") async def get_feed_simple(fast: bool = False): """Simple GET endpoint to fetch feed using stored credentials.""" try: # Fast mode = 0 scrolls (just initial batch), Normal = 5 scrolls scroll_count = 0 if fast else 5 videos = await PlaywrightManager.intercept_feed(scroll_count=scroll_count) return videos except Exception as e: print(f"DEBUG: Feed error: {e}") raise HTTPException(status_code=500, detail=str(e)) @router.get("/cache-stats") async def cache_stats(): """Get video cache statistics.""" return get_cache_stats() @router.delete("/cache") async def clear_cache(): """Clear video cache.""" if os.path.exists(CACHE_DIR): shutil.rmtree(CACHE_DIR, ignore_errors=True) os.makedirs(CACHE_DIR, exist_ok=True) return {"status": "cleared"} @router.get("/proxy") async def proxy_video( url: str = Query(..., description="The TikTok video URL to proxy"), download: bool = Query(False, description="Force download with attachment header") ): """ Proxy video with LRU caching for mobile optimization. Cache hit = instant playback, cache miss = download and cache. """ import yt_dlp import re # Check cache first cached_path = get_cached_path(url) if cached_path: print(f"CACHE HIT: {url[:50]}...") response_headers = {} if download: video_id_match = re.search(r'/video/(\d+)', url) video_id = video_id_match.group(1) if video_id_match else "tiktok_video" response_headers["Content-Disposition"] = f'attachment; filename="{video_id}.mp4"' return FileResponse( cached_path, media_type="video/mp4", headers=response_headers ) print(f"CACHE MISS: {url[:50]}... (downloading)") # Load stored credentials cookies, user_agent = PlaywrightManager.load_stored_credentials() # Create temp file for download temp_dir = tempfile.mkdtemp() output_template = os.path.join(temp_dir, "video.%(ext)s") # Create cookies file for yt-dlp cookie_file_path = None if cookies: cookie_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) cookie_file.write("# Netscape HTTP Cookie File\n") for c in cookies: cookie_file.write(f".tiktok.com\tTRUE\t/\tFALSE\t0\t{c['name']}\t{c['value']}\n") cookie_file.close() cookie_file_path = cookie_file.name ydl_opts = { 'format': 'best[ext=mp4]/best', 'outtmpl': output_template, 'quiet': True, 'no_warnings': True, 'http_headers': { 'User-Agent': user_agent, 'Referer': 'https://www.tiktok.com/' } } if cookie_file_path: ydl_opts['cookiefile'] = cookie_file_path video_path = None try: loop = asyncio.get_event_loop() def download_video(): with yt_dlp.YoutubeDL(ydl_opts) as ydl: info = ydl.extract_info(url, download=True) ext = info.get('ext', 'mp4') return os.path.join(temp_dir, f"video.{ext}") video_path = await loop.run_in_executor(None, download_video) if not os.path.exists(video_path): raise Exception("Video file not created") # Save to cache for future requests cached_path = save_to_cache(url, video_path) stats = get_cache_stats() print(f"CACHED: {url[:50]}... ({stats['files']} files, {stats['size_mb']}MB total)") except Exception as e: print(f"DEBUG: yt-dlp download failed: {e}") # Cleanup if cookie_file_path and os.path.exists(cookie_file_path): os.unlink(cookie_file_path) if os.path.exists(temp_dir): shutil.rmtree(temp_dir, ignore_errors=True) raise HTTPException(status_code=500, detail=f"Could not download video: {e}") # Cleanup temp (cached file is separate) if cookie_file_path and os.path.exists(cookie_file_path): os.unlink(cookie_file_path) shutil.rmtree(temp_dir, ignore_errors=True) # Return from cache response_headers = {} if download: video_id_match = re.search(r'/video/(\d+)', url) video_id = video_id_match.group(1) if video_id_match else "tiktok_video" response_headers["Content-Disposition"] = f'attachment; filename="{video_id}.mp4"' return FileResponse( cached_path, media_type="video/mp4", headers=response_headers ) @router.get("/thin-proxy") async def thin_proxy_video( request: Request, cdn_url: str = Query(..., description="Direct TikTok CDN URL") ): """ Thin proxy - just forwards CDN requests with proper headers. Supports Range requests for buffering and seeking. """ # Load stored credentials for headers cookies, user_agent = PlaywrightManager.load_stored_credentials() headers = { "User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT, "Referer": "https://www.tiktok.com/", "Accept": "*/*", "Accept-Language": "en-US,en;q=0.9", "Origin": "https://www.tiktok.com", } # Add cookies as header if available if cookies: cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies]) headers["Cookie"] = cookie_str # Forward Range header if present client_range = request.headers.get("Range") if client_range: headers["Range"] = client_range try: # Create client outside stream generator to access response headers first client = httpx.AsyncClient(timeout=60.0, follow_redirects=True) # We need to manually close this client later or use it in the generator # Start the request to get headers (without reading body yet) req = client.build_request("GET", cdn_url, headers=headers) r = await client.send(req, stream=True) async def stream_from_cdn(): try: async for chunk in r.aiter_bytes(chunk_size=64 * 1024): yield chunk finally: await r.aclose() await client.aclose() response_headers = { "Accept-Ranges": "bytes", "Cache-Control": "public, max-age=3600", "Content-Type": r.headers.get("Content-Type", "video/mp4"), } # Forward Content-Length and Content-Range if "Content-Length" in r.headers: response_headers["Content-Length"] = r.headers["Content-Length"] if "Content-Range" in r.headers: response_headers["Content-Range"] = r.headers["Content-Range"] status_code = r.status_code return StreamingResponse( stream_from_cdn(), status_code=status_code, media_type="video/mp4", headers=response_headers ) except Exception as e: print(f"Thin proxy error: {e}") # Ensure cleanup if possible raise HTTPException(status_code=500, detail=str(e))