507 lines
17 KiB
Python
507 lines
17 KiB
Python
"""
|
|
Feed API routes with LRU video cache for mobile optimization.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Query, HTTPException, Request
|
|
from fastapi.responses import StreamingResponse, FileResponse
|
|
from pydantic import BaseModel
|
|
from typing import Optional
|
|
import httpx
|
|
import os
|
|
import json
|
|
import tempfile
|
|
import asyncio
|
|
import hashlib
|
|
import time
|
|
import shutil
|
|
|
|
from core.playwright_manager import PlaywrightManager
|
|
|
|
router = APIRouter()
|
|
|
|
# ========== LRU VIDEO CACHE ==========
|
|
CACHE_DIR = os.path.join(tempfile.gettempdir(), "purestream_cache")
|
|
MAX_CACHE_SIZE_MB = 500 # Limit cache to 500MB
|
|
MAX_CACHE_FILES = 30 # Keep max 30 videos cached
|
|
CACHE_TTL_HOURS = 2 # Videos expire after 2 hours
|
|
|
|
def init_cache():
|
|
"""Initialize cache directory."""
|
|
os.makedirs(CACHE_DIR, exist_ok=True)
|
|
cleanup_old_cache()
|
|
|
|
def get_cache_key(url: str) -> str:
|
|
"""Generate cache key from URL."""
|
|
return hashlib.md5(url.encode()).hexdigest()
|
|
|
|
def get_cached_path(url: str) -> Optional[str]:
|
|
"""Check if video is cached and not expired."""
|
|
cache_key = get_cache_key(url)
|
|
cached_file = os.path.join(CACHE_DIR, f"{cache_key}.mp4")
|
|
|
|
if os.path.exists(cached_file):
|
|
# Check TTL
|
|
file_age_hours = (time.time() - os.path.getmtime(cached_file)) / 3600
|
|
if file_age_hours < CACHE_TTL_HOURS:
|
|
# Touch file to update LRU
|
|
os.utime(cached_file, None)
|
|
return cached_file
|
|
else:
|
|
# Expired, delete
|
|
os.unlink(cached_file)
|
|
|
|
return None
|
|
|
|
def save_to_cache(url: str, source_path: str) -> str:
|
|
"""Save video to cache, return cached path."""
|
|
cache_key = get_cache_key(url)
|
|
cached_file = os.path.join(CACHE_DIR, f"{cache_key}.mp4")
|
|
|
|
# Copy to cache
|
|
shutil.copy2(source_path, cached_file)
|
|
|
|
# Enforce cache limits
|
|
enforce_cache_limits()
|
|
|
|
return cached_file
|
|
|
|
def enforce_cache_limits():
|
|
"""Remove old files if cache exceeds limits."""
|
|
if not os.path.exists(CACHE_DIR):
|
|
return
|
|
|
|
files = []
|
|
total_size = 0
|
|
|
|
for f in os.listdir(CACHE_DIR):
|
|
fpath = os.path.join(CACHE_DIR, f)
|
|
if os.path.isfile(fpath):
|
|
stat = os.stat(fpath)
|
|
files.append((fpath, stat.st_mtime, stat.st_size))
|
|
total_size += stat.st_size
|
|
|
|
# Sort by modification time (oldest first)
|
|
files.sort(key=lambda x: x[1])
|
|
|
|
# Remove oldest until under limits
|
|
max_bytes = MAX_CACHE_SIZE_MB * 1024 * 1024
|
|
|
|
while (len(files) > MAX_CACHE_FILES or total_size > max_bytes) and files:
|
|
oldest = files.pop(0)
|
|
try:
|
|
os.unlink(oldest[0])
|
|
total_size -= oldest[2]
|
|
print(f"CACHE: Removed {oldest[0]} (LRU)")
|
|
except:
|
|
pass
|
|
|
|
def cleanup_old_cache():
|
|
"""Remove expired files on startup."""
|
|
if not os.path.exists(CACHE_DIR):
|
|
return
|
|
|
|
now = time.time()
|
|
for f in os.listdir(CACHE_DIR):
|
|
fpath = os.path.join(CACHE_DIR, f)
|
|
if os.path.isfile(fpath):
|
|
age_hours = (now - os.path.getmtime(fpath)) / 3600
|
|
if age_hours > CACHE_TTL_HOURS:
|
|
try:
|
|
os.unlink(fpath)
|
|
print(f"CACHE: Expired {f}")
|
|
except:
|
|
pass
|
|
|
|
def get_cache_stats() -> dict:
|
|
"""Get cache statistics."""
|
|
if not os.path.exists(CACHE_DIR):
|
|
return {"files": 0, "size_mb": 0}
|
|
|
|
total = 0
|
|
count = 0
|
|
for f in os.listdir(CACHE_DIR):
|
|
fpath = os.path.join(CACHE_DIR, f)
|
|
if os.path.isfile(fpath):
|
|
total += os.path.getsize(fpath)
|
|
count += 1
|
|
|
|
return {"files": count, "size_mb": round(total / 1024 / 1024, 2)}
|
|
|
|
# Initialize cache on module load
|
|
init_cache()
|
|
|
|
# ========== API ROUTES ==========
|
|
|
|
from typing import Optional, Any, Union, List, Dict
|
|
|
|
# ========== FEED METADATA CACHE ==========
|
|
FEED_METADATA_CACHE = os.path.join(tempfile.gettempdir(), "feed_metadata.json")
|
|
METADATA_TTL_HOURS = 24 # Keep feed order for 24 hours (for instant load)
|
|
|
|
def load_cached_feed() -> Optional[List[dict]]:
|
|
"""Load cached feed metadata for instant startup."""
|
|
if not os.path.exists(FEED_METADATA_CACHE):
|
|
return None
|
|
|
|
try:
|
|
if (time.time() - os.path.getmtime(FEED_METADATA_CACHE)) > (METADATA_TTL_HOURS * 3600):
|
|
return None
|
|
|
|
with open(FEED_METADATA_CACHE, "r") as f:
|
|
return json.load(f)
|
|
except:
|
|
return None
|
|
|
|
def save_cached_feed(videos: List[dict]):
|
|
"""Save feed metadata to cache."""
|
|
try:
|
|
with open(FEED_METADATA_CACHE, "w") as f:
|
|
json.dump(videos, f)
|
|
except Exception as e:
|
|
print(f"DEBUG: Failed to save feed metadata: {e}")
|
|
|
|
# Import services for fallback aggregation
|
|
from core.tiktok_api_service import TikTokAPIService
|
|
from api.routes.user import get_fallback_accounts
|
|
|
|
async def generate_fallback_feed(limit: int = 5) -> List[dict]:
|
|
"""
|
|
Generate a feed by aggregating latest videos from verified creators.
|
|
Used when cache is empty and we want to avoid Playwright startup headers.
|
|
"""
|
|
print("DEBUG: Generating fallback feed from verified users...")
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
# Use verified accounts from fallback list
|
|
accounts = get_fallback_accounts()
|
|
# Shuffle accounts to get variety
|
|
import random
|
|
random.shuffle(accounts)
|
|
|
|
# Select top 5-8 accounts to query
|
|
selected = accounts[:8]
|
|
|
|
tasks = []
|
|
# We only need 1 recent video from each to make a "feed"
|
|
for acc in selected:
|
|
tasks.append(TikTokAPIService.get_user_videos(
|
|
acc['username'],
|
|
cookies=cookies,
|
|
user_agent=user_agent,
|
|
limit=1 # Just 1 video per creator
|
|
))
|
|
|
|
# Run in parallel
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
feed = []
|
|
for res in results:
|
|
if isinstance(res, list) and res:
|
|
feed.extend(res)
|
|
|
|
# Shuffle the resulting feed
|
|
random.shuffle(feed)
|
|
|
|
print(f"DEBUG: Generated fallback feed with {len(feed)} videos")
|
|
return feed[:limit]
|
|
|
|
class FeedRequest(BaseModel):
|
|
"""Request body for feed endpoint with optional JSON credentials."""
|
|
credentials: Optional[Union[Dict, List]] = None
|
|
|
|
|
|
@router.post("")
|
|
async def get_feed(request: FeedRequest = None):
|
|
"""Get TikTok feed using network interception."""
|
|
cookies = None
|
|
user_agent = None
|
|
|
|
if request and request.credentials:
|
|
cookies, user_agent = PlaywrightManager.parse_json_credentials(request.credentials)
|
|
print(f"DEBUG: Using provided credentials ({len(cookies)} cookies)")
|
|
|
|
try:
|
|
videos = await PlaywrightManager.intercept_feed(cookies, user_agent)
|
|
return videos
|
|
except Exception as e:
|
|
print(f"DEBUG: Feed error: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("")
|
|
async def get_feed_simple(fast: bool = False, skip_cache: bool = False):
|
|
"""Simple GET endpoint to fetch feed using stored credentials.
|
|
|
|
Args:
|
|
fast: If True, only get initial batch (0 scrolls). If False, scroll 5 times.
|
|
skip_cache: If True, always fetch fresh videos (for infinite scroll).
|
|
"""
|
|
try:
|
|
# Fast mode = 0 scrolls (just initial batch), Normal = 5 scrolls
|
|
scroll_count = 0 if fast else 5
|
|
|
|
# When skipping cache for infinite scroll, do more scrolling to get different videos
|
|
if skip_cache:
|
|
scroll_count = 8
|
|
|
|
# [OPTIMIZATION] Fast Load Strategy
|
|
if fast and not skip_cache:
|
|
# 1. Try Memory/Disk Cache first (Instant)
|
|
cached_feed = load_cached_feed()
|
|
if cached_feed:
|
|
print(f"DEBUG: Returning cached feed ({len(cached_feed)} videos)")
|
|
return cached_feed
|
|
|
|
# 2. Try Fallback Aggregation (Fast HTTP, no browser)
|
|
# This fetches real latest videos from top creators via direct API
|
|
try:
|
|
aggregated = await generate_fallback_feed(limit=5)
|
|
if aggregated:
|
|
save_cached_feed(aggregated)
|
|
return aggregated
|
|
except Exception as agg_err:
|
|
print(f"DEBUG: Aggregation fallback failed: {agg_err}")
|
|
|
|
# 3. Playwright Interception (Slowest, but guaranteed 'For You' algorithm)
|
|
videos = await PlaywrightManager.intercept_feed(scroll_count=scroll_count)
|
|
|
|
# Save successful result to cache for next time
|
|
if videos and len(videos) > 0 and not skip_cache:
|
|
save_cached_feed(videos)
|
|
|
|
return videos
|
|
except Exception as e:
|
|
print(f"DEBUG: Feed error: {e}")
|
|
|
|
# 4. Ultimate Fallback if everything fails (Verified users static list?)
|
|
# For now just re-raise, as UI handles empty state
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/cache-stats")
|
|
async def cache_stats():
|
|
"""Get video cache statistics."""
|
|
return get_cache_stats()
|
|
|
|
|
|
@router.delete("/cache")
|
|
async def clear_cache():
|
|
"""Clear video cache."""
|
|
if os.path.exists(CACHE_DIR):
|
|
shutil.rmtree(CACHE_DIR, ignore_errors=True)
|
|
os.makedirs(CACHE_DIR, exist_ok=True)
|
|
return {"status": "cleared"}
|
|
|
|
|
|
@router.get("/proxy")
|
|
async def proxy_video(
|
|
url: str = Query(..., description="The TikTok video URL to proxy"),
|
|
download: bool = Query(False, description="Force download with attachment header")
|
|
):
|
|
"""
|
|
Proxy video with LRU caching for mobile optimization.
|
|
OPTIMIZED: No server-side transcoding - client handles decoding.
|
|
This reduces server CPU to ~0% during video playback.
|
|
"""
|
|
import yt_dlp
|
|
import re
|
|
|
|
# Check cache first
|
|
cached_path = get_cached_path(url)
|
|
if cached_path:
|
|
print(f"CACHE HIT: {url[:50]}...")
|
|
|
|
response_headers = {
|
|
"Accept-Ranges": "bytes",
|
|
"Cache-Control": "public, max-age=3600",
|
|
}
|
|
if download:
|
|
video_id_match = re.search(r'/video/(\d+)', url)
|
|
video_id = video_id_match.group(1) if video_id_match else "tiktok_video"
|
|
response_headers["Content-Disposition"] = f'attachment; filename="{video_id}.mp4"'
|
|
|
|
return FileResponse(
|
|
cached_path,
|
|
media_type="video/mp4",
|
|
headers=response_headers
|
|
)
|
|
|
|
print(f"CACHE MISS: {url[:50]}... (downloading)")
|
|
|
|
# Load stored credentials
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
# Create temp file for download
|
|
temp_dir = tempfile.mkdtemp()
|
|
output_template = os.path.join(temp_dir, "video.%(ext)s")
|
|
|
|
# Create cookies file for yt-dlp
|
|
cookie_file_path = None
|
|
if cookies:
|
|
cookie_file = tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False)
|
|
cookie_file.write("# Netscape HTTP Cookie File\n")
|
|
for c in cookies:
|
|
cookie_file.write(f".tiktok.com\tTRUE\t/\tFALSE\t0\t{c['name']}\t{c['value']}\n")
|
|
cookie_file.close()
|
|
cookie_file_path = cookie_file.name
|
|
|
|
# Download best quality - NO TRANSCODING (let client decode)
|
|
# Prefer H.264 when available, but accept any codec
|
|
ydl_opts = {
|
|
'format': 'best[ext=mp4][vcodec^=avc]/best[ext=mp4]/best',
|
|
'outtmpl': output_template,
|
|
'quiet': True,
|
|
'no_warnings': True,
|
|
'http_headers': {
|
|
'User-Agent': user_agent,
|
|
'Referer': 'https://www.tiktok.com/'
|
|
}
|
|
}
|
|
|
|
if cookie_file_path:
|
|
ydl_opts['cookiefile'] = cookie_file_path
|
|
|
|
video_path = None
|
|
video_codec = "unknown"
|
|
|
|
try:
|
|
loop = asyncio.get_event_loop()
|
|
|
|
def download_video():
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
info = ydl.extract_info(url, download=True)
|
|
ext = info.get('ext', 'mp4')
|
|
vcodec = info.get('vcodec', 'unknown') or 'unknown'
|
|
return os.path.join(temp_dir, f"video.{ext}"), vcodec
|
|
|
|
video_path, video_codec = await loop.run_in_executor(None, download_video)
|
|
|
|
if not os.path.exists(video_path):
|
|
raise Exception("Video file not created")
|
|
|
|
print(f"Downloaded codec: {video_codec}")
|
|
|
|
# Save to cache directly - client-side player handles all formats
|
|
cached_path = save_to_cache(url, video_path)
|
|
stats = get_cache_stats()
|
|
print(f"CACHED: {url[:50]}... ({stats['files']} files, {stats['size_mb']}MB total)")
|
|
|
|
except Exception as e:
|
|
print(f"DEBUG: yt-dlp download failed: {str(e)}")
|
|
# Cleanup
|
|
if cookie_file_path and os.path.exists(cookie_file_path):
|
|
try:
|
|
os.unlink(cookie_file_path)
|
|
except:
|
|
pass
|
|
if os.path.exists(temp_dir):
|
|
try:
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
except:
|
|
pass
|
|
|
|
# Return 422 for processing failure instead of 500 (server crash)
|
|
raise HTTPException(status_code=422, detail=f"Video processing failed: {str(e)}")
|
|
|
|
# Cleanup temp (cached file is separate)
|
|
if cookie_file_path and os.path.exists(cookie_file_path):
|
|
os.unlink(cookie_file_path)
|
|
shutil.rmtree(temp_dir, ignore_errors=True)
|
|
|
|
# Return from cache with codec info header
|
|
response_headers = {
|
|
"Accept-Ranges": "bytes",
|
|
"Cache-Control": "public, max-age=3600",
|
|
"X-Video-Codec": video_codec, # Let client know the codec
|
|
}
|
|
if download:
|
|
video_id_match = re.search(r'/video/(\d+)', url)
|
|
video_id = video_id_match.group(1) if video_id_match else "tiktok_video"
|
|
response_headers["Content-Disposition"] = f'attachment; filename="{video_id}.mp4"'
|
|
|
|
return FileResponse(
|
|
cached_path,
|
|
media_type="video/mp4",
|
|
headers=response_headers
|
|
)
|
|
|
|
|
|
|
|
@router.get("/thin-proxy")
|
|
async def thin_proxy_video(
|
|
request: Request,
|
|
cdn_url: str = Query(..., description="Direct TikTok CDN URL")
|
|
):
|
|
"""
|
|
Thin proxy - just forwards CDN requests with proper headers.
|
|
Supports Range requests for buffering and seeking.
|
|
"""
|
|
|
|
# Load stored credentials for headers
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
headers = {
|
|
"User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT,
|
|
"Referer": "https://www.tiktok.com/",
|
|
"Accept": "*/*",
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
"Origin": "https://www.tiktok.com",
|
|
}
|
|
|
|
# Add cookies as header if available
|
|
if cookies:
|
|
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
|
headers["Cookie"] = cookie_str
|
|
|
|
# Forward Range header if present
|
|
client_range = request.headers.get("Range")
|
|
if client_range:
|
|
headers["Range"] = client_range
|
|
|
|
try:
|
|
# Create client outside stream generator to access response headers first
|
|
client = httpx.AsyncClient(timeout=60.0, follow_redirects=True)
|
|
# We need to manually close this client later or use it in the generator
|
|
|
|
# Start the request to get headers (without reading body yet)
|
|
req = client.build_request("GET", cdn_url, headers=headers)
|
|
r = await client.send(req, stream=True)
|
|
|
|
async def stream_from_cdn():
|
|
try:
|
|
async for chunk in r.aiter_bytes(chunk_size=64 * 1024):
|
|
yield chunk
|
|
finally:
|
|
await r.aclose()
|
|
await client.aclose()
|
|
|
|
response_headers = {
|
|
"Accept-Ranges": "bytes",
|
|
"Cache-Control": "public, max-age=3600",
|
|
"Content-Type": r.headers.get("Content-Type", "video/mp4"),
|
|
}
|
|
|
|
# Forward Content-Length and Content-Range
|
|
if "Content-Length" in r.headers:
|
|
response_headers["Content-Length"] = r.headers["Content-Length"]
|
|
if "Content-Range" in r.headers:
|
|
response_headers["Content-Range"] = r.headers["Content-Range"]
|
|
|
|
status_code = r.status_code
|
|
|
|
return StreamingResponse(
|
|
stream_from_cdn(),
|
|
status_code=status_code,
|
|
media_type="video/mp4",
|
|
headers=response_headers
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Thin proxy error: {e}")
|
|
# Ensure cleanup if possible
|
|
if 'r' in locals():
|
|
await r.aclose()
|
|
if 'client' in locals():
|
|
await client.aclose()
|
|
|
|
raise HTTPException(status_code=502, detail=f"Upstream Proxy Error: {str(e)}")
|