feat: sync latest local changes (backend, frontend, scripts)

This commit is contained in:
Khoa Vo 2026-01-25 18:49:15 +07:00
parent 437f694cfb
commit 05beadb055
43 changed files with 3460 additions and 1877 deletions

View file

@ -7,14 +7,7 @@ COPY frontend/ ./
RUN npm run build RUN npm run build
# Runtime Stage for Backend # Runtime Stage for Backend
FROM python:3.11-slim FROM mcr.microsoft.com/playwright/python:v1.49.1-jammy
# Install system dependencies required for Playwright and compiled extensions
RUN apt-get update && apt-get install -y \
curl \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*
WORKDIR /app WORKDIR /app
@ -22,10 +15,6 @@ WORKDIR /app
COPY backend/requirements.txt backend/ COPY backend/requirements.txt backend/
RUN pip install --no-cache-dir -r backend/requirements.txt RUN pip install --no-cache-dir -r backend/requirements.txt
# Install Playwright browsers (Chromium only to save space)
RUN playwright install chromium
RUN playwright install-deps chromium
# Copy Backend Code # Copy Backend Code
COPY backend/ backend/ COPY backend/ backend/

32
Dockerfile.dev Normal file
View file

@ -0,0 +1,32 @@
# PureStream Development Dockerfile
# Copies all files to avoid Synology Drive volume mount issues
FROM mcr.microsoft.com/playwright/python:v1.49.1-jammy
WORKDIR /app
# Copy backend files
COPY backend/ /app/backend/
# Copy pre-built frontend
COPY frontend/dist/ /app/frontend/dist/
# Create directories for cache and session
RUN mkdir -p /app/cache /app/session
# Install Python dependencies
WORKDIR /app/backend
RUN pip install --no-cache-dir -r requirements.txt && \
pip install playwright-stealth && \
playwright install chromium
# Environment variables
ENV PYTHONUNBUFFERED=1
ENV CACHE_DIR=/app/cache
ENV MAX_CACHE_SIZE_MB=500
ENV CACHE_TTL_HOURS=24
ENV ADMIN_PASSWORD=admin123
EXPOSE 8002
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"]

View file

@ -134,6 +134,77 @@ init_cache()
from typing import Optional, Any, Union, List, Dict from typing import Optional, Any, Union, List, Dict
# ========== FEED METADATA CACHE ==========
FEED_METADATA_CACHE = os.path.join(tempfile.gettempdir(), "feed_metadata.json")
METADATA_TTL_HOURS = 24 # Keep feed order for 24 hours (for instant load)
def load_cached_feed() -> Optional[List[dict]]:
"""Load cached feed metadata for instant startup."""
if not os.path.exists(FEED_METADATA_CACHE):
return None
try:
if (time.time() - os.path.getmtime(FEED_METADATA_CACHE)) > (METADATA_TTL_HOURS * 3600):
return None
with open(FEED_METADATA_CACHE, "r") as f:
return json.load(f)
except:
return None
def save_cached_feed(videos: List[dict]):
"""Save feed metadata to cache."""
try:
with open(FEED_METADATA_CACHE, "w") as f:
json.dump(videos, f)
except Exception as e:
print(f"DEBUG: Failed to save feed metadata: {e}")
# Import services for fallback aggregation
from core.tiktok_api_service import TikTokAPIService
from api.routes.user import get_fallback_accounts
async def generate_fallback_feed(limit: int = 5) -> List[dict]:
"""
Generate a feed by aggregating latest videos from verified creators.
Used when cache is empty and we want to avoid Playwright startup headers.
"""
print("DEBUG: Generating fallback feed from verified users...")
cookies, user_agent = PlaywrightManager.load_stored_credentials()
# Use verified accounts from fallback list
accounts = get_fallback_accounts()
# Shuffle accounts to get variety
import random
random.shuffle(accounts)
# Select top 5-8 accounts to query
selected = accounts[:8]
tasks = []
# We only need 1 recent video from each to make a "feed"
for acc in selected:
tasks.append(TikTokAPIService.get_user_videos(
acc['username'],
cookies=cookies,
user_agent=user_agent,
limit=1 # Just 1 video per creator
))
# Run in parallel
results = await asyncio.gather(*tasks, return_exceptions=True)
feed = []
for res in results:
if isinstance(res, list) and res:
feed.extend(res)
# Shuffle the resulting feed
random.shuffle(feed)
print(f"DEBUG: Generated fallback feed with {len(feed)} videos")
return feed[:limit]
class FeedRequest(BaseModel): class FeedRequest(BaseModel):
"""Request body for feed endpoint with optional JSON credentials.""" """Request body for feed endpoint with optional JSON credentials."""
credentials: Optional[Union[Dict, List]] = None credentials: Optional[Union[Dict, List]] = None
@ -171,12 +242,39 @@ async def get_feed_simple(fast: bool = False, skip_cache: bool = False):
# When skipping cache for infinite scroll, do more scrolling to get different videos # When skipping cache for infinite scroll, do more scrolling to get different videos
if skip_cache: if skip_cache:
scroll_count = 8 # More scrolling to get fresh content scroll_count = 8
# [OPTIMIZATION] Fast Load Strategy
if fast and not skip_cache:
# 1. Try Memory/Disk Cache first (Instant)
cached_feed = load_cached_feed()
if cached_feed:
print(f"DEBUG: Returning cached feed ({len(cached_feed)} videos)")
return cached_feed
# 2. Try Fallback Aggregation (Fast HTTP, no browser)
# This fetches real latest videos from top creators via direct API
try:
aggregated = await generate_fallback_feed(limit=5)
if aggregated:
save_cached_feed(aggregated)
return aggregated
except Exception as agg_err:
print(f"DEBUG: Aggregation fallback failed: {agg_err}")
# 3. Playwright Interception (Slowest, but guaranteed 'For You' algorithm)
videos = await PlaywrightManager.intercept_feed(scroll_count=scroll_count) videos = await PlaywrightManager.intercept_feed(scroll_count=scroll_count)
# Save successful result to cache for next time
if videos and len(videos) > 0 and not skip_cache:
save_cached_feed(videos)
return videos return videos
except Exception as e: except Exception as e:
print(f"DEBUG: Feed error: {e}") print(f"DEBUG: Feed error: {e}")
# 4. Ultimate Fallback if everything fails (Verified users static list?)
# For now just re-raise, as UI handles empty state
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@ -281,21 +379,29 @@ async def proxy_video(
if not os.path.exists(video_path): if not os.path.exists(video_path):
raise Exception("Video file not created") raise Exception("Video file not created")
print(f"Downloaded codec: {video_codec} (no transcoding - client will decode)") print(f"Downloaded codec: {video_codec}")
# Save to cache directly - NO TRANSCODING # Save to cache directly - client-side player handles all formats
cached_path = save_to_cache(url, video_path) cached_path = save_to_cache(url, video_path)
stats = get_cache_stats() stats = get_cache_stats()
print(f"CACHED: {url[:50]}... ({stats['files']} files, {stats['size_mb']}MB total)") print(f"CACHED: {url[:50]}... ({stats['files']} files, {stats['size_mb']}MB total)")
except Exception as e: except Exception as e:
print(f"DEBUG: yt-dlp download failed: {e}") print(f"DEBUG: yt-dlp download failed: {str(e)}")
# Cleanup # Cleanup
if cookie_file_path and os.path.exists(cookie_file_path): if cookie_file_path and os.path.exists(cookie_file_path):
os.unlink(cookie_file_path) try:
os.unlink(cookie_file_path)
except:
pass
if os.path.exists(temp_dir): if os.path.exists(temp_dir):
shutil.rmtree(temp_dir, ignore_errors=True) try:
raise HTTPException(status_code=500, detail=f"Could not download video: {e}") shutil.rmtree(temp_dir, ignore_errors=True)
except:
pass
# Return 422 for processing failure instead of 500 (server crash)
raise HTTPException(status_code=422, detail=f"Video processing failed: {str(e)}")
# Cleanup temp (cached file is separate) # Cleanup temp (cached file is separate)
if cookie_file_path and os.path.exists(cookie_file_path): if cookie_file_path and os.path.exists(cookie_file_path):
@ -393,4 +499,9 @@ async def thin_proxy_video(
except Exception as e: except Exception as e:
print(f"Thin proxy error: {e}") print(f"Thin proxy error: {e}")
# Ensure cleanup if possible # Ensure cleanup if possible
raise HTTPException(status_code=500, detail=str(e)) if 'r' in locals():
await r.aclose()
if 'client' in locals():
await client.aclose()
raise HTTPException(status_code=502, detail=f"Upstream Proxy Error: {str(e)}")

View file

@ -7,8 +7,12 @@ from pydantic import BaseModel
from typing import Optional, List from typing import Optional, List
import httpx import httpx
import asyncio import asyncio
import time
import re
from typing import Optional, List
from core.playwright_manager import PlaywrightManager from core.playwright_manager import PlaywrightManager
from core.tiktok_api_service import TikTokAPIService
router = APIRouter() router = APIRouter()
@ -112,7 +116,7 @@ async def get_user_videos(
): ):
""" """
Fetch videos from a TikTok user's profile. Fetch videos from a TikTok user's profile.
Uses Playwright to crawl the user's page for reliable results. Uses direct API calls for speed (~100-500ms), with Playwright fallback.
""" """
username = username.replace("@", "") username = username.replace("@", "")
@ -123,10 +127,25 @@ async def get_user_videos(
raise HTTPException(status_code=401, detail="Not authenticated") raise HTTPException(status_code=401, detail="Not authenticated")
print(f"Fetching videos for @{username}...") print(f"Fetching videos for @{username}...")
start_time = time.time()
# Try fast API first
try:
videos = await TikTokAPIService.get_user_videos(username, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[API] Got {len(videos)} videos in {duration:.2f}s")
return {"username": username, "videos": videos, "count": len(videos), "source": "api", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[API] Failed for {username}: {e}")
# Fallback to Playwright if API fails or returns empty
print(f"[Fallback] Using Playwright for @{username}...")
try: try:
videos = await PlaywrightManager.fetch_user_videos(username, cookies, user_agent, limit) videos = await PlaywrightManager.fetch_user_videos(username, cookies, user_agent, limit)
return {"username": username, "videos": videos, "count": len(videos)} duration = time.time() - start_time
print(f"[Playwright] Got {len(videos)} videos in {duration:.2f}s")
return {"username": username, "videos": videos, "count": len(videos), "source": "playwright", "duration_ms": int(duration * 1000)}
except Exception as e: except Exception as e:
print(f"Error fetching videos for {username}: {e}") print(f"Error fetching videos for {username}: {e}")
raise HTTPException(status_code=500, detail=str(e)) raise HTTPException(status_code=500, detail=str(e))
@ -140,7 +159,7 @@ async def search_videos(
): ):
""" """
Search for videos by keyword or hashtag. Search for videos by keyword or hashtag.
Uses Playwright to crawl TikTok search results for reliable data. Uses direct API calls for speed (~200-800ms), with Playwright fallback.
""" """
# Load stored credentials # Load stored credentials
cookies, user_agent = PlaywrightManager.load_stored_credentials() cookies, user_agent = PlaywrightManager.load_stored_credentials()
@ -149,13 +168,157 @@ async def search_videos(
raise HTTPException(status_code=401, detail="Not authenticated") raise HTTPException(status_code=401, detail="Not authenticated")
print(f"Searching for: {query} (limit={limit}, cursor={cursor})...") print(f"Searching for: {query} (limit={limit}, cursor={cursor})...")
start_time = time.time()
# [Smart Search] Username Detection Priority
# If query looks like a username (contains dots, underscores, or starts with @),
# try fetching that specific user's videos FIRST.
# This solves issues where searching for exact username returns unrelated content.
clean_query = query.strip()
# Handle @ prefix commonly used by users - STRICT MODE
# If user explicitly types "@", they want a user lookup, NOT a keyword search.
strict_user_lookup = False
if clean_query.startswith("@"):
clean_query = clean_query[1:]
strict_user_lookup = True
# Also treat dots/underscores as likely usernames
is_username_format = bool(re.match(r"^[a-zA-Z0-9_\.]+$", clean_query)) and len(clean_query) > 2
# DEBUG LOGGING TO FILE
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"\n--- Search: {query} ---\n")
f.write(f"Strict: {strict_user_lookup}, Format: {is_username_format}, Clean: {clean_query}\n")
except: pass
if is_username_format or strict_user_lookup:
print(f"[Smart Search] Query '{query}' identified as username. Strict: {strict_user_lookup}")
try:
# Try direct profile fetch via API
videos = await TikTokAPIService.get_user_videos(clean_query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[API-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_priority", "duration_ms": int(duration * 1000)}
# Try Playwright fallback BEFORE yt-dlp
# Playwright scraping provides thumbnails and correct metadata, while yt-dlp flat-playlist does not.
print(f"[Smart Search] API failed, trying Playwright for user '{query}'...")
try:
videos = await PlaywrightManager.fetch_user_videos(clean_query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[Playwright-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright_priority", "duration_ms": int(duration * 1000)}
except Exception as pw_err:
print(f"[Smart Search] Playwright profile fetch failed: {pw_err}")
# Try yt-dlp fallback if Playwright also fails
print(f"[Smart Search] Playwright failed, trying yt-dlp for user '{query}'...")
# Log we are trying ytdlp
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"Attempting yt-dlp for {clean_query}...\n")
except: pass
videos = await TikTokAPIService.get_user_videos_via_ytdlp(clean_query, limit)
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"yt-dlp Result: {len(videos)} videos\n")
except: pass
if videos:
duration = time.time() - start_time
print(f"[yt-dlp-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_ytdlp", "duration_ms": int(duration * 1000)}
# If strict usage of "@" was used and we found nothing, DO NOT fallback to generic search.
# It's better to show "No videos found" than random unrelated results.
if strict_user_lookup:
print(f"[Smart Search] Strict lookup for '{query}' found no results. Returning empty.")
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "user_not_found_strict", "duration_ms": int((time.time() - start_time) * 1000)}
except Exception as e:
print(f"[Smart Search] Priority profile fetch failed: {e}")
if strict_user_lookup:
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "error_strict", "duration_ms": int((time.time() - start_time) * 1000)}
# Fall through to normal search only if NOT strict
# Try fast API search
try:
videos = await TikTokAPIService.search_videos(query, cookies, user_agent, limit, cursor)
if videos:
duration = time.time() - start_time
print(f"[API] Found {len(videos)} videos in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[API] Search failed for {query}: {e}")
# Fallback Phase 1: Check if query is a "trending" misspelling and retry API if so
# Regex for: hot, trend, trens, hor, hott, trand, etc.
trend_pattern = r"(hot|hor|hott)\s*(trend|trens|trand|tred)|(trend|trens|trand)"
is_trend_query = bool(re.search(trend_pattern, query.lower()))
if is_trend_query and (not videos):
print(f"[Smart Fallback] Query '{query}' detected as trending request. Retrying with 'hot trend'...")
try:
# Try normalized query on API
videos = await TikTokAPIService.search_videos("hot trend", cookies, user_agent, limit, cursor)
if videos:
duration = time.time() - start_time
print(f"[API-Fallback] Found {len(videos)} videos for 'hot trend' in {duration:.2f}s")
return {"query": "hot trend", "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api_fallback", "duration_ms": int(duration * 1000)}
except Exception:
pass # Continue to Playwright if this fails
# Fallback Phase 2: Playwright
# Fallback to Playwright if API fails or returns empty
print(f"[Fallback] Using Playwright for search '{query}'...")
try: try:
videos = await PlaywrightManager.search_videos(query, cookies, user_agent, limit, cursor) videos = await PlaywrightManager.search_videos(query, cookies, user_agent, limit, cursor)
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos)}
# Smart Fallback Phase 3: If Playwright also fails for trending query, try normalized query
if not videos and is_trend_query:
print(f"[Playwright-Fallback] No results for '{query}'. Retrying with 'hot trend'...")
videos = await PlaywrightManager.search_videos("hot trend", cookies, user_agent, limit, cursor)
except Exception as e: except Exception as e:
print(f"Error searching for {query}: {e}") print(f"Error searching for {query}: {e}")
raise HTTPException(status_code=500, detail=str(e)) # Don't raise yet, try user fallback
pass
# Fallback Phase 4: Exact Username Match (Secondary Fallback)
# If generic search failed, and query looks like a username, try fetching their profile directly (if not tried already)
# Note: We already tried this at the top, but we try again here with Playwright as a backup if the API profile fetch failed earlier.
if (not videos) and is_username_format:
print(f"[Smart Fallback] Query '{query}' yielded no search results. Attempting secondary profile fetch (Playwright)...")
try:
# We already tried API profile fetch at start, so try Playwright now
print(f"[Smart Fallback] API failed, trying Playwright for user '{query}'...")
videos = await PlaywrightManager.fetch_user_videos(query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[Playwright-Profile] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[Smart Fallback] Profile fetch failed: {e}")
pass
if not videos:
# Only raise error if we truly found nothing after all attempts
# or return empty list instead of 500?
# A 500 implies server broken. Empty list implies no results.
# Let's return empty structure to be safe for frontend
return {"query": query, "videos": [], "count": 0, "cursor": cursor, "source": "empty"}
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "playwright", "duration_ms": int((time.time() - start_time) * 1000)}
# Cache for suggested accounts # Cache for suggested accounts
@ -178,7 +341,6 @@ async def get_suggested_accounts(
# Check cache # Check cache
if _suggested_cache["accounts"] and (time.time() - _suggested_cache["updated_at"]) < CACHE_TTL: if _suggested_cache["accounts"] and (time.time() - _suggested_cache["updated_at"]) < CACHE_TTL:
print("Returning cached suggested accounts")
return {"accounts": _suggested_cache["accounts"][:limit], "cached": True} return {"accounts": _suggested_cache["accounts"][:limit], "cached": True}
# Load stored credentials # Load stored credentials
@ -191,17 +353,24 @@ async def get_suggested_accounts(
print("Fetching fresh suggested accounts from TikTok...") print("Fetching fresh suggested accounts from TikTok...")
try: try:
accounts = await PlaywrightManager.fetch_suggested_accounts(cookies, user_agent, limit) # Enforce a strict timeout to prevent hanging or heavy resource usage blocking the server
# If Playwright takes > 15 seconds, we default to fallback.
try:
accounts = await asyncio.wait_for(
PlaywrightManager.fetch_suggested_accounts(cookies, user_agent, limit),
timeout=15.0
)
except asyncio.TimeoutError:
print("Suggest fetch timed out, using fallback.")
accounts = []
if accounts and len(accounts) >= 5: # Need at least 5 accounts from dynamic fetch if accounts and len(accounts) >= 5: # Need at least 5 accounts from dynamic fetch
_suggested_cache["accounts"] = accounts _suggested_cache["accounts"] = accounts
_suggested_cache["updated_at"] = time.time() _suggested_cache["updated_at"] = time.time()
return {"accounts": accounts[:limit], "cached": False} return {"accounts": accounts[:limit], "cached": False}
else: else:
# Fallback: fetch actual profile data with avatars for static list # Just return static accounts directly without API calls - TikTok API is unreliable
print("Dynamic fetch failed, fetching profile data for static accounts...") return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
fallback_list = get_fallback_accounts()[:min(limit, 20)] # Limit to 20 for speed
return await fetch_profiles_with_avatars(fallback_list, cookies, user_agent)
except Exception as e: except Exception as e:
print(f"Error fetching suggested accounts: {e}") print(f"Error fetching suggested accounts: {e}")

View file

@ -7,6 +7,23 @@ class DownloadService:
self.download_dir = "downloads" self.download_dir = "downloads"
if not os.path.exists(self.download_dir): if not os.path.exists(self.download_dir):
os.makedirs(self.download_dir) os.makedirs(self.download_dir)
# Auto-update yt-dlp on startup (Disabled for stability/speed)
# self.update_ytdlp()
def update_ytdlp(self):
"""
Auto-update yt-dlp to the latest nightly/pre-release version.
"""
try:
print("Checking for yt-dlp updates (nightly)...")
import subprocess
import sys
# Use the current python executable to run pip
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--pre", "yt-dlp", "--break-system-packages"])
print("yt-dlp updated successfully.")
except Exception as e:
print(f"Failed to update yt-dlp: {e}")
async def download_video(self, url: str) -> dict: async def download_video(self, url: str) -> dict:
""" """

View file

@ -17,9 +17,14 @@ from playwright.async_api import async_playwright, Response, Browser, BrowserCon
try: try:
from playwright_stealth import stealth_async from playwright_stealth import stealth_async
except ImportError: except ImportError:
print("WARNING: playwright_stealth not found, disabling stealth mode.") try:
async def stealth_async(page): from playwright_stealth import Stealth
pass async def stealth_async(page):
await Stealth().apply_stealth_async(page)
except ImportError:
print("WARNING: playwright_stealth not found, disabling stealth mode.")
async def stealth_async(page):
pass
COOKIES_FILE = "cookies.json" COOKIES_FILE = "cookies.json"
@ -43,10 +48,18 @@ class PlaywrightManager:
"--start-maximized" "--start-maximized"
] ]
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
# Use installed Chrome instead of Playwright's Chromium (avoids slow download) # Use installed Chrome instead of Playwright's Chromium (avoids slow download)
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" import platform
import os
# Check if running on macOS
if platform.system() == "Darwin" and os.path.exists("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"):
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
else:
# On Linux/Docker, use Playwright's bundled Chromium (None lets Playwright decide)
CHROME_PATH = None
# VNC login state (class-level to persist across requests) # VNC login state (class-level to persist across requests)
_vnc_playwright = None _vnc_playwright = None
@ -515,8 +528,26 @@ class PlaywrightManager:
try: try:
data = await response.json() data = await response.json()
# TikTok returns videos in "itemList" or "aweme_list" # TikTok returns videos in various nested formats
items = data.get("itemList", []) or data.get("aweme_list", []) items = []
# Try direct itemList first
if data.get("itemList") and isinstance(data["itemList"], list):
items = data["itemList"]
elif data.get("aweme_list") and isinstance(data["aweme_list"], list):
items = data["aweme_list"]
# Try nested data structure
elif data.get("data"):
nested_data = data["data"]
if isinstance(nested_data, list):
for item in nested_data:
if isinstance(item, dict):
if "item" in item and isinstance(item["item"], dict):
items.append(item["item"])
else:
items.append(item)
elif isinstance(nested_data, dict):
items = nested_data.get("itemList", []) or nested_data.get("aweme_list", [])
for item in items: for item in items:
video_data = PlaywrightManager._extract_video_data(item) video_data = PlaywrightManager._extract_video_data(item)
@ -744,10 +775,68 @@ class PlaywrightManager:
# Wait for videos to load # Wait for videos to load
await asyncio.sleep(2) await asyncio.sleep(2)
# Scroll a bit to trigger more video loading # Scroll loop to ensure we get enough videos
await page.evaluate("window.scrollBy(0, 500)") scroll_attempts = 0
await asyncio.sleep(1) last_count = 0
max_scrolls = 20 # Prevent infinite loops
while len(captured_videos) < limit and scroll_attempts < max_scrolls:
print(f"DEBUG: Scrolling profile (Current: {len(captured_videos)}/{limit})...")
await page.evaluate("window.scrollBy(0, 800)")
await asyncio.sleep(1.5) # Wait for network/DOM
# DOM Fallback check inside loop (for hybrid loading)
if len(captured_videos) == last_count:
# If count didn't increase via network, try scraping DOM again
# This handles cases where TikTok renders new items in DOM without standard API
# (Unlikely for infinite scroll, but good safety)
pass
last_count = len(captured_videos)
scroll_attempts += 1
# DOM Fallback: If no API captured (SSR case), scrape from DOM
if len(captured_videos) == 0:
print("DEBUG: No API response for user videos, trying DOM scrape (SSR)...")
video_elements = await page.locator('div[data-e2e="user-post-item"]').all()
for el in video_elements:
if len(captured_videos) >= limit:
break
try:
# Extract data from DOM attributes/links
url = await el.locator("a").get_attribute("href")
desc = await el.locator("img").get_attribute("alt")
# Try to find specific img for cover
# Often the img alt is the description
if url:
# Parse video ID and author from URL
# Format: https://www.tiktok.com/@user/video/123456...
if "/video/" in url:
parts = url.split("/video/")
vid_id = parts[1].split("?")[0] if len(parts) > 1 else ""
# We already know the author from the function arg, but can verify
# Construct basic video object
dom_video = {
"id": vid_id,
"url": url,
"author": username,
"description": desc or f"Video by @{username}",
"views": 0, # Cannot easily get from list view DOM
"likes": 0
}
# Try to get thumbnail info
thumb = await el.locator("img").get_attribute("src")
if thumb:
dom_video["thumbnail"] = thumb
captured_videos.append(dom_video)
except Exception as el_err:
print(f"DEBUG: Error extracting DOM item: {el_err}")
except Exception as e: except Exception as e:
print(f"DEBUG: Error navigating to profile: {e}") print(f"DEBUG: Error navigating to profile: {e}")
@ -760,59 +849,19 @@ class PlaywrightManager:
async def search_videos(query: str, cookies: list, user_agent: str = None, limit: int = 20, cursor: int = 0) -> list: async def search_videos(query: str, cookies: list, user_agent: str = None, limit: int = 20, cursor: int = 0) -> list:
""" """
Search for videos by keyword or hashtag. Search for videos by keyword or hashtag.
Uses Playwright to intercept TikTok search results API. Optimized: Uses page.evaluate to fetch specific offsets via internal API.
Args:
query: Search query
cookies: Auth cookies
user_agent: Browser user agent
limit: Max videos to capture in this batch
cursor: Starting offset for pagination
""" """
from playwright.async_api import async_playwright, Response from playwright.async_api import async_playwright
from urllib.parse import quote from urllib.parse import quote
import json
if not user_agent: if not user_agent:
user_agent = PlaywrightManager.DEFAULT_USER_AGENT user_agent = PlaywrightManager.DEFAULT_USER_AGENT
if not cookies:
print("DEBUG: No cookies available for search")
return []
print(f"DEBUG: Searching for '{query}' (limit={limit}, cursor={cursor})...") print(f"DEBUG: Searching for '{query}' (limit={limit}, cursor={cursor})...")
captured_videos = [] captured_videos = []
async def handle_response(response: Response):
"""Capture search results API responses."""
nonlocal captured_videos
url = response.url
# Look for search results API
if "search" in url and ("item_list" in url or "video" in url or "general" in url):
try:
data = await response.json()
# Try different response formats
items = data.get("itemList", []) or data.get("data", []) or data.get("item_list", [])
for item in items:
# If we have enough for this specific batch, we don't need more
if len(captured_videos) >= limit:
break
video_data = PlaywrightManager._extract_video_data(item)
if video_data:
# Avoid duplicates within the same capture session
if not any(v['id'] == video_data['id'] for v in captured_videos):
captured_videos.append(video_data)
print(f"DEBUG: Captured {len(items)} videos from search API (Total batch: {len(captured_videos)})")
except Exception as e:
print(f"DEBUG: Error parsing search API response: {e}")
async with async_playwright() as p: async with async_playwright() as p:
browser = await p.chromium.launch( browser = await p.chromium.launch(
headless=True, headless=True,
@ -820,51 +869,69 @@ class PlaywrightManager:
args=PlaywrightManager.BROWSER_ARGS args=PlaywrightManager.BROWSER_ARGS
) )
context = await browser.new_context(user_agent=user_agent) context = await browser.new_context(
await context.add_cookies(cookies) user_agent=user_agent,
viewport={"width": 1280, "height": 720}
)
if cookies:
await context.add_cookies(cookies)
page = await context.new_page() page = await context.new_page()
await stealth_async(page) await stealth_async(page)
page.on("response", handle_response)
try: try:
# Navigate to TikTok search page # 1. Navigate to Search Page to initialize session/state
search_url = f"https://www.tiktok.com/search/video?q={quote(query)}" # We don't need to wait for full load if we are just going to fetch
try: search_url = f"https://www.tiktok.com/search?q={quote(query)}"
await page.goto(search_url, wait_until="domcontentloaded", timeout=15000) await page.goto(search_url, wait_until="domcontentloaded", timeout=20000)
except:
print("DEBUG: Navigation timeout, proceeding anyway")
# Wait for initial results # 2. If cursor > 0 (or always), Try to fetch API directly from browser context
await asyncio.sleep(3) # This leverages the browser's valid session/signature generation
print(f"DEBUG: Executing internal API fetch for offset {cursor}...")
# Scroll based on cursor to reach previous results and then capture new ones api_script = f"""
# Each scroll typically loads 12-20 items async () => {{
# We scroll more as the cursor increases const url = "https://www.tiktok.com/api/search/general/full/?keyword={quote(query)}&offset={cursor}&count={limit}&search_source=normal_search&is_filter_search=0";
scroll_count = (cursor // 10) + 1 try {{
# Limit total scrolls to avoid hanging const res = await fetch(url);
scroll_count = min(scroll_count, 10) return await res.json();
}} catch (e) {{
return {{ error: e.toString() }};
}}
}}
"""
for i in range(scroll_count): data = await page.evaluate(api_script)
await page.evaluate("window.scrollBy(0, 1500)")
await asyncio.sleep(1.5)
# After reaching the offset, scroll a bit more to trigger the specific batch capture
batch_scrolls = (limit // 10) + 2 # Add extra scrolls to be safe
for _ in range(batch_scrolls):
await page.evaluate("window.scrollBy(0, 2000)") # Larger scroll
await asyncio.sleep(1.0) # Faster scroll cadence
# Wait a bit after scrolling for all responses to settle
await asyncio.sleep(2.5)
# 3. Parse Results
if data and "error" not in data:
items = []
# Try data list directly (general search)
if data.get("data") and isinstance(data["data"], list):
for item in data["data"]:
if isinstance(item, dict):
if "item" in item:
items.append(item["item"])
elif "aweme" in item:
items.append(item["aweme"])
elif "type" in item and item["type"] == 1: # Video type
items.append(item)
# Try itemList (item search)
elif data.get("itemList"):
items = data["itemList"]
elif data.get("item_list"):
items = data["item_list"]
except Exception as e: except Exception as e:
print(f"DEBUG: Error during search: {e}") print(f"DEBUG: Search navigation error: {e}")
await browser.close() await browser.close()
print(f"DEBUG: Total captured search videos in this batch: {len(captured_videos)}") print(f"DEBUG: Total captured search videos: {len(captured_videos)}")
return captured_videos return captured_videos
@staticmethod @staticmethod
async def fetch_suggested_accounts(cookies: list, user_agent: str = None, limit: int = 50) -> list: async def fetch_suggested_accounts(cookies: list, user_agent: str = None, limit: int = 50) -> list:

View file

@ -0,0 +1,450 @@
"""
TikTok Direct API Service - Fast API calls without browser automation.
Replaces Playwright crawling with direct HTTP requests to TikTok's internal APIs.
Expected performance: ~100-500ms vs 5-15 seconds with Playwright.
"""
import httpx
import asyncio
from typing import List, Optional, Dict, Any
from urllib.parse import quote
from core.playwright_manager import PlaywrightManager
class TikTokAPIService:
"""
Direct TikTok API calls for instant data retrieval.
Key endpoints used:
- /api/user/detail/?uniqueId={username} - Get user profile and secUid
- /api/post/item_list/?secUid={secUid}&count={count} - Get user's videos
- /api/search/general/full/?keyword={query} - Search videos
"""
BASE_URL = "https://www.tiktok.com"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
@staticmethod
def _build_headers(cookies: List[dict], user_agent: str = None) -> dict:
"""Build request headers with cookies and user agent."""
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
return {
"User-Agent": user_agent or TikTokAPIService.DEFAULT_USER_AGENT,
"Referer": "https://www.tiktok.com/",
"Cookie": cookie_str,
"Accept": "application/json, text/plain, */*",
"Accept-Language": "en-US,en;q=0.9",
"sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
}
@staticmethod
def _extract_video_data(item: dict) -> Optional[dict]:
"""
Extract video data from TikTok API response item.
Matches the format used by PlaywrightManager._extract_video_data().
"""
try:
if not isinstance(item, dict):
return None
video_id = item.get("id") or item.get("aweme_id")
# Get author info
author_data = item.get("author", {})
author = author_data.get("uniqueId") or author_data.get("unique_id") or "unknown"
# Get description
desc = item.get("desc") or item.get("description") or ""
# Check if this is a product/shop video
is_shop_video = bool(item.get("products") or item.get("commerce_info") or item.get("poi_info"))
# Get thumbnail/cover image
thumbnail = None
video_data = item.get("video", {})
thumbnail_sources = [
video_data.get("cover"),
video_data.get("dynamicCover"),
video_data.get("originCover"),
]
for src in thumbnail_sources:
if src:
thumbnail = src
break
# Get direct CDN URL
cdn_url = None
cdn_sources = [
video_data.get("playAddr"),
video_data.get("downloadAddr"),
]
for src in cdn_sources:
if src:
cdn_url = src
break
# Video page URL
video_url = f"https://www.tiktok.com/@{author}/video/{video_id}"
# Get stats
stats = item.get("stats", {}) or item.get("statistics", {})
views = stats.get("playCount") or stats.get("play_count") or 0
likes = stats.get("diggCount") or stats.get("digg_count") or 0
comments = stats.get("commentCount") or stats.get("comment_count") or 0
shares = stats.get("shareCount") or stats.get("share_count") or 0
if video_id and author:
result = {
"id": str(video_id),
"url": video_url,
"author": author,
"description": desc[:200] if desc else f"Video by @{author}"
}
if thumbnail:
result["thumbnail"] = thumbnail
if cdn_url:
result["cdn_url"] = cdn_url
if views:
result["views"] = views
if likes:
result["likes"] = likes
if comments:
result["comments"] = comments
if shares:
result["shares"] = shares
if is_shop_video:
result["has_product"] = True
return result
except Exception as e:
print(f"DEBUG: Error extracting video data: {e}")
return None
@staticmethod
async def get_user_sec_uid(username: str, cookies: List[dict], user_agent: str = None) -> Optional[str]:
"""
Get user's secUid from their profile.
secUid is required for the video list API.
"""
headers = TikTokAPIService._build_headers(cookies, user_agent)
profile_url = f"{TikTokAPIService.BASE_URL}/api/user/detail/?uniqueId={username}"
try:
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
response = await client.get(profile_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: Failed to get user profile, status: {response.status_code}")
return None
data = response.json()
user_info = data.get("userInfo", {})
user = user_info.get("user", {})
sec_uid = user.get("secUid")
if sec_uid:
print(f"DEBUG: Got secUid for @{username}: {sec_uid[:20]}...")
return sec_uid
except Exception as e:
print(f"DEBUG: Error getting secUid for {username}: {e}")
return None
@staticmethod
async def get_user_videos(
username: str,
cookies: List[dict],
user_agent: str = None,
limit: int = 20,
cursor: int = 0
) -> List[dict]:
"""
Fetch videos from a user's profile using direct API call.
Args:
username: TikTok username (without @)
cookies: Auth cookies list
user_agent: Browser user agent
limit: Max videos to return
cursor: Pagination cursor for more videos
Returns:
List of video dictionaries
"""
print(f"DEBUG: [API] Fetching videos for @{username} (limit={limit})...")
# Step 1: Get secUid
sec_uid = await TikTokAPIService.get_user_sec_uid(username, cookies, user_agent)
if not sec_uid:
print(f"DEBUG: [API] Could not get secUid for @{username}")
return []
# Step 2: Fetch video list
headers = TikTokAPIService._build_headers(cookies, user_agent)
# Build video list API URL
video_list_url = (
f"{TikTokAPIService.BASE_URL}/api/post/item_list/?"
f"secUid={quote(sec_uid)}&"
f"count={min(limit, 35)}&" # TikTok max per request is ~35
f"cursor={cursor}"
)
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(video_list_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: [API] Video list failed, status: {response.status_code}")
return []
data = response.json()
# Extract videos from response
items = data.get("itemList", []) or data.get("aweme_list", [])
videos = []
for item in items[:limit]:
video_data = TikTokAPIService._extract_video_data(item)
if video_data:
videos.append(video_data)
print(f"DEBUG: [API] Successfully fetched {len(videos)} videos for @{username}")
return videos
except Exception as e:
print(f"DEBUG: [API] Error fetching videos for {username}: {e}")
return []
@staticmethod
async def search_videos(
query: str,
cookies: List[dict],
user_agent: str = None,
limit: int = 20,
cursor: int = 0
) -> List[dict]:
"""
Search for videos using direct API call.
Args:
query: Search keyword or hashtag
cookies: Auth cookies list
user_agent: Browser user agent
limit: Max videos to return
cursor: Pagination offset
Returns:
List of video dictionaries
"""
print(f"DEBUG: [API] Searching for '{query}' (limit={limit}, cursor={cursor})...")
headers = TikTokAPIService._build_headers(cookies, user_agent)
# Build search API URL
# TikTok uses different search endpoints, try the main one
search_url = (
f"{TikTokAPIService.BASE_URL}/api/search/general/full/?"
f"keyword={quote(query)}&"
f"offset={cursor}&"
f"search_source=normal_search&"
f"is_filter_search=0&"
f"web_search_code=%7B%22tiktok%22%3A%7B%22client_params_x%22%3A%7B%22search_engine%22%3A%7B%22ies_mt_user_live_video_card_use_498%22%3A1%7D%7D%7D%7D"
)
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(search_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: [API] Search failed, status: {response.status_code}")
# Try alternative search endpoint
return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)
data = response.json()
# Search results structure
videos = []
# Try different response formats
item_list = data.get("data", [])
if not item_list:
item_list = data.get("itemList", [])
if not item_list:
item_list = data.get("item_list", [])
for item in item_list[:limit]:
# Search results may have nested structure
video_item = item.get("item", item)
video_data = TikTokAPIService._extract_video_data(video_item)
if video_data:
videos.append(video_data)
if videos:
print(f"DEBUG: [API] Successfully found {len(videos)} videos for '{query}'")
return videos
else:
# Fallback to alternative endpoint
return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)
except Exception as e:
print(f"DEBUG: [API] Error searching for {query}: {e}")
return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)
@staticmethod
async def _search_videos_alt(
query: str,
cookies: List[dict],
user_agent: str = None,
limit: int = 20,
cursor: int = 0
) -> List[dict]:
"""
Alternative search using video-specific endpoint.
"""
print(f"DEBUG: [API] Trying alternative search endpoint...")
headers = TikTokAPIService._build_headers(cookies, user_agent)
# Try video-specific search endpoint
search_url = (
f"{TikTokAPIService.BASE_URL}/api/search/item/full/?"
f"keyword={quote(query)}&"
f"offset={cursor}&"
f"count={min(limit, 30)}"
)
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(search_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: [API] Alt search also failed, status: {response.status_code}")
return []
data = response.json()
videos = []
item_list = data.get("itemList", []) or data.get("item_list", []) or data.get("data", [])
for item in item_list[:limit]:
video_data = TikTokAPIService._extract_video_data(item)
if video_data:
videos.append(video_data)
print(f"DEBUG: [API] Alt search found {len(videos)} videos")
return videos
except Exception as e:
print(f"DEBUG: [API] Alt search error: {e}")
return []
@staticmethod
async def get_user_videos_via_ytdlp(username: str, limit: int = 20) -> List[dict]:
"""
Fetch user videos using yt-dlp (Robust fallback).
"""
print(f"DEBUG: [yt-dlp] Fetching videos for @{username}...")
import subprocess
import json
# Determine yt-dlp path (assume it's in the same python environment)
import sys
import os
# Helper to find executable
def get_yt_dlp_path():
# Try same dir as python executable
path = os.path.join(os.path.dirname(sys.executable), 'yt-dlp.exe')
if os.path.exists(path): return path
# Try global
return 'yt-dlp'
get_yt_dlp_path(),
f"https://www.tiktok.com/@{username}",
# "--flat-playlist", # Disabled to get full metadata (thumbnails)
"--skip-download", # Don't download video files
"--dump-json",
"--playlist-end", str(limit),
"--no-warnings",
"--ignore-errors" # Skip private/removed videos
try:
# Run async subprocess
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
print(f"DEBUG: [yt-dlp] Failed: {stderr.decode()}")
return []
videos = []
output = stdout.decode('utf-8')
for line in output.splitlines():
try:
if not line.strip(): continue
data = json.loads(line)
# Map yt-dlp format to our format
video_id = data.get('id')
# Handle Author Name logic
# yt-dlp sometimes returns numeric ID as uploader_id for profiles.
# We prefer the 'uploader' (nickname) if it looks handle-like, or the original search username.
raw_uploader_id = data.get('uploader_id')
# Heuristic: If uploader_id is numeric, prefer the search username
if raw_uploader_id and raw_uploader_id.isdigit():
unique_id = username
else:
unique_id = raw_uploader_id or username
# yt-dlp might not give full details in flat-playlist mode,
# but usually gives enough for a list
# Construct basic object
video = {
"id": video_id,
"url": data.get('url') or f"https://www.tiktok.com/@{unique_id}/video/{video_id}",
"author": unique_id,
"description": data.get('title') or "",
"thumbnail": data.get('thumbnail'), # Flat playlist might not have this?
"views": data.get('view_count', 0),
"likes": data.get('like_count', 0)
}
# If thumbnail is missing, we might need to assume or use a placeholder
# or yt-dlp flat playlist sometimes misses it.
# But verifying the 'dump-json' output above, it usually has metadata.
videos.append(video)
except Exception as parse_err:
continue
print(f"DEBUG: [yt-dlp] Found {len(videos)} videos")
return videos
except Exception as e:
print(f"DEBUG: [yt-dlp] Execution error: {e}")
return []
# Singleton instance
tiktok_api = TikTokAPIService()

44
backend/debug_api.py Normal file
View file

@ -0,0 +1,44 @@
import asyncio
import httpx
import sys
from core.playwright_manager import PlaywrightManager
async def test_api():
print("Loading credentials...")
cookies, user_agent = PlaywrightManager.load_stored_credentials()
headers = {
"User-Agent": user_agent or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.tiktok.com/",
"Cookie": "; ".join([f"{c['name']}={c['value']}" for c in cookies]),
}
username = "x.ka.baongoc"
url = f"https://www.tiktok.com/api/user/detail/?uniqueId={username}"
print(f"Fetching {url}...")
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
res = await client.get(url, headers=headers)
print(f"Status: {res.status_code}")
if res.status_code == 200:
try:
data = res.json()
user = data.get("userInfo", {}).get("user", {})
sec_uid = user.get("secUid")
print(f"SecUid: {sec_uid}")
if not sec_uid:
print("Response body preview:", str(data)[:500])
except:
print("JSON Decode Failed. Content preview:")
print(res.text[:500])
else:
print("Response:", res.text[:500])
if __name__ == "__main__":
try:
if sys.platform == "win32":
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)
loop.run_until_complete(test_api())
except Exception as e:
print(e)

View file

@ -1,5 +1,8 @@
[ [
"nhythanh_04", "nhythanh_04",
"po.trann77", "po.trann77",
"tieu_hy26" "tieu_hy26",
"phamthuy9722r",
"tlin99",
"mjxdj9"
] ]

View file

@ -1,3 +1,11 @@
import sys
import asyncio
# CRITICAL: Set Windows event loop policy BEFORE any other imports
# Playwright requires ProactorEventLoop for subprocess support on Windows
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
from fastapi import FastAPI from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
@ -5,12 +13,7 @@ from fastapi.responses import FileResponse
from contextlib import asynccontextmanager from contextlib import asynccontextmanager
from pathlib import Path from pathlib import Path
from api.routes import auth, feed, download, following, config, user from api.routes import auth, feed, download, following, config, user
import sys
import asyncio
# Force Proactor on Windows for Playwright
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
@asynccontextmanager @asynccontextmanager
async def lifespan(app: FastAPI): async def lifespan(app: FastAPI):

View file

@ -6,5 +6,6 @@ python-multipart
websockets websockets
python-dotenv python-dotenv
crawl4ai crawl4ai
playwright playwright==1.49.1
playwright-stealth playwright-stealth
httpx

26
backend/run_windows.py Normal file
View file

@ -0,0 +1,26 @@
"""
Windows-compatible startup script for PureStream.
Sets ProactorEventLoop policy BEFORE uvicorn imports anything.
"""
import sys
import asyncio
# CRITICAL: Must be set before importing uvicorn or any async code
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
# Also create the loop early
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
print(f"DEBUG: Forced ProactorEventLoop: {type(loop)}")
# Now import and run uvicorn
import uvicorn
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8002,
reload=False, # Disabled: reload subprocess loses ProactorEventLoop on Windows
loop="asyncio" # Use asyncio, which should now use our ProactorEventLoop
)

View file

@ -0,0 +1,17 @@
import asyncio
from backend.core.playwright_manager import PlaywrightManager
async def test_search():
print("Testing search_videos with STORED COOKIES...")
cookies, ua = PlaywrightManager.load_stored_credentials()
print(f"Loaded {len(cookies)} cookies. UA: {ua[:50]}...")
videos = await PlaywrightManager.search_videos("gai xinh nhay", cookies=cookies, user_agent=ua, limit=5)
print(f"Found {len(videos)} videos.")
for i, v in enumerate(videos):
play_addr = v.get("video", {}).get("play_addr")
print(f"Video {i} Play Addr: {play_addr}")
if __name__ == "__main__":
asyncio.run(test_search())

66
backup_project.ps1 Normal file
View file

@ -0,0 +1,66 @@
$ErrorActionPreference = "Stop"
$ProjectDir = Get-Location
$Timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$BackupDir = Join-Path $ProjectDir "backups"
$BackupFile = Join-Path $BackupDir "kv_tiktok_backup_$Timestamp.zip"
# Create backup directory if it doesn't exist
if (-not (Test-Path $BackupDir)) {
New-Item -ItemType Directory -Path $BackupDir | Out-Null
Write-Host "Created backup directory: $BackupDir" -ForegroundColor Cyan
}
Write-Host "Starting backup of $ProjectDir..." -ForegroundColor Cyan
Write-Host "Target file: $BackupFile" -ForegroundColor Cyan
# Exclude list (Patterns to ignore)
$ExcludePatterns = @(
"^\.git",
"^\.venv",
"^node_modules",
"__pycache__",
"^backups",
"\.log$",
"backend\\downloads",
"backend\\cache",
"backend\\session",
"frontend\\dist"
)
# Get files to zip
$FilesToZip = Get-ChildItem -Path $ProjectDir -Recurse | Where-Object {
$relativePath = $_.FullName.Substring($ProjectDir.Path.Length + 1)
$shouldExclude = $false
foreach ($pattern in $ExcludePatterns) {
if ($relativePath -match $pattern) {
$shouldExclude = $true
break
}
}
# Also exclude the backup directory itself and any zip files inside root (if active)
if ($relativePath -like "backups\*") { $shouldExclude = $true }
return -not $shouldExclude
}
if ($FilesToZip.Count -eq 0) {
Write-Error "No files found to backup!"
}
# Compress
Write-Host "Compressing $($FilesToZip.Count) files..." -ForegroundColor Yellow
$FilesToZip | Compress-Archive -DestinationPath $BackupFile -Force
if (Test-Path $BackupFile) {
$Item = Get-Item $BackupFile
$SizeMB = [math]::Round($Item.Length / 1MB, 2)
Write-Host "Backup created successfully!" -ForegroundColor Green
Write-Host "Location: $BackupFile"
Write-Host "Size: $SizeMB MB"
}
else {
Write-Error "Backup failed!"
}

49
backup_project.sh Normal file
View file

@ -0,0 +1,49 @@
#!/bin/bash
# Configuration
PROJECT_DIR="$(pwd)"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
BACKUP_DIR="${PROJECT_DIR}/backups"
BACKUP_FILE="${BACKUP_DIR}/kv_tiktok_backup_${TIMESTAMP}.zip"
# Create backup directory if it doesn't exist
if [ ! -d "$BACKUP_DIR" ]; then
echo "Creating backup directory: $BACKUP_DIR"
mkdir -p "$BACKUP_DIR"
fi
echo "Starting backup of ${PROJECT_DIR}..."
echo "Target file: ${BACKUP_FILE}"
# Zip the project contents, excluding heavy/generated folders
# Using -r for recursive, -q for quiet (optional), -x to exclude patterns
zip -r "$BACKUP_FILE" . \
-x "*.git*" \
-x "*.venv*" \
-x "*node_modules*" \
-x "*__pycache__*" \
-x "*.DS_Store" \
-x "*backend/downloads*" \
-x "*backend/cache*" \
-x "*backend/session*" \
-x "*frontend/dist*" \
-x "*backups*" \
-x "*.log"
if [ $? -eq 0 ]; then
echo "✅ Backup created successfully!"
echo "📂 Location: ${BACKUP_FILE}"
# Show file size
if [[ "$OSTYPE" == "darwin"* ]]; then
SIZE=$(stat -f%z "$BACKUP_FILE")
else
SIZE=$(stat -c%s "$BACKUP_FILE")
fi
# Convert to MB
SIZE_MB=$(echo "scale=2; $SIZE / 1024 / 1024" | bc)
echo "📦 Size: ${SIZE_MB} MB"
else
echo "❌ Backup failed!"
exit 1
fi

24
docker-compose.dev.yml Normal file
View file

@ -0,0 +1,24 @@
# PureStream Development Docker Compose
# Uses Dockerfile.dev which COPIES files to avoid Synology Drive filesystem issues
services:
backend:
container_name: purestream_dev
build:
context: .
dockerfile: Dockerfile.dev
ports:
- "8002:8002"
volumes:
# Only mount data directories (not code)
- purestream_cache:/app/cache
- purestream_session:/app/session
shm_size: '2gb'
volumes:
purestream_cache:
purestream_session:
# NOTE:
# - Frontend is served by backend at http://localhost:8002
# - Code changes require rebuild: docker-compose -f docker-compose.dev.yml up --build

File diff suppressed because it is too large Load diff

View file

@ -13,11 +13,14 @@
"artplayer": "^5.3.0", "artplayer": "^5.3.0",
"axios": "^1.13.2", "axios": "^1.13.2",
"clsx": "^2.1.1", "clsx": "^2.1.1",
"esbuild": "^0.27.2",
"framer-motion": "^12.23.26", "framer-motion": "^12.23.26",
"lucide-react": "^0.561.0", "hls.js": "^1.6.15",
"lucide-react": "^0.563.0",
"react": "^18.3.1", "react": "^18.3.1",
"react-dom": "^18.3.1", "react-dom": "^18.3.1",
"react-router-dom": "^6.30.2", "react-router-dom": "^6.30.2",
"rollup": "^4.56.0",
"tailwind-merge": "^3.4.0", "tailwind-merge": "^3.4.0",
"zustand": "^5.0.9" "zustand": "^5.0.9"
}, },
@ -36,6 +39,9 @@
"tailwindcss": "^3.4.10", "tailwindcss": "^3.4.10",
"typescript": "^5.5.3", "typescript": "^5.5.3",
"typescript-eslint": "^8.0.1", "typescript-eslint": "^8.0.1",
"vite": "^5.4.1" "vite": "^5.4.21"
},
"optionalDependencies": {
"@rollup/rollup-win32-x64-msvc": "^4.56.0"
} }
} }

View file

@ -23,7 +23,9 @@ import { Feed } from './components/Feed';
const Dashboard = () => { const Dashboard = () => {
return ( return (
<div className="h-screen bg-black"> <div className="h-screen bg-black">
<Feed /> <Routes>
<Route path="/" element={<Feed />} />
</Routes>
</div> </div>
) )
} }
@ -44,7 +46,7 @@ function App() {
<Route path="/login" element={<Login />} /> <Route path="/login" element={<Login />} />
<Route path="/admin" element={<Admin />} /> <Route path="/admin" element={<Admin />} />
<Route <Route
path="/" path="/*"
element={ element={
<ProtectedRoute> <ProtectedRoute>
<Dashboard /> <Dashboard />

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,83 @@
import React from 'react';
import { Home, Search, Heart, LogOut } from 'lucide-react';
interface SidebarProps {
activeTab: 'foryou' | 'search' | 'profile';
onTabChange: (tab: 'foryou' | 'search' | 'profile') => void;
onLogout?: () => void;
}
export const Sidebar: React.FC<SidebarProps> = ({ activeTab, onTabChange, onLogout }) => {
return (
<div className="hidden md:flex flex-col w-20 lg:w-64 h-full glass-panel border-r-0 border-r-white/10 z-50 transition-all duration-300">
{/* Logo */}
<div className="p-6 flex items-center gap-3">
<div className="w-8 h-8 rounded-xl bg-gradient-to-tr from-violet-500 to-fuchsia-500 flex-shrink-0" />
<h1 className="text-xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-white to-gray-400 hidden lg:block">
PureStream
</h1>
</div>
{/* Nav Items */}
<div className="flex-1 flex flex-col gap-2 px-3 py-4">
<NavItem
icon={<Home size={24} />}
label="For You"
isActive={activeTab === 'foryou'}
onClick={() => onTabChange('foryou')}
/>
<NavItem
icon={<Search size={24} />}
label="Search"
isActive={activeTab === 'search'}
onClick={() => onTabChange('search')}
/>
{/* Placeholder for future features */}
<NavItem
icon={<Heart size={24} />}
label="Likes"
isActive={false}
onClick={() => { }}
/>
</div>
{/* Bottom Actions */}
<div className="p-4 border-t border-white/10 space-y-2">
<button
onClick={onLogout}
className="flex items-center gap-4 w-full p-3 rounded-xl text-gray-400 hover:bg-white/5 hover:text-red-400 transition-all group"
>
<LogOut size={22} className="group-hover:scale-110 transition-transform" />
<span className="hidden lg:block font-medium">Log Out</span>
</button>
</div>
</div>
);
};
interface NavItemProps {
icon: React.ReactNode;
label: string;
isActive: boolean;
onClick: () => void;
}
const NavItem: React.FC<NavItemProps> = ({ icon, label, isActive, onClick }) => {
return (
<button
onClick={onClick}
className={`flex items-center gap-4 w-full p-3 rounded-xl transition-all duration-200 group
${isActive
? 'bg-white/10 text-white shadow-lg shadow-black/20'
: 'text-gray-400 hover:bg-white/5 hover:text-white'
}`}
>
<div className={`${isActive ? 'scale-110' : 'group-hover:scale-110'} transition-transform duration-200`}>
{icon}
</div>
<span className={`hidden lg:block font-medium ${isActive ? 'text-white' : ''}`}>
{label}
</span>
</button>
);
};

View file

@ -0,0 +1,38 @@
import React from 'react';
export const SkeletonFeed: React.FC = () => {
return (
<div className="h-full w-full bg-[#0f0f15] relative overflow-hidden flex items-center justify-center">
{/* Main Video Area Skeleton */}
<div className="absolute inset-0 flex items-center justify-center">
<div className="w-16 h-16 rounded-full skeleton-pulse opacity-50"></div>
</div>
{/* Right Sidebar Action Buttons */}
<div className="absolute right-4 bottom-24 flex flex-col items-center gap-6 z-10">
{[1, 2, 3, 4].map((_, i) => (
<div key={i} className="flex flex-col items-center gap-1">
<div className="w-12 h-12 rounded-full skeleton-pulse bg-white/10" />
<div className="w-8 h-3 rounded-md skeleton-pulse bg-white/10" />
</div>
))}
</div>
{/* Bottom Info Area */}
<div className="absolute bottom-6 left-4 right-20 z-10 space-y-3">
<div className="w-32 h-5 rounded-md skeleton-pulse bg-white/10" />
<div className="w-64 h-4 rounded-md skeleton-pulse bg-white/10" />
<div className="w-48 h-4 rounded-md skeleton-pulse bg-white/10 opacity-70" />
{/* Music Skeleton */}
<div className="flex items-center gap-2 mt-2">
<div className="w-6 h-6 rounded-full skeleton-pulse bg-white/10" />
<div className="w-40 h-4 rounded-md skeleton-pulse bg-white/10" />
</div>
</div>
{/* Overlay Gradient */}
<div className="absolute inset-0 bg-gradient-to-b from-transparent via-transparent to-black/60 pointer-events-none" />
</div>
);
};

View file

@ -0,0 +1,61 @@
import React, { useState } from 'react';
import type { UserProfile } from '../types';
interface UserCardProps {
user: UserProfile;
}
const UserCard: React.FC<UserCardProps> = ({ user }) => {
const [isExpanded, setIsExpanded] = useState(false);
return (
<div className="bg-white dark:bg-gray-800 shadow-md rounded-lg p-4 mb-4">
<div className="flex items-center justify-between">
<div className="flex items-center">
<img
src={user.avatar}
alt={user.username}
className="w-16 h-16 rounded-full mr-4"
/>
<div>
<h2 className="text-xl font-bold">{user.nickname}</h2>
<p className="text-gray-500 dark:text-gray-400">@{user.username}</p>
</div>
</div>
<button
onClick={() => setIsExpanded(!isExpanded)}
className="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded"
>
{isExpanded ? 'Hide Stats' : 'Show Stats'}
</button>
</div>
{isExpanded && (
<div className="mt-4">
<div className="grid grid-cols-3 gap-4 text-center">
<div>
<p className="font-bold text-lg">{user.followers?.toLocaleString()}</p>
<p className="text-gray-500 dark:text-gray-400">Followers</p>
</div>
<div>
<p className="font-bold text-lg">{user.following?.toLocaleString()}</p>
<p className="text-gray-500 dark:text-gray-400">Following</p>
</div>
<div>
<p className="font-bold text-lg">{user.likes?.toLocaleString()}</p>
<p className="text-gray-500 dark:text-gray-400">Likes</p>
</div>
</div>
{user.bio && (
<div className="mt-4">
<h3 className="font-bold">Bio</h3>
<p className="text-gray-600 dark:text-gray-300">{user.bio}</p>
</div>
)}
</div>
)}
</div>
);
};
export default UserCard;

View file

@ -4,12 +4,7 @@ import type { Video } from '../types';
import { API_BASE_URL } from '../config'; import { API_BASE_URL } from '../config';
import { videoCache } from '../utils/videoCache'; import { videoCache } from '../utils/videoCache';
// Check if browser supports HEVC codec (Safari, Chrome 107+, Edge)
const supportsHEVC = (): boolean => {
if (typeof MediaSource === 'undefined') return false;
return MediaSource.isTypeSupported('video/mp4; codecs="hvc1"') ||
MediaSource.isTypeSupported('video/mp4; codecs="hev1"');
};
interface HeartParticle { interface HeartParticle {
id: number; id: number;
@ -43,7 +38,7 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
const progressBarRef = useRef<HTMLDivElement>(null); const progressBarRef = useRef<HTMLDivElement>(null);
const [isPaused, setIsPaused] = useState(false); const [isPaused, setIsPaused] = useState(false);
const [showControls, setShowControls] = useState(false); const [showControls, setShowControls] = useState(false);
const [objectFit] = useState<'cover' | 'contain'>('cover');
const [progress, setProgress] = useState(0); const [progress, setProgress] = useState(0);
const [duration, setDuration] = useState(0); const [duration, setDuration] = useState(0);
const [isSeeking, setIsSeeking] = useState(false); const [isSeeking, setIsSeeking] = useState(false);
@ -55,7 +50,7 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
const [cachedUrl, setCachedUrl] = useState<string | null>(null); const [cachedUrl, setCachedUrl] = useState<string | null>(null);
const [codecError, setCodecError] = useState(false); // True if video codec not supported const [codecError, setCodecError] = useState(false); // True if video codec not supported
const lastTapRef = useRef<number>(0); const lastTapRef = useRef<number>(0);
const browserSupportsHEVC = useRef(supportsHEVC());
const fullProxyUrl = `${API_BASE_URL}/feed/proxy?url=${encodeURIComponent(video.url)}`; const fullProxyUrl = `${API_BASE_URL}/feed/proxy?url=${encodeURIComponent(video.url)}`;
const thinProxyUrl = video.cdn_url ? `${API_BASE_URL}/feed/thin-proxy?cdn_url=${encodeURIComponent(video.cdn_url)}` : null; const thinProxyUrl = video.cdn_url ? `${API_BASE_URL}/feed/thin-proxy?cdn_url=${encodeURIComponent(video.cdn_url)}` : null;
@ -121,18 +116,16 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
setUseFallback(false); setUseFallback(false);
setIsLoading(true); // Show loading for new video setIsLoading(true); // Show loading for new video
setCodecError(false); // Reset codec error for new video setCodecError(false); // Reset codec error for new video
setCachedUrl(null); // Disable client-side caching for now as it causes partial content issues with Range requests
setShowSidebar(false); // Reset sidebar for new video // The backend has its own LRU cache which is sufficient
const checkCache = async () => { const checkCache = async () => {
const cached = await videoCache.get(video.url); // Force clear any existing cache for this video to ensure we don't serve bad blobs
if (cached) { await videoCache.delete(video.url);
const blob_url = URL.createObjectURL(cached); setCachedUrl(null);
setCachedUrl(blob_url);
}
}; };
checkCache(); checkCache();
}, [video.id]); }, [video.id]);
// Progress tracking // Progress tracking
@ -154,15 +147,21 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
const videoEl = e.target as HTMLVideoElement; const videoEl = e.target as HTMLVideoElement;
const error = videoEl?.error; const error = videoEl?.error;
// Check if this is a codec/decode error (MEDIA_ERR_DECODE = 3) // Check if this is a codec/decode error (MEDIA_ERR_DECODE = 3, MEDIA_ERR_SRC_NOT_SUPPORTED = 4)
if (error?.code === 3 || error?.code === 4) { if (error?.code === 3 || error?.code === 4) {
console.log(`Codec error detected (code ${error.code}):`, error.message); console.log(`Codec error detected (code ${error.code}):`, error.message);
// Only show codec error if browser doesn't support HEVC
if (!browserSupportsHEVC.current) { // Always fall back to full proxy which will transcode to H.264
setCodecError(true); if (!useFallback) {
setIsLoading(false); console.log('Codec not supported, falling back to full proxy (will transcode to H.264)...');
setUseFallback(true);
return; return;
} }
// If even full proxy failed, show error
setCodecError(true);
setIsLoading(false);
return;
} }
if (thinProxyUrl && !useFallback) { if (thinProxyUrl && !useFallback) {
@ -182,25 +181,26 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
}; };
}, [thinProxyUrl, useFallback, cachedUrl]); }, [thinProxyUrl, useFallback, cachedUrl]);
useEffect(() => { // Disable active caching
const cacheVideo = async () => { // useEffect(() => {
if (!cachedUrl || !proxyUrl || proxyUrl === cachedUrl) return; // const cacheVideo = async () => {
// if (!cachedUrl || !proxyUrl || proxyUrl === cachedUrl) return;
try { //
const response = await fetch(proxyUrl); // try {
if (response.ok) { // const response = await fetch(proxyUrl);
const blob = await response.blob(); // if (response.ok) {
await videoCache.set(video.url, blob); // const blob = await response.blob();
} // await videoCache.set(video.url, blob);
} catch (error) { // }
console.debug('Failed to cache video:', error); // } catch (error) {
} // console.debug('Failed to cache video:', error);
}; // }
// };
if (isActive && !isLoading) { //
cacheVideo(); // if (isActive && !isLoading) {
} // cacheVideo();
}, [isActive, isLoading, proxyUrl, cachedUrl, video.url]); // }
// }, [isActive, isLoading, proxyUrl, cachedUrl, video.url]);
const togglePlayPause = () => { const togglePlayPause = () => {
if (!videoRef.current) return; if (!videoRef.current) return;
@ -376,20 +376,32 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
onClick={handleVideoClick} onClick={handleVideoClick}
onTouchStart={handleTouchStart} onTouchStart={handleTouchStart}
> >
{/* Video Element - preload="metadata" for instant player readiness */} {/* Ambient Background (Blurred) */}
<video <div className="absolute inset-0 z-0 overflow-hidden">
ref={videoRef} <video
src={proxyUrl} src={proxyUrl}
loop muted
playsInline loop
preload="metadata" className="w-full h-full object-cover blur-2xl opacity-50 scale-110"
muted={isMuted} />
className="w-full h-full" </div>
style={{ objectFit }}
onCanPlay={() => setIsLoading(false)} {/* Video Element - Main Content */}
onWaiting={() => setIsLoading(true)} <div className="relative z-10 w-full h-full flex items-center justify-center">
onPlaying={() => setIsLoading(false)} <video
/> ref={videoRef}
src={proxyUrl}
loop
playsInline
preload="metadata"
muted={isMuted}
className="max-h-full max-w-full"
style={{ objectFit: 'contain' }}
onCanPlay={() => setIsLoading(false)}
onWaiting={() => setIsLoading(true)}
onPlaying={() => setIsLoading(false)}
/>
</div>
{/* Loading Overlay - Subtle pulsing logo */} {/* Loading Overlay - Subtle pulsing logo */}
{isLoading && !codecError && ( {isLoading && !codecError && (
@ -482,9 +494,9 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
)} )}
</div> </div>
{/* Side Controls - Only show when video is paused */} {/* Side Controls - Always visible on hover or when paused */}
<div <div
className={`absolute bottom-36 right-4 flex flex-col gap-3 transition-all duration-300 transform ${isPaused && showSidebar ? 'translate-x-0 opacity-100' : 'translate-x-[200%] opacity-0' className={`absolute bottom-36 right-4 flex flex-col gap-3 transition-all duration-300 transform ${showControls || isPaused ? 'translate-x-0 opacity-100' : 'translate-x-2 opacity-0'
}`} }`}
> >
{/* Follow Button */} {/* Follow Button */}
@ -505,6 +517,7 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
<a <a
href={downloadUrl} href={downloadUrl}
download download
onClick={(e) => e.stopPropagation()}
className="w-12 h-12 flex items-center justify-center bg-white/10 hover:bg-white/20 backdrop-blur-xl border border-white/10 rounded-full text-white transition-all" className="w-12 h-12 flex items-center justify-center bg-white/10 hover:bg-white/20 backdrop-blur-xl border border-white/10 rounded-full text-white transition-all"
title="Download" title="Download"
> >

View file

@ -1,3 +1 @@
export const API_BASE_URL = import.meta.env.PROD export const API_BASE_URL = '/api';
? '/api'
: (import.meta.env.VITE_API_URL || 'http://localhost:8002/api');

View file

@ -15,6 +15,30 @@
height: 100vh; height: 100vh;
height: 100dvh; height: 100dvh;
} }
body {
@apply bg-[#0f0f15] text-white antialiased;
color-scheme: dark;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
}
}
@layer components {
.glass-panel {
@apply bg-white/5 backdrop-blur-xl border border-white/10;
}
.glass-panel-hover {
@apply hover:bg-white/10 transition-colors duration-200;
}
.btn-primary {
@apply bg-gradient-to-r from-violet-600 to-indigo-600 hover:from-violet-500 hover:to-indigo-500 text-white font-medium px-4 py-2 rounded-xl transition-all active:scale-95 shadow-lg shadow-indigo-500/20;
}
.btn-ghost {
@apply hover:bg-white/10 text-gray-300 hover:text-white px-4 py-2 rounded-xl transition-all active:scale-95;
}
} }
@layer utilities { @layer utilities {
@ -33,33 +57,17 @@
display: none; display: none;
/* Chrome, Safari and Opera */ /* Chrome, Safari and Opera */
} }
}
.text-shadow {
@layer utilities { text-shadow: 0 2px 4px rgba(0,0,0,0.5);
.scrollbar-hide::-webkit-scrollbar {
display: none;
}
.scrollbar-hide {
-ms-overflow-style: none;
scrollbar-width: none;
} }
} }
/* Animations */
@keyframes shake { @keyframes shake {
0%, 100% { transform: translateX(0); }
0%, 25% { transform: translateX(-4px); }
100% { 75% { transform: translateX(4px); }
transform: translateX(0);
}
25% {
transform: translateX(-4px);
}
75% {
transform: translateX(4px);
}
} }
.animate-shake { .animate-shake {
@ -67,34 +75,25 @@
} }
@keyframes heart-float { @keyframes heart-float {
0% { 0% { opacity: 1; transform: scale(0) rotate(-15deg); }
opacity: 1; 25% { opacity: 1; transform: scale(1.2) rotate(10deg); }
transform: scale(0) rotate(-15deg); 50% { opacity: 0.8; transform: scale(1) translateY(-30px) rotate(-5deg); }
} 100% { opacity: 0; transform: scale(0.6) translateY(-80px) rotate(15deg); }
25% {
opacity: 1;
transform: scale(1.2) rotate(10deg);
}
50% {
opacity: 0.8;
transform: scale(1) translateY(-30px) rotate(-5deg);
}
100% {
opacity: 0;
transform: scale(0.6) translateY(-80px) rotate(15deg);
}
} }
.animate-heart-float { .animate-heart-float {
animation: heart-float 1s ease-out forwards; animation: heart-float 1s ease-out forwards;
} }
body { @keyframes shimmer {
@apply bg-black antialiased; 0% { background-position: -200% 0; }
color-scheme: dark; 100% { background-position: 200% 0; }
}
.skeleton-pulse {
background: linear-gradient(90deg, rgba(255,255,255,0.03) 25%, rgba(255,255,255,0.08) 50%, rgba(255,255,255,0.03) 75%);
background-size: 200% 100%;
animation: shimmer 1.5s infinite;
} }
.artplayer-app { .artplayer-app {

View file

@ -33,7 +33,7 @@ class FeedLoader {
} }
const cacheKey = 'feed-full'; const cacheKey = 'feed-full';
// Skip cache check when explicitly requested (for infinite scroll) // Skip cache check when explicitly requested (for infinite scroll)
if (!skipCache) { if (!skipCache) {
const cached = this.getCached(cacheKey); const cached = this.getCached(cacheKey);
@ -43,8 +43,8 @@ class FeedLoader {
} }
} }
const videos = await this.fetchFeed(skipCache); const videos = await this.fetchFeed(skipCache, fast);
// Only cache if not skipping (initial load) // Only cache if not skipping (initial load)
if (!skipCache) { if (!skipCache) {
this.setCached(cacheKey, videos); this.setCached(cacheKey, videos);
@ -62,11 +62,15 @@ class FeedLoader {
} }
} }
private async fetchFeed(skipCache: boolean = false): Promise<Video[]> { private async fetchFeed(skipCache: boolean = false, fast: boolean = false): Promise<Video[]> {
// Add skip_cache parameter to force backend to fetch fresh videos // Add skip_cache parameter to force backend to fetch fresh videos
const url = skipCache let url = `${API_BASE_URL}/feed?`;
? `${API_BASE_URL}/feed?skip_cache=true` if (skipCache) url += 'skip_cache=true&';
: `${API_BASE_URL}/feed`; if (fast) url += 'fast=true&';
// Clean trailing & or ?
url = url.replace(/[?&]$/, '');
const response = await axios.get(url); const response = await axios.get(url);
if (!Array.isArray(response.data)) { if (!Array.isArray(response.data)) {

View file

@ -85,7 +85,8 @@ class VideoPrefetcher {
return; return;
} }
const API_BASE_URL = 'http://localhost:8002/api'; // Hardcoded or imported from config const API_BASE_URL_CONFIG = (await import('../config')).API_BASE_URL;
const API_BASE_URL = API_BASE_URL_CONFIG || 'http://localhost:8002/api'; // Fallback if import fails
const fullProxyUrl = `${API_BASE_URL}/feed/proxy?url=${encodeURIComponent(video.url)}`; const fullProxyUrl = `${API_BASE_URL}/feed/proxy?url=${encodeURIComponent(video.url)}`;
// Use thin proxy if available for better performance // Use thin proxy if available for better performance
const thinProxyUrl = video.cdn_url ? `${API_BASE_URL}/feed/thin-proxy?cdn_url=${encodeURIComponent(video.cdn_url)}` : null; const thinProxyUrl = video.cdn_url ? `${API_BASE_URL}/feed/thin-proxy?cdn_url=${encodeURIComponent(video.cdn_url)}` : null;

View file

@ -1,7 +1,18 @@
import { defineConfig } from 'vite' import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react' import react from '@vitejs/plugin-react'
// https://vite.dev/config/ // https://vitejs.dev/config/
export default defineConfig({ export default defineConfig({
plugins: [react()], plugins: [react()],
server: {
host: '0.0.0.0', // Allow access from outside the container
port: 5173,
proxy: {
'/api': {
target: 'http://localhost:8002',
changeOrigin: true,
timeout: 60000,
},
},
},
}) })

60
manage_app.ps1 Normal file
View file

@ -0,0 +1,60 @@
param (
[string]$Action = "start"
)
$BackendPort = 8002
$FrontendPort = 5173
$RootPath = Get-Location
$BackendDir = Join-Path $RootPath "backend"
$FrontendDir = Join-Path $RootPath "frontend"
function Stop-App {
Write-Host "Stopping PureStream..." -ForegroundColor Yellow
$ports = @($BackendPort, $FrontendPort)
foreach ($port in $ports) {
$processes = Get-NetTCPConnection -LocalPort $port -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess -Unique
if ($processes) {
foreach ($pidVal in $processes) {
Write-Host "Killing process on port $port (PID: $pidVal)" -ForegroundColor Red
Stop-Process -Id $pidVal -Force -ErrorAction SilentlyContinue
}
} else {
Write-Host "No process found on port $port" -ForegroundColor Gray
}
}
Write-Host "Stopped." -ForegroundColor Green
}
function Start-App {
# Check if ports are already in use
$backendActive = Get-NetTCPConnection -LocalPort $BackendPort -ErrorAction SilentlyContinue
$frontendActive = Get-NetTCPConnection -LocalPort $FrontendPort -ErrorAction SilentlyContinue
if ($backendActive -or $frontendActive) {
Write-Host "Ports are already in use. Stopping existing instances..." -ForegroundColor Yellow
Stop-App
}
Write-Host "Starting PureStream Backend..." -ForegroundColor Cyan
# Launch in a new CMD window so user can see logs and it stays open (/k)
Start-Process "cmd.exe" -ArgumentList "/k title PureStream Backend & cd /d `"$BackendDir`" & `"$RootPath\.venv\Scripts\python.exe`" run_windows.py" -WindowStyle Normal
Write-Host "Starting PureStream Frontend..." -ForegroundColor Cyan
# Launch in a new CMD window
Start-Process "cmd.exe" -ArgumentList "/k title PureStream Frontend & cd /d `"$FrontendDir`" & npm run dev" -WindowStyle Normal
Write-Host "PureStream is starting!" -ForegroundColor Green
Write-Host "Backend API: http://localhost:$BackendPort"
Write-Host "Frontend UI: http://localhost:$FrontendPort"
}
switch ($Action.ToLower()) {
"stop" { Stop-App }
"start" { Start-App }
"restart" { Stop-App; Start-App }
default {
Write-Host "Usage: .\manage_app.ps1 [start|stop|restart]" -ForegroundColor Red
Write-Host "Defaulting to 'start'..." -ForegroundColor Yellow
Start-App
}
}

49
restart_app.sh Normal file
View file

@ -0,0 +1,49 @@
#!/bin/bash
echo "🔄 Restarting PureStream WebApp..."
# Function to kill process on port
kill_port() {
PORT=$1
if lsof -i:$PORT -t >/dev/null; then
PID=$(lsof -ti:$PORT)
echo "Killing process on port $PORT (PID: $PID)..."
kill -9 $PID
else
echo "Port $PORT is free."
fi
}
# 1. Stop existing processes
echo "🛑 Stopping services..."
kill_port 8000 # Backend
kill_port 8002 # Frontend (Target)
kill_port 8003 # Frontend (Alt)
kill_port 5173 # Frontend (Default)
# 2. Start Backend
echo "🚀 Starting Backend (Port 8000)..."
cd backend
# Check if venv exists matching user env, else use python3
PYTHON_CMD="python3"
# Start uvicorn in background
nohup $PYTHON_CMD -m uvicorn main:app --reload --host 0.0.0.0 --port 8000 > ../backend.log 2>&1 &
BACKEND_PID=$!
echo "Backend started with PID $BACKEND_PID"
cd ..
# 3. Start Frontend
echo "🎨 Starting Frontend (Port 8002)..."
cd frontend
# Start vite in background
nohup npm run dev -- --port 8002 --host > ../frontend.log 2>&1 &
FRONTEND_PID=$!
echo "Frontend started with PID $FRONTEND_PID"
cd ..
echo "✅ App restarted successfully!"
echo "--------------------------------"
echo "Backend: http://localhost:8000"
echo "Frontend: http://localhost:8002"
echo "--------------------------------"
echo "Logs are being written to backend.log and frontend.log"

2
run_debug_search.ps1 Normal file
View file

@ -0,0 +1,2 @@
$env:PYTHONPATH = "c:\Users\Admin\Downloads\kv-tiktok\backend"
& "c:\Users\Admin\Downloads\kv-tiktok\.venv\Scripts\python.exe" tests/debug_search.py

3
start_app.bat Normal file
View file

@ -0,0 +1,3 @@
@echo off
cd /d "%~dp0"
powershell -ExecutionPolicy Bypass -File manage_app.ps1 start

4
stop_app.bat Normal file
View file

@ -0,0 +1,4 @@
@echo off
cd /d "%~dp0"
powershell -ExecutionPolicy Bypass -File manage_app.ps1 stop
pause

12
test_stealth.py Normal file
View file

@ -0,0 +1,12 @@
import sys
print(f"Python: {sys.executable}")
print(f"Path: {sys.path}")
try:
import playwright_stealth
print(f"Module: {playwright_stealth}")
from playwright_stealth import stealth_async
print("Import successful!")
except ImportError as e:
print(f"Import failed: {e}")
except Exception as e:
print(f"Error: {e}")

41
tests/debug_search.py Normal file
View file

@ -0,0 +1,41 @@
import json
import urllib.request
import urllib.parse
import os
import sys
def debug_search():
base_url = "http://localhost:8002/api/user/search"
query = "hot trend"
params = urllib.parse.urlencode({"query": query, "limit": 10})
url = f"{base_url}?{params}"
print(f"Testing search for: '{query}'")
print(f"URL: {url}")
try:
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=60) as response:
status_code = response.getcode()
print(f"Status Code: {status_code}")
if status_code == 200:
data = json.loads(response.read().decode('utf-8'))
print(f"Source: {data.get('source')}")
print(f"Count: {data.get('count')}")
videos = data.get("videos", [])
if not videos:
print("ERROR: No videos returned!")
else:
print(f"First video: {videos[0].get('id')} - {videos[0].get('desc', 'No desc')}")
else:
print(f"Error: Status {status_code}")
except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code} - {e.reason}")
print(e.read().decode('utf-8'))
except Exception as e:
print(f"Request failed: {e}")
if __name__ == "__main__":
debug_search()

29
tests/inspect_html.py Normal file
View file

@ -0,0 +1,29 @@
from bs4 import BeautifulSoup
import re
with open("debug_search_page.html", "r", encoding="utf-8") as f:
html = f.read()
soup = BeautifulSoup(html, "html.parser")
# Inspect text occurrences
print("\n--- Searching for 'trend' text ---")
text_matches = soup.find_all(string=re.compile("trend", re.IGNORECASE))
print(f"Found {len(text_matches)} text matches.")
unique_parents = set()
for text in text_matches:
parent = text.parent
if parent and parent.name != "script" and parent.name != "style":
# Get up to 3 levels of parents
chain = []
curr = parent
for _ in range(3):
if curr:
chain.append(f"<{curr.name} class='{'.'.join(curr.get('class', []))}'>")
curr = curr.parent
unique_parents.add(" -> ".join(chain))
for p in list(unique_parents)[:10]:
print(p)

45
tests/parse_ssr_data.py Normal file
View file

@ -0,0 +1,45 @@
from bs4 import BeautifulSoup
import json
with open("debug_search_page.html", "r", encoding="utf-8") as f:
html = f.read()
soup = BeautifulSoup(html, "html.parser")
script = soup.find("script", id="__UNIVERSAL_DATA_FOR_REHYDRATION__")
if script:
try:
data = json.loads(script.string)
print("Found SSR Data!")
# Save pretty printed
with open("ssr_data.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
# Search for video list
# Look in __DEFAULT_SCOPE__ -> webapp.search-video -> searchVideoList (guessing keys)
# or just traverse and print keys
def find_keys(obj, target_key, path=""):
if isinstance(obj, dict):
for k, v in obj.items():
current_path = f"{path}.{k}"
if target_key.lower() in k.lower():
print(f"Found key '{k}' at {current_path}")
find_keys(v, target_key, current_path)
elif isinstance(obj, list):
for i, item in enumerate(obj):
find_keys(item, target_key, f"{path}[{i}]")
print("\nSearching for 'item' or 'list' keys...")
find_keys(data, "item")
find_keys(data, "list")
# Check specific known paths
default_scope = data.get("__DEFAULT_SCOPE__", {})
print(f"\nTop level keys: {list(default_scope.keys())}")
except json.JSONDecodeError as e:
print(f"JSON Error: {e}")
else:
print("Script tag not found.")

View file

@ -1,30 +1,30 @@
import urllib.request import urllib.request
import json import json
try: try:
print("Testing /health...") print("Testing /health...")
with urllib.request.urlopen("http://localhost:8002/health", timeout=5) as r: with urllib.request.urlopen("http://localhost:8002/health", timeout=5) as r:
print(f"Health: {r.status}") print(f"Health: {r.status}")
print("Testing /api/feed...") print("Testing /api/feed...")
with open("temp_cookies.json", "r") as f: with open("temp_cookies.json", "r") as f:
data = json.load(f) data = json.load(f)
# Ensure list format # Ensure list format
if isinstance(data, dict) and "credentials" in data: if isinstance(data, dict) and "credentials" in data:
data = data["credentials"] data = data["credentials"]
# Prepare body as dict for safety with new Union type # Prepare body as dict for safety with new Union type
body = {"credentials": data} body = {"credentials": data}
req = urllib.request.Request( req = urllib.request.Request(
"http://localhost:8002/api/feed", "http://localhost:8002/api/feed",
data=json.dumps(body).encode('utf-8'), data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'} headers={'Content-Type': 'application/json'}
) )
with urllib.request.urlopen(req, timeout=30) as r: with urllib.request.urlopen(req, timeout=30) as r:
print(f"Feed: {r.status}") print(f"Feed: {r.status}")
print(r.read().decode('utf-8')[:100]) print(r.read().decode('utf-8')[:100])
except Exception as e: except Exception as e:
print(f"Error: {e}") print(f"Error: {e}")

42
tests/test_crawl.py Normal file
View file

@ -0,0 +1,42 @@
import asyncio
import base64
from crawl4ai import AsyncWebCrawler
async def main():
print("Starting Crawl4AI test...")
async with AsyncWebCrawler(verbose=True) as crawler:
url = "https://www.tiktok.com/search?q=hot+trend"
print(f"Crawling: {url}")
# Browser config
run_conf = {
"url": url,
"wait_for": "css:[data-e2e='search_video_item']",
"css_selector": "[data-e2e='search_video_item']",
"screenshot": True,
"magic": True
}
print(f"Crawling with config: {run_conf}")
result = await crawler.arun(**run_conf)
if result.success:
print("Crawl successful!")
print(f"HTML length: {len(result.html)}")
if result.screenshot:
with open("crawl_screenshot.png", "wb") as f:
f.write(base64.b64decode(result.screenshot))
print("Saved screenshot to crawl_screenshot.png")
# Save for inspection
with open("crawl_debug.html", "w", encoding="utf-8") as f:
f.write(result.html)
with open("crawl_debug.md", "w", encoding="utf-8") as f:
f.write(result.markdown)
else:
print(f"Crawl failed: {result.error_message}")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -1,16 +1,16 @@
import requests import requests
import time import time
URL = "http://localhost:8002/api/auth/admin-login" URL = "http://localhost:8002/api/auth/admin-login"
def test_login(): def test_login():
print("Testing Admin Login...") print("Testing Admin Login...")
try: try:
res = requests.post(URL, json={"password": "admin123"}) res = requests.post(URL, json={"password": "admin123"})
print(f"Status: {res.status_code}") print(f"Status: {res.status_code}")
print(f"Response: {res.text}") print(f"Response: {res.text}")
except Exception as e: except Exception as e:
print(f"Error: {e}") print(f"Error: {e}")
if __name__ == "__main__": if __name__ == "__main__":
test_login() test_login()

View file

@ -1,30 +1,30 @@
import urllib.request import urllib.request
import json import json
import os import os
with open("temp_cookies.json", "r") as f: with open("temp_cookies.json", "r") as f:
data = json.load(f) data = json.load(f)
# Ensure data is in the expected dict format for the request body # Ensure data is in the expected dict format for the request body
if isinstance(data, list): if isinstance(data, list):
# If temp_cookies is just the list, wrap it # If temp_cookies is just the list, wrap it
body = {"credentials": data} body = {"credentials": data}
elif "credentials" not in data: elif "credentials" not in data:
body = {"credentials": data} body = {"credentials": data}
else: else:
body = data body = data
req = urllib.request.Request( req = urllib.request.Request(
"http://localhost:8002/api/feed", "http://localhost:8002/api/feed",
data=json.dumps(body).encode('utf-8'), data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'} headers={'Content-Type': 'application/json'}
) )
try: try:
with urllib.request.urlopen(req) as response: with urllib.request.urlopen(req) as response:
print(response.read().decode('utf-8')) print(response.read().decode('utf-8'))
except urllib.error.HTTPError as e: except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code}") print(f"HTTP Error: {e.code}")
print(e.read().decode('utf-8')) print(e.read().decode('utf-8'))
except Exception as e: except Exception as e:
print(f"Error: {e}") print(f"Error: {e}")

View file

@ -1,35 +1,35 @@
import requests import requests
import json import json
import time import time
BASE_URL = "http://localhost:8002/api/user/search" BASE_URL = "http://localhost:8002/api/user/search"
def test_search(): def test_search():
print("Testing Search API...") print("Testing Search API...")
try: try:
# Simple query # Simple query
params = { params = {
"query": "dance", "query": "dance",
"limit": 50, "limit": 50,
"cursor": 0 "cursor": 0
} }
start = time.time() start = time.time()
res = requests.get(BASE_URL, params=params) res = requests.get(BASE_URL, params=params)
duration = time.time() - start duration = time.time() - start
print(f"Status Code: {res.status_code}") print(f"Status Code: {res.status_code}")
print(f"Duration: {duration:.2f}s") print(f"Duration: {duration:.2f}s")
if res.status_code == 200: if res.status_code == 200:
data = res.json() data = res.json()
print(f"Videos Found: {len(data.get('videos', []))}") print(f"Videos Found: {len(data.get('videos', []))}")
# print(json.dumps(data, indent=2)) # print(json.dumps(data, indent=2))
else: else:
print("Error Response:") print("Error Response:")
print(res.text) print(res.text)
except Exception as e: except Exception as e:
print(f"Request Failed: {e}") print(f"Request Failed: {e}")
if __name__ == "__main__": if __name__ == "__main__":
test_search() test_search()