feat: sync latest local changes (backend, frontend, scripts)

This commit is contained in:
Khoa Vo 2026-01-25 18:49:15 +07:00
parent 437f694cfb
commit 05beadb055
43 changed files with 3460 additions and 1877 deletions

View file

@ -7,14 +7,7 @@ COPY frontend/ ./
RUN npm run build
# Runtime Stage for Backend
FROM python:3.11-slim
# Install system dependencies required for Playwright and compiled extensions
RUN apt-get update && apt-get install -y \
curl \
git \
build-essential \
&& rm -rf /var/lib/apt/lists/*
FROM mcr.microsoft.com/playwright/python:v1.49.1-jammy
WORKDIR /app
@ -22,10 +15,6 @@ WORKDIR /app
COPY backend/requirements.txt backend/
RUN pip install --no-cache-dir -r backend/requirements.txt
# Install Playwright browsers (Chromium only to save space)
RUN playwright install chromium
RUN playwright install-deps chromium
# Copy Backend Code
COPY backend/ backend/

32
Dockerfile.dev Normal file
View file

@ -0,0 +1,32 @@
# PureStream Development Dockerfile
# Copies all files to avoid Synology Drive volume mount issues
FROM mcr.microsoft.com/playwright/python:v1.49.1-jammy
WORKDIR /app
# Copy backend files
COPY backend/ /app/backend/
# Copy pre-built frontend
COPY frontend/dist/ /app/frontend/dist/
# Create directories for cache and session
RUN mkdir -p /app/cache /app/session
# Install Python dependencies
WORKDIR /app/backend
RUN pip install --no-cache-dir -r requirements.txt && \
pip install playwright-stealth && \
playwright install chromium
# Environment variables
ENV PYTHONUNBUFFERED=1
ENV CACHE_DIR=/app/cache
ENV MAX_CACHE_SIZE_MB=500
ENV CACHE_TTL_HOURS=24
ENV ADMIN_PASSWORD=admin123
EXPOSE 8002
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8002"]

View file

@ -134,6 +134,77 @@ init_cache()
from typing import Optional, Any, Union, List, Dict
# ========== FEED METADATA CACHE ==========
FEED_METADATA_CACHE = os.path.join(tempfile.gettempdir(), "feed_metadata.json")
METADATA_TTL_HOURS = 24 # Keep feed order for 24 hours (for instant load)
def load_cached_feed() -> Optional[List[dict]]:
"""Load cached feed metadata for instant startup."""
if not os.path.exists(FEED_METADATA_CACHE):
return None
try:
if (time.time() - os.path.getmtime(FEED_METADATA_CACHE)) > (METADATA_TTL_HOURS * 3600):
return None
with open(FEED_METADATA_CACHE, "r") as f:
return json.load(f)
except:
return None
def save_cached_feed(videos: List[dict]):
"""Save feed metadata to cache."""
try:
with open(FEED_METADATA_CACHE, "w") as f:
json.dump(videos, f)
except Exception as e:
print(f"DEBUG: Failed to save feed metadata: {e}")
# Import services for fallback aggregation
from core.tiktok_api_service import TikTokAPIService
from api.routes.user import get_fallback_accounts
async def generate_fallback_feed(limit: int = 5) -> List[dict]:
"""
Generate a feed by aggregating latest videos from verified creators.
Used when cache is empty and we want to avoid Playwright startup headers.
"""
print("DEBUG: Generating fallback feed from verified users...")
cookies, user_agent = PlaywrightManager.load_stored_credentials()
# Use verified accounts from fallback list
accounts = get_fallback_accounts()
# Shuffle accounts to get variety
import random
random.shuffle(accounts)
# Select top 5-8 accounts to query
selected = accounts[:8]
tasks = []
# We only need 1 recent video from each to make a "feed"
for acc in selected:
tasks.append(TikTokAPIService.get_user_videos(
acc['username'],
cookies=cookies,
user_agent=user_agent,
limit=1 # Just 1 video per creator
))
# Run in parallel
results = await asyncio.gather(*tasks, return_exceptions=True)
feed = []
for res in results:
if isinstance(res, list) and res:
feed.extend(res)
# Shuffle the resulting feed
random.shuffle(feed)
print(f"DEBUG: Generated fallback feed with {len(feed)} videos")
return feed[:limit]
class FeedRequest(BaseModel):
"""Request body for feed endpoint with optional JSON credentials."""
credentials: Optional[Union[Dict, List]] = None
@ -171,12 +242,39 @@ async def get_feed_simple(fast: bool = False, skip_cache: bool = False):
# When skipping cache for infinite scroll, do more scrolling to get different videos
if skip_cache:
scroll_count = 8 # More scrolling to get fresh content
scroll_count = 8
# [OPTIMIZATION] Fast Load Strategy
if fast and not skip_cache:
# 1. Try Memory/Disk Cache first (Instant)
cached_feed = load_cached_feed()
if cached_feed:
print(f"DEBUG: Returning cached feed ({len(cached_feed)} videos)")
return cached_feed
# 2. Try Fallback Aggregation (Fast HTTP, no browser)
# This fetches real latest videos from top creators via direct API
try:
aggregated = await generate_fallback_feed(limit=5)
if aggregated:
save_cached_feed(aggregated)
return aggregated
except Exception as agg_err:
print(f"DEBUG: Aggregation fallback failed: {agg_err}")
# 3. Playwright Interception (Slowest, but guaranteed 'For You' algorithm)
videos = await PlaywrightManager.intercept_feed(scroll_count=scroll_count)
# Save successful result to cache for next time
if videos and len(videos) > 0 and not skip_cache:
save_cached_feed(videos)
return videos
except Exception as e:
print(f"DEBUG: Feed error: {e}")
# 4. Ultimate Fallback if everything fails (Verified users static list?)
# For now just re-raise, as UI handles empty state
raise HTTPException(status_code=500, detail=str(e))
@ -281,21 +379,29 @@ async def proxy_video(
if not os.path.exists(video_path):
raise Exception("Video file not created")
print(f"Downloaded codec: {video_codec} (no transcoding - client will decode)")
print(f"Downloaded codec: {video_codec}")
# Save to cache directly - NO TRANSCODING
# Save to cache directly - client-side player handles all formats
cached_path = save_to_cache(url, video_path)
stats = get_cache_stats()
print(f"CACHED: {url[:50]}... ({stats['files']} files, {stats['size_mb']}MB total)")
except Exception as e:
print(f"DEBUG: yt-dlp download failed: {e}")
print(f"DEBUG: yt-dlp download failed: {str(e)}")
# Cleanup
if cookie_file_path and os.path.exists(cookie_file_path):
os.unlink(cookie_file_path)
try:
os.unlink(cookie_file_path)
except:
pass
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir, ignore_errors=True)
raise HTTPException(status_code=500, detail=f"Could not download video: {e}")
try:
shutil.rmtree(temp_dir, ignore_errors=True)
except:
pass
# Return 422 for processing failure instead of 500 (server crash)
raise HTTPException(status_code=422, detail=f"Video processing failed: {str(e)}")
# Cleanup temp (cached file is separate)
if cookie_file_path and os.path.exists(cookie_file_path):
@ -393,4 +499,9 @@ async def thin_proxy_video(
except Exception as e:
print(f"Thin proxy error: {e}")
# Ensure cleanup if possible
raise HTTPException(status_code=500, detail=str(e))
if 'r' in locals():
await r.aclose()
if 'client' in locals():
await client.aclose()
raise HTTPException(status_code=502, detail=f"Upstream Proxy Error: {str(e)}")

View file

@ -7,8 +7,12 @@ from pydantic import BaseModel
from typing import Optional, List
import httpx
import asyncio
import time
import re
from typing import Optional, List
from core.playwright_manager import PlaywrightManager
from core.tiktok_api_service import TikTokAPIService
router = APIRouter()
@ -112,7 +116,7 @@ async def get_user_videos(
):
"""
Fetch videos from a TikTok user's profile.
Uses Playwright to crawl the user's page for reliable results.
Uses direct API calls for speed (~100-500ms), with Playwright fallback.
"""
username = username.replace("@", "")
@ -123,10 +127,25 @@ async def get_user_videos(
raise HTTPException(status_code=401, detail="Not authenticated")
print(f"Fetching videos for @{username}...")
start_time = time.time()
# Try fast API first
try:
videos = await TikTokAPIService.get_user_videos(username, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[API] Got {len(videos)} videos in {duration:.2f}s")
return {"username": username, "videos": videos, "count": len(videos), "source": "api", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[API] Failed for {username}: {e}")
# Fallback to Playwright if API fails or returns empty
print(f"[Fallback] Using Playwright for @{username}...")
try:
videos = await PlaywrightManager.fetch_user_videos(username, cookies, user_agent, limit)
return {"username": username, "videos": videos, "count": len(videos)}
duration = time.time() - start_time
print(f"[Playwright] Got {len(videos)} videos in {duration:.2f}s")
return {"username": username, "videos": videos, "count": len(videos), "source": "playwright", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"Error fetching videos for {username}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@ -140,7 +159,7 @@ async def search_videos(
):
"""
Search for videos by keyword or hashtag.
Uses Playwright to crawl TikTok search results for reliable data.
Uses direct API calls for speed (~200-800ms), with Playwright fallback.
"""
# Load stored credentials
cookies, user_agent = PlaywrightManager.load_stored_credentials()
@ -149,13 +168,157 @@ async def search_videos(
raise HTTPException(status_code=401, detail="Not authenticated")
print(f"Searching for: {query} (limit={limit}, cursor={cursor})...")
start_time = time.time()
# [Smart Search] Username Detection Priority
# If query looks like a username (contains dots, underscores, or starts with @),
# try fetching that specific user's videos FIRST.
# This solves issues where searching for exact username returns unrelated content.
clean_query = query.strip()
# Handle @ prefix commonly used by users - STRICT MODE
# If user explicitly types "@", they want a user lookup, NOT a keyword search.
strict_user_lookup = False
if clean_query.startswith("@"):
clean_query = clean_query[1:]
strict_user_lookup = True
# Also treat dots/underscores as likely usernames
is_username_format = bool(re.match(r"^[a-zA-Z0-9_\.]+$", clean_query)) and len(clean_query) > 2
# DEBUG LOGGING TO FILE
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"\n--- Search: {query} ---\n")
f.write(f"Strict: {strict_user_lookup}, Format: {is_username_format}, Clean: {clean_query}\n")
except: pass
if is_username_format or strict_user_lookup:
print(f"[Smart Search] Query '{query}' identified as username. Strict: {strict_user_lookup}")
try:
# Try direct profile fetch via API
videos = await TikTokAPIService.get_user_videos(clean_query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[API-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_priority", "duration_ms": int(duration * 1000)}
# Try Playwright fallback BEFORE yt-dlp
# Playwright scraping provides thumbnails and correct metadata, while yt-dlp flat-playlist does not.
print(f"[Smart Search] API failed, trying Playwright for user '{query}'...")
try:
videos = await PlaywrightManager.fetch_user_videos(clean_query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[Playwright-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright_priority", "duration_ms": int(duration * 1000)}
except Exception as pw_err:
print(f"[Smart Search] Playwright profile fetch failed: {pw_err}")
# Try yt-dlp fallback if Playwright also fails
print(f"[Smart Search] Playwright failed, trying yt-dlp for user '{query}'...")
# Log we are trying ytdlp
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"Attempting yt-dlp for {clean_query}...\n")
except: pass
videos = await TikTokAPIService.get_user_videos_via_ytdlp(clean_query, limit)
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"yt-dlp Result: {len(videos)} videos\n")
except: pass
if videos:
duration = time.time() - start_time
print(f"[yt-dlp-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_ytdlp", "duration_ms": int(duration * 1000)}
# If strict usage of "@" was used and we found nothing, DO NOT fallback to generic search.
# It's better to show "No videos found" than random unrelated results.
if strict_user_lookup:
print(f"[Smart Search] Strict lookup for '{query}' found no results. Returning empty.")
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "user_not_found_strict", "duration_ms": int((time.time() - start_time) * 1000)}
except Exception as e:
print(f"[Smart Search] Priority profile fetch failed: {e}")
if strict_user_lookup:
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "error_strict", "duration_ms": int((time.time() - start_time) * 1000)}
# Fall through to normal search only if NOT strict
# Try fast API search
try:
videos = await TikTokAPIService.search_videos(query, cookies, user_agent, limit, cursor)
if videos:
duration = time.time() - start_time
print(f"[API] Found {len(videos)} videos in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[API] Search failed for {query}: {e}")
# Fallback Phase 1: Check if query is a "trending" misspelling and retry API if so
# Regex for: hot, trend, trens, hor, hott, trand, etc.
trend_pattern = r"(hot|hor|hott)\s*(trend|trens|trand|tred)|(trend|trens|trand)"
is_trend_query = bool(re.search(trend_pattern, query.lower()))
if is_trend_query and (not videos):
print(f"[Smart Fallback] Query '{query}' detected as trending request. Retrying with 'hot trend'...")
try:
# Try normalized query on API
videos = await TikTokAPIService.search_videos("hot trend", cookies, user_agent, limit, cursor)
if videos:
duration = time.time() - start_time
print(f"[API-Fallback] Found {len(videos)} videos for 'hot trend' in {duration:.2f}s")
return {"query": "hot trend", "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api_fallback", "duration_ms": int(duration * 1000)}
except Exception:
pass # Continue to Playwright if this fails
# Fallback Phase 2: Playwright
# Fallback to Playwright if API fails or returns empty
print(f"[Fallback] Using Playwright for search '{query}'...")
try:
videos = await PlaywrightManager.search_videos(query, cookies, user_agent, limit, cursor)
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos)}
# Smart Fallback Phase 3: If Playwright also fails for trending query, try normalized query
if not videos and is_trend_query:
print(f"[Playwright-Fallback] No results for '{query}'. Retrying with 'hot trend'...")
videos = await PlaywrightManager.search_videos("hot trend", cookies, user_agent, limit, cursor)
except Exception as e:
print(f"Error searching for {query}: {e}")
raise HTTPException(status_code=500, detail=str(e))
# Don't raise yet, try user fallback
pass
# Fallback Phase 4: Exact Username Match (Secondary Fallback)
# If generic search failed, and query looks like a username, try fetching their profile directly (if not tried already)
# Note: We already tried this at the top, but we try again here with Playwright as a backup if the API profile fetch failed earlier.
if (not videos) and is_username_format:
print(f"[Smart Fallback] Query '{query}' yielded no search results. Attempting secondary profile fetch (Playwright)...")
try:
# We already tried API profile fetch at start, so try Playwright now
print(f"[Smart Fallback] API failed, trying Playwright for user '{query}'...")
videos = await PlaywrightManager.fetch_user_videos(query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[Playwright-Profile] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[Smart Fallback] Profile fetch failed: {e}")
pass
if not videos:
# Only raise error if we truly found nothing after all attempts
# or return empty list instead of 500?
# A 500 implies server broken. Empty list implies no results.
# Let's return empty structure to be safe for frontend
return {"query": query, "videos": [], "count": 0, "cursor": cursor, "source": "empty"}
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "playwright", "duration_ms": int((time.time() - start_time) * 1000)}
# Cache for suggested accounts
@ -178,7 +341,6 @@ async def get_suggested_accounts(
# Check cache
if _suggested_cache["accounts"] and (time.time() - _suggested_cache["updated_at"]) < CACHE_TTL:
print("Returning cached suggested accounts")
return {"accounts": _suggested_cache["accounts"][:limit], "cached": True}
# Load stored credentials
@ -191,17 +353,24 @@ async def get_suggested_accounts(
print("Fetching fresh suggested accounts from TikTok...")
try:
accounts = await PlaywrightManager.fetch_suggested_accounts(cookies, user_agent, limit)
# Enforce a strict timeout to prevent hanging or heavy resource usage blocking the server
# If Playwright takes > 15 seconds, we default to fallback.
try:
accounts = await asyncio.wait_for(
PlaywrightManager.fetch_suggested_accounts(cookies, user_agent, limit),
timeout=15.0
)
except asyncio.TimeoutError:
print("Suggest fetch timed out, using fallback.")
accounts = []
if accounts and len(accounts) >= 5: # Need at least 5 accounts from dynamic fetch
_suggested_cache["accounts"] = accounts
_suggested_cache["updated_at"] = time.time()
return {"accounts": accounts[:limit], "cached": False}
else:
# Fallback: fetch actual profile data with avatars for static list
print("Dynamic fetch failed, fetching profile data for static accounts...")
fallback_list = get_fallback_accounts()[:min(limit, 20)] # Limit to 20 for speed
return await fetch_profiles_with_avatars(fallback_list, cookies, user_agent)
# Just return static accounts directly without API calls - TikTok API is unreliable
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
except Exception as e:
print(f"Error fetching suggested accounts: {e}")

View file

@ -7,6 +7,23 @@ class DownloadService:
self.download_dir = "downloads"
if not os.path.exists(self.download_dir):
os.makedirs(self.download_dir)
# Auto-update yt-dlp on startup (Disabled for stability/speed)
# self.update_ytdlp()
def update_ytdlp(self):
"""
Auto-update yt-dlp to the latest nightly/pre-release version.
"""
try:
print("Checking for yt-dlp updates (nightly)...")
import subprocess
import sys
# Use the current python executable to run pip
subprocess.check_call([sys.executable, "-m", "pip", "install", "-U", "--pre", "yt-dlp", "--break-system-packages"])
print("yt-dlp updated successfully.")
except Exception as e:
print(f"Failed to update yt-dlp: {e}")
async def download_video(self, url: str) -> dict:
"""

View file

@ -17,9 +17,14 @@ from playwright.async_api import async_playwright, Response, Browser, BrowserCon
try:
from playwright_stealth import stealth_async
except ImportError:
print("WARNING: playwright_stealth not found, disabling stealth mode.")
async def stealth_async(page):
pass
try:
from playwright_stealth import Stealth
async def stealth_async(page):
await Stealth().apply_stealth_async(page)
except ImportError:
print("WARNING: playwright_stealth not found, disabling stealth mode.")
async def stealth_async(page):
pass
COOKIES_FILE = "cookies.json"
@ -43,10 +48,18 @@ class PlaywrightManager:
"--start-maximized"
]
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
# Use installed Chrome instead of Playwright's Chromium (avoids slow download)
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
import platform
import os
# Check if running on macOS
if platform.system() == "Darwin" and os.path.exists("/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"):
CHROME_PATH = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
else:
# On Linux/Docker, use Playwright's bundled Chromium (None lets Playwright decide)
CHROME_PATH = None
# VNC login state (class-level to persist across requests)
_vnc_playwright = None
@ -515,8 +528,26 @@ class PlaywrightManager:
try:
data = await response.json()
# TikTok returns videos in "itemList" or "aweme_list"
items = data.get("itemList", []) or data.get("aweme_list", [])
# TikTok returns videos in various nested formats
items = []
# Try direct itemList first
if data.get("itemList") and isinstance(data["itemList"], list):
items = data["itemList"]
elif data.get("aweme_list") and isinstance(data["aweme_list"], list):
items = data["aweme_list"]
# Try nested data structure
elif data.get("data"):
nested_data = data["data"]
if isinstance(nested_data, list):
for item in nested_data:
if isinstance(item, dict):
if "item" in item and isinstance(item["item"], dict):
items.append(item["item"])
else:
items.append(item)
elif isinstance(nested_data, dict):
items = nested_data.get("itemList", []) or nested_data.get("aweme_list", [])
for item in items:
video_data = PlaywrightManager._extract_video_data(item)
@ -744,10 +775,68 @@ class PlaywrightManager:
# Wait for videos to load
await asyncio.sleep(2)
# Scroll a bit to trigger more video loading
await page.evaluate("window.scrollBy(0, 500)")
await asyncio.sleep(1)
# Scroll loop to ensure we get enough videos
scroll_attempts = 0
last_count = 0
max_scrolls = 20 # Prevent infinite loops
while len(captured_videos) < limit and scroll_attempts < max_scrolls:
print(f"DEBUG: Scrolling profile (Current: {len(captured_videos)}/{limit})...")
await page.evaluate("window.scrollBy(0, 800)")
await asyncio.sleep(1.5) # Wait for network/DOM
# DOM Fallback check inside loop (for hybrid loading)
if len(captured_videos) == last_count:
# If count didn't increase via network, try scraping DOM again
# This handles cases where TikTok renders new items in DOM without standard API
# (Unlikely for infinite scroll, but good safety)
pass
last_count = len(captured_videos)
scroll_attempts += 1
# DOM Fallback: If no API captured (SSR case), scrape from DOM
if len(captured_videos) == 0:
print("DEBUG: No API response for user videos, trying DOM scrape (SSR)...")
video_elements = await page.locator('div[data-e2e="user-post-item"]').all()
for el in video_elements:
if len(captured_videos) >= limit:
break
try:
# Extract data from DOM attributes/links
url = await el.locator("a").get_attribute("href")
desc = await el.locator("img").get_attribute("alt")
# Try to find specific img for cover
# Often the img alt is the description
if url:
# Parse video ID and author from URL
# Format: https://www.tiktok.com/@user/video/123456...
if "/video/" in url:
parts = url.split("/video/")
vid_id = parts[1].split("?")[0] if len(parts) > 1 else ""
# We already know the author from the function arg, but can verify
# Construct basic video object
dom_video = {
"id": vid_id,
"url": url,
"author": username,
"description": desc or f"Video by @{username}",
"views": 0, # Cannot easily get from list view DOM
"likes": 0
}
# Try to get thumbnail info
thumb = await el.locator("img").get_attribute("src")
if thumb:
dom_video["thumbnail"] = thumb
captured_videos.append(dom_video)
except Exception as el_err:
print(f"DEBUG: Error extracting DOM item: {el_err}")
except Exception as e:
print(f"DEBUG: Error navigating to profile: {e}")
@ -760,59 +849,19 @@ class PlaywrightManager:
async def search_videos(query: str, cookies: list, user_agent: str = None, limit: int = 20, cursor: int = 0) -> list:
"""
Search for videos by keyword or hashtag.
Uses Playwright to intercept TikTok search results API.
Args:
query: Search query
cookies: Auth cookies
user_agent: Browser user agent
limit: Max videos to capture in this batch
cursor: Starting offset for pagination
Optimized: Uses page.evaluate to fetch specific offsets via internal API.
"""
from playwright.async_api import async_playwright, Response
from playwright.async_api import async_playwright
from urllib.parse import quote
import json
if not user_agent:
user_agent = PlaywrightManager.DEFAULT_USER_AGENT
if not cookies:
print("DEBUG: No cookies available for search")
return []
print(f"DEBUG: Searching for '{query}' (limit={limit}, cursor={cursor})...")
captured_videos = []
async def handle_response(response: Response):
"""Capture search results API responses."""
nonlocal captured_videos
url = response.url
# Look for search results API
if "search" in url and ("item_list" in url or "video" in url or "general" in url):
try:
data = await response.json()
# Try different response formats
items = data.get("itemList", []) or data.get("data", []) or data.get("item_list", [])
for item in items:
# If we have enough for this specific batch, we don't need more
if len(captured_videos) >= limit:
break
video_data = PlaywrightManager._extract_video_data(item)
if video_data:
# Avoid duplicates within the same capture session
if not any(v['id'] == video_data['id'] for v in captured_videos):
captured_videos.append(video_data)
print(f"DEBUG: Captured {len(items)} videos from search API (Total batch: {len(captured_videos)})")
except Exception as e:
print(f"DEBUG: Error parsing search API response: {e}")
async with async_playwright() as p:
browser = await p.chromium.launch(
headless=True,
@ -820,51 +869,69 @@ class PlaywrightManager:
args=PlaywrightManager.BROWSER_ARGS
)
context = await browser.new_context(user_agent=user_agent)
await context.add_cookies(cookies)
context = await browser.new_context(
user_agent=user_agent,
viewport={"width": 1280, "height": 720}
)
if cookies:
await context.add_cookies(cookies)
page = await context.new_page()
await stealth_async(page)
page.on("response", handle_response)
try:
# Navigate to TikTok search page
search_url = f"https://www.tiktok.com/search/video?q={quote(query)}"
try:
await page.goto(search_url, wait_until="domcontentloaded", timeout=15000)
except:
print("DEBUG: Navigation timeout, proceeding anyway")
# 1. Navigate to Search Page to initialize session/state
# We don't need to wait for full load if we are just going to fetch
search_url = f"https://www.tiktok.com/search?q={quote(query)}"
await page.goto(search_url, wait_until="domcontentloaded", timeout=20000)
# Wait for initial results
await asyncio.sleep(3)
# 2. If cursor > 0 (or always), Try to fetch API directly from browser context
# This leverages the browser's valid session/signature generation
print(f"DEBUG: Executing internal API fetch for offset {cursor}...")
# Scroll based on cursor to reach previous results and then capture new ones
# Each scroll typically loads 12-20 items
# We scroll more as the cursor increases
scroll_count = (cursor // 10) + 1
# Limit total scrolls to avoid hanging
scroll_count = min(scroll_count, 10)
api_script = f"""
async () => {{
const url = "https://www.tiktok.com/api/search/general/full/?keyword={quote(query)}&offset={cursor}&count={limit}&search_source=normal_search&is_filter_search=0";
try {{
const res = await fetch(url);
return await res.json();
}} catch (e) {{
return {{ error: e.toString() }};
}}
}}
"""
for i in range(scroll_count):
await page.evaluate("window.scrollBy(0, 1500)")
await asyncio.sleep(1.5)
# After reaching the offset, scroll a bit more to trigger the specific batch capture
batch_scrolls = (limit // 10) + 2 # Add extra scrolls to be safe
for _ in range(batch_scrolls):
await page.evaluate("window.scrollBy(0, 2000)") # Larger scroll
await asyncio.sleep(1.0) # Faster scroll cadence
# Wait a bit after scrolling for all responses to settle
await asyncio.sleep(2.5)
data = await page.evaluate(api_script)
# 3. Parse Results
if data and "error" not in data:
items = []
# Try data list directly (general search)
if data.get("data") and isinstance(data["data"], list):
for item in data["data"]:
if isinstance(item, dict):
if "item" in item:
items.append(item["item"])
elif "aweme" in item:
items.append(item["aweme"])
elif "type" in item and item["type"] == 1: # Video type
items.append(item)
# Try itemList (item search)
elif data.get("itemList"):
items = data["itemList"]
elif data.get("item_list"):
items = data["item_list"]
except Exception as e:
print(f"DEBUG: Error during search: {e}")
print(f"DEBUG: Search navigation error: {e}")
await browser.close()
print(f"DEBUG: Total captured search videos in this batch: {len(captured_videos)}")
print(f"DEBUG: Total captured search videos: {len(captured_videos)}")
return captured_videos
@staticmethod
async def fetch_suggested_accounts(cookies: list, user_agent: str = None, limit: int = 50) -> list:

View file

@ -0,0 +1,450 @@
"""
TikTok Direct API Service - Fast API calls without browser automation.
Replaces Playwright crawling with direct HTTP requests to TikTok's internal APIs.
Expected performance: ~100-500ms vs 5-15 seconds with Playwright.
"""
import httpx
import asyncio
from typing import List, Optional, Dict, Any
from urllib.parse import quote
from core.playwright_manager import PlaywrightManager
class TikTokAPIService:
"""
Direct TikTok API calls for instant data retrieval.
Key endpoints used:
- /api/user/detail/?uniqueId={username} - Get user profile and secUid
- /api/post/item_list/?secUid={secUid}&count={count} - Get user's videos
- /api/search/general/full/?keyword={query} - Search videos
"""
BASE_URL = "https://www.tiktok.com"
DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
@staticmethod
def _build_headers(cookies: List[dict], user_agent: str = None) -> dict:
"""Build request headers with cookies and user agent."""
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
return {
"User-Agent": user_agent or TikTokAPIService.DEFAULT_USER_AGENT,
"Referer": "https://www.tiktok.com/",
"Cookie": cookie_str,
"Accept": "application/json, text/plain, */*",
"Accept-Language": "en-US,en;q=0.9",
"sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": '"Windows"',
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-origin",
}
@staticmethod
def _extract_video_data(item: dict) -> Optional[dict]:
"""
Extract video data from TikTok API response item.
Matches the format used by PlaywrightManager._extract_video_data().
"""
try:
if not isinstance(item, dict):
return None
video_id = item.get("id") or item.get("aweme_id")
# Get author info
author_data = item.get("author", {})
author = author_data.get("uniqueId") or author_data.get("unique_id") or "unknown"
# Get description
desc = item.get("desc") or item.get("description") or ""
# Check if this is a product/shop video
is_shop_video = bool(item.get("products") or item.get("commerce_info") or item.get("poi_info"))
# Get thumbnail/cover image
thumbnail = None
video_data = item.get("video", {})
thumbnail_sources = [
video_data.get("cover"),
video_data.get("dynamicCover"),
video_data.get("originCover"),
]
for src in thumbnail_sources:
if src:
thumbnail = src
break
# Get direct CDN URL
cdn_url = None
cdn_sources = [
video_data.get("playAddr"),
video_data.get("downloadAddr"),
]
for src in cdn_sources:
if src:
cdn_url = src
break
# Video page URL
video_url = f"https://www.tiktok.com/@{author}/video/{video_id}"
# Get stats
stats = item.get("stats", {}) or item.get("statistics", {})
views = stats.get("playCount") or stats.get("play_count") or 0
likes = stats.get("diggCount") or stats.get("digg_count") or 0
comments = stats.get("commentCount") or stats.get("comment_count") or 0
shares = stats.get("shareCount") or stats.get("share_count") or 0
if video_id and author:
result = {
"id": str(video_id),
"url": video_url,
"author": author,
"description": desc[:200] if desc else f"Video by @{author}"
}
if thumbnail:
result["thumbnail"] = thumbnail
if cdn_url:
result["cdn_url"] = cdn_url
if views:
result["views"] = views
if likes:
result["likes"] = likes
if comments:
result["comments"] = comments
if shares:
result["shares"] = shares
if is_shop_video:
result["has_product"] = True
return result
except Exception as e:
print(f"DEBUG: Error extracting video data: {e}")
return None
@staticmethod
async def get_user_sec_uid(username: str, cookies: List[dict], user_agent: str = None) -> Optional[str]:
"""
Get user's secUid from their profile.
secUid is required for the video list API.
"""
headers = TikTokAPIService._build_headers(cookies, user_agent)
profile_url = f"{TikTokAPIService.BASE_URL}/api/user/detail/?uniqueId={username}"
try:
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
response = await client.get(profile_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: Failed to get user profile, status: {response.status_code}")
return None
data = response.json()
user_info = data.get("userInfo", {})
user = user_info.get("user", {})
sec_uid = user.get("secUid")
if sec_uid:
print(f"DEBUG: Got secUid for @{username}: {sec_uid[:20]}...")
return sec_uid
except Exception as e:
print(f"DEBUG: Error getting secUid for {username}: {e}")
return None
@staticmethod
async def get_user_videos(
username: str,
cookies: List[dict],
user_agent: str = None,
limit: int = 20,
cursor: int = 0
) -> List[dict]:
"""
Fetch videos from a user's profile using direct API call.
Args:
username: TikTok username (without @)
cookies: Auth cookies list
user_agent: Browser user agent
limit: Max videos to return
cursor: Pagination cursor for more videos
Returns:
List of video dictionaries
"""
print(f"DEBUG: [API] Fetching videos for @{username} (limit={limit})...")
# Step 1: Get secUid
sec_uid = await TikTokAPIService.get_user_sec_uid(username, cookies, user_agent)
if not sec_uid:
print(f"DEBUG: [API] Could not get secUid for @{username}")
return []
# Step 2: Fetch video list
headers = TikTokAPIService._build_headers(cookies, user_agent)
# Build video list API URL
video_list_url = (
f"{TikTokAPIService.BASE_URL}/api/post/item_list/?"
f"secUid={quote(sec_uid)}&"
f"count={min(limit, 35)}&" # TikTok max per request is ~35
f"cursor={cursor}"
)
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(video_list_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: [API] Video list failed, status: {response.status_code}")
return []
data = response.json()
# Extract videos from response
items = data.get("itemList", []) or data.get("aweme_list", [])
videos = []
for item in items[:limit]:
video_data = TikTokAPIService._extract_video_data(item)
if video_data:
videos.append(video_data)
print(f"DEBUG: [API] Successfully fetched {len(videos)} videos for @{username}")
return videos
except Exception as e:
print(f"DEBUG: [API] Error fetching videos for {username}: {e}")
return []
@staticmethod
async def search_videos(
query: str,
cookies: List[dict],
user_agent: str = None,
limit: int = 20,
cursor: int = 0
) -> List[dict]:
"""
Search for videos using direct API call.
Args:
query: Search keyword or hashtag
cookies: Auth cookies list
user_agent: Browser user agent
limit: Max videos to return
cursor: Pagination offset
Returns:
List of video dictionaries
"""
print(f"DEBUG: [API] Searching for '{query}' (limit={limit}, cursor={cursor})...")
headers = TikTokAPIService._build_headers(cookies, user_agent)
# Build search API URL
# TikTok uses different search endpoints, try the main one
search_url = (
f"{TikTokAPIService.BASE_URL}/api/search/general/full/?"
f"keyword={quote(query)}&"
f"offset={cursor}&"
f"search_source=normal_search&"
f"is_filter_search=0&"
f"web_search_code=%7B%22tiktok%22%3A%7B%22client_params_x%22%3A%7B%22search_engine%22%3A%7B%22ies_mt_user_live_video_card_use_498%22%3A1%7D%7D%7D%7D"
)
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(search_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: [API] Search failed, status: {response.status_code}")
# Try alternative search endpoint
return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)
data = response.json()
# Search results structure
videos = []
# Try different response formats
item_list = data.get("data", [])
if not item_list:
item_list = data.get("itemList", [])
if not item_list:
item_list = data.get("item_list", [])
for item in item_list[:limit]:
# Search results may have nested structure
video_item = item.get("item", item)
video_data = TikTokAPIService._extract_video_data(video_item)
if video_data:
videos.append(video_data)
if videos:
print(f"DEBUG: [API] Successfully found {len(videos)} videos for '{query}'")
return videos
else:
# Fallback to alternative endpoint
return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)
except Exception as e:
print(f"DEBUG: [API] Error searching for {query}: {e}")
return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)
@staticmethod
async def _search_videos_alt(
query: str,
cookies: List[dict],
user_agent: str = None,
limit: int = 20,
cursor: int = 0
) -> List[dict]:
"""
Alternative search using video-specific endpoint.
"""
print(f"DEBUG: [API] Trying alternative search endpoint...")
headers = TikTokAPIService._build_headers(cookies, user_agent)
# Try video-specific search endpoint
search_url = (
f"{TikTokAPIService.BASE_URL}/api/search/item/full/?"
f"keyword={quote(query)}&"
f"offset={cursor}&"
f"count={min(limit, 30)}"
)
try:
async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
response = await client.get(search_url, headers=headers)
if response.status_code != 200:
print(f"DEBUG: [API] Alt search also failed, status: {response.status_code}")
return []
data = response.json()
videos = []
item_list = data.get("itemList", []) or data.get("item_list", []) or data.get("data", [])
for item in item_list[:limit]:
video_data = TikTokAPIService._extract_video_data(item)
if video_data:
videos.append(video_data)
print(f"DEBUG: [API] Alt search found {len(videos)} videos")
return videos
except Exception as e:
print(f"DEBUG: [API] Alt search error: {e}")
return []
@staticmethod
async def get_user_videos_via_ytdlp(username: str, limit: int = 20) -> List[dict]:
"""
Fetch user videos using yt-dlp (Robust fallback).
"""
print(f"DEBUG: [yt-dlp] Fetching videos for @{username}...")
import subprocess
import json
# Determine yt-dlp path (assume it's in the same python environment)
import sys
import os
# Helper to find executable
def get_yt_dlp_path():
# Try same dir as python executable
path = os.path.join(os.path.dirname(sys.executable), 'yt-dlp.exe')
if os.path.exists(path): return path
# Try global
return 'yt-dlp'
get_yt_dlp_path(),
f"https://www.tiktok.com/@{username}",
# "--flat-playlist", # Disabled to get full metadata (thumbnails)
"--skip-download", # Don't download video files
"--dump-json",
"--playlist-end", str(limit),
"--no-warnings",
"--ignore-errors" # Skip private/removed videos
try:
# Run async subprocess
process = await asyncio.create_subprocess_exec(
*cmd,
stdout=asyncio.subprocess.PIPE,
stderr=asyncio.subprocess.PIPE
)
stdout, stderr = await process.communicate()
if process.returncode != 0:
print(f"DEBUG: [yt-dlp] Failed: {stderr.decode()}")
return []
videos = []
output = stdout.decode('utf-8')
for line in output.splitlines():
try:
if not line.strip(): continue
data = json.loads(line)
# Map yt-dlp format to our format
video_id = data.get('id')
# Handle Author Name logic
# yt-dlp sometimes returns numeric ID as uploader_id for profiles.
# We prefer the 'uploader' (nickname) if it looks handle-like, or the original search username.
raw_uploader_id = data.get('uploader_id')
# Heuristic: If uploader_id is numeric, prefer the search username
if raw_uploader_id and raw_uploader_id.isdigit():
unique_id = username
else:
unique_id = raw_uploader_id or username
# yt-dlp might not give full details in flat-playlist mode,
# but usually gives enough for a list
# Construct basic object
video = {
"id": video_id,
"url": data.get('url') or f"https://www.tiktok.com/@{unique_id}/video/{video_id}",
"author": unique_id,
"description": data.get('title') or "",
"thumbnail": data.get('thumbnail'), # Flat playlist might not have this?
"views": data.get('view_count', 0),
"likes": data.get('like_count', 0)
}
# If thumbnail is missing, we might need to assume or use a placeholder
# or yt-dlp flat playlist sometimes misses it.
# But verifying the 'dump-json' output above, it usually has metadata.
videos.append(video)
except Exception as parse_err:
continue
print(f"DEBUG: [yt-dlp] Found {len(videos)} videos")
return videos
except Exception as e:
print(f"DEBUG: [yt-dlp] Execution error: {e}")
return []
# Singleton instance
tiktok_api = TikTokAPIService()

44
backend/debug_api.py Normal file
View file

@ -0,0 +1,44 @@
import asyncio
import httpx
import sys
from core.playwright_manager import PlaywrightManager
async def test_api():
print("Loading credentials...")
cookies, user_agent = PlaywrightManager.load_stored_credentials()
headers = {
"User-Agent": user_agent or "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer": "https://www.tiktok.com/",
"Cookie": "; ".join([f"{c['name']}={c['value']}" for c in cookies]),
}
username = "x.ka.baongoc"
url = f"https://www.tiktok.com/api/user/detail/?uniqueId={username}"
print(f"Fetching {url}...")
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
res = await client.get(url, headers=headers)
print(f"Status: {res.status_code}")
if res.status_code == 200:
try:
data = res.json()
user = data.get("userInfo", {}).get("user", {})
sec_uid = user.get("secUid")
print(f"SecUid: {sec_uid}")
if not sec_uid:
print("Response body preview:", str(data)[:500])
except:
print("JSON Decode Failed. Content preview:")
print(res.text[:500])
else:
print("Response:", res.text[:500])
if __name__ == "__main__":
try:
if sys.platform == "win32":
loop = asyncio.ProactorEventLoop()
asyncio.set_event_loop(loop)
loop.run_until_complete(test_api())
except Exception as e:
print(e)

View file

@ -1,5 +1,8 @@
[
"nhythanh_04",
"po.trann77",
"tieu_hy26"
"tieu_hy26",
"phamthuy9722r",
"tlin99",
"mjxdj9"
]

View file

@ -1,3 +1,11 @@
import sys
import asyncio
# CRITICAL: Set Windows event loop policy BEFORE any other imports
# Playwright requires ProactorEventLoop for subprocess support on Windows
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from fastapi.staticfiles import StaticFiles
@ -5,12 +13,7 @@ from fastapi.responses import FileResponse
from contextlib import asynccontextmanager
from pathlib import Path
from api.routes import auth, feed, download, following, config, user
import sys
import asyncio
# Force Proactor on Windows for Playwright
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
@asynccontextmanager
async def lifespan(app: FastAPI):

View file

@ -6,5 +6,6 @@ python-multipart
websockets
python-dotenv
crawl4ai
playwright
playwright==1.49.1
playwright-stealth
httpx

26
backend/run_windows.py Normal file
View file

@ -0,0 +1,26 @@
"""
Windows-compatible startup script for PureStream.
Sets ProactorEventLoop policy BEFORE uvicorn imports anything.
"""
import sys
import asyncio
# CRITICAL: Must be set before importing uvicorn or any async code
if sys.platform == "win32":
asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
# Also create the loop early
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
print(f"DEBUG: Forced ProactorEventLoop: {type(loop)}")
# Now import and run uvicorn
import uvicorn
if __name__ == "__main__":
uvicorn.run(
"main:app",
host="0.0.0.0",
port=8002,
reload=False, # Disabled: reload subprocess loses ProactorEventLoop on Windows
loop="asyncio" # Use asyncio, which should now use our ProactorEventLoop
)

View file

@ -0,0 +1,17 @@
import asyncio
from backend.core.playwright_manager import PlaywrightManager
async def test_search():
print("Testing search_videos with STORED COOKIES...")
cookies, ua = PlaywrightManager.load_stored_credentials()
print(f"Loaded {len(cookies)} cookies. UA: {ua[:50]}...")
videos = await PlaywrightManager.search_videos("gai xinh nhay", cookies=cookies, user_agent=ua, limit=5)
print(f"Found {len(videos)} videos.")
for i, v in enumerate(videos):
play_addr = v.get("video", {}).get("play_addr")
print(f"Video {i} Play Addr: {play_addr}")
if __name__ == "__main__":
asyncio.run(test_search())

66
backup_project.ps1 Normal file
View file

@ -0,0 +1,66 @@
$ErrorActionPreference = "Stop"
$ProjectDir = Get-Location
$Timestamp = Get-Date -Format "yyyyMMdd_HHmmss"
$BackupDir = Join-Path $ProjectDir "backups"
$BackupFile = Join-Path $BackupDir "kv_tiktok_backup_$Timestamp.zip"
# Create backup directory if it doesn't exist
if (-not (Test-Path $BackupDir)) {
New-Item -ItemType Directory -Path $BackupDir | Out-Null
Write-Host "Created backup directory: $BackupDir" -ForegroundColor Cyan
}
Write-Host "Starting backup of $ProjectDir..." -ForegroundColor Cyan
Write-Host "Target file: $BackupFile" -ForegroundColor Cyan
# Exclude list (Patterns to ignore)
$ExcludePatterns = @(
"^\.git",
"^\.venv",
"^node_modules",
"__pycache__",
"^backups",
"\.log$",
"backend\\downloads",
"backend\\cache",
"backend\\session",
"frontend\\dist"
)
# Get files to zip
$FilesToZip = Get-ChildItem -Path $ProjectDir -Recurse | Where-Object {
$relativePath = $_.FullName.Substring($ProjectDir.Path.Length + 1)
$shouldExclude = $false
foreach ($pattern in $ExcludePatterns) {
if ($relativePath -match $pattern) {
$shouldExclude = $true
break
}
}
# Also exclude the backup directory itself and any zip files inside root (if active)
if ($relativePath -like "backups\*") { $shouldExclude = $true }
return -not $shouldExclude
}
if ($FilesToZip.Count -eq 0) {
Write-Error "No files found to backup!"
}
# Compress
Write-Host "Compressing $($FilesToZip.Count) files..." -ForegroundColor Yellow
$FilesToZip | Compress-Archive -DestinationPath $BackupFile -Force
if (Test-Path $BackupFile) {
$Item = Get-Item $BackupFile
$SizeMB = [math]::Round($Item.Length / 1MB, 2)
Write-Host "Backup created successfully!" -ForegroundColor Green
Write-Host "Location: $BackupFile"
Write-Host "Size: $SizeMB MB"
}
else {
Write-Error "Backup failed!"
}

49
backup_project.sh Normal file
View file

@ -0,0 +1,49 @@
#!/bin/bash
# Configuration
PROJECT_DIR="$(pwd)"
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
BACKUP_DIR="${PROJECT_DIR}/backups"
BACKUP_FILE="${BACKUP_DIR}/kv_tiktok_backup_${TIMESTAMP}.zip"
# Create backup directory if it doesn't exist
if [ ! -d "$BACKUP_DIR" ]; then
echo "Creating backup directory: $BACKUP_DIR"
mkdir -p "$BACKUP_DIR"
fi
echo "Starting backup of ${PROJECT_DIR}..."
echo "Target file: ${BACKUP_FILE}"
# Zip the project contents, excluding heavy/generated folders
# Using -r for recursive, -q for quiet (optional), -x to exclude patterns
zip -r "$BACKUP_FILE" . \
-x "*.git*" \
-x "*.venv*" \
-x "*node_modules*" \
-x "*__pycache__*" \
-x "*.DS_Store" \
-x "*backend/downloads*" \
-x "*backend/cache*" \
-x "*backend/session*" \
-x "*frontend/dist*" \
-x "*backups*" \
-x "*.log"
if [ $? -eq 0 ]; then
echo "✅ Backup created successfully!"
echo "📂 Location: ${BACKUP_FILE}"
# Show file size
if [[ "$OSTYPE" == "darwin"* ]]; then
SIZE=$(stat -f%z "$BACKUP_FILE")
else
SIZE=$(stat -c%s "$BACKUP_FILE")
fi
# Convert to MB
SIZE_MB=$(echo "scale=2; $SIZE / 1024 / 1024" | bc)
echo "📦 Size: ${SIZE_MB} MB"
else
echo "❌ Backup failed!"
exit 1
fi

24
docker-compose.dev.yml Normal file
View file

@ -0,0 +1,24 @@
# PureStream Development Docker Compose
# Uses Dockerfile.dev which COPIES files to avoid Synology Drive filesystem issues
services:
backend:
container_name: purestream_dev
build:
context: .
dockerfile: Dockerfile.dev
ports:
- "8002:8002"
volumes:
# Only mount data directories (not code)
- purestream_cache:/app/cache
- purestream_session:/app/session
shm_size: '2gb'
volumes:
purestream_cache:
purestream_session:
# NOTE:
# - Frontend is served by backend at http://localhost:8002
# - Code changes require rebuild: docker-compose -f docker-compose.dev.yml up --build

File diff suppressed because it is too large Load diff

View file

@ -13,11 +13,14 @@
"artplayer": "^5.3.0",
"axios": "^1.13.2",
"clsx": "^2.1.1",
"esbuild": "^0.27.2",
"framer-motion": "^12.23.26",
"lucide-react": "^0.561.0",
"hls.js": "^1.6.15",
"lucide-react": "^0.563.0",
"react": "^18.3.1",
"react-dom": "^18.3.1",
"react-router-dom": "^6.30.2",
"rollup": "^4.56.0",
"tailwind-merge": "^3.4.0",
"zustand": "^5.0.9"
},
@ -36,6 +39,9 @@
"tailwindcss": "^3.4.10",
"typescript": "^5.5.3",
"typescript-eslint": "^8.0.1",
"vite": "^5.4.1"
"vite": "^5.4.21"
},
"optionalDependencies": {
"@rollup/rollup-win32-x64-msvc": "^4.56.0"
}
}

View file

@ -23,7 +23,9 @@ import { Feed } from './components/Feed';
const Dashboard = () => {
return (
<div className="h-screen bg-black">
<Feed />
<Routes>
<Route path="/" element={<Feed />} />
</Routes>
</div>
)
}
@ -44,7 +46,7 @@ function App() {
<Route path="/login" element={<Login />} />
<Route path="/admin" element={<Admin />} />
<Route
path="/"
path="/*"
element={
<ProtectedRoute>
<Dashboard />

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,83 @@
import React from 'react';
import { Home, Search, Heart, LogOut } from 'lucide-react';
interface SidebarProps {
activeTab: 'foryou' | 'search' | 'profile';
onTabChange: (tab: 'foryou' | 'search' | 'profile') => void;
onLogout?: () => void;
}
export const Sidebar: React.FC<SidebarProps> = ({ activeTab, onTabChange, onLogout }) => {
return (
<div className="hidden md:flex flex-col w-20 lg:w-64 h-full glass-panel border-r-0 border-r-white/10 z-50 transition-all duration-300">
{/* Logo */}
<div className="p-6 flex items-center gap-3">
<div className="w-8 h-8 rounded-xl bg-gradient-to-tr from-violet-500 to-fuchsia-500 flex-shrink-0" />
<h1 className="text-xl font-bold bg-clip-text text-transparent bg-gradient-to-r from-white to-gray-400 hidden lg:block">
PureStream
</h1>
</div>
{/* Nav Items */}
<div className="flex-1 flex flex-col gap-2 px-3 py-4">
<NavItem
icon={<Home size={24} />}
label="For You"
isActive={activeTab === 'foryou'}
onClick={() => onTabChange('foryou')}
/>
<NavItem
icon={<Search size={24} />}
label="Search"
isActive={activeTab === 'search'}
onClick={() => onTabChange('search')}
/>
{/* Placeholder for future features */}
<NavItem
icon={<Heart size={24} />}
label="Likes"
isActive={false}
onClick={() => { }}
/>
</div>
{/* Bottom Actions */}
<div className="p-4 border-t border-white/10 space-y-2">
<button
onClick={onLogout}
className="flex items-center gap-4 w-full p-3 rounded-xl text-gray-400 hover:bg-white/5 hover:text-red-400 transition-all group"
>
<LogOut size={22} className="group-hover:scale-110 transition-transform" />
<span className="hidden lg:block font-medium">Log Out</span>
</button>
</div>
</div>
);
};
interface NavItemProps {
icon: React.ReactNode;
label: string;
isActive: boolean;
onClick: () => void;
}
const NavItem: React.FC<NavItemProps> = ({ icon, label, isActive, onClick }) => {
return (
<button
onClick={onClick}
className={`flex items-center gap-4 w-full p-3 rounded-xl transition-all duration-200 group
${isActive
? 'bg-white/10 text-white shadow-lg shadow-black/20'
: 'text-gray-400 hover:bg-white/5 hover:text-white'
}`}
>
<div className={`${isActive ? 'scale-110' : 'group-hover:scale-110'} transition-transform duration-200`}>
{icon}
</div>
<span className={`hidden lg:block font-medium ${isActive ? 'text-white' : ''}`}>
{label}
</span>
</button>
);
};

View file

@ -0,0 +1,38 @@
import React from 'react';
export const SkeletonFeed: React.FC = () => {
return (
<div className="h-full w-full bg-[#0f0f15] relative overflow-hidden flex items-center justify-center">
{/* Main Video Area Skeleton */}
<div className="absolute inset-0 flex items-center justify-center">
<div className="w-16 h-16 rounded-full skeleton-pulse opacity-50"></div>
</div>
{/* Right Sidebar Action Buttons */}
<div className="absolute right-4 bottom-24 flex flex-col items-center gap-6 z-10">
{[1, 2, 3, 4].map((_, i) => (
<div key={i} className="flex flex-col items-center gap-1">
<div className="w-12 h-12 rounded-full skeleton-pulse bg-white/10" />
<div className="w-8 h-3 rounded-md skeleton-pulse bg-white/10" />
</div>
))}
</div>
{/* Bottom Info Area */}
<div className="absolute bottom-6 left-4 right-20 z-10 space-y-3">
<div className="w-32 h-5 rounded-md skeleton-pulse bg-white/10" />
<div className="w-64 h-4 rounded-md skeleton-pulse bg-white/10" />
<div className="w-48 h-4 rounded-md skeleton-pulse bg-white/10 opacity-70" />
{/* Music Skeleton */}
<div className="flex items-center gap-2 mt-2">
<div className="w-6 h-6 rounded-full skeleton-pulse bg-white/10" />
<div className="w-40 h-4 rounded-md skeleton-pulse bg-white/10" />
</div>
</div>
{/* Overlay Gradient */}
<div className="absolute inset-0 bg-gradient-to-b from-transparent via-transparent to-black/60 pointer-events-none" />
</div>
);
};

View file

@ -0,0 +1,61 @@
import React, { useState } from 'react';
import type { UserProfile } from '../types';
interface UserCardProps {
user: UserProfile;
}
const UserCard: React.FC<UserCardProps> = ({ user }) => {
const [isExpanded, setIsExpanded] = useState(false);
return (
<div className="bg-white dark:bg-gray-800 shadow-md rounded-lg p-4 mb-4">
<div className="flex items-center justify-between">
<div className="flex items-center">
<img
src={user.avatar}
alt={user.username}
className="w-16 h-16 rounded-full mr-4"
/>
<div>
<h2 className="text-xl font-bold">{user.nickname}</h2>
<p className="text-gray-500 dark:text-gray-400">@{user.username}</p>
</div>
</div>
<button
onClick={() => setIsExpanded(!isExpanded)}
className="bg-blue-500 hover:bg-blue-700 text-white font-bold py-2 px-4 rounded"
>
{isExpanded ? 'Hide Stats' : 'Show Stats'}
</button>
</div>
{isExpanded && (
<div className="mt-4">
<div className="grid grid-cols-3 gap-4 text-center">
<div>
<p className="font-bold text-lg">{user.followers?.toLocaleString()}</p>
<p className="text-gray-500 dark:text-gray-400">Followers</p>
</div>
<div>
<p className="font-bold text-lg">{user.following?.toLocaleString()}</p>
<p className="text-gray-500 dark:text-gray-400">Following</p>
</div>
<div>
<p className="font-bold text-lg">{user.likes?.toLocaleString()}</p>
<p className="text-gray-500 dark:text-gray-400">Likes</p>
</div>
</div>
{user.bio && (
<div className="mt-4">
<h3 className="font-bold">Bio</h3>
<p className="text-gray-600 dark:text-gray-300">{user.bio}</p>
</div>
)}
</div>
)}
</div>
);
};
export default UserCard;

View file

@ -4,12 +4,7 @@ import type { Video } from '../types';
import { API_BASE_URL } from '../config';
import { videoCache } from '../utils/videoCache';
// Check if browser supports HEVC codec (Safari, Chrome 107+, Edge)
const supportsHEVC = (): boolean => {
if (typeof MediaSource === 'undefined') return false;
return MediaSource.isTypeSupported('video/mp4; codecs="hvc1"') ||
MediaSource.isTypeSupported('video/mp4; codecs="hev1"');
};
interface HeartParticle {
id: number;
@ -43,7 +38,7 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
const progressBarRef = useRef<HTMLDivElement>(null);
const [isPaused, setIsPaused] = useState(false);
const [showControls, setShowControls] = useState(false);
const [objectFit] = useState<'cover' | 'contain'>('cover');
const [progress, setProgress] = useState(0);
const [duration, setDuration] = useState(0);
const [isSeeking, setIsSeeking] = useState(false);
@ -55,7 +50,7 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
const [cachedUrl, setCachedUrl] = useState<string | null>(null);
const [codecError, setCodecError] = useState(false); // True if video codec not supported
const lastTapRef = useRef<number>(0);
const browserSupportsHEVC = useRef(supportsHEVC());
const fullProxyUrl = `${API_BASE_URL}/feed/proxy?url=${encodeURIComponent(video.url)}`;
const thinProxyUrl = video.cdn_url ? `${API_BASE_URL}/feed/thin-proxy?cdn_url=${encodeURIComponent(video.cdn_url)}` : null;
@ -121,18 +116,16 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
setUseFallback(false);
setIsLoading(true); // Show loading for new video
setCodecError(false); // Reset codec error for new video
setCachedUrl(null);
setShowSidebar(false); // Reset sidebar for new video
// Disable client-side caching for now as it causes partial content issues with Range requests
// The backend has its own LRU cache which is sufficient
const checkCache = async () => {
const cached = await videoCache.get(video.url);
if (cached) {
const blob_url = URL.createObjectURL(cached);
setCachedUrl(blob_url);
}
// Force clear any existing cache for this video to ensure we don't serve bad blobs
await videoCache.delete(video.url);
setCachedUrl(null);
};
checkCache();
}, [video.id]);
// Progress tracking
@ -154,15 +147,21 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
const videoEl = e.target as HTMLVideoElement;
const error = videoEl?.error;
// Check if this is a codec/decode error (MEDIA_ERR_DECODE = 3)
// Check if this is a codec/decode error (MEDIA_ERR_DECODE = 3, MEDIA_ERR_SRC_NOT_SUPPORTED = 4)
if (error?.code === 3 || error?.code === 4) {
console.log(`Codec error detected (code ${error.code}):`, error.message);
// Only show codec error if browser doesn't support HEVC
if (!browserSupportsHEVC.current) {
setCodecError(true);
setIsLoading(false);
// Always fall back to full proxy which will transcode to H.264
if (!useFallback) {
console.log('Codec not supported, falling back to full proxy (will transcode to H.264)...');
setUseFallback(true);
return;
}
// If even full proxy failed, show error
setCodecError(true);
setIsLoading(false);
return;
}
if (thinProxyUrl && !useFallback) {
@ -182,25 +181,26 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
};
}, [thinProxyUrl, useFallback, cachedUrl]);
useEffect(() => {
const cacheVideo = async () => {
if (!cachedUrl || !proxyUrl || proxyUrl === cachedUrl) return;
try {
const response = await fetch(proxyUrl);
if (response.ok) {
const blob = await response.blob();
await videoCache.set(video.url, blob);
}
} catch (error) {
console.debug('Failed to cache video:', error);
}
};
if (isActive && !isLoading) {
cacheVideo();
}
}, [isActive, isLoading, proxyUrl, cachedUrl, video.url]);
// Disable active caching
// useEffect(() => {
// const cacheVideo = async () => {
// if (!cachedUrl || !proxyUrl || proxyUrl === cachedUrl) return;
//
// try {
// const response = await fetch(proxyUrl);
// if (response.ok) {
// const blob = await response.blob();
// await videoCache.set(video.url, blob);
// }
// } catch (error) {
// console.debug('Failed to cache video:', error);
// }
// };
//
// if (isActive && !isLoading) {
// cacheVideo();
// }
// }, [isActive, isLoading, proxyUrl, cachedUrl, video.url]);
const togglePlayPause = () => {
if (!videoRef.current) return;
@ -376,20 +376,32 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
onClick={handleVideoClick}
onTouchStart={handleTouchStart}
>
{/* Video Element - preload="metadata" for instant player readiness */}
<video
ref={videoRef}
src={proxyUrl}
loop
playsInline
preload="metadata"
muted={isMuted}
className="w-full h-full"
style={{ objectFit }}
onCanPlay={() => setIsLoading(false)}
onWaiting={() => setIsLoading(true)}
onPlaying={() => setIsLoading(false)}
/>
{/* Ambient Background (Blurred) */}
<div className="absolute inset-0 z-0 overflow-hidden">
<video
src={proxyUrl}
muted
loop
className="w-full h-full object-cover blur-2xl opacity-50 scale-110"
/>
</div>
{/* Video Element - Main Content */}
<div className="relative z-10 w-full h-full flex items-center justify-center">
<video
ref={videoRef}
src={proxyUrl}
loop
playsInline
preload="metadata"
muted={isMuted}
className="max-h-full max-w-full"
style={{ objectFit: 'contain' }}
onCanPlay={() => setIsLoading(false)}
onWaiting={() => setIsLoading(true)}
onPlaying={() => setIsLoading(false)}
/>
</div>
{/* Loading Overlay - Subtle pulsing logo */}
{isLoading && !codecError && (
@ -482,9 +494,9 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
)}
</div>
{/* Side Controls - Only show when video is paused */}
{/* Side Controls - Always visible on hover or when paused */}
<div
className={`absolute bottom-36 right-4 flex flex-col gap-3 transition-all duration-300 transform ${isPaused && showSidebar ? 'translate-x-0 opacity-100' : 'translate-x-[200%] opacity-0'
className={`absolute bottom-36 right-4 flex flex-col gap-3 transition-all duration-300 transform ${showControls || isPaused ? 'translate-x-0 opacity-100' : 'translate-x-2 opacity-0'
}`}
>
{/* Follow Button */}
@ -505,6 +517,7 @@ export const VideoPlayer: React.FC<VideoPlayerProps> = ({
<a
href={downloadUrl}
download
onClick={(e) => e.stopPropagation()}
className="w-12 h-12 flex items-center justify-center bg-white/10 hover:bg-white/20 backdrop-blur-xl border border-white/10 rounded-full text-white transition-all"
title="Download"
>

View file

@ -1,3 +1 @@
export const API_BASE_URL = import.meta.env.PROD
? '/api'
: (import.meta.env.VITE_API_URL || 'http://localhost:8002/api');
export const API_BASE_URL = '/api';

View file

@ -15,6 +15,30 @@
height: 100vh;
height: 100dvh;
}
body {
@apply bg-[#0f0f15] text-white antialiased;
color-scheme: dark;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, Helvetica, Arial, sans-serif;
}
}
@layer components {
.glass-panel {
@apply bg-white/5 backdrop-blur-xl border border-white/10;
}
.glass-panel-hover {
@apply hover:bg-white/10 transition-colors duration-200;
}
.btn-primary {
@apply bg-gradient-to-r from-violet-600 to-indigo-600 hover:from-violet-500 hover:to-indigo-500 text-white font-medium px-4 py-2 rounded-xl transition-all active:scale-95 shadow-lg shadow-indigo-500/20;
}
.btn-ghost {
@apply hover:bg-white/10 text-gray-300 hover:text-white px-4 py-2 rounded-xl transition-all active:scale-95;
}
}
@layer utilities {
@ -33,33 +57,17 @@
display: none;
/* Chrome, Safari and Opera */
}
}
@layer utilities {
.scrollbar-hide::-webkit-scrollbar {
display: none;
}
.scrollbar-hide {
-ms-overflow-style: none;
scrollbar-width: none;
.text-shadow {
text-shadow: 0 2px 4px rgba(0,0,0,0.5);
}
}
/* Animations */
@keyframes shake {
0%,
100% {
transform: translateX(0);
}
25% {
transform: translateX(-4px);
}
75% {
transform: translateX(4px);
}
0%, 100% { transform: translateX(0); }
25% { transform: translateX(-4px); }
75% { transform: translateX(4px); }
}
.animate-shake {
@ -67,34 +75,25 @@
}
@keyframes heart-float {
0% {
opacity: 1;
transform: scale(0) rotate(-15deg);
}
25% {
opacity: 1;
transform: scale(1.2) rotate(10deg);
}
50% {
opacity: 0.8;
transform: scale(1) translateY(-30px) rotate(-5deg);
}
100% {
opacity: 0;
transform: scale(0.6) translateY(-80px) rotate(15deg);
}
0% { opacity: 1; transform: scale(0) rotate(-15deg); }
25% { opacity: 1; transform: scale(1.2) rotate(10deg); }
50% { opacity: 0.8; transform: scale(1) translateY(-30px) rotate(-5deg); }
100% { opacity: 0; transform: scale(0.6) translateY(-80px) rotate(15deg); }
}
.animate-heart-float {
animation: heart-float 1s ease-out forwards;
}
body {
@apply bg-black antialiased;
color-scheme: dark;
@keyframes shimmer {
0% { background-position: -200% 0; }
100% { background-position: 200% 0; }
}
.skeleton-pulse {
background: linear-gradient(90deg, rgba(255,255,255,0.03) 25%, rgba(255,255,255,0.08) 50%, rgba(255,255,255,0.03) 75%);
background-size: 200% 100%;
animation: shimmer 1.5s infinite;
}
.artplayer-app {

View file

@ -33,7 +33,7 @@ class FeedLoader {
}
const cacheKey = 'feed-full';
// Skip cache check when explicitly requested (for infinite scroll)
if (!skipCache) {
const cached = this.getCached(cacheKey);
@ -43,8 +43,8 @@ class FeedLoader {
}
}
const videos = await this.fetchFeed(skipCache);
const videos = await this.fetchFeed(skipCache, fast);
// Only cache if not skipping (initial load)
if (!skipCache) {
this.setCached(cacheKey, videos);
@ -62,11 +62,15 @@ class FeedLoader {
}
}
private async fetchFeed(skipCache: boolean = false): Promise<Video[]> {
private async fetchFeed(skipCache: boolean = false, fast: boolean = false): Promise<Video[]> {
// Add skip_cache parameter to force backend to fetch fresh videos
const url = skipCache
? `${API_BASE_URL}/feed?skip_cache=true`
: `${API_BASE_URL}/feed`;
let url = `${API_BASE_URL}/feed?`;
if (skipCache) url += 'skip_cache=true&';
if (fast) url += 'fast=true&';
// Clean trailing & or ?
url = url.replace(/[?&]$/, '');
const response = await axios.get(url);
if (!Array.isArray(response.data)) {

View file

@ -85,7 +85,8 @@ class VideoPrefetcher {
return;
}
const API_BASE_URL = 'http://localhost:8002/api'; // Hardcoded or imported from config
const API_BASE_URL_CONFIG = (await import('../config')).API_BASE_URL;
const API_BASE_URL = API_BASE_URL_CONFIG || 'http://localhost:8002/api'; // Fallback if import fails
const fullProxyUrl = `${API_BASE_URL}/feed/proxy?url=${encodeURIComponent(video.url)}`;
// Use thin proxy if available for better performance
const thinProxyUrl = video.cdn_url ? `${API_BASE_URL}/feed/thin-proxy?cdn_url=${encodeURIComponent(video.cdn_url)}` : null;

View file

@ -1,7 +1,18 @@
import { defineConfig } from 'vite'
import react from '@vitejs/plugin-react'
// https://vite.dev/config/
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
server: {
host: '0.0.0.0', // Allow access from outside the container
port: 5173,
proxy: {
'/api': {
target: 'http://localhost:8002',
changeOrigin: true,
timeout: 60000,
},
},
},
})

60
manage_app.ps1 Normal file
View file

@ -0,0 +1,60 @@
param (
[string]$Action = "start"
)
$BackendPort = 8002
$FrontendPort = 5173
$RootPath = Get-Location
$BackendDir = Join-Path $RootPath "backend"
$FrontendDir = Join-Path $RootPath "frontend"
function Stop-App {
Write-Host "Stopping PureStream..." -ForegroundColor Yellow
$ports = @($BackendPort, $FrontendPort)
foreach ($port in $ports) {
$processes = Get-NetTCPConnection -LocalPort $port -ErrorAction SilentlyContinue | Select-Object -ExpandProperty OwningProcess -Unique
if ($processes) {
foreach ($pidVal in $processes) {
Write-Host "Killing process on port $port (PID: $pidVal)" -ForegroundColor Red
Stop-Process -Id $pidVal -Force -ErrorAction SilentlyContinue
}
} else {
Write-Host "No process found on port $port" -ForegroundColor Gray
}
}
Write-Host "Stopped." -ForegroundColor Green
}
function Start-App {
# Check if ports are already in use
$backendActive = Get-NetTCPConnection -LocalPort $BackendPort -ErrorAction SilentlyContinue
$frontendActive = Get-NetTCPConnection -LocalPort $FrontendPort -ErrorAction SilentlyContinue
if ($backendActive -or $frontendActive) {
Write-Host "Ports are already in use. Stopping existing instances..." -ForegroundColor Yellow
Stop-App
}
Write-Host "Starting PureStream Backend..." -ForegroundColor Cyan
# Launch in a new CMD window so user can see logs and it stays open (/k)
Start-Process "cmd.exe" -ArgumentList "/k title PureStream Backend & cd /d `"$BackendDir`" & `"$RootPath\.venv\Scripts\python.exe`" run_windows.py" -WindowStyle Normal
Write-Host "Starting PureStream Frontend..." -ForegroundColor Cyan
# Launch in a new CMD window
Start-Process "cmd.exe" -ArgumentList "/k title PureStream Frontend & cd /d `"$FrontendDir`" & npm run dev" -WindowStyle Normal
Write-Host "PureStream is starting!" -ForegroundColor Green
Write-Host "Backend API: http://localhost:$BackendPort"
Write-Host "Frontend UI: http://localhost:$FrontendPort"
}
switch ($Action.ToLower()) {
"stop" { Stop-App }
"start" { Start-App }
"restart" { Stop-App; Start-App }
default {
Write-Host "Usage: .\manage_app.ps1 [start|stop|restart]" -ForegroundColor Red
Write-Host "Defaulting to 'start'..." -ForegroundColor Yellow
Start-App
}
}

49
restart_app.sh Normal file
View file

@ -0,0 +1,49 @@
#!/bin/bash
echo "🔄 Restarting PureStream WebApp..."
# Function to kill process on port
kill_port() {
PORT=$1
if lsof -i:$PORT -t >/dev/null; then
PID=$(lsof -ti:$PORT)
echo "Killing process on port $PORT (PID: $PID)..."
kill -9 $PID
else
echo "Port $PORT is free."
fi
}
# 1. Stop existing processes
echo "🛑 Stopping services..."
kill_port 8000 # Backend
kill_port 8002 # Frontend (Target)
kill_port 8003 # Frontend (Alt)
kill_port 5173 # Frontend (Default)
# 2. Start Backend
echo "🚀 Starting Backend (Port 8000)..."
cd backend
# Check if venv exists matching user env, else use python3
PYTHON_CMD="python3"
# Start uvicorn in background
nohup $PYTHON_CMD -m uvicorn main:app --reload --host 0.0.0.0 --port 8000 > ../backend.log 2>&1 &
BACKEND_PID=$!
echo "Backend started with PID $BACKEND_PID"
cd ..
# 3. Start Frontend
echo "🎨 Starting Frontend (Port 8002)..."
cd frontend
# Start vite in background
nohup npm run dev -- --port 8002 --host > ../frontend.log 2>&1 &
FRONTEND_PID=$!
echo "Frontend started with PID $FRONTEND_PID"
cd ..
echo "✅ App restarted successfully!"
echo "--------------------------------"
echo "Backend: http://localhost:8000"
echo "Frontend: http://localhost:8002"
echo "--------------------------------"
echo "Logs are being written to backend.log and frontend.log"

2
run_debug_search.ps1 Normal file
View file

@ -0,0 +1,2 @@
$env:PYTHONPATH = "c:\Users\Admin\Downloads\kv-tiktok\backend"
& "c:\Users\Admin\Downloads\kv-tiktok\.venv\Scripts\python.exe" tests/debug_search.py

3
start_app.bat Normal file
View file

@ -0,0 +1,3 @@
@echo off
cd /d "%~dp0"
powershell -ExecutionPolicy Bypass -File manage_app.ps1 start

4
stop_app.bat Normal file
View file

@ -0,0 +1,4 @@
@echo off
cd /d "%~dp0"
powershell -ExecutionPolicy Bypass -File manage_app.ps1 stop
pause

12
test_stealth.py Normal file
View file

@ -0,0 +1,12 @@
import sys
print(f"Python: {sys.executable}")
print(f"Path: {sys.path}")
try:
import playwright_stealth
print(f"Module: {playwright_stealth}")
from playwright_stealth import stealth_async
print("Import successful!")
except ImportError as e:
print(f"Import failed: {e}")
except Exception as e:
print(f"Error: {e}")

41
tests/debug_search.py Normal file
View file

@ -0,0 +1,41 @@
import json
import urllib.request
import urllib.parse
import os
import sys
def debug_search():
base_url = "http://localhost:8002/api/user/search"
query = "hot trend"
params = urllib.parse.urlencode({"query": query, "limit": 10})
url = f"{base_url}?{params}"
print(f"Testing search for: '{query}'")
print(f"URL: {url}")
try:
req = urllib.request.Request(url)
with urllib.request.urlopen(req, timeout=60) as response:
status_code = response.getcode()
print(f"Status Code: {status_code}")
if status_code == 200:
data = json.loads(response.read().decode('utf-8'))
print(f"Source: {data.get('source')}")
print(f"Count: {data.get('count')}")
videos = data.get("videos", [])
if not videos:
print("ERROR: No videos returned!")
else:
print(f"First video: {videos[0].get('id')} - {videos[0].get('desc', 'No desc')}")
else:
print(f"Error: Status {status_code}")
except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code} - {e.reason}")
print(e.read().decode('utf-8'))
except Exception as e:
print(f"Request failed: {e}")
if __name__ == "__main__":
debug_search()

29
tests/inspect_html.py Normal file
View file

@ -0,0 +1,29 @@
from bs4 import BeautifulSoup
import re
with open("debug_search_page.html", "r", encoding="utf-8") as f:
html = f.read()
soup = BeautifulSoup(html, "html.parser")
# Inspect text occurrences
print("\n--- Searching for 'trend' text ---")
text_matches = soup.find_all(string=re.compile("trend", re.IGNORECASE))
print(f"Found {len(text_matches)} text matches.")
unique_parents = set()
for text in text_matches:
parent = text.parent
if parent and parent.name != "script" and parent.name != "style":
# Get up to 3 levels of parents
chain = []
curr = parent
for _ in range(3):
if curr:
chain.append(f"<{curr.name} class='{'.'.join(curr.get('class', []))}'>")
curr = curr.parent
unique_parents.add(" -> ".join(chain))
for p in list(unique_parents)[:10]:
print(p)

45
tests/parse_ssr_data.py Normal file
View file

@ -0,0 +1,45 @@
from bs4 import BeautifulSoup
import json
with open("debug_search_page.html", "r", encoding="utf-8") as f:
html = f.read()
soup = BeautifulSoup(html, "html.parser")
script = soup.find("script", id="__UNIVERSAL_DATA_FOR_REHYDRATION__")
if script:
try:
data = json.loads(script.string)
print("Found SSR Data!")
# Save pretty printed
with open("ssr_data.json", "w", encoding="utf-8") as f:
json.dump(data, f, indent=2)
# Search for video list
# Look in __DEFAULT_SCOPE__ -> webapp.search-video -> searchVideoList (guessing keys)
# or just traverse and print keys
def find_keys(obj, target_key, path=""):
if isinstance(obj, dict):
for k, v in obj.items():
current_path = f"{path}.{k}"
if target_key.lower() in k.lower():
print(f"Found key '{k}' at {current_path}")
find_keys(v, target_key, current_path)
elif isinstance(obj, list):
for i, item in enumerate(obj):
find_keys(item, target_key, f"{path}[{i}]")
print("\nSearching for 'item' or 'list' keys...")
find_keys(data, "item")
find_keys(data, "list")
# Check specific known paths
default_scope = data.get("__DEFAULT_SCOPE__", {})
print(f"\nTop level keys: {list(default_scope.keys())}")
except json.JSONDecodeError as e:
print(f"JSON Error: {e}")
else:
print("Script tag not found.")

View file

@ -1,30 +1,30 @@
import urllib.request
import json
try:
print("Testing /health...")
with urllib.request.urlopen("http://localhost:8002/health", timeout=5) as r:
print(f"Health: {r.status}")
print("Testing /api/feed...")
with open("temp_cookies.json", "r") as f:
data = json.load(f)
# Ensure list format
if isinstance(data, dict) and "credentials" in data:
data = data["credentials"]
# Prepare body as dict for safety with new Union type
body = {"credentials": data}
req = urllib.request.Request(
"http://localhost:8002/api/feed",
data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'}
)
with urllib.request.urlopen(req, timeout=30) as r:
print(f"Feed: {r.status}")
print(r.read().decode('utf-8')[:100])
except Exception as e:
print(f"Error: {e}")
import urllib.request
import json
try:
print("Testing /health...")
with urllib.request.urlopen("http://localhost:8002/health", timeout=5) as r:
print(f"Health: {r.status}")
print("Testing /api/feed...")
with open("temp_cookies.json", "r") as f:
data = json.load(f)
# Ensure list format
if isinstance(data, dict) and "credentials" in data:
data = data["credentials"]
# Prepare body as dict for safety with new Union type
body = {"credentials": data}
req = urllib.request.Request(
"http://localhost:8002/api/feed",
data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'}
)
with urllib.request.urlopen(req, timeout=30) as r:
print(f"Feed: {r.status}")
print(r.read().decode('utf-8')[:100])
except Exception as e:
print(f"Error: {e}")

42
tests/test_crawl.py Normal file
View file

@ -0,0 +1,42 @@
import asyncio
import base64
from crawl4ai import AsyncWebCrawler
async def main():
print("Starting Crawl4AI test...")
async with AsyncWebCrawler(verbose=True) as crawler:
url = "https://www.tiktok.com/search?q=hot+trend"
print(f"Crawling: {url}")
# Browser config
run_conf = {
"url": url,
"wait_for": "css:[data-e2e='search_video_item']",
"css_selector": "[data-e2e='search_video_item']",
"screenshot": True,
"magic": True
}
print(f"Crawling with config: {run_conf}")
result = await crawler.arun(**run_conf)
if result.success:
print("Crawl successful!")
print(f"HTML length: {len(result.html)}")
if result.screenshot:
with open("crawl_screenshot.png", "wb") as f:
f.write(base64.b64decode(result.screenshot))
print("Saved screenshot to crawl_screenshot.png")
# Save for inspection
with open("crawl_debug.html", "w", encoding="utf-8") as f:
f.write(result.html)
with open("crawl_debug.md", "w", encoding="utf-8") as f:
f.write(result.markdown)
else:
print(f"Crawl failed: {result.error_message}")
if __name__ == "__main__":
asyncio.run(main())

View file

@ -1,16 +1,16 @@
import requests
import time
URL = "http://localhost:8002/api/auth/admin-login"
def test_login():
print("Testing Admin Login...")
try:
res = requests.post(URL, json={"password": "admin123"})
print(f"Status: {res.status_code}")
print(f"Response: {res.text}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
test_login()
import requests
import time
URL = "http://localhost:8002/api/auth/admin-login"
def test_login():
print("Testing Admin Login...")
try:
res = requests.post(URL, json={"password": "admin123"})
print(f"Status: {res.status_code}")
print(f"Response: {res.text}")
except Exception as e:
print(f"Error: {e}")
if __name__ == "__main__":
test_login()

View file

@ -1,30 +1,30 @@
import urllib.request
import json
import os
with open("temp_cookies.json", "r") as f:
data = json.load(f)
# Ensure data is in the expected dict format for the request body
if isinstance(data, list):
# If temp_cookies is just the list, wrap it
body = {"credentials": data}
elif "credentials" not in data:
body = {"credentials": data}
else:
body = data
req = urllib.request.Request(
"http://localhost:8002/api/feed",
data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'}
)
try:
with urllib.request.urlopen(req) as response:
print(response.read().decode('utf-8'))
except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code}")
print(e.read().decode('utf-8'))
except Exception as e:
print(f"Error: {e}")
import urllib.request
import json
import os
with open("temp_cookies.json", "r") as f:
data = json.load(f)
# Ensure data is in the expected dict format for the request body
if isinstance(data, list):
# If temp_cookies is just the list, wrap it
body = {"credentials": data}
elif "credentials" not in data:
body = {"credentials": data}
else:
body = data
req = urllib.request.Request(
"http://localhost:8002/api/feed",
data=json.dumps(body).encode('utf-8'),
headers={'Content-Type': 'application/json'}
)
try:
with urllib.request.urlopen(req) as response:
print(response.read().decode('utf-8'))
except urllib.error.HTTPError as e:
print(f"HTTP Error: {e.code}")
print(e.read().decode('utf-8'))
except Exception as e:
print(f"Error: {e}")

View file

@ -1,35 +1,35 @@
import requests
import json
import time
BASE_URL = "http://localhost:8002/api/user/search"
def test_search():
print("Testing Search API...")
try:
# Simple query
params = {
"query": "dance",
"limit": 50,
"cursor": 0
}
start = time.time()
res = requests.get(BASE_URL, params=params)
duration = time.time() - start
print(f"Status Code: {res.status_code}")
print(f"Duration: {duration:.2f}s")
if res.status_code == 200:
data = res.json()
print(f"Videos Found: {len(data.get('videos', []))}")
# print(json.dumps(data, indent=2))
else:
print("Error Response:")
print(res.text)
except Exception as e:
print(f"Request Failed: {e}")
if __name__ == "__main__":
test_search()
import requests
import json
import time
BASE_URL = "http://localhost:8002/api/user/search"
def test_search():
print("Testing Search API...")
try:
# Simple query
params = {
"query": "dance",
"limit": 50,
"cursor": 0
}
start = time.time()
res = requests.get(BASE_URL, params=params)
duration = time.time() - start
print(f"Status Code: {res.status_code}")
print(f"Duration: {duration:.2f}s")
if res.status_code == 200:
data = res.json()
print(f"Videos Found: {len(data.get('videos', []))}")
# print(json.dumps(data, indent=2))
else:
print("Error Response:")
print(res.text)
except Exception as e:
print(f"Request Failed: {e}")
if __name__ == "__main__":
test_search()