kv-tiktok/backend/api/routes/user.py

449 lines
21 KiB
Python

"""
User profile API - fetch real TikTok user data.
"""
from fastapi import APIRouter, Query, HTTPException
from pydantic import BaseModel
from typing import Optional, List
import httpx
import asyncio
import time
import re
from typing import Optional, List
from core.playwright_manager import PlaywrightManager
from core.tiktok_api_service import TikTokAPIService
router = APIRouter()
class UserProfile(BaseModel):
"""TikTok user profile data."""
username: str
nickname: Optional[str] = None
avatar: Optional[str] = None
bio: Optional[str] = None
followers: Optional[int] = None
following: Optional[int] = None
likes: Optional[int] = None
verified: bool = False
@router.get("/profile")
async def get_user_profile(username: str = Query(..., description="TikTok username (without @)")):
"""
Fetch real TikTok user profile data.
"""
username = username.replace("@", "")
# Load stored credentials
cookies, user_agent = PlaywrightManager.load_stored_credentials()
if not cookies:
raise HTTPException(status_code=401, detail="Not authenticated")
# Build cookie header
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
headers = {
"User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT,
"Referer": "https://www.tiktok.com/",
"Cookie": cookie_str,
"Accept": "application/json",
}
# Try to fetch user data from TikTok's internal API
profile_url = f"https://www.tiktok.com/api/user/detail/?uniqueId={username}"
try:
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
response = await client.get(profile_url, headers=headers)
if response.status_code != 200:
# Fallback - return basic info
return UserProfile(username=username)
data = response.json()
user_info = data.get("userInfo", {})
user = user_info.get("user", {})
stats = user_info.get("stats", {})
return UserProfile(
username=username,
nickname=user.get("nickname"),
avatar=user.get("avatarLarger") or user.get("avatarMedium"),
bio=user.get("signature"),
followers=stats.get("followerCount"),
following=stats.get("followingCount"),
likes=stats.get("heartCount"),
verified=user.get("verified", False)
)
except Exception as e:
print(f"Error fetching profile for {username}: {e}")
# Return basic fallback
return UserProfile(username=username)
@router.get("/profiles")
async def get_multiple_profiles(usernames: str = Query(..., description="Comma-separated usernames")):
"""
Fetch multiple TikTok user profiles at once.
"""
username_list = [u.strip().replace("@", "") for u in usernames.split(",") if u.strip()]
if len(username_list) > 20:
raise HTTPException(status_code=400, detail="Max 20 usernames at once")
# Fetch all profiles concurrently
tasks = [get_user_profile(u) for u in username_list]
results = await asyncio.gather(*tasks, return_exceptions=True)
profiles = []
for i, result in enumerate(results):
if isinstance(result, Exception):
profiles.append(UserProfile(username=username_list[i]))
else:
profiles.append(result)
return profiles
@router.get("/videos")
async def get_user_videos(
username: str = Query(..., description="TikTok username (without @)"),
limit: int = Query(10, description="Max videos to fetch", ge=1, le=60)
):
"""
Fetch videos from a TikTok user's profile.
Uses direct API calls for speed (~100-500ms), with Playwright fallback.
"""
username = username.replace("@", "")
# Load stored credentials
cookies, user_agent = PlaywrightManager.load_stored_credentials()
if not cookies:
raise HTTPException(status_code=401, detail="Not authenticated")
print(f"Fetching videos for @{username}...")
start_time = time.time()
# Try fast API first
try:
videos = await TikTokAPIService.get_user_videos(username, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[API] Got {len(videos)} videos in {duration:.2f}s")
return {"username": username, "videos": videos, "count": len(videos), "source": "api", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[API] Failed for {username}: {e}")
# Fallback to Playwright if API fails or returns empty
print(f"[Fallback] Using Playwright for @{username}...")
try:
videos = await PlaywrightManager.fetch_user_videos(username, cookies, user_agent, limit)
duration = time.time() - start_time
print(f"[Playwright] Got {len(videos)} videos in {duration:.2f}s")
return {"username": username, "videos": videos, "count": len(videos), "source": "playwright", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"Error fetching videos for {username}: {e}")
raise HTTPException(status_code=500, detail=str(e))
@router.get("/search")
async def search_videos(
query: str = Query(..., description="Search keyword or hashtag"),
limit: int = Query(20, description="Max videos to fetch", ge=1, le=60),
cursor: int = Query(0, description="Pagination cursor (offset)")
):
"""
Search for videos by keyword or hashtag.
Uses direct API calls for speed (~200-800ms), with Playwright fallback.
"""
# Load stored credentials
cookies, user_agent = PlaywrightManager.load_stored_credentials()
if not cookies:
raise HTTPException(status_code=401, detail="Not authenticated")
print(f"Searching for: {query} (limit={limit}, cursor={cursor})...")
start_time = time.time()
# [Smart Search] Username Detection Priority
# If query looks like a username (contains dots, underscores, or starts with @),
# try fetching that specific user's videos FIRST.
# This solves issues where searching for exact username returns unrelated content.
clean_query = query.strip()
# Handle @ prefix commonly used by users - STRICT MODE
# If user explicitly types "@", they want a user lookup, NOT a keyword search.
strict_user_lookup = False
if clean_query.startswith("@"):
clean_query = clean_query[1:]
strict_user_lookup = True
# Also treat dots/underscores as likely usernames
is_username_format = bool(re.match(r"^[a-zA-Z0-9_\.]+$", clean_query)) and len(clean_query) > 2
# DEBUG LOGGING TO FILE
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"\n--- Search: {query} ---\n")
f.write(f"Strict: {strict_user_lookup}, Format: {is_username_format}, Clean: {clean_query}\n")
except: pass
if is_username_format or strict_user_lookup:
print(f"[Smart Search] Query '{query}' identified as username. Strict: {strict_user_lookup}")
try:
# Try direct profile fetch via API
videos = await TikTokAPIService.get_user_videos(clean_query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[API-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_priority", "duration_ms": int(duration * 1000)}
# Try Playwright fallback BEFORE yt-dlp
# Playwright scraping provides thumbnails and correct metadata, while yt-dlp flat-playlist does not.
print(f"[Smart Search] API failed, trying Playwright for user '{query}'...")
try:
videos = await PlaywrightManager.fetch_user_videos(clean_query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[Playwright-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright_priority", "duration_ms": int(duration * 1000)}
except Exception as pw_err:
print(f"[Smart Search] Playwright profile fetch failed: {pw_err}")
# Try yt-dlp fallback if Playwright also fails
print(f"[Smart Search] Playwright failed, trying yt-dlp for user '{query}'...")
# Log we are trying ytdlp
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"Attempting yt-dlp for {clean_query}...\n")
except: pass
videos = await TikTokAPIService.get_user_videos_via_ytdlp(clean_query, limit)
try:
with open("search_debug.log", "a", encoding="utf-8") as f:
f.write(f"yt-dlp Result: {len(videos)} videos\n")
except: pass
if videos:
duration = time.time() - start_time
print(f"[yt-dlp-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_ytdlp", "duration_ms": int(duration * 1000)}
# If strict usage of "@" was used and we found nothing, DO NOT fallback to generic search.
# It's better to show "No videos found" than random unrelated results.
if strict_user_lookup:
print(f"[Smart Search] Strict lookup for '{query}' found no results. Returning empty.")
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "user_not_found_strict", "duration_ms": int((time.time() - start_time) * 1000)}
except Exception as e:
print(f"[Smart Search] Priority profile fetch failed: {e}")
if strict_user_lookup:
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "error_strict", "duration_ms": int((time.time() - start_time) * 1000)}
# Fall through to normal search only if NOT strict
# Try fast API search
try:
videos = await TikTokAPIService.search_videos(query, cookies, user_agent, limit, cursor)
if videos:
duration = time.time() - start_time
print(f"[API] Found {len(videos)} videos in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[API] Search failed for {query}: {e}")
# Fallback Phase 1: Check if query is a "trending" misspelling and retry API if so
# Regex for: hot, trend, trens, hor, hott, trand, etc.
trend_pattern = r"(hot|hor|hott)\s*(trend|trens|trand|tred)|(trend|trens|trand)"
is_trend_query = bool(re.search(trend_pattern, query.lower()))
if is_trend_query and (not videos):
print(f"[Smart Fallback] Query '{query}' detected as trending request. Retrying with 'hot trend'...")
try:
# Try normalized query on API
videos = await TikTokAPIService.search_videos("hot trend", cookies, user_agent, limit, cursor)
if videos:
duration = time.time() - start_time
print(f"[API-Fallback] Found {len(videos)} videos for 'hot trend' in {duration:.2f}s")
return {"query": "hot trend", "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api_fallback", "duration_ms": int(duration * 1000)}
except Exception:
pass # Continue to Playwright if this fails
# Fallback Phase 2: Playwright
# Fallback to Playwright if API fails or returns empty
print(f"[Fallback] Using Playwright for search '{query}'...")
try:
videos = await PlaywrightManager.search_videos(query, cookies, user_agent, limit, cursor)
# Smart Fallback Phase 3: If Playwright also fails for trending query, try normalized query
if not videos and is_trend_query:
print(f"[Playwright-Fallback] No results for '{query}'. Retrying with 'hot trend'...")
videos = await PlaywrightManager.search_videos("hot trend", cookies, user_agent, limit, cursor)
except Exception as e:
print(f"Error searching for {query}: {e}")
# Don't raise yet, try user fallback
pass
# Fallback Phase 4: Exact Username Match (Secondary Fallback)
# If generic search failed, and query looks like a username, try fetching their profile directly (if not tried already)
# Note: We already tried this at the top, but we try again here with Playwright as a backup if the API profile fetch failed earlier.
if (not videos) and is_username_format:
print(f"[Smart Fallback] Query '{query}' yielded no search results. Attempting secondary profile fetch (Playwright)...")
try:
# We already tried API profile fetch at start, so try Playwright now
print(f"[Smart Fallback] API failed, trying Playwright for user '{query}'...")
videos = await PlaywrightManager.fetch_user_videos(query, cookies, user_agent, limit)
if videos:
duration = time.time() - start_time
print(f"[Playwright-Profile] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright", "duration_ms": int(duration * 1000)}
except Exception as e:
print(f"[Smart Fallback] Profile fetch failed: {e}")
pass
if not videos:
# Only raise error if we truly found nothing after all attempts
# or return empty list instead of 500?
# A 500 implies server broken. Empty list implies no results.
# Let's return empty structure to be safe for frontend
return {"query": query, "videos": [], "count": 0, "cursor": cursor, "source": "empty"}
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "playwright", "duration_ms": int((time.time() - start_time) * 1000)}
# Cache for suggested accounts
_suggested_cache = {
"accounts": [],
"updated_at": 0
}
CACHE_TTL = 3600 # 1 hour cache
@router.get("/suggested")
async def get_suggested_accounts(
limit: int = Query(50, description="Max accounts to return", ge=10, le=100)
):
"""
Fetch trending/suggested Vietnamese TikTok creators.
Uses TikTok's discover API and caches results for 1 hour.
"""
import time
# Check cache
if _suggested_cache["accounts"] and (time.time() - _suggested_cache["updated_at"]) < CACHE_TTL:
return {"accounts": _suggested_cache["accounts"][:limit], "cached": True}
# Load stored credentials
cookies, user_agent = PlaywrightManager.load_stored_credentials()
if not cookies:
# Return fallback static list if not authenticated
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
print("Fetching fresh suggested accounts from TikTok...")
try:
# Enforce a strict timeout to prevent hanging or heavy resource usage blocking the server
# If Playwright takes > 15 seconds, we default to fallback.
try:
accounts = await asyncio.wait_for(
PlaywrightManager.fetch_suggested_accounts(cookies, user_agent, limit),
timeout=15.0
)
except asyncio.TimeoutError:
print("Suggest fetch timed out, using fallback.")
accounts = []
if accounts and len(accounts) >= 5: # Need at least 5 accounts from dynamic fetch
_suggested_cache["accounts"] = accounts
_suggested_cache["updated_at"] = time.time()
return {"accounts": accounts[:limit], "cached": False}
else:
# Just return static accounts directly without API calls - TikTok API is unreliable
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
except Exception as e:
print(f"Error fetching suggested accounts: {e}")
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
async def fetch_profiles_with_avatars(accounts: list, cookies: list, user_agent: str) -> dict:
"""Fetch actual profile data with avatars for a list of accounts."""
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
headers = {
"User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT,
"Referer": "https://www.tiktok.com/",
"Cookie": cookie_str,
"Accept": "application/json",
}
enriched = []
async with httpx.AsyncClient(timeout=10.0) as client:
for acc in accounts:
try:
url = f"https://www.tiktok.com/api/user/detail/?uniqueId={acc['username']}"
res = await client.get(url, headers=headers)
if res.status_code == 200:
data = res.json()
user = data.get("userInfo", {}).get("user", {})
stats = data.get("userInfo", {}).get("stats", {})
if user:
enriched.append({
"username": acc["username"],
"nickname": user.get("nickname") or acc.get("nickname", acc["username"]),
"avatar": user.get("avatarThumb") or user.get("avatarMedium"),
"followers": stats.get("followerCount", 0),
"verified": user.get("verified", False),
"region": "VN"
})
continue
except Exception as e:
print(f"Error fetching profile for {acc['username']}: {e}")
# Fallback: use original data without avatar
enriched.append(acc)
return {"accounts": enriched, "cached": False, "enriched": True}
def get_fallback_accounts():
"""Static fallback list of popular Vietnamese TikTokers (verified usernames)."""
return [
# Verified Vietnamese TikTok accounts
{"username": "cciinnn", "nickname": "👑 CiiN (Bùi Thảo Ly)", "region": "VN"},
{"username": "hoaa.hanassii", "nickname": "💃 Hoa Hanassii", "region": "VN"},
{"username": "lebong95", "nickname": "💪 Lê Bống", "region": "VN"},
{"username": "tieu_hy26", "nickname": "👰 Tiểu Hý", "region": "VN"},
{"username": "hieuthuhai2222", "nickname": "🎧 HIEUTHUHAI", "region": "VN"},
{"username": "mtp.fan", "nickname": "🎤 Sơn Tùng M-TP", "region": "VN"},
{"username": "changmakeup", "nickname": "💄 Changmakeup", "region": "VN"},
{"username": "theanh28entertainment", "nickname": "🎬 Theanh28", "region": "VN"},
{"username": "linhbarbie", "nickname": "👗 Linh Barbie", "region": "VN"},
{"username": "phuonglykchau", "nickname": "✨ Phương Ly", "region": "VN"},
{"username": "phimtieutrang", "nickname": "📺 Tiểu Trang", "region": "VN"},
{"username": "nhunguyendy", "nickname": "💕 Như Nguyễn", "region": "VN"},
{"username": "trucnhantv", "nickname": "🎤 Trúc Nhân", "region": "VN"},
{"username": "justvietanh", "nickname": "😄 Just Việt Anh", "region": "VN"},
{"username": "minngu.official", "nickname": "🌸 Min NGU", "region": "VN"},
{"username": "quangdangofficial", "nickname": "🕺 Quang Đăng", "region": "VN"},
{"username": "minhhangofficial", "nickname": "👑 Minh Hằng", "region": "VN"},
{"username": "dungntt", "nickname": "🎭 Dũng NTT", "region": "VN"},
{"username": "chipu88", "nickname": "🎤 Chi Pu", "region": "VN"},
{"username": "kaydinh", "nickname": "🎵 Kay Dinh", "region": "VN"},
]