449 lines
21 KiB
Python
449 lines
21 KiB
Python
"""
|
|
User profile API - fetch real TikTok user data.
|
|
"""
|
|
|
|
from fastapi import APIRouter, Query, HTTPException
|
|
from pydantic import BaseModel
|
|
from typing import Optional, List
|
|
import httpx
|
|
import asyncio
|
|
import time
|
|
import re
|
|
from typing import Optional, List
|
|
|
|
from core.playwright_manager import PlaywrightManager
|
|
from core.tiktok_api_service import TikTokAPIService
|
|
|
|
router = APIRouter()
|
|
|
|
|
|
class UserProfile(BaseModel):
|
|
"""TikTok user profile data."""
|
|
username: str
|
|
nickname: Optional[str] = None
|
|
avatar: Optional[str] = None
|
|
bio: Optional[str] = None
|
|
followers: Optional[int] = None
|
|
following: Optional[int] = None
|
|
likes: Optional[int] = None
|
|
verified: bool = False
|
|
|
|
|
|
@router.get("/profile")
|
|
async def get_user_profile(username: str = Query(..., description="TikTok username (without @)")):
|
|
"""
|
|
Fetch real TikTok user profile data.
|
|
"""
|
|
username = username.replace("@", "")
|
|
|
|
# Load stored credentials
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
if not cookies:
|
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
|
|
# Build cookie header
|
|
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
|
|
|
headers = {
|
|
"User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT,
|
|
"Referer": "https://www.tiktok.com/",
|
|
"Cookie": cookie_str,
|
|
"Accept": "application/json",
|
|
}
|
|
|
|
# Try to fetch user data from TikTok's internal API
|
|
profile_url = f"https://www.tiktok.com/api/user/detail/?uniqueId={username}"
|
|
|
|
try:
|
|
async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
|
|
response = await client.get(profile_url, headers=headers)
|
|
|
|
if response.status_code != 200:
|
|
# Fallback - return basic info
|
|
return UserProfile(username=username)
|
|
|
|
data = response.json()
|
|
user_info = data.get("userInfo", {})
|
|
user = user_info.get("user", {})
|
|
stats = user_info.get("stats", {})
|
|
|
|
return UserProfile(
|
|
username=username,
|
|
nickname=user.get("nickname"),
|
|
avatar=user.get("avatarLarger") or user.get("avatarMedium"),
|
|
bio=user.get("signature"),
|
|
followers=stats.get("followerCount"),
|
|
following=stats.get("followingCount"),
|
|
likes=stats.get("heartCount"),
|
|
verified=user.get("verified", False)
|
|
)
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching profile for {username}: {e}")
|
|
# Return basic fallback
|
|
return UserProfile(username=username)
|
|
|
|
|
|
@router.get("/profiles")
|
|
async def get_multiple_profiles(usernames: str = Query(..., description="Comma-separated usernames")):
|
|
"""
|
|
Fetch multiple TikTok user profiles at once.
|
|
"""
|
|
username_list = [u.strip().replace("@", "") for u in usernames.split(",") if u.strip()]
|
|
|
|
if len(username_list) > 20:
|
|
raise HTTPException(status_code=400, detail="Max 20 usernames at once")
|
|
|
|
# Fetch all profiles concurrently
|
|
tasks = [get_user_profile(u) for u in username_list]
|
|
results = await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
|
profiles = []
|
|
for i, result in enumerate(results):
|
|
if isinstance(result, Exception):
|
|
profiles.append(UserProfile(username=username_list[i]))
|
|
else:
|
|
profiles.append(result)
|
|
|
|
return profiles
|
|
|
|
|
|
@router.get("/videos")
|
|
async def get_user_videos(
|
|
username: str = Query(..., description="TikTok username (without @)"),
|
|
limit: int = Query(10, description="Max videos to fetch", ge=1, le=60)
|
|
):
|
|
"""
|
|
Fetch videos from a TikTok user's profile.
|
|
Uses direct API calls for speed (~100-500ms), with Playwright fallback.
|
|
"""
|
|
username = username.replace("@", "")
|
|
|
|
# Load stored credentials
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
if not cookies:
|
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
|
|
print(f"Fetching videos for @{username}...")
|
|
start_time = time.time()
|
|
|
|
# Try fast API first
|
|
try:
|
|
videos = await TikTokAPIService.get_user_videos(username, cookies, user_agent, limit)
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[API] Got {len(videos)} videos in {duration:.2f}s")
|
|
return {"username": username, "videos": videos, "count": len(videos), "source": "api", "duration_ms": int(duration * 1000)}
|
|
except Exception as e:
|
|
print(f"[API] Failed for {username}: {e}")
|
|
|
|
# Fallback to Playwright if API fails or returns empty
|
|
print(f"[Fallback] Using Playwright for @{username}...")
|
|
try:
|
|
videos = await PlaywrightManager.fetch_user_videos(username, cookies, user_agent, limit)
|
|
duration = time.time() - start_time
|
|
print(f"[Playwright] Got {len(videos)} videos in {duration:.2f}s")
|
|
return {"username": username, "videos": videos, "count": len(videos), "source": "playwright", "duration_ms": int(duration * 1000)}
|
|
except Exception as e:
|
|
print(f"Error fetching videos for {username}: {e}")
|
|
raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
@router.get("/search")
|
|
async def search_videos(
|
|
query: str = Query(..., description="Search keyword or hashtag"),
|
|
limit: int = Query(20, description="Max videos to fetch", ge=1, le=60),
|
|
cursor: int = Query(0, description="Pagination cursor (offset)")
|
|
):
|
|
"""
|
|
Search for videos by keyword or hashtag.
|
|
Uses direct API calls for speed (~200-800ms), with Playwright fallback.
|
|
"""
|
|
# Load stored credentials
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
if not cookies:
|
|
raise HTTPException(status_code=401, detail="Not authenticated")
|
|
|
|
print(f"Searching for: {query} (limit={limit}, cursor={cursor})...")
|
|
start_time = time.time()
|
|
|
|
# [Smart Search] Username Detection Priority
|
|
# If query looks like a username (contains dots, underscores, or starts with @),
|
|
# try fetching that specific user's videos FIRST.
|
|
# This solves issues where searching for exact username returns unrelated content.
|
|
clean_query = query.strip()
|
|
|
|
# Handle @ prefix commonly used by users - STRICT MODE
|
|
# If user explicitly types "@", they want a user lookup, NOT a keyword search.
|
|
strict_user_lookup = False
|
|
if clean_query.startswith("@"):
|
|
clean_query = clean_query[1:]
|
|
strict_user_lookup = True
|
|
|
|
# Also treat dots/underscores as likely usernames
|
|
is_username_format = bool(re.match(r"^[a-zA-Z0-9_\.]+$", clean_query)) and len(clean_query) > 2
|
|
|
|
# DEBUG LOGGING TO FILE
|
|
try:
|
|
with open("search_debug.log", "a", encoding="utf-8") as f:
|
|
f.write(f"\n--- Search: {query} ---\n")
|
|
f.write(f"Strict: {strict_user_lookup}, Format: {is_username_format}, Clean: {clean_query}\n")
|
|
except: pass
|
|
|
|
if is_username_format or strict_user_lookup:
|
|
print(f"[Smart Search] Query '{query}' identified as username. Strict: {strict_user_lookup}")
|
|
try:
|
|
# Try direct profile fetch via API
|
|
videos = await TikTokAPIService.get_user_videos(clean_query, cookies, user_agent, limit)
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[API-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
|
|
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_priority", "duration_ms": int(duration * 1000)}
|
|
|
|
# Try Playwright fallback BEFORE yt-dlp
|
|
# Playwright scraping provides thumbnails and correct metadata, while yt-dlp flat-playlist does not.
|
|
print(f"[Smart Search] API failed, trying Playwright for user '{query}'...")
|
|
try:
|
|
videos = await PlaywrightManager.fetch_user_videos(clean_query, cookies, user_agent, limit)
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[Playwright-Profile-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
|
|
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright_priority", "duration_ms": int(duration * 1000)}
|
|
except Exception as pw_err:
|
|
print(f"[Smart Search] Playwright profile fetch failed: {pw_err}")
|
|
|
|
# Try yt-dlp fallback if Playwright also fails
|
|
print(f"[Smart Search] Playwright failed, trying yt-dlp for user '{query}'...")
|
|
|
|
# Log we are trying ytdlp
|
|
try:
|
|
with open("search_debug.log", "a", encoding="utf-8") as f:
|
|
f.write(f"Attempting yt-dlp for {clean_query}...\n")
|
|
except: pass
|
|
|
|
videos = await TikTokAPIService.get_user_videos_via_ytdlp(clean_query, limit)
|
|
|
|
try:
|
|
with open("search_debug.log", "a", encoding="utf-8") as f:
|
|
f.write(f"yt-dlp Result: {len(videos)} videos\n")
|
|
except: pass
|
|
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[yt-dlp-Priority] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
|
|
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_ytdlp", "duration_ms": int(duration * 1000)}
|
|
|
|
# If strict usage of "@" was used and we found nothing, DO NOT fallback to generic search.
|
|
# It's better to show "No videos found" than random unrelated results.
|
|
if strict_user_lookup:
|
|
print(f"[Smart Search] Strict lookup for '{query}' found no results. Returning empty.")
|
|
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "user_not_found_strict", "duration_ms": int((time.time() - start_time) * 1000)}
|
|
|
|
|
|
except Exception as e:
|
|
print(f"[Smart Search] Priority profile fetch failed: {e}")
|
|
if strict_user_lookup:
|
|
return {"query": query, "videos": [], "count": 0, "cursor": 0, "source": "error_strict", "duration_ms": int((time.time() - start_time) * 1000)}
|
|
# Fall through to normal search only if NOT strict
|
|
|
|
# Try fast API search
|
|
try:
|
|
videos = await TikTokAPIService.search_videos(query, cookies, user_agent, limit, cursor)
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[API] Found {len(videos)} videos in {duration:.2f}s")
|
|
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api", "duration_ms": int(duration * 1000)}
|
|
except Exception as e:
|
|
print(f"[API] Search failed for {query}: {e}")
|
|
|
|
# Fallback Phase 1: Check if query is a "trending" misspelling and retry API if so
|
|
# Regex for: hot, trend, trens, hor, hott, trand, etc.
|
|
trend_pattern = r"(hot|hor|hott)\s*(trend|trens|trand|tred)|(trend|trens|trand)"
|
|
is_trend_query = bool(re.search(trend_pattern, query.lower()))
|
|
|
|
if is_trend_query and (not videos):
|
|
print(f"[Smart Fallback] Query '{query}' detected as trending request. Retrying with 'hot trend'...")
|
|
try:
|
|
# Try normalized query on API
|
|
videos = await TikTokAPIService.search_videos("hot trend", cookies, user_agent, limit, cursor)
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[API-Fallback] Found {len(videos)} videos for 'hot trend' in {duration:.2f}s")
|
|
return {"query": "hot trend", "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "api_fallback", "duration_ms": int(duration * 1000)}
|
|
except Exception:
|
|
pass # Continue to Playwright if this fails
|
|
|
|
# Fallback Phase 2: Playwright
|
|
# Fallback to Playwright if API fails or returns empty
|
|
print(f"[Fallback] Using Playwright for search '{query}'...")
|
|
try:
|
|
videos = await PlaywrightManager.search_videos(query, cookies, user_agent, limit, cursor)
|
|
|
|
# Smart Fallback Phase 3: If Playwright also fails for trending query, try normalized query
|
|
if not videos and is_trend_query:
|
|
print(f"[Playwright-Fallback] No results for '{query}'. Retrying with 'hot trend'...")
|
|
videos = await PlaywrightManager.search_videos("hot trend", cookies, user_agent, limit, cursor)
|
|
|
|
except Exception as e:
|
|
print(f"Error searching for {query}: {e}")
|
|
# Don't raise yet, try user fallback
|
|
pass
|
|
|
|
# Fallback Phase 4: Exact Username Match (Secondary Fallback)
|
|
# If generic search failed, and query looks like a username, try fetching their profile directly (if not tried already)
|
|
# Note: We already tried this at the top, but we try again here with Playwright as a backup if the API profile fetch failed earlier.
|
|
|
|
if (not videos) and is_username_format:
|
|
print(f"[Smart Fallback] Query '{query}' yielded no search results. Attempting secondary profile fetch (Playwright)...")
|
|
try:
|
|
# We already tried API profile fetch at start, so try Playwright now
|
|
print(f"[Smart Fallback] API failed, trying Playwright for user '{query}'...")
|
|
videos = await PlaywrightManager.fetch_user_videos(query, cookies, user_agent, limit)
|
|
if videos:
|
|
duration = time.time() - start_time
|
|
print(f"[Playwright-Profile] Found {len(videos)} videos for user '{query}' in {duration:.2f}s")
|
|
return {"query": query, "videos": videos, "count": len(videos), "cursor": 0, "source": "user_profile_playwright", "duration_ms": int(duration * 1000)}
|
|
|
|
except Exception as e:
|
|
print(f"[Smart Fallback] Profile fetch failed: {e}")
|
|
pass
|
|
|
|
if not videos:
|
|
# Only raise error if we truly found nothing after all attempts
|
|
# or return empty list instead of 500?
|
|
# A 500 implies server broken. Empty list implies no results.
|
|
# Let's return empty structure to be safe for frontend
|
|
return {"query": query, "videos": [], "count": 0, "cursor": cursor, "source": "empty"}
|
|
|
|
return {"query": query, "videos": videos, "count": len(videos), "cursor": cursor + len(videos), "source": "playwright", "duration_ms": int((time.time() - start_time) * 1000)}
|
|
|
|
|
|
# Cache for suggested accounts
|
|
_suggested_cache = {
|
|
"accounts": [],
|
|
"updated_at": 0
|
|
}
|
|
CACHE_TTL = 3600 # 1 hour cache
|
|
|
|
|
|
@router.get("/suggested")
|
|
async def get_suggested_accounts(
|
|
limit: int = Query(50, description="Max accounts to return", ge=10, le=100)
|
|
):
|
|
"""
|
|
Fetch trending/suggested Vietnamese TikTok creators.
|
|
Uses TikTok's discover API and caches results for 1 hour.
|
|
"""
|
|
import time
|
|
|
|
# Check cache
|
|
if _suggested_cache["accounts"] and (time.time() - _suggested_cache["updated_at"]) < CACHE_TTL:
|
|
return {"accounts": _suggested_cache["accounts"][:limit], "cached": True}
|
|
|
|
# Load stored credentials
|
|
cookies, user_agent = PlaywrightManager.load_stored_credentials()
|
|
|
|
if not cookies:
|
|
# Return fallback static list if not authenticated
|
|
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
|
|
|
|
print("Fetching fresh suggested accounts from TikTok...")
|
|
|
|
try:
|
|
# Enforce a strict timeout to prevent hanging or heavy resource usage blocking the server
|
|
# If Playwright takes > 15 seconds, we default to fallback.
|
|
try:
|
|
accounts = await asyncio.wait_for(
|
|
PlaywrightManager.fetch_suggested_accounts(cookies, user_agent, limit),
|
|
timeout=15.0
|
|
)
|
|
except asyncio.TimeoutError:
|
|
print("Suggest fetch timed out, using fallback.")
|
|
accounts = []
|
|
|
|
if accounts and len(accounts) >= 5: # Need at least 5 accounts from dynamic fetch
|
|
_suggested_cache["accounts"] = accounts
|
|
_suggested_cache["updated_at"] = time.time()
|
|
return {"accounts": accounts[:limit], "cached": False}
|
|
else:
|
|
# Just return static accounts directly without API calls - TikTok API is unreliable
|
|
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching suggested accounts: {e}")
|
|
return {"accounts": get_fallback_accounts()[:limit], "cached": False, "fallback": True}
|
|
|
|
|
|
async def fetch_profiles_with_avatars(accounts: list, cookies: list, user_agent: str) -> dict:
|
|
"""Fetch actual profile data with avatars for a list of accounts."""
|
|
|
|
cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])
|
|
|
|
headers = {
|
|
"User-Agent": user_agent or PlaywrightManager.DEFAULT_USER_AGENT,
|
|
"Referer": "https://www.tiktok.com/",
|
|
"Cookie": cookie_str,
|
|
"Accept": "application/json",
|
|
}
|
|
|
|
enriched = []
|
|
|
|
async with httpx.AsyncClient(timeout=10.0) as client:
|
|
for acc in accounts:
|
|
try:
|
|
url = f"https://www.tiktok.com/api/user/detail/?uniqueId={acc['username']}"
|
|
res = await client.get(url, headers=headers)
|
|
|
|
if res.status_code == 200:
|
|
data = res.json()
|
|
user = data.get("userInfo", {}).get("user", {})
|
|
stats = data.get("userInfo", {}).get("stats", {})
|
|
|
|
if user:
|
|
enriched.append({
|
|
"username": acc["username"],
|
|
"nickname": user.get("nickname") or acc.get("nickname", acc["username"]),
|
|
"avatar": user.get("avatarThumb") or user.get("avatarMedium"),
|
|
"followers": stats.get("followerCount", 0),
|
|
"verified": user.get("verified", False),
|
|
"region": "VN"
|
|
})
|
|
continue
|
|
|
|
except Exception as e:
|
|
print(f"Error fetching profile for {acc['username']}: {e}")
|
|
|
|
# Fallback: use original data without avatar
|
|
enriched.append(acc)
|
|
|
|
return {"accounts": enriched, "cached": False, "enriched": True}
|
|
|
|
|
|
def get_fallback_accounts():
|
|
"""Static fallback list of popular Vietnamese TikTokers (verified usernames)."""
|
|
return [
|
|
# Verified Vietnamese TikTok accounts
|
|
{"username": "cciinnn", "nickname": "👑 CiiN (Bùi Thảo Ly)", "region": "VN"},
|
|
{"username": "hoaa.hanassii", "nickname": "💃 Hoa Hanassii", "region": "VN"},
|
|
{"username": "lebong95", "nickname": "💪 Lê Bống", "region": "VN"},
|
|
{"username": "tieu_hy26", "nickname": "👰 Tiểu Hý", "region": "VN"},
|
|
{"username": "hieuthuhai2222", "nickname": "🎧 HIEUTHUHAI", "region": "VN"},
|
|
{"username": "mtp.fan", "nickname": "🎤 Sơn Tùng M-TP", "region": "VN"},
|
|
{"username": "changmakeup", "nickname": "💄 Changmakeup", "region": "VN"},
|
|
{"username": "theanh28entertainment", "nickname": "🎬 Theanh28", "region": "VN"},
|
|
{"username": "linhbarbie", "nickname": "👗 Linh Barbie", "region": "VN"},
|
|
{"username": "phuonglykchau", "nickname": "✨ Phương Ly", "region": "VN"},
|
|
{"username": "phimtieutrang", "nickname": "📺 Tiểu Trang", "region": "VN"},
|
|
{"username": "nhunguyendy", "nickname": "💕 Như Nguyễn", "region": "VN"},
|
|
{"username": "trucnhantv", "nickname": "🎤 Trúc Nhân", "region": "VN"},
|
|
{"username": "justvietanh", "nickname": "😄 Just Việt Anh", "region": "VN"},
|
|
{"username": "minngu.official", "nickname": "🌸 Min NGU", "region": "VN"},
|
|
{"username": "quangdangofficial", "nickname": "🕺 Quang Đăng", "region": "VN"},
|
|
{"username": "minhhangofficial", "nickname": "👑 Minh Hằng", "region": "VN"},
|
|
{"username": "dungntt", "nickname": "🎭 Dũng NTT", "region": "VN"},
|
|
{"username": "chipu88", "nickname": "🎤 Chi Pu", "region": "VN"},
|
|
{"username": "kaydinh", "nickname": "🎵 Kay Dinh", "region": "VN"},
|
|
]
|