kv-tiktok/backend/core/tiktok_api_service.py

"""
TikTok Direct API Service - Fast API calls without browser automation.

Replaces Playwright crawling with direct HTTP requests to TikTok's internal APIs.
Expected performance: ~100-500ms vs 5-15 seconds with Playwright.
"""

import httpx
import asyncio
from typing import List, Optional, Dict, Any
from urllib.parse import quote

from core.playwright_manager import PlaywrightManager


class TikTokAPIService:
    """
    Direct TikTok API calls for instant data retrieval.

    Key endpoints used:
    - /api/user/detail/?uniqueId={username} - Get user profile and secUid
    - /api/post/item_list/?secUid={secUid}&count={count} - Get user's videos
    - /api/search/general/full/?keyword={query} - Search videos
    """

    BASE_URL = "https://www.tiktok.com"
    DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"

    @staticmethod
    def _build_headers(cookies: List[dict], user_agent: str = None) -> dict:
        """Build request headers with cookies and user agent."""
        cookie_str = "; ".join([f"{c['name']}={c['value']}" for c in cookies])

        return {
            "User-Agent": user_agent or TikTokAPIService.DEFAULT_USER_AGENT,
            "Referer": "https://www.tiktok.com/",
            "Cookie": cookie_str,
            "Accept": "application/json, text/plain, */*",
            "Accept-Language": "en-US,en;q=0.9",
            "sec-ch-ua": '"Not_A Brand";v="8", "Chromium";v="120", "Google Chrome";v="120"',
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": '"Windows"',
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-origin",
        }

    @staticmethod
    def _extract_video_data(item: dict) -> Optional[dict]:
        """
        Extract video data from TikTok API response item.
        Matches the format used by PlaywrightManager._extract_video_data().
        """
        try:
            if not isinstance(item, dict):
                return None

            video_id = item.get("id") or item.get("aweme_id")

            # Get author info
            author_data = item.get("author", {})
            author = author_data.get("uniqueId") or author_data.get("unique_id") or "unknown"

            # Get description
            desc = item.get("desc") or item.get("description") or ""

            # Check if this is a product/shop video
            is_shop_video = bool(item.get("products") or item.get("commerce_info") or item.get("poi_info"))

            # Get thumbnail/cover image
            thumbnail = None
            video_data = item.get("video", {})

            thumbnail_sources = [
                video_data.get("cover"),
                video_data.get("dynamicCover"),
                video_data.get("originCover"),
            ]
            for src in thumbnail_sources:
                if src:
                    thumbnail = src
                    break

            # Get direct CDN URL
            cdn_url = None
            cdn_sources = [
                video_data.get("playAddr"),
                video_data.get("downloadAddr"),
            ]
            for src in cdn_sources:
                if src:
                    cdn_url = src
                    break

            # Video page URL
            video_url = f"https://www.tiktok.com/@{author}/video/{video_id}"

            # Get stats
            stats = item.get("stats", {}) or item.get("statistics", {})
            views = stats.get("playCount") or stats.get("play_count") or 0
            likes = stats.get("diggCount") or stats.get("digg_count") or 0
            comments = stats.get("commentCount") or stats.get("comment_count") or 0
            shares = stats.get("shareCount") or stats.get("share_count") or 0

            if video_id and author:
                result = {
                    "id": str(video_id),
                    "url": video_url,
                    "author": author,
                    "description": desc[:200] if desc else f"Video by @{author}"
                }
                if thumbnail:
                    result["thumbnail"] = thumbnail
                if cdn_url:
                    result["cdn_url"] = cdn_url
                if views:
                    result["views"] = views
                if likes:
                    result["likes"] = likes
                if comments:
                    result["comments"] = comments
                if shares:
                    result["shares"] = shares
                if is_shop_video:
                    result["has_product"] = True
                return result

        except Exception as e:
            print(f"DEBUG: Error extracting video data: {e}")

        return None

    @staticmethod
    async def get_user_sec_uid(username: str, cookies: List[dict], user_agent: str = None) -> Optional[str]:
        """
        Get user's secUid from their profile.
        secUid is required for the video list API.
        """
        headers = TikTokAPIService._build_headers(cookies, user_agent)
        profile_url = f"{TikTokAPIService.BASE_URL}/api/user/detail/?uniqueId={username}"

        try:
            async with httpx.AsyncClient(timeout=10.0, follow_redirects=True) as client:
                response = await client.get(profile_url, headers=headers)

                if response.status_code != 200:
                    print(f"DEBUG: Failed to get user profile, status: {response.status_code}")
                    return None

                data = response.json()
                user_info = data.get("userInfo", {})
                user = user_info.get("user", {})
                sec_uid = user.get("secUid")

                if sec_uid:
                    print(f"DEBUG: Got secUid for @{username}: {sec_uid[:20]}...")
                    return sec_uid

        except Exception as e:
            print(f"DEBUG: Error getting secUid for {username}: {e}")

        return None

    @staticmethod
    async def get_user_videos(
        username: str,
        cookies: List[dict],
        user_agent: str = None,
        limit: int = 20,
        cursor: int = 0
    ) -> List[dict]:
        """
        Fetch videos from a user's profile using direct API call.

        Args:
            username: TikTok username (without @)
            cookies: Auth cookies list
            user_agent: Browser user agent
            limit: Max videos to return
            cursor: Pagination cursor for more videos

        Returns:
            List of video dictionaries
        """
        print(f"DEBUG: [API] Fetching videos for @{username} (limit={limit})...")

        # Step 1: Get secUid
        sec_uid = await TikTokAPIService.get_user_sec_uid(username, cookies, user_agent)

        if not sec_uid:
            print(f"DEBUG: [API] Could not get secUid for @{username}")
            return []

        # Step 2: Fetch video list
        headers = TikTokAPIService._build_headers(cookies, user_agent)

        # Build video list API URL
        video_list_url = (
            f"{TikTokAPIService.BASE_URL}/api/post/item_list/?"
            f"secUid={quote(sec_uid)}&"
            f"count={min(limit, 35)}&"  # TikTok max per request is ~35
            f"cursor={cursor}"
        )

        try:
            async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
                response = await client.get(video_list_url, headers=headers)

                if response.status_code != 200:
                    print(f"DEBUG: [API] Video list failed, status: {response.status_code}")
                    return []

                data = response.json()

                # Extract videos from response
                items = data.get("itemList", []) or data.get("aweme_list", [])

                videos = []
                for item in items[:limit]:
                    video_data = TikTokAPIService._extract_video_data(item)
                    if video_data:
                        videos.append(video_data)

                print(f"DEBUG: [API] Successfully fetched {len(videos)} videos for @{username}")
                return videos

        except Exception as e:
            print(f"DEBUG: [API] Error fetching videos for {username}: {e}")
            return []

    @staticmethod
    async def search_videos(
        query: str,
        cookies: List[dict],
        user_agent: str = None,
        limit: int = 20,
        cursor: int = 0
    ) -> List[dict]:
        """
        Search for videos using direct API call.

        Args:
            query: Search keyword or hashtag
            cookies: Auth cookies list
            user_agent: Browser user agent
            limit: Max videos to return
            cursor: Pagination offset

        Returns:
            List of video dictionaries
        """
        print(f"DEBUG: [API] Searching for '{query}' (limit={limit}, cursor={cursor})...")

        headers = TikTokAPIService._build_headers(cookies, user_agent)

        # Build search API URL
        # TikTok uses different search endpoints, try the main one
        search_url = (
            f"{TikTokAPIService.BASE_URL}/api/search/general/full/?"
            f"keyword={quote(query)}&"
            f"offset={cursor}&"
            f"search_source=normal_search&"
            f"is_filter_search=0&"
            f"web_search_code=%7B%22tiktok%22%3A%7B%22client_params_x%22%3A%7B%22search_engine%22%3A%7B%22ies_mt_user_live_video_card_use_498%22%3A1%7D%7D%7D%7D"
        )

        try:
            async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
                response = await client.get(search_url, headers=headers)

                if response.status_code != 200:
                    print(f"DEBUG: [API] Search failed, status: {response.status_code}")
                    # Try alternative search endpoint
                    return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)

                data = response.json()

                # Search results structure
                videos = []

                # Try different response formats
                item_list = data.get("data", [])
                if not item_list:
                    item_list = data.get("itemList", [])
                if not item_list:
                    item_list = data.get("item_list", [])

                for item in item_list[:limit]:
                    # Search results may have nested structure
                    video_item = item.get("item", item)
                    video_data = TikTokAPIService._extract_video_data(video_item)
                    if video_data:
                        videos.append(video_data)

                if videos:
                    print(f"DEBUG: [API] Successfully found {len(videos)} videos for '{query}'")
                    return videos
                else:
                    # Fallback to alternative endpoint
                    return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)

        except Exception as e:
            print(f"DEBUG: [API] Error searching for {query}: {e}")
            return await TikTokAPIService._search_videos_alt(query, cookies, user_agent, limit, cursor)

    @staticmethod
    async def _search_videos_alt(
        query: str,
        cookies: List[dict],
        user_agent: str = None,
        limit: int = 20,
        cursor: int = 0
    ) -> List[dict]:
        """
        Alternative search using video-specific endpoint.
        """
        print(f"DEBUG: [API] Trying alternative search endpoint...")

        headers = TikTokAPIService._build_headers(cookies, user_agent)

        # Try video-specific search endpoint
        search_url = (
            f"{TikTokAPIService.BASE_URL}/api/search/item/full/?"
            f"keyword={quote(query)}&"
            f"offset={cursor}&"
            f"count={min(limit, 30)}"
        )

        try:
            async with httpx.AsyncClient(timeout=15.0, follow_redirects=True) as client:
                response = await client.get(search_url, headers=headers)

                if response.status_code != 200:
                    print(f"DEBUG: [API] Alt search also failed, status: {response.status_code}")
                    return []

                data = response.json()

                videos = []
                item_list = data.get("itemList", []) or data.get("item_list", []) or data.get("data", [])

                for item in item_list[:limit]:
                    video_data = TikTokAPIService._extract_video_data(item)
                    if video_data:
                        videos.append(video_data)

                print(f"DEBUG: [API] Alt search found {len(videos)} videos")
                return videos

        except Exception as e:
            print(f"DEBUG: [API] Alt search error: {e}")
            return []


    @staticmethod
    async def get_user_videos_via_ytdlp(username: str, limit: int = 20) -> List[dict]:
        """
        Fetch user videos using yt-dlp (Robust fallback).
        """
        print(f"DEBUG: [yt-dlp] Fetching videos for @{username}...")
        import subprocess
        import json

        # Determine yt-dlp path (assume it's in the same python environment)
        import sys
        import os

        # Helper to find executable
        def get_yt_dlp_path():
            # Try same dir as python executable
            path = os.path.join(os.path.dirname(sys.executable), 'yt-dlp.exe')
            if os.path.exists(path): return path
            # Try global
            return 'yt-dlp'

            get_yt_dlp_path(),
            f"https://www.tiktok.com/@{username}",
            # "--flat-playlist",  # Disabled to get full metadata (thumbnails)
            "--skip-download",    # Don't download video files
            "--dump-json",
            "--playlist-end", str(limit),
            "--no-warnings",
            "--ignore-errors"     # Skip private/removed videos

        try:
            # Run async subprocess
            process = await asyncio.create_subprocess_exec(
                *cmd,
                stdout=asyncio.subprocess.PIPE,
                stderr=asyncio.subprocess.PIPE
            )

            stdout, stderr = await process.communicate()

            if process.returncode != 0:
                print(f"DEBUG: [yt-dlp] Failed: {stderr.decode()}")
                return []

            videos = []
            output = stdout.decode('utf-8')

            for line in output.splitlines():
                try:
                    if not line.strip(): continue
                    data = json.loads(line)

                    # Map yt-dlp format to our format
                    video_id = data.get('id')

                    # Handle Author Name logic
                    # yt-dlp sometimes returns numeric ID as uploader_id for profiles.
                    # We prefer the 'uploader' (nickname) if it looks handle-like, or the original search username.
                    raw_uploader_id = data.get('uploader_id')

                    # Heuristic: If uploader_id is numeric, prefer the search username
                    if raw_uploader_id and raw_uploader_id.isdigit():
                         unique_id = username
                    else:
                         unique_id = raw_uploader_id or username

                    # yt-dlp might not give full details in flat-playlist mode,
                    # but usually gives enough for a list
                    # Construct basic object
                    video = {
                        "id": video_id,
                        "url": data.get('url') or f"https://www.tiktok.com/@{unique_id}/video/{video_id}",
                        "author": unique_id,
                        "description": data.get('title') or "",
                        "thumbnail": data.get('thumbnail'), # Flat playlist might not have this?
                        "views": data.get('view_count', 0),
                        "likes": data.get('like_count', 0)
                    }

                    # If thumbnail is missing, we might need to assume or use a placeholder
                    # or yt-dlp flat playlist sometimes misses it.
                    # But verifying the 'dump-json' output above, it usually has metadata.

                    videos.append(video)
                except Exception as parse_err:
                    continue

            print(f"DEBUG: [yt-dlp] Found {len(videos)} videos")
            return videos

        except Exception as e:
            print(f"DEBUG: [yt-dlp] Execution error: {e}")
            return []

# Singleton instance
tiktok_api = TikTokAPIService()