Enhance video extraction for TikTok Shop product videos

This commit is contained in:
Khoa.vo 2025-12-19 20:47:59 +07:00
parent 168ae19018
commit 587a83fe0d

View file

@ -534,7 +534,7 @@ class PlaywrightManager:
@staticmethod @staticmethod
def _extract_video_data(item: dict) -> Optional[dict]: def _extract_video_data(item: dict) -> Optional[dict]:
"""Extract video data from TikTok API item.""" """Extract video data from TikTok API item, including product/shop videos."""
try: try:
# Handle different API response formats # Handle different API response formats
video_id = item.get("id") or item.get("aweme_id") video_id = item.get("id") or item.get("aweme_id")
@ -546,26 +546,43 @@ class PlaywrightManager:
# Get description # Get description
desc = item.get("desc") or item.get("description") or "" desc = item.get("desc") or item.get("description") or ""
# Check if this is a product/shop video
is_shop_video = bool(item.get("products") or item.get("commerce_info") or item.get("poi_info"))
# Get thumbnail/cover image # Get thumbnail/cover image
thumbnail = None thumbnail = None
video_data = item.get("video", {}) video_data = item.get("video", {})
# Try different thumbnail sources # Try different thumbnail sources
if video_data.get("cover"): thumbnail_sources = [
thumbnail = video_data["cover"] video_data.get("cover"),
elif video_data.get("dynamicCover"): video_data.get("dynamicCover"),
thumbnail = video_data["dynamicCover"] video_data.get("originCover"),
elif video_data.get("originCover"): video_data.get("ai_dynamic_cover", {}).get("url_list", [None])[0] if isinstance(video_data.get("ai_dynamic_cover"), dict) else None,
thumbnail = video_data["originCover"] ]
for src in thumbnail_sources:
if src:
thumbnail = src
break
# Get direct CDN URL (for thin proxy mode) # Get direct CDN URL - try multiple sources (including for shop videos)
cdn_url = None cdn_url = None
if video_data.get("playAddr"): cdn_sources = [
cdn_url = video_data["playAddr"] # Standard sources
elif video_data.get("downloadAddr"): video_data.get("playAddr"),
cdn_url = video_data["downloadAddr"] video_data.get("downloadAddr"),
elif video_data.get("play_addr", {}).get("url_list"): # Bit rate sources (often works for shop videos)
cdn_url = video_data["play_addr"]["url_list"][0] video_data.get("bitrateInfo", [{}])[0].get("PlayAddr", {}).get("UrlList", [None])[0] if video_data.get("bitrateInfo") else None,
# Play URL list
video_data.get("play_addr", {}).get("url_list", [None])[0] if isinstance(video_data.get("play_addr"), dict) else None,
# Download URL list
video_data.get("download_addr", {}).get("url_list", [None])[0] if isinstance(video_data.get("download_addr"), dict) else None,
]
for src in cdn_sources:
if src:
cdn_url = src
break
# Use TikTok page URL as fallback (yt-dlp resolves this) # Use TikTok page URL as fallback (yt-dlp resolves this)
video_url = f"https://www.tiktok.com/@{author}/video/{video_id}" video_url = f"https://www.tiktok.com/@{author}/video/{video_id}"
@ -590,6 +607,8 @@ class PlaywrightManager:
result["views"] = views result["views"] = views
if likes: if likes:
result["likes"] = likes result["likes"] = likes
if is_shop_video:
result["has_product"] = True # Flag for product videos
return result return result
except Exception as e: except Exception as e: