""" Category Scraper for PhimMoiChill Orchestrates category-based crawling to build themed sections """ import asyncio from typing import Dict, List, Any from rophim_scraper import RophimScraper from category_discovery import get_categories class PhimMoiChillCategoryScraper: """ Advanced scraper that looks for categories first, then crawls them. """ def __init__(self): self.scraper = RophimScraper() async def close(self): await self.scraper.close() async def get_all_sections(self) -> Dict[str, List[Dict]]: """ Build complete homepage structure by crawling key categories """ # 1. Discover Categories (Cached) discovered = await get_categories() # 2. Map discovered categories to UI sections # We look for specific slugs in the discovered lists tasks = [] # Define what we want to fetch # Format: (section_key, category_expected_slug, fallback_slug) sections_to_fetch = [ # Hot -> Phim Le Page 1 ('hot', 'danh-sach/phim-le'), # New Releases -> Phim Le Page 2 (Variation) ('new_releases', 'danh-sach/phim-le'), # Series -> Phim Bo ('series', 'danh-sach/phim-bo'), # Animation -> Hoat Hinh ('animated', 'danh-sach/hoat-hinh'), # Cinema -> Phim Chieu Rap ('cinema', 'the-loai/phim-chieu-rap'), # Top 10 -> Phim Le Page 1 ('top10', 'danh-sach/phim-le'), # Vietnamese ('vietnamese', 'quoc-gia/viet-nam') ] results = {} # Parallel fetch async def fetch_section(key, slug): try: # Use scraper to get movies for this category limit = 10 if key == 'top10' else 42 # Increased for 2-3 rows # Fetch Page 2 for New Releases to allow variety from Hot (Page 1) page = 2 if key == 'new_releases' else 1 movies = await self.scraper.get_category(slug, page=page, limit=limit) # Fallback for cinema if empty - try action genre if key == 'cinema' and not movies: movies = await self.scraper.get_category('the-loai/hanh-dong', page=1, limit=limit) # Convert to dict and enrich movie_dicts = [] for idx, m in enumerate(movies, 1): d = m.__dict__ # Add Metadata Badges if key == 'top10': d['ranking'] = idx d['badge'] = f'TOP {idx}' elif key == 'hot': d['badge'] = 'HOT' elif key == 'new_releases': d['badge'] = 'NEW' elif key == 'cinema': d['badge'] = 'CINEMA' movie_dicts.append(d) return key, movie_dicts except Exception as e: print(f"Error fetching section {key} ({slug}): {e}") return key, [] pending_tasks = [fetch_section(key, slug) for key, slug in sections_to_fetch] fetched_results = await asyncio.gather(*pending_tasks) for key, movies in fetched_results: results[key] = movies return results # Individual fetchers for specific endpoints async def get_hot_movies(self, limit=24): movies = await self.scraper.get_category('danh-sach/phim-le', 1, limit) return [m.__dict__ for m in movies] async def get_new_releases(self, limit=24): # Fetch page 2 for variety? Or just page 1 movies = await self.scraper.get_category('danh-sach/phim-le', 1, limit) return [m.__dict__ for m in movies] async def get_cinema_releases(self, limit=24): # Try finding a cinema category movies = await self.scraper.get_category('the-loai/phim-chieu-rap', 1, limit) if not movies: # Fallback: Phim Le movies = await self.scraper.get_category('danh-sach/phim-le', 1, limit) return [m.__dict__ for m in movies] async def get_top_10(self): movies = await self.scraper.get_category('danh-sach/phim-le', 1, 10) return [m.__dict__ for m in movies] async def get_mixed_sections(self, page: int) -> List[Dict[str, Any]]: """ Fetch subsequent pages of Main Categories for infinite scroll. Strategy: Keep the same structure (Hot, Series, etc.) but load Page N. """ # Define the main structure to repeat main_categories = [ {'title': 'Phim Hot (Movies)', 'slug': 'danh-sach/phim-le'}, {'title': 'Phim Bộ Mới (Series)', 'slug': 'danh-sach/phim-bo'}, {'title': 'Hoạt Hình & Anime', 'slug': 'danh-sach/hoat-hinh'}, {'title': 'Phim Chiếu Rạp', 'slug': 'the-loai/phim-chieu-rap'}, {'title': 'Phim Việt Nam', 'slug': 'quoc-gia/viet-nam'} ] tasks = [] async def fetch_dynamic(cat): try: # Use large limit for multi-row display movies = await self.scraper.get_category(cat['slug'], page, 84) if not movies: return None # Optional: Differentiate title for clarity, or keep same? # User asked to "keep the same structure". # We can append " - Page N" or just leave as is. # Let's leave as is but maybe ensures frontend renders it. return { 'title': cat['title'], 'key': cat['slug'], 'movies': [m.__dict__ for m in movies] } except: return None tasks = [fetch_dynamic(cat) for cat in main_categories] results = await asyncio.gather(*tasks) return [r for r in results if r is not None] async def get_view_sections(self, view: str, page: int) -> List[Dict[str, Any]]: """ Fetch structured sections for specific views (Movies, Series, etc.) mimicking the Main Page design with sliders. """ sub_sections = [] if view == 'movies': sub_sections = [ {'title': 'Phim Lẻ Mới', 'slug': 'danh-sach/phim-le'}, {'title': 'Hành Động', 'slug': 'the-loai/hanh-dong'}, {'title': 'Tình Cảm', 'slug': 'the-loai/tinh-cam'}, {'title': 'Kinh Dị', 'slug': 'the-loai/kinh-di'}, {'title': 'Viễn Tưởng', 'slug': 'the-loai/vien-tuong'}, {'title': 'Hài Hước', 'slug': 'the-loai/hai-huoc'} ] elif view == 'series': sub_sections = [ {'title': 'Phim Bộ Mới', 'slug': 'danh-sach/phim-bo'}, {'title': 'Hàn Quốc', 'slug': 'quoc-gia/han-quoc'}, {'title': 'Trung Quốc', 'slug': 'quoc-gia/trung-quoc'}, {'title': 'Âu Mỹ', 'slug': 'quoc-gia/au-my'}, {'title': 'Thái Lan', 'slug': 'quoc-gia/thai-lan'} ] elif view == 'animation': sub_sections = [ {'title': 'Anime Mới', 'slug': 'danh-sach/hoat-hinh'}, {'title': 'Học Đường', 'slug': 'the-loai/hoc-duong'}, {'title': 'Nhật Bản', 'slug': 'quoc-gia/nhat-ban'} ] elif view == 'cinema': sub_sections = [ {'title': 'Phim Chiếu Rạp Hot', 'slug': 'the-loai/phim-chieu-rap'}, {'title': 'Hành Động', 'slug': 'the-loai/hanh-dong'}, {'title': 'Hài Hước', 'slug': 'the-loai/hai-huoc'} ] if not sub_sections: return [] tasks = [] async def fetch_section(cat): try: # Fetch larger batch for multi-row movies = await self.scraper.get_category(cat['slug'], page, 84) if not movies: return None return { 'title': cat['title'], 'key': cat['slug'], 'movies': [m.__dict__ for m in movies] } except: return None tasks = [fetch_section(cat) for cat in sub_sections] results = await asyncio.gather(*tasks) return [r for r in results if r is not None] # Wrapper function for main.py (Sync compatibility) def get_categories_sync() -> Dict[str, List[Dict]]: """Synchronous wrapper to get all category sections""" async def _run(): scraper = PhimMoiChillCategoryScraper() try: return await scraper.get_all_sections() finally: await scraper.close() try: return asyncio.run(_run()) except Exception as e: print(f"Sync Category Crawl Error: {e}") return { 'hot': [], 'new_releases': [], 'top10': [], 'cinema': [], 'vietnamese': [], 'animated': [], 'series': [] }