from flask import Flask, render_template, request, redirect, url_for, jsonify, send_file, Response, stream_with_context, session, flash
import os
import sys
import subprocess
import json
import requests
import sqlite3
from werkzeug.security import generate_password_hash, check_password_hash
import yt_dlp
from functools import wraps
import yt_dlp
from functools import wraps
from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
import re
import heapq
import threading
import uuid
import datetime
import time



# Fix for OMP: Error #15
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"

app = Flask(__name__)
app.secret_key = 'super_secret_key_change_this'  # Required for sessions

# Ensure data directory exists for persistence
DATA_DIR = "data"
if not os.path.exists(DATA_DIR):
    os.makedirs(DATA_DIR)

DB_NAME = os.path.join(DATA_DIR, "kvtube.db")

# --- Database Setup ---
def init_db():
    conn = sqlite3.connect(DB_NAME)
    c = conn.cursor()
    # Users Table
    c.execute('''CREATE TABLE IF NOT EXISTS users (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    username TEXT UNIQUE NOT NULL,
                    password TEXT NOT NULL
                )''')
    # Saved/History Table
    # type: 'history' or 'saved'
    c.execute('''CREATE TABLE IF NOT EXISTS user_videos (
                    id INTEGER PRIMARY KEY AUTOINCREMENT,
                    user_id INTEGER,
                    video_id TEXT,
                    title TEXT,
                    thumbnail TEXT,
                    type TEXT,
                    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP,
                    FOREIGN KEY(user_id) REFERENCES users(id)
                )''')
    # Cache Table for video metadata/streams
    c.execute('''CREATE TABLE IF NOT EXISTS video_cache (
                    video_id TEXT PRIMARY KEY,
                    data TEXT,
                    expires_at DATETIME
                )''')
    conn.commit()
    conn.close()

# Run init
init_db()

# Transcription Task Status
transcription_tasks = {}

def get_db_connection():

    conn = sqlite3.connect(DB_NAME)
    conn.row_factory = sqlite3.Row
    return conn

# --- Auth Helpers Removed ---
# Use client-side storage for all user data

# --- Auth Routes Removed ---

@app.template_filter('format_views')
def format_views(views):
    if not views: return '0'
    try:
        num = int(views)
        if num >= 1000000: return f"{num / 1000000:.1f}M"
        if num >= 1000: return f"{num / 1000:.0f}K"
        return f"{num:,}"
    except:
        return str(views)

@app.template_filter('format_date')
def format_date(value):
    if not value: return 'Recently'
    from datetime import datetime, timedelta
    try:
        # Handle YYYYMMDD
        if len(str(value)) == 8 and str(value).isdigit():
            dt = datetime.strptime(str(value), '%Y%m%d')
        # Handle Timestamp
        elif isinstance(value, (int, float)):
            dt = datetime.fromtimestamp(value)
        # Handle already formatted (YYYY-MM-DD)
        else:
             # Try common formats
             try: dt = datetime.strptime(str(value), '%Y-%m-%d')
             except: return str(value)
             
        now = datetime.now()
        diff = now - dt
        
        if diff.days > 365:
            return f"{diff.days // 365} years ago"
        if diff.days > 30:
            return f"{diff.days // 30} months ago"
        if diff.days > 0:
            return f"{diff.days} days ago"
        if diff.seconds > 3600:
            return f"{diff.seconds // 3600} hours ago"
        return "Just now"
    except:
        return str(value)

# Configuration for local video path - configurable via env var
VIDEO_DIR = os.environ.get('KVTUBE_VIDEO_DIR', './videos')

@app.route('/')
def index():
    return render_template('index.html', page='home')

@app.route('/results')
def results():
    query = request.args.get('search_query', '')
    return render_template('index.html', page='results', query=query)

@app.route('/my-videos')
def my_videos():
    # Purely client-side rendering now
    return render_template('my_videos.html')

@app.route('/api/save_video', methods=['POST'])
def save_video():
    # Deprecated endpoint - client-side handled
    return jsonify({'success': True, 'message': 'Use local storage'})
def save_video():
    data = request.json
    video_id = data.get('id')
    title = data.get('title')
    thumbnail = data.get('thumbnail')
    action_type = data.get('type', 'history') # 'history' or 'saved'
    
    conn = get_db_connection()
    
    # Check if already exists to prevent duplicates (optional, strictly for 'saved')
    if action_type == 'saved':
        exists = conn.execute('SELECT id FROM user_videos WHERE user_id = ? AND video_id = ? AND type = ?', 
                             (session['user_id'], video_id, 'saved')).fetchone()
        if exists:
            conn.close()
            return jsonify({'status': 'already_saved'})

    conn.execute('INSERT INTO user_videos (user_id, video_id, title, thumbnail, type) VALUES (?, ?, ?, ?, ?)',
                 (1, video_id, title, thumbnail, action_type)) # Default user_id 1
    conn.commit()
    conn.close()
    return jsonify({'status': 'success'})

@app.route('/api/history')
def get_history():
    conn = get_db_connection()
    rows = conn.execute('SELECT video_id as id, title, thumbnail FROM user_videos WHERE type = "history" ORDER BY timestamp DESC LIMIT 50').fetchall()
    conn.close()
    return jsonify([dict(row) for row in rows])

@app.route('/api/suggested')
def get_suggested():
    # Simple recommendation based on history: search for "trending" related to the last 3 viewed channels/titles
    conn = get_db_connection()
    history = conn.execute('SELECT title FROM user_videos WHERE type = "history" ORDER BY timestamp DESC LIMIT 3').fetchall()
    conn.close()
    
    if not history:
        return jsonify(fetch_videos("trending", limit=20))
        
    all_suggestions = []
    with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
        queries = [f"{row['title']} related" for row in history]
        results = list(executor.map(lambda q: fetch_videos(q, limit=10), queries))
        for res in results:
            all_suggestions.extend(res)
            
    # Remove duplicates and shuffle
    unique_vids = {v['id']: v for v in all_suggestions}.values()
    import random
    final_list = list(unique_vids)
    random.shuffle(final_list)
    
    return jsonify(final_list[:30])


@app.route('/stream/<path:filename>')
def stream_local(filename):
    return send_from_directory(VIDEO_DIR, filename)

@app.route('/settings')
def settings():
    return render_template('settings.html', page='settings')

@app.route('/video_proxy')
def video_proxy():
    url = request.args.get('url')
    if not url:
        return "No URL provided", 400
    
    # Forward headers to mimic browser and support seeking
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36',
    }
    
    # Support Range requests (scrubbing)
    range_header = request.headers.get('Range')
    if range_header:
        headers['Range'] = range_header
    
    try:
        req = requests.get(url, headers=headers, stream=True, timeout=30)
        
        # Handle HLS (M3U8) Rewriting - CRITICAL for 1080p+ and proper sync
        content_type = req.headers.get('content-type', '').lower()
        # Extract URL path without query params for checking extension
        url_path = url.split('?')[0]
        is_manifest = (url_path.endswith('.m3u8') or 
                       'application/x-mpegurl' in content_type or
                       'application/vnd.apple.mpegurl' in content_type)
        
        if is_manifest:
            content = req.text
            base_url = url.rsplit('/', 1)[0]
            new_lines = []
            
            for line in content.splitlines():
                if line.strip() and not line.startswith('#'):
                    # It's a segment or sub-playlist
                    # If relative, make absolute
                    if not line.startswith('http'):
                        full_url = f"{base_url}/{line}"
                    else:
                        full_url = line
                    
                    # Proxy it - use urllib.parse.quote with safe parameter
                    from urllib.parse import quote
                    quoted_url = quote(full_url, safe='')
                    new_lines.append(f"/video_proxy?url={quoted_url}")
                else:
                    new_lines.append(line)
            
            return Response('\n'.join(new_lines), content_type='application/vnd.apple.mpegurl')

        # Standard Stream Proxy (Binary)
        # We exclude headers that might confuse the browser/flask
        excluded_headers = ['content-encoding', 'content-length', 'transfer-encoding', 'connection']
        response_headers = [(name, value) for (name, value) in req.headers.items()
                           if name.lower() not in excluded_headers]
        
        return Response(stream_with_context(req.iter_content(chunk_size=8192)), 
                        status=req.status_code,
                        headers=response_headers,
                        content_type=req.headers.get('content-type'))
    except Exception as e:
        print(f"Proxy Error: {e}")
        return str(e), 500

@app.route('/watch')
def watch():
    video_id = request.args.get('v')
    local_file = request.args.get('local')
    
    if local_file:
        return render_template('watch.html', video_type='local', src=url_for('stream_local', filename=local_file), title=local_file)
    
    if not video_id:
        return "No video ID provided", 400
    return render_template('watch.html', video_type='youtube', video_id=video_id)

@app.route('/channel/<channel_id>')
def channel(channel_id):
    if not channel_id:
        return redirect(url_for('index'))
    
    try:
        # Robustness: Resolve name to ID if needed (Metadata only fetch)
        real_id_or_url = channel_id
        is_search_fallback = False
        
        if not channel_id.startswith('UC') and not channel_id.startswith('@'):
            # Simple resolve logic - reusing similar block from before but optimized for metadata
             search_cmd = [
                sys.executable, '-m', 'yt_dlp',
                f'ytsearch1:{channel_id}',
                '--dump-json',
                '--default-search', 'ytsearch',
                '--no-playlist'
            ]
             try:
                 proc_search = subprocess.run(search_cmd, capture_output=True, text=True)
                 if proc_search.returncode == 0:
                     first_result = json.loads(proc_search.stdout.splitlines()[0])
                     if first_result.get('channel_id'):
                         real_id_or_url = first_result.get('channel_id')
                         is_search_fallback = True
             except: pass

        # Fetch basic channel info (Avatar/Banner)
        # We use a very short playlist fetch just to get the channel dict
        channel_info = {
            'id': real_id_or_url, # Use resolved ID for API calls
            'title': channel_id if not is_search_fallback else 'Loading...',
            'avatar': None,
            'banner': None,
            'subscribers': None
        }
        
        # Determine target URL for metadata fetch
        target_url = real_id_or_url
        if target_url.startswith('UC'): target_url = f'https://www.youtube.com/channel/{target_url}'
        elif target_url.startswith('@'): target_url = f'https://www.youtube.com/{target_url}'
            
        cmd = [
            sys.executable, '-m', 'yt_dlp',
            target_url,
            '--dump-json',
            '--flat-playlist',
            '--playlist-end', '1', # Fetch just 1 to get metadata
            '--no-warnings'
        ]
        
        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        stdout, stderr = proc.communicate()
        
        if stdout:
            try:
                first = json.loads(stdout.splitlines()[0])
                channel_info['title'] = first.get('channel') or first.get('uploader') or channel_info['title']
                channel_info['id'] = first.get('channel_id') or channel_info['id']
                # Try to get avatar/banner if available in flat dump (often NOT, but title/id are key)
            except: pass

        # Render shell - videos fetched via JS
        return render_template('channel.html', channel=channel_info)
        
    except Exception as e:
        return f"Error loading channel: {str(e)}", 500

@app.route('/api/related')
def get_related_videos():
    video_id = request.args.get('v')
    title = request.args.get('title')
    page = int(request.args.get('page', 1))
    limit = int(request.args.get('limit', 10))
    
    if not title and not video_id:
        return jsonify({'error': 'Video ID or Title required'}), 400
        
    try:
        query = f"{title} related" if title else f"{video_id} related"
        
        # Calculate pagination
        # Page 1: 0-10 (but usually fetched by get_stream_info)
        # Page 2: 10-20
        start = (page - 1) * limit
        end = start + limit
        
        videos = fetch_videos(query, limit=limit, playlist_start=start+1, playlist_end=end)
        return jsonify(videos)
    except Exception as e:
        print(f"Error fetching related: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/download')
def get_download_url():
    """Get a direct MP4 download URL for a video"""
    video_id = request.args.get('v')
    if not video_id:
        return jsonify({'error': 'No video ID'}), 400
    
    try:
        url = f"https://www.youtube.com/watch?v={video_id}"
        
        # Use format that avoids HLS/DASH manifests (m3u8)
        # Prefer progressive download formats
        ydl_opts = {
            'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best[protocol!*=m3u8]/best',
            'noplaylist': True,
            'quiet': True,
            'no_warnings': True,
            'skip_download': True,
            'youtube_include_dash_manifest': False,  # Avoid DASH
            'youtube_include_hls_manifest': False,   # Avoid HLS
        }
        
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(url, download=False)
            
            # Try to get URL that's NOT an m3u8
            download_url = info.get('url', '')
            
            # If still m3u8, try getting from formats directly
            if '.m3u8' in download_url or not download_url:
                formats = info.get('formats', [])
                # Find best non-HLS format
                for f in reversed(formats):
                    f_url = f.get('url', '')
                    f_ext = f.get('ext', '')
                    f_protocol = f.get('protocol', '')
                    if f_url and 'm3u8' not in f_url and f_ext == 'mp4':
                        download_url = f_url
                        break
            
            title = info.get('title', 'video')
            
            if download_url and '.m3u8' not in download_url:
                return jsonify({
                    'url': download_url,
                    'title': title,
                    'ext': 'mp4'
                })
            else:
                # Fallback: return YouTube link for manual download
                return jsonify({
                    'error': 'Direct download not available. Try a video downloader site.',
                    'fallback_url': url
                }), 200
                
    except Exception as e:
        print(f"Download URL error: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/channel/videos')
def get_channel_videos():
    channel_id = request.args.get('id')
    page = int(request.args.get('page', 1))
    limit = int(request.args.get('limit', 20))
    sort_mode = request.args.get('sort', 'latest')
    filter_type = request.args.get('filter_type', 'video') # 'video' or 'shorts'
    
    if not channel_id: return jsonify([])
    
    try:
        # Calculate playlist range
        start = (page - 1) * limit + 1
        end = start + limit - 1
        
        # Resolve channel_id if it's not a proper YouTube ID
        resolved_id = channel_id
        if not channel_id.startswith('UC') and not channel_id.startswith('@'):
            # Try to resolve by searching
            search_cmd = [
                sys.executable, '-m', 'yt_dlp',
                f'ytsearch1:{channel_id}',
                '--dump-json',
                '--default-search', 'ytsearch',
                '--no-playlist'
            ]
            try:
                proc_search = subprocess.run(search_cmd, capture_output=True, text=True, timeout=15)
                if proc_search.returncode == 0:
                    first_result = json.loads(proc_search.stdout.splitlines()[0])
                    if first_result.get('channel_id'):
                        resolved_id = first_result.get('channel_id')
            except: pass
        
        # Construct URL based on ID type AND Filter Type
        if resolved_id.startswith('UC'): 
            base_url = f'https://www.youtube.com/channel/{resolved_id}'
        elif resolved_id.startswith('@'): 
            base_url = f'https://www.youtube.com/{resolved_id}'
        else: 
            base_url = f'https://www.youtube.com/channel/{resolved_id}'
        
        target_url = base_url
        if filter_type == 'shorts':
            target_url += '/shorts'
        elif filter_type == 'video':
            target_url += '/videos'
        
        playlist_args = ['--playlist-start', str(start), '--playlist-end', str(end)]
        
        if sort_mode == 'oldest':
             playlist_args = ['--playlist-reverse', '--playlist-start', str(start), '--playlist-end', str(end)]

        cmd = [
            sys.executable, '-m', 'yt_dlp',
            target_url,
            '--dump-json',
            '--flat-playlist',
            '--no-warnings'
        ] + playlist_args
        
        proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        stdout, stderr = proc.communicate()
        
        videos = []
        for line in stdout.splitlines():
            try:
                v = json.loads(line)
                dur_str = None
                if v.get('duration'):
                    m, s = divmod(int(v['duration']), 60)
                    h, m = divmod(m, 60)
                    dur_str = f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
                    
                videos.append({
                    'id': v.get('id'),
                    'title': v.get('title'),
                    'thumbnail': f"https://i.ytimg.com/vi/{v.get('id')}/mqdefault.jpg",
                    'view_count': v.get('view_count') or 0,
                    'duration': dur_str,
                    'upload_date': v.get('upload_date'),
                    'uploader': v.get('uploader') or v.get('channel') or v.get('uploader_id') or '',
                    'channel': v.get('channel') or v.get('uploader') or '',
                    'channel_id': v.get('channel_id') or resolved_id
                })
            except: continue
            
        return jsonify(videos)
    except Exception as e:
        print(f"API Error: {e}")
        return jsonify([])

@app.route('/api/get_stream_info')
def get_stream_info():
    video_id = request.args.get('v')
    if not video_id:
        return jsonify({'error': 'No video ID'}), 400
    
    try:
        # 1. Check Cache
        import time
        conn = get_db_connection()
        cached = conn.execute('SELECT data, expires_at FROM video_cache WHERE video_id = ?', (video_id,)).fetchone()
        
        current_time = time.time()
        if cached:
            # Check expiry (stored as unix timestamp or datetime string, we'll use timestamp for simplicity)
            try:
                expires_at = float(cached['expires_at'])
                if current_time < expires_at:
                    data = json.loads(cached['data'])
                    conn.close()
                    # Re-proxy the URL just in case, or use cached if valid. 
                    # Actually proxy url requires encoding, let's reconstruct it to be safe.
                    from urllib.parse import quote
                    proxied_url = f"/video_proxy?url={quote(data['original_url'], safe='')}"
                    data['stream_url'] = proxied_url
                    
                    # Add cache hit header for debug
                    response = jsonify(data)
                    response.headers['X-Cache'] = 'HIT'
                    return response
            except:
                pass # Invalid cache, fall through
        
        # 2. Fetch from YouTube (Library Optimization)
        url = f"https://www.youtube.com/watch?v={video_id}"
        
        ydl_opts = {
            'format': 'best[ext=mp4]/best',
            'noplaylist': True,
            'quiet': True,
            'no_warnings': True,
            'skip_download': True,
            'force_ipv4': True,
            'socket_timeout': 10,
        }

        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            try:
                info = ydl.extract_info(url, download=False)
            except Exception as e:
                print(f"❌ yt-dlp error for {video_id}: {str(e)}")
                return jsonify({'error': f'Stream extraction failed: {str(e)}'}), 500

        stream_url = info.get('url')
        if not stream_url:
             return jsonify({'error': 'No stream URL found in metadata'}), 500

        # Fetch Related Videos (Fallback to search if not provided)
        # We use the title + " related" to find relevant content
        related_videos = []
        try:
            search_query = f"{info.get('title', '')} related"
            related_videos = fetch_videos(search_query, limit=20)
        except:
            pass

        # Extract Subtitles (English preferred)
        subtitle_url = None
        start_lang = 'en'
        
        subs = info.get('subtitles') or {}
        auto_subs = info.get('automatic_captions') or {}
        
        # DEBUG: Print subtitle info
        print(f"Checking subtitles for {video_id}")
        print(f"Manual Subs keys: {list(subs.keys())}")
        print(f"Auto Subs keys: {list(auto_subs.keys())}")

        # Check manual subs first
        if 'en' in subs:
            subtitle_url = subs['en'][0]['url']
        elif 'vi' in subs:  # Vietnamese fallback
            subtitle_url = subs['vi'][0]['url']
        # Check auto subs (usually available)
        elif 'en' in auto_subs:
            subtitle_url = auto_subs['en'][0]['url']
        elif 'vi' in auto_subs:
            subtitle_url = auto_subs['vi'][0]['url']
        
        # If still none, just pick the first one from manual then auto
        if not subtitle_url:
            if subs:
                first_key = list(subs.keys())[0]
                subtitle_url = subs[first_key][0]['url']
            elif auto_subs:
                first_key = list(auto_subs.keys())[0]
                subtitle_url = auto_subs[first_key][0]['url']
        
        print(f"Selected Subtitle URL: {subtitle_url}")

        # 3. Construct Response Data
        response_data = {
            'original_url': stream_url,
            'title': info.get('title', 'Unknown Title'),
            'description': info.get('description', ''),
            'uploader': info.get('uploader', ''),
            'uploader_id': info.get('uploader_id', ''),
            'channel_id': info.get('channel_id', ''),
            'upload_date': info.get('upload_date', ''),
            'view_count': info.get('view_count', 0),
            'related': related_videos,
            'subtitle_url': subtitle_url
        }
        
        # 4. Cache It (valid for 1 hour = 3600s)
        # YouTube URLs expire in ~6 hours usually.
        expiry = current_time + 3600 
        conn.execute('INSERT OR REPLACE INTO video_cache (video_id, data, expires_at) VALUES (?, ?, ?)',
                     (video_id, json.dumps(response_data), expiry))
        conn.commit()
        conn.close()
        
        # 5. Return Response
        from urllib.parse import quote
        proxied_url = f"/video_proxy?url={quote(stream_url, safe='')}"
        response_data['stream_url'] = proxied_url
        
        response = jsonify(response_data)
        response.headers['X-Cache'] = 'MISS'
        return response
    
    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/search')
def search():
    query = request.args.get('q')
    if not query:
        return jsonify({'error': 'No query provided'}), 400
    
    try:
        # Check if query is a YouTube URL
        import re
        # Regex to catch youtube.com/watch?v=, youtu.be/, shorts/, etc.
        youtube_regex = r'(https?://)?(www\.)?(youtube\.com/watch\?v=|youtu\.be/|youtube\.com/shorts/)([\w-]+)'
        match = re.search(youtube_regex, query)
        
        if match:
            video_id = match.group(4)
            # Fetch direct metadata
            meta_cmd = [sys.executable, '-m', 'yt_dlp', '--dump-json', '--no-playlist', f'https://www.youtube.com/watch?v={video_id}']
            meta_proc = subprocess.run(meta_cmd, capture_output=True, text=True)
            
            results = []
            search_title = ""
            
            if meta_proc.returncode == 0:
                data = json.loads(meta_proc.stdout)
                search_title = data.get('title', '')
                
                # Format duration
                duration_secs = data.get('duration')
                if duration_secs:
                    mins, secs = divmod(int(duration_secs), 60)
                    hours, mins = divmod(mins, 60)
                    duration = f"{hours}:{mins:02d}:{secs:02d}" if hours else f"{mins}:{secs:02d}"
                else:
                    duration = None
                
                results.append({
                    'id': video_id,
                    'title': search_title,
                    'uploader': data.get('uploader') or data.get('channel') or 'Unknown',
                    'thumbnail': f"https://i.ytimg.com/vi/{video_id}/mqdefault.jpg",
                    'view_count': data.get('view_count', 0),
                    'upload_date': data.get('upload_date', ''),
                    'duration': duration,
                    'description': data.get('description', ''),
                    'is_exact_match': True
                })

            # Now fetch related/similar videos using title
            if search_title:
                rel_cmd = [
                    sys.executable, '-m', 'yt_dlp',
                    f'ytsearch19:{search_title}', 
                    '--dump-json',
                    '--default-search', 'ytsearch',
                    '--no-playlist',
                    '--flat-playlist' 
                ]
                rel_proc = subprocess.Popen(rel_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
                stdout, _ = rel_proc.communicate()
                
                for line in stdout.splitlines():
                    try:
                        r_data = json.loads(line)
                        r_id = r_data.get('id')
                        if r_id != video_id:
                            r_dur = r_data.get('duration')
                            if r_dur:
                                m, s = divmod(int(r_dur), 60)
                                h, m = divmod(m, 60)
                                dur_str = f"{h}:{m:02d}:{s:02d}" if h else f"{m}:{s:02d}"
                            else:
                                dur_str = None
                                
                            results.append({
                                'id': r_id,
                                'title': r_data.get('title', 'Unknown'),
                                'uploader': r_data.get('uploader') or r_data.get('channel') or 'Unknown',
                                'thumbnail': f"https://i.ytimg.com/vi/{r_id}/hqdefault.jpg",
                                'view_count': r_data.get('view_count', 0),
                                'upload_date': r_data.get('upload_date', ''),
                                'duration': dur_str
                            })
                    except:
                        continue

            return jsonify(results)

        else:
            # Standard Text Search
            cmd = [
                sys.executable, '-m', 'yt_dlp',
                f'ytsearch20:{query}',
                '--dump-json',
                '--default-search', 'ytsearch',
                '--no-playlist',
                '--flat-playlist' 
            ]
            
            process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
            stdout, stderr = process.communicate()
            
            results = []
            for line in stdout.splitlines():
                try:
                    data = json.loads(line)
                    video_id = data.get('id')
                    if video_id:
                        duration_secs = data.get('duration')
                        if duration_secs:
                            mins, secs = divmod(int(duration_secs), 60)
                            hours, mins = divmod(mins, 60)
                            duration = f"{hours}:{mins:02d}:{secs:02d}" if hours else f"{mins}:{secs:02d}"
                        else:
                            duration = None
                        
                        results.append({
                            'id': video_id,
                            'title': data.get('title', 'Unknown'),
                            'uploader': data.get('uploader') or data.get('channel') or 'Unknown',
                            'thumbnail': f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg",
                            'view_count': data.get('view_count', 0),
                            'upload_date': data.get('upload_date', ''),
                            'duration': duration
                        })
                except:
                    continue
                    
            return jsonify(results)
            
    except Exception as e:
        print(f"Search Error: {e}")
        return jsonify({'error': str(e)}), 500

@app.route('/api/channel')
def get_channel_videos_simple():
    channel_id = request.args.get('id')
    if not channel_id:
        return jsonify({'error': 'No channel ID provided'}), 400
        
    try:
        # Construct Channel URL
        if channel_id.startswith('http'):
            url = channel_id
        elif channel_id.startswith('@'):
            url = f"https://www.youtube.com/{channel_id}"
        elif len(channel_id) == 24 and channel_id.startswith('UC'): # Standard Channel ID
            url = f"https://www.youtube.com/channel/{channel_id}"
        else:
             url = f"https://www.youtube.com/{channel_id}"

        # Fetch videos (flat playlist to be fast)
        cmd = [sys.executable, '-m', 'yt_dlp', '--dump-json', '--flat-playlist', '--playlist-end', '20', url]
        proc = subprocess.run(cmd, capture_output=True, text=True)
        
        if proc.returncode != 0:
            return jsonify({'error': 'Failed to fetch channel videos', 'details': proc.stderr}), 500
            
        videos = []
        for line in proc.stdout.splitlines():
            try:
                v = json.loads(line)
                if v.get('id') and v.get('title'):
                    videos.append(sanitize_video_data(v))
            except json.JSONDecodeError:
                continue
                
        return jsonify(videos)

    except Exception as e:
        print(f"Channel Fetch Error: {e}")
        return jsonify({'error': str(e)}), 500



# --- Helper: Extractive Summarization ---
def extractive_summary(text, num_sentences=5):
    # 1. Clean and parse text
    # Remove metadata like [Music] (common in auto-caps)
    clean_text = re.sub(r'\[.*?\]', '', text)
    clean_text = clean_text.replace('\n', ' ')
    
    # 2. Split into sentences (simple punctuation split)
    sentences = re.split(r'(?<!\w\.\w.)(?<![A-Z][a-z]\.)(?<=\.|\?)\s', clean_text)
    
    # 3. Tokenize and Calculate Word Frequencies
    word_frequencies = {}
    stop_words = set(['the', 'a', 'an', 'and', 'or', 'but', 'is', 'are', 'was', 'were', 'to', 'of', 'in', 'on', 'at', 'for', 'width', 'that', 'this', 'it', 'you', 'i', 'we', 'they', 'he', 'she'])
    
    for word in re.findall(r'\w+', clean_text.lower()):
        if word not in stop_words:
            if word not in word_frequencies:
                word_frequencies[word] = 1
            else:
                word_frequencies[word] += 1
                
    if not word_frequencies:
        return "Not enough content to summarize."

    # Normalize frequencies
    max_freq = max(word_frequencies.values())
    for word in word_frequencies:
        word_frequencies[word] = word_frequencies[word] / max_freq

    # 4. Score Sentences
    sentence_scores = {}
    for sent in sentences:
        for word in re.findall(r'\w+', sent.lower()):
            if word in word_frequencies:
                if sent not in sentence_scores:
                    sentence_scores[sent] = word_frequencies[word]
                else:
                    sentence_scores[sent] += word_frequencies[word]
    
    # 5. Extract Top N Sentences
    summary_sentences = heapq.nlargest(num_sentences, sentence_scores, key=sentence_scores.get)
    return ' '.join(summary_sentences)

@app.route('/api/summarize')
def summarize_video():
    video_id = request.args.get('v')
    if not video_id:
        return jsonify({'error': 'No video ID'}), 400
        
    try:
        # Fetch Transcript
        transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
        
        # Try to find english or manually created first, then auto
        try:
           transcript = transcript_list.find_transcript(['en', 'vi']) 
        except:
           # Fallback to whatever is available (likely auto-generated)
           transcript = transcript_list.find_generated_transcript(['en', 'vi'])
           
        transcript_data = transcript.fetch()
        
        # Combine text
        full_text = " ".join([entry['text'] for entry in transcript_data])
        
        # Summarize
        summary = extractive_summary(full_text, num_sentences=7)
        
        return jsonify({'success': True, 'summary': summary})
        
    except TranscriptsDisabled:
        return jsonify({'success': False, 'message': 'Subtitles are disabled for this video.'})
    except Exception as e:
        return jsonify({'success': False, 'message': f'Could not summarize: {str(e)}'})

# Helper function to fetch videos (not a route)
def fetch_videos(query, limit=20, filter_type=None, playlist_start=1, playlist_end=None):
    try:
        # Source-Level Filter: Exclude Shorts for standard video requests
        # REMOVED: Causing 0 results with complex queries. Rely on Python filtering.
        # if filter_type == 'video':
        #    query = f"{query} -shorts -#shorts"

        # If no end specified, default to start + limit - 1
        if not playlist_end:
            playlist_end = playlist_start + limit - 1
            
        cmd = [
            sys.executable, '-m', 'yt_dlp',
            f'ytsearch{playlist_end}:{query}', # Explicitly request enough items to populate the list up to 'end'
            '--dump-json',
            '--default-search', 'ytsearch',
            '--no-playlist',
            '--flat-playlist',
            '--playlist-start', str(playlist_start),
            '--playlist-end', str(playlist_end)
        ]
        
        process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        stdout, stderr = process.communicate()
        
        results = []
        for line in stdout.splitlines():
            try:
                data = json.loads(line)
                video_id = data.get('id')
                if video_id:
                    # Format duration
                    duration_secs = data.get('duration')
                    
                    # Filter Logic
                    title_lower = data.get('title', '').lower()
                    if filter_type == 'video':
                         # STRICT: If duration is missing, DO NOT SKIP. Just trust the query exclusion.
                         # if not duration_secs:
                         #    continue
                         
                         # Exclude explicit Shorts
                         if '#shorts' in title_lower:
                             continue
                         # Exclude short duration (buffer to 70s to avoid vertical clutter) ONLY IF WE KNOW IT
                         if duration_secs and int(duration_secs) <= 70:
                             continue
                             
                    if filter_type == 'short' and duration_secs and int(duration_secs) > 60:
                        continue
                        
                    if duration_secs:
                        mins, secs = divmod(int(duration_secs), 60)
                        hours, mins = divmod(mins, 60)
                        duration = f"{hours}:{mins:02d}:{secs:02d}" if hours else f"{mins}:{secs:02d}"
                    else:
                        duration = None
                    
                    results.append({
                        'id': video_id,
                        'title': data.get('title', 'Unknown'),
                        'uploader': data.get('uploader') or data.get('channel') or 'Unknown',
                        'channel_id': data.get('channel_id'),
                        'uploader_id': data.get('uploader_id'),
                        'thumbnail': f"https://i.ytimg.com/vi/{video_id}/hqdefault.jpg",
                        'view_count': data.get('view_count', 0),
                        'upload_date': data.get('upload_date', ''),
                        'duration': duration
                    })
            except:
                continue
        return results
    except Exception as e:
        print(f"Error fetching videos: {e}")
        return []

import concurrent.futures

@app.route('/api/trending')
def trending():
    try:
        category = request.args.get('category', 'all') # Default to 'all' for home
        page = int(request.args.get('page', 1))
        sort = request.args.get('sort', 'month')
        region = request.args.get('region', 'vietnam')
        limit = 120 if category != 'all' else 20 # 120 for grid, 20 for sections
        
        def get_query(cat, reg, s_sort):
            if reg == 'vietnam':
                queries = {
                    'general': 'trending vietnam -shorts',
                    'tech': 'review công nghệ điện thoại laptop',
                    'all': 'trending vietnam -shorts',
                    'music': 'nhạc việt trending -shorts',
                    'gaming': 'gaming việt nam -shorts',
                    'movies': 'phim việt nam -shorts',
                    'news': 'tin tức việt nam hôm nay -shorts',
                    'sports': 'thể thao việt nam -shorts',
                    'shorts': 'trending việt nam', 
                    'trending': 'trending việt nam -shorts',
                    'podcasts': 'podcast việt nam -shorts',
                    'live': 'live stream việt nam -shorts'
                }
            else:
                queries = {
                    'general': 'trending -shorts',
                    'tech': 'tech gadget review smartphone',
                    'all': 'trending -shorts',
                    'music': 'music trending -shorts',
                    'gaming': 'gaming trending -shorts',
                    'movies': 'movies trending -shorts',
                    'news': 'news today -shorts',
                    'sports': 'sports highlights -shorts',
                    'shorts': 'trending',
                    'trending': 'trending now -shorts',
                    'podcasts': 'podcast trending -shorts',
                    'live': 'live stream -shorts'
                }
            
            base = queries.get(cat, 'trending')
            
            if s_sort == 'newest':
                return base + ', today' # Or use explicit date filter
            
            from datetime import datetime, timedelta
            three_months_ago = (datetime.now() - timedelta(days=90)).strftime('%Y-%m-%d')
            
            sort_filters = {
                'day': ', today',
                'week': ', this week',
                'month': ', this month',
                '3months': f" after:{three_months_ago}",
                'year': ', this year'
            }
            return base + sort_filters.get(s_sort, f" after:{three_months_ago}")

        sort = request.args.get('sort', 'newest') # Ensure newest is default
        
        # === Parallel Fetching for Home Feed ===
        if category == 'all':
            # === 1. Suggested For You (History Based) ===
            suggested_videos = []
            try:
                conn = get_db_connection()
                # Get last 5 videos for context
                history = conn.execute('SELECT title, video_id, type FROM user_videos WHERE type = "history" ORDER BY timestamp DESC LIMIT 5').fetchall()
                conn.close()

                if history:
                    # Create a composite query from history
                    import random
                    # Pick 1-2 random items from recent history to diversify
                    bases = random.sample(history, min(len(history), 2))
                    query_parts = [row['title'] for row in bases]
                    # Add "related" to find similar content, not exact same
                    suggestion_query = " ".join(query_parts) + " related"
                    suggested_videos = fetch_videos(suggestion_query, limit=16, filter_type='video')
            except Exception as e:
                print(f"Suggestion Error: {e}")

            # === 2. You Might Like (Discovery) ===
            discovery_videos = []
            try:
                # curated list of interesting topics to rotate
                topics = ['amazing inventions', 'primitive technology', 'street food around the world', 
                          'documentary 2024', 'space exploration', 'wildlife 4k', 'satisfying restoration',
                          'travel vlog 4k', 'tech gadgets review', 'coding tutorial']
                import random
                topic = random.choice(topics)
                discovery_videos = fetch_videos(f"{topic} best", limit=16, filter_type='video')
            except: pass

            # === Define Standard Sections ===
            sections_to_fetch = [
                {'id': 'trending', 'title': 'Trending', 'icon': 'fire'},
                {'id': 'music', 'title': 'Music', 'icon': 'music'},
                {'id': 'tech', 'title': 'Tech & AI', 'icon': 'microchip'},
                {'id': 'movies', 'title': 'Movies', 'icon': 'film'},
                {'id': 'gaming', 'title': 'Gaming', 'icon': 'gamepad'},
                {'id': 'news', 'title': 'News', 'icon': 'newspaper'},
                {'id': 'sports', 'title': 'Sports', 'icon': 'football-ball'}
            ]
            
            def fetch_section(section):
                target_sort = 'newest' if section['id'] != 'trending' else 'relevance'
                q = get_query(section['id'], region, target_sort)
                # Add a unique component to query for freshness
                q_fresh = f"{q} {int(time.time())}" if section['id'] == 'all' else q
                
                # Increase fetch limit to 150 (was 100) to compensate for strict filtering (dropping shorts/no-duration)
                vids = fetch_videos(q_fresh, limit=150, filter_type='video', playlist_start=1) 
                return {
                    'id': section['id'],
                    'title': section['title'],
                    'icon': section['icon'],
                    'videos': vids[:16] 
                }

            with concurrent.futures.ThreadPoolExecutor(max_workers=8) as executor:
                standard_results = list(executor.map(fetch_section, sections_to_fetch))
            
            # === Assemble Final Feed ===
            final_sections = []
            
            # Add Suggested if we have them
            if suggested_videos:
                final_sections.append({
                    'id': 'suggested',
                    'title': 'Suggested for You',
                    'icon': 'sparkles',
                    'videos': suggested_videos
                })
            
            # Add Discovery
            if discovery_videos:
                final_sections.append({
                    'id': 'discovery',
                    'title': 'You Might Like',
                    'icon': 'compass',
                    'videos': discovery_videos
                })
                
            # Add Standard Sections
            final_sections.extend(standard_results)
            
            return jsonify({'mode': 'sections', 'data': final_sections})

        # === Standard Single Category Fetch ===
        query = get_query(category, region, sort)
        
        # Calculate offset
        start = (page - 1) * limit + 1
        
        # Determine filter type
        is_shorts_req = request.args.get('shorts')
        if is_shorts_req:
            filter_mode = 'short'
        else:
            filter_mode = 'short' if category == 'shorts' else 'video'

        results = fetch_videos(query, limit=limit, filter_type=filter_mode, playlist_start=start)
        # Randomize a bit for "freshness" if it's the first page
        if page == 1:
            import random
            random.shuffle(results)
            
        return jsonify(results)


    except Exception as e:
        return jsonify({'error': str(e)}), 500

@app.route('/api/update_ytdlp', methods=['POST'])
def update_ytdlp():
    try:
        # Run pip install -U yt-dlp
        cmd = [sys.executable, '-m', 'pip', 'install', '-U', 'yt-dlp']
        result = subprocess.run(cmd, capture_output=True, text=True)
        
        if result.returncode == 0:
            # Check new version
            ver_cmd = [sys.executable, '-m', 'yt_dlp', '--version']
            ver_result = subprocess.run(ver_cmd, capture_output=True, text=True)
            version = ver_result.stdout.strip()
            return jsonify({'success': True, 'message': f'Updated successfully to {version}'})
        else:
            return jsonify({'success': False, 'message': f'Update failed: {result.stderr}'}), 500
    except Exception as e:
        return jsonify({'success': False, 'message': str(e)}), 500

@app.route('/api/comments')
def get_comments():
    """Get comments for a YouTube video"""
    video_id = request.args.get('v')
    if not video_id:
        return jsonify({'error': 'No video ID'}), 400
    
    try:
        url = f"https://www.youtube.com/watch?v={video_id}"
        cmd = [
            sys.executable, '-m', 'yt_dlp',
            url,
            '--write-comments',
            '--skip-download',
            '--dump-json'
        ]
        
        result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
        
        if result.returncode == 0:
            data = json.loads(result.stdout)
            comments_data = data.get('comments', [])
            
            # Format comments for frontend
            comments = []
            for c in comments_data[:50]:  # Limit to 50 comments
                comments.append({
                    'author': c.get('author', 'Unknown'),
                    'author_thumbnail': c.get('author_thumbnail', ''),
                    'text': c.get('text', ''),
                    'likes': c.get('like_count', 0),
                    'time': c.get('time_text', ''),
                    'is_pinned': c.get('is_pinned', False)
                })
            
            return jsonify({
                'comments': comments,
                'count': data.get('comment_count', len(comments))
            })
        else:
            return jsonify({'comments': [], 'count': 0, 'error': 'Could not load comments'})
            
    except subprocess.TimeoutExpired:
        return jsonify({'comments': [], 'count': 0, 'error': 'Comments loading timed out'})
    except Exception as e:
        return jsonify({'comments': [], 'count': 0, 'error': str(e)})


# --- AI Transcription REMOVED ---

if __name__ == '__main__':
    print("Starting KV-Tube Server on port 5002 (Reloader Disabled)")
    app.run(debug=True, host='0.0.0.0', port=5002, use_reloader=False)