diff --git a/benchmark_ytdlp.py b/benchmark_ytdlp.py deleted file mode 100644 index a762ecf..0000000 --- a/benchmark_ytdlp.py +++ /dev/null @@ -1,55 +0,0 @@ -import time -import sys -import subprocess -import json -import yt_dlp - -QUERY = "latest smart technology gadgets reviews" -LIMIT = 20 - -def test_subprocess(): - start = time.time() - cmd = [ - sys.executable, "-m", "yt_dlp", - f"ytsearch{LIMIT}:{QUERY}", - "--dump-json", - "--flat-playlist", - "--no-playlist", - "--no-warnings" - ] - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True) - out, err = proc.communicate() - end = time.time() - count = len(out.splitlines()) - return end - start, count - -def test_library(): - start = time.time() - ydl_opts = { - 'headers': {'User-Agent': 'Mozilla/5.0'}, - 'skip_download': True, - 'extract_flat': True, - 'noplaylist': True, - 'quiet': True, - 'no_warnings': True, - } - with yt_dlp.YoutubeDL(ydl_opts) as ydl: - res = ydl.extract_info(f"ytsearch{LIMIT}:{QUERY}", download=False) - count = len(res.get('entries', [])) - end = time.time() - return end - start, count - -if __name__ == "__main__": - print("Benchmarking Subprocess...") - try: - sub_time, sub_count = test_subprocess() - print(f"Subprocess: {sub_time:.4f}s (Fetched {sub_count} items)") - except Exception as e: - print(f"Subprocess Failed: {e}") - - print("\nBenchmarking Library...") - try: - lib_time, lib_count = test_library() - print(f"Library: {lib_time:.4f}s (Fetched {lib_count} items)") - except Exception as e: - print(f"Library Failed: {e}") diff --git a/debug_api.log b/debug_api.log deleted file mode 100644 index ccce502..0000000 --- a/debug_api.log +++ /dev/null @@ -1,8 +0,0 @@ -Batch fetching for: ['IpqiLXy4im8', 'o_rtfAazE5s', 'VexXHSzibxY', 'U2oEJKsPdHo', 'dQw4w9WgXcQ', 'h22z894ThnQ', 'Fp7FcfGNpWg', 'z_G_8i95SMA'] -Using python: C:\Program Files\Python312\python.exe -Command prepared: ['C:\\Program Files\\Python312\\python.exe', '-m', 'yt_dlp'] ... [len:16] -Result lines: 8 -Batch fetching for: ['IpqiLXy4im8', 'o_rtfAazE5s', 'VexXHSzibxY', 'U2oEJKsPdHo', 'dQw4w9WgXcQ', 'h22z894ThnQ', 'Fp7FcfGNpWg', 'z_G_8i95SMA'] -Using python: C:\Program Files\Python312\python.exe -Command prepared: ['C:\\Program Files\\Python312\\python.exe', '-m', 'yt_dlp'] ... [len:16] -Result lines: 8 diff --git a/debug_fetch.log b/debug_fetch.log deleted file mode 100644 index 488a885..0000000 --- a/debug_fetch.log +++ /dev/null @@ -1,3 +0,0 @@ -Fetching for qZvqydUEzqA... -Results type: -Results is empty/None. diff --git a/debug_paths.txt b/debug_paths.txt deleted file mode 100644 index 01bbe8e..0000000 --- a/debug_paths.txt +++ /dev/null @@ -1,2 +0,0 @@ -APP: C:\Users\Admin\Documents\Projects-Khoa.vo\kv-tube\current\kv-tube\app\__init__.py -API: C:\Users\Admin\Documents\Projects-Khoa.vo\kv-tube\current\kv-tube\app\routes\api.py diff --git a/debug_ytfetcher_standalone.py b/debug_ytfetcher_standalone.py deleted file mode 100644 index d5fec5a..0000000 --- a/debug_ytfetcher_standalone.py +++ /dev/null @@ -1,81 +0,0 @@ -from ytfetcher import YTFetcher -from ytfetcher.config import HTTPConfig -import random -import os -import http.cookiejar -import logging - -# Configure logging -logging.basicConfig(level=logging.INFO) -logger = logging.getLogger(__name__) - -def debug_fetch(video_id): - try: - # 1. Prepare Cookies if available - cookie_header = "" - cookies_path = 'cookies.txt' - - if os.path.exists(cookies_path): - try: - cj = http.cookiejar.MozillaCookieJar(cookies_path) - cj.load() - cookies_list = [] - for cookie in cj: - cookies_list.append(f"{cookie.name}={cookie.value}") - cookie_header = "; ".join(cookies_list) - logger.info(f"Loaded {len(cookies_list)} cookies for YTFetcher") - except Exception as e: - logger.warning(f"Failed to process cookies: {e}") - - # 2. Configuration to look like a real browser - user_agents = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0" - ] - - headers = { - "User-Agent": random.choice(user_agents), - "Accept-Language": "en-US,en;q=0.9", - } - - # Inject cookie header if we have it - if cookie_header: - headers["Cookie"] = cookie_header - - config = HTTPConfig(headers=headers) - - print(f"Initializing YTFetcher for {video_id}...") - # Initialize Fetcher - fetcher = YTFetcher.from_video_ids( - video_ids=[video_id], - http_config=config, - languages=['en', 'en-US', 'vi'] - ) - - # Fetch - print(f"Fetching transcripts...") - results = fetcher.fetch_transcripts() - - print(f"Results type: {type(results)}") - print(f"Results length: {len(results) if results else 0}") - - if results: - data = results[0] - if data.transcripts: - print("Transcript found!") - text_lines = [t.text.strip() for t in data.transcripts if t.text.strip()] - print(f"First 3 lines: {text_lines[:3]}") - else: - print("No transcripts inside data object.") - # Maybe print available tracks if possible? - else: - print("Results is empty.") - - except Exception as e: - import traceback - print(f"CRITICAL ERROR: {e}") - print(traceback.format_exc()) - -if __name__ == "__main__": - debug_fetch("qZvqydUEzqA") diff --git a/execution_trace.txt b/execution_trace.txt deleted file mode 100644 index d690daf..0000000 --- a/execution_trace.txt +++ /dev/null @@ -1,3 +0,0 @@ -Unique videos count: 5 -IDs to hydrate: ['AlGfj9JBdAI', 'MhQGDAGVIa8', 'Fp7FcfGNpWg', 'Z2KlYnsPaIk', 'f_4uUX9n538'] -Metadata map keys: ['MhQGDAGVIa8', 'Fp7FcfGNpWg', 'AlGfj9JBdAI', 'Z2KlYnsPaIk', 'f_4uUX9n538'] diff --git a/restore_cookies.py b/restore_cookies.py deleted file mode 100644 index 01fa870..0000000 --- a/restore_cookies.py +++ /dev/null @@ -1,185 +0,0 @@ -import json -import time - -cookies_data = [ - { - "domain": ".youtube.com", - "expirationDate": 1802692356.635205, - "hostOnly": False, - "httpOnly": True, - "name": "__Secure-3PSID", - "path": "/", - "sameSite": "no_restriction", - "secure": True, - "session": False, - "storeId": None, - "value": "g.a0005gie1lAkmYZc-EPeGx77pCrXo_Cz5eAi-e9aryb9Qoz967v4Caiou6Tt5ZyLR4iMp5I51wACgYKASISARESFQHGX2MiopTeGBKXybppZWNr7JzmKhoVAUF8yKrgfPx-gEb02gGAV3ZaVOGr0076" - }, - { - "domain": ".youtube.com", - "expirationDate": 1800281710.070798, - "hostOnly": False, - "httpOnly": True, - "name": "__Secure-1PSIDTS", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "sidts-CjQB7I_69DRJdiQQGddE6tt-GHilv2IjDZd8S6FlWCjx2iReOoNtQMUkb55vaBdl8vBK7J_DEAA" - }, - { - "domain": ".youtube.com", - "expirationDate": 1802692356.635439, - "hostOnly": False, - "httpOnly": False, - "name": "SAPISID", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "DP6iRyLCM_cFV1Gw/AN2nemkVrvJ2p8MWb" - }, - { - "domain": ".youtube.com", - "expirationDate": 1800281710.070999, - "hostOnly": False, - "httpOnly": True, - "name": "__Secure-1PSIDCC", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "AKEyXzU66C7YKqYKgxpR8BbWvDlICFaXQCERc_NLnU_QLkcHrmR0aPQJTFLW1WesYcSYtIJYW3o" - }, - { - "domain": ".youtube.com", - "expirationDate": 1802692356.635327, - "hostOnly": False, - "httpOnly": True, - "name": "SSID", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "A4isk9AE9xActvzYy" - }, - { - "domain": ".youtube.com", - "expirationDate": 1802692356.635505, - "hostOnly": False, - "httpOnly": False, - "name": "__Secure-1PAPISID", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "DP6iRyLCM_cFV1Gw/AN2nemkVrvJ2p8MWb" - }, - { - "domain": ".youtube.com", - "expirationDate": 1802692356.635139, - "hostOnly": False, - "httpOnly": True, - "name": "__Secure-1PSID", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "g.a0005gie1lAkmYZc-EPeGx77pCrXo_Cz5eAi-e9aryb9Qoz967v4-rF3xTavVHrJoyJAqShH6gACgYKAX0SARESFQHGX2MiOdAbUPmCj4MueYyh-2km5RoVAUF8yKp2ehWQC6tX8n-9UNg11RV60076" - }, - { - "domain": ".youtube.com", - "expirationDate": 1802692356.635559, - "hostOnly": False, - "httpOnly": False, - "name": "__Secure-3PAPISID", - "path": "/", - "sameSite": "no_restriction", - "secure": True, - "session": False, - "storeId": None, - "value": "DP6iRyLCM_cFV1Gw/AN2nemkVrvJ2p8MWb" - }, - { - "domain": ".youtube.com", - "expirationDate": 1800281710.071036, - "hostOnly": False, - "httpOnly": True, - "name": "__Secure-3PSIDCC", - "path": "/", - "sameSite": "no_restriction", - "secure": True, - "session": False, - "storeId": None, - "value": "AKEyXzUv06PBPrBxCnsrFCJPVRWYCKjXadcrSQPokD-DHGumtiOBRC96ipf2COBQcX_7RjiO8g" - }, - { - "domain": ".youtube.com", - "expirationDate": 1800281710.070914, - "hostOnly": False, - "httpOnly": True, - "name": "__Secure-3PSIDTS", - "path": "/", - "sameSite": "no_restriction", - "secure": True, - "session": False, - "storeId": None, - "value": "sidts-CjQB7I_69DRJdiQQGddE6tt-GHilv2IjDZd8S6FlWCjx2iReOoNtQMUkb55vaBdl8vBK7J_DEAA" - }, - { - "domain": ".youtube.com", - "expirationDate": 1792154873.499957, - "hostOnly": False, - "httpOnly": True, - "name": "LOGIN_INFO", - "path": "/", - "sameSite": "no_restriction", - "secure": True, - "session": False, - "storeId": None, - "value": "AFmmF2swRQIgVjJk8Mho4_JuKr6SZzrhBdlL1LdxWxcwDMu4cjaRRgcCIQCTtJpmYKJH54Tiei3at3f4YT3US7gSL0lW_TZ04guKjQ:QUQ3MjNmeWlwRDJSNDl2NE9uX2JWWG5tWllHN0RsNUVZVUhsLVp4N2dWbldaeC14SnNybWVERnNoaXFpanFJczhKTjJSRGN6MEs3c1VkLTE1TGJVeFBPT05BY29NMFh0Q1VPdFU3dUdvSUpET3lQbU1ZMUlHUGltajlXNDllNUQxZHdzZko1WXF1UUJWclNxQVJ0TXVEYnF2bXJRY2V6Vl9n" - }, - { - "domain": ".youtube.com", - "expirationDate": 1803304605.839449, - "hostOnly": False, - "httpOnly": False, - "name": "PREF", - "path": "/", - "sameSite": None, - "secure": True, - "session": False, - "storeId": None, - "value": "tz=Etc.GMT-7&f7=150" - } -] - -def json_to_netscape(json_cookies, output_file): - with open(output_file, 'w') as f: - f.write("# Netscape HTTP Cookie File\n") - f.write("# This file is generated by a script.\n\n") - - for cookie in json_cookies: - domain = cookie.get('domain', '') - # Netscape format requires domain to start with . for subdomains usually, - # flag TRUE/FALSE depends on if it's a domain cookie. - # Simplified: - flag = "TRUE" if domain.startswith('.') else "FALSE" - path = cookie.get('path', '/') - secure = "TRUE" if cookie.get('secure', False) else "FALSE" - expiration = str(int(cookie.get('expirationDate', 0))) - name = cookie.get('name', '') - value = cookie.get('value', '') - - f.write(f"{domain}\t{flag}\t{path}\t{secure}\t{expiration}\t{name}\t{value}\n") - - print(f"Successfully converted {len(json_cookies)} cookies to {output_file}") - -if __name__ == "__main__": - json_to_netscape(cookies_data, 'cookies.txt') diff --git a/server_debug.log b/server_debug.log deleted file mode 100644 index f7229bb..0000000 Binary files a/server_debug.log and /dev/null differ diff --git a/server_log.txt b/server_log.txt deleted file mode 100644 index 7064839..0000000 Binary files a/server_log.txt and /dev/null differ diff --git a/verify_fix.py b/verify_fix.py deleted file mode 100644 index 6696e1a..0000000 --- a/verify_fix.py +++ /dev/null @@ -1,20 +0,0 @@ -import requests -import time - -print("Waiting for server to be ready...") -for i in range(10): - try: - requests.get("http://127.0.0.1:5002/health") # Assuming health check or just root - break - except: - time.sleep(2) - -print("Invoking Summarize API...") -try: - # Use a longer timeout for the genai processing - r = requests.get("http://127.0.0.1:5002/api/summarize?v=qZvqydUEzqA&title=Disturbed%20-%20The%20Sound%20Of%20Silence&lang=vi", timeout=60) - print(f"Status Code: {r.status_code}") - print("Response Body Snippet:") - print(r.text[:500]) -except Exception as e: - print(f"Request Failed: {e}")