kv-tube/debug_ytfetcher_standalone.py

81 lines
2.8 KiB
Python

from ytfetcher import YTFetcher
from ytfetcher.config import HTTPConfig
import random
import os
import http.cookiejar
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def debug_fetch(video_id):
try:
# 1. Prepare Cookies if available
cookie_header = ""
cookies_path = 'cookies.txt'
if os.path.exists(cookies_path):
try:
cj = http.cookiejar.MozillaCookieJar(cookies_path)
cj.load()
cookies_list = []
for cookie in cj:
cookies_list.append(f"{cookie.name}={cookie.value}")
cookie_header = "; ".join(cookies_list)
logger.info(f"Loaded {len(cookies_list)} cookies for YTFetcher")
except Exception as e:
logger.warning(f"Failed to process cookies: {e}")
# 2. Configuration to look like a real browser
user_agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
]
headers = {
"User-Agent": random.choice(user_agents),
"Accept-Language": "en-US,en;q=0.9",
}
# Inject cookie header if we have it
if cookie_header:
headers["Cookie"] = cookie_header
config = HTTPConfig(headers=headers)
print(f"Initializing YTFetcher for {video_id}...")
# Initialize Fetcher
fetcher = YTFetcher.from_video_ids(
video_ids=[video_id],
http_config=config,
languages=['en', 'en-US', 'vi']
)
# Fetch
print(f"Fetching transcripts...")
results = fetcher.fetch_transcripts()
print(f"Results type: {type(results)}")
print(f"Results length: {len(results) if results else 0}")
if results:
data = results[0]
if data.transcripts:
print("Transcript found!")
text_lines = [t.text.strip() for t in data.transcripts if t.text.strip()]
print(f"First 3 lines: {text_lines[:3]}")
else:
print("No transcripts inside data object.")
# Maybe print available tracks if possible?
else:
print("Results is empty.")
except Exception as e:
import traceback
print(f"CRITICAL ERROR: {e}")
print(traceback.format_exc())
if __name__ == "__main__":
debug_fetch("qZvqydUEzqA")