81 lines
2.8 KiB
Python
81 lines
2.8 KiB
Python
from ytfetcher import YTFetcher
|
|
from ytfetcher.config import HTTPConfig
|
|
import random
|
|
import os
|
|
import http.cookiejar
|
|
import logging
|
|
|
|
# Configure logging
|
|
logging.basicConfig(level=logging.INFO)
|
|
logger = logging.getLogger(__name__)
|
|
|
|
def debug_fetch(video_id):
|
|
try:
|
|
# 1. Prepare Cookies if available
|
|
cookie_header = ""
|
|
cookies_path = 'cookies.txt'
|
|
|
|
if os.path.exists(cookies_path):
|
|
try:
|
|
cj = http.cookiejar.MozillaCookieJar(cookies_path)
|
|
cj.load()
|
|
cookies_list = []
|
|
for cookie in cj:
|
|
cookies_list.append(f"{cookie.name}={cookie.value}")
|
|
cookie_header = "; ".join(cookies_list)
|
|
logger.info(f"Loaded {len(cookies_list)} cookies for YTFetcher")
|
|
except Exception as e:
|
|
logger.warning(f"Failed to process cookies: {e}")
|
|
|
|
# 2. Configuration to look like a real browser
|
|
user_agents = [
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
|
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
|
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0"
|
|
]
|
|
|
|
headers = {
|
|
"User-Agent": random.choice(user_agents),
|
|
"Accept-Language": "en-US,en;q=0.9",
|
|
}
|
|
|
|
# Inject cookie header if we have it
|
|
if cookie_header:
|
|
headers["Cookie"] = cookie_header
|
|
|
|
config = HTTPConfig(headers=headers)
|
|
|
|
print(f"Initializing YTFetcher for {video_id}...")
|
|
# Initialize Fetcher
|
|
fetcher = YTFetcher.from_video_ids(
|
|
video_ids=[video_id],
|
|
http_config=config,
|
|
languages=['en', 'en-US', 'vi']
|
|
)
|
|
|
|
# Fetch
|
|
print(f"Fetching transcripts...")
|
|
results = fetcher.fetch_transcripts()
|
|
|
|
print(f"Results type: {type(results)}")
|
|
print(f"Results length: {len(results) if results else 0}")
|
|
|
|
if results:
|
|
data = results[0]
|
|
if data.transcripts:
|
|
print("Transcript found!")
|
|
text_lines = [t.text.strip() for t in data.transcripts if t.text.strip()]
|
|
print(f"First 3 lines: {text_lines[:3]}")
|
|
else:
|
|
print("No transcripts inside data object.")
|
|
# Maybe print available tracks if possible?
|
|
else:
|
|
print("Results is empty.")
|
|
|
|
except Exception as e:
|
|
import traceback
|
|
print(f"CRITICAL ERROR: {e}")
|
|
print(traceback.format_exc())
|
|
|
|
if __name__ == "__main__":
|
|
debug_fetch("qZvqydUEzqA")
|