apix/services/crawl4ai/app/grok_auth.py
Khoa.vo 2a4bf8b58b
Some checks are pending
CI / build (18.x) (push) Waiting to run
CI / build (20.x) (push) Waiting to run
feat: updates before deployment
2026-01-06 13:26:11 +07:00

111 lines
4.5 KiB
Python

import asyncio
import logging
from typing import Dict, Optional
from playwright.async_api import async_playwright, Browser, Page
from playwright_stealth import Stealth
logger = logging.getLogger(__name__)
async def get_grok_cookies() -> Dict[str, str]:
"""
Launches a HEADFUL browser with Stealth settings to bypass Cloudflare.
Waits generously for manual user interaction if needed.
"""
browser: Optional[Browser] = None
try:
logger.info("Launching Playwright (Stealth Mode) for authentication...")
with open("error.log", "a") as f:
f.write("Browser: Launching Playwright (Visible, Stealth v2)...\n")
async with async_playwright() as p:
# Launch chromium in HEADFUL mode
browser = await p.chromium.launch(
headless=False,
args=[
'--no-sandbox',
'--disable-setuid-sandbox',
'--disable-blink-features=AutomationControlled',
'--start-maximized'
]
)
# Use a slightly more random user agent
context = await browser.new_context(
viewport=None, # Allow window to determine size
locale='en-US',
timezone_id='America/New_York'
)
page: Page = await context.new_page()
# Apply stealth using new Class-based API
# Try to initialize Stealth and apply async
stealth = Stealth()
await stealth.apply_stealth_async(page)
logger.info("Navigating to https://grok.com...")
with open("error.log", "a") as f:
f.write("Browser: Navigating to grok.com...\n")
# Go to page
try:
await page.goto('https://grok.com', timeout=60000, wait_until='domcontentloaded')
except Exception as e:
with open("error.log", "a") as f:
f.write(f"Browser: Navigation warning (might be loading): {e}\n")
with open("error.log", "a") as f:
f.write("Browser: Waiting 120s for challenge (Please solve manually if visible)...\n")
# Polling wait for 120s
# We explicitly check for success selector: textarea or specific home element
authenticated = False
for i in range(24): # 24 * 5s = 120s
try:
# Check for Success
if await page.query_selector('textarea[placeholder*="Grok"]'):
with open("error.log", "a") as f:
f.write("Browser: Success! Grok UI detected.\n")
authenticated = True
break
# Check for Failure/Challenge
content = await page.content()
if "Just a moment" in content:
if i % 2 == 0:
with open("error.log", "a") as f:
f.write(f"Browser: Still on Cloudflare challenge... ({i*5}s)\n")
else:
# Maybe it is loaded but selector didn't match yet?
pass
await asyncio.sleep(5)
except Exception as e:
pass
if not authenticated:
with open("error.log", "a") as f:
f.write("Browser: Timeout. Challenge NOT solved after 120s.\n")
# Take a screenshot to debug what was on screen
await page.screenshot(path="cloudflare_fail.png")
# Extract cookies regardless, maybe we got lucky
cookies = await context.cookies()
cookie_dict = {c['name']: c['value'] for c in cookies}
with open("error.log", "a") as f:
f.write(f"Browser: Extracted {len(cookie_dict)} cookies.\n")
if 'cf_clearance' in cookie_dict:
f.write("Browser: cf_clearance found.\n")
else:
f.write("Browser: WARNING: cf_clearance NOT found.\n")
return cookie_dict
except Exception as e:
logger.error(f"Browser authentication failed: {e}")
with open("error.log", "a") as f:
f.write(f"Browser auth failed exception: {e}\n")
return {}
finally:
pass