apix/services/crawl4ai/app/grok/reverse/parser.py
Khoa.vo 2a4bf8b58b
Some checks are pending
CI / build (18.x) (push) Waiting to run
CI / build (20.x) (push) Waiting to run
feat: updates before deployment
2026-01-06 13:26:11 +07:00

139 lines
No EOL
5 KiB
Python

from re import findall, search
from json import load, dump
from base64 import b64decode
from typing import Optional
from curl_cffi import requests
from ..runtime import Utils
from os import path
class Parser:
mapping: dict = {}
_mapping_loaded: bool = False
grok_mapping: list = []
_grok_mapping_loaded: bool = False
@classmethod
def _load__xsid_mapping(cls):
if not cls._mapping_loaded and path.exists('core/mappings/txid.json'):
with open('core/mappings/txid.json', 'r') as f:
cls.mapping = load(f)
cls._mapping_loaded = True
@classmethod
def _load_grok_mapping(cls):
if not cls._grok_mapping_loaded and path.exists('core/mappings/grok.json'):
with open('core/mappings/grok.json', 'r') as f:
cls.grok_mapping = load(f)
cls._grok_mapping_loaded = True
@staticmethod
def parse_values(html: str, loading: str = "loading-x-anim-0", scriptId: str = "") -> tuple[str, Optional[str]]:
Parser._load__xsid_mapping()
all_d_values = findall(r'"d":"(M[^"]{200,})"', html)
if not all_d_values:
# Fallback or error
print("Warning: No SVG paths found")
return "", None if scriptId else ""
try:
anim_index = int(loading.split("loading-x-anim-")[1])
if anim_index >= len(all_d_values):
anim_index = 0
svg_data = all_d_values[anim_index]
except (IndexError, ValueError):
svg_data = all_d_values[0]
if scriptId:
if scriptId == "ondemand.s":
script_link: str = 'https://abs.twimg.com/responsive-web/client-web/ondemand.s.' + Utils.between(html, f'"{scriptId}":"', '"') + 'a.js'
else:
script_link: str = f'https://grok.com/_next/{scriptId}'
if script_link in Parser.mapping:
numbers: list = Parser.mapping[script_link]
else:
script_content: str = requests.get(script_link, impersonate="chrome136").text
matches = findall(r'x\[(\d+)\]\s*,\s*16', script_content)
if matches:
numbers: list = [int(x) for x in matches]
else:
numbers = []
Parser.mapping[script_link] = numbers
if path.exists('core/mappings'):
try:
with open('core/mappings/txid.json', 'w') as f:
dump(Parser.mapping, f)
except Exception as e:
print(f"Failed to save mapping: {e}")
return svg_data, numbers
else:
return svg_data
@staticmethod
def get_anim(html: str, verification: str = "grok-site-verification") -> tuple[str, str]:
verification_token: str = Utils.between(html, f'"name":"{verification}","content":"', '"')
try:
array: list = list(b64decode(verification_token))
if len(array) > 5:
anim: str = "loading-x-anim-" + str(array[5] % 4)
else:
anim = "loading-x-anim-0"
except Exception:
anim = "loading-x-anim-0"
return verification_token, anim
@staticmethod
def parse_grok(scripts: list) -> tuple[list, str]:
Parser._load_grok_mapping()
for index in Parser.grok_mapping:
if index.get("action_script") in scripts:
return index["actions"], index["xsid_script"]
script_content1: Optional[str] = None
script_content2: Optional[str] = None
action_script: Optional[str] = None
for script in scripts:
content: str = requests.get(f'https://grok.com{script}', impersonate="chrome136").text
if "anonPrivateKey" in content:
script_content1 = content
action_script = script
elif "880932)" in content:
script_content2 = content
if not script_content1 or not script_content2:
print("Failed to find required scripts")
return [], ""
actions: list = findall(r'createServerReference\)\("([a-f0-9]+)"', script_content1)
xsid_script: str = search(r'"(static/chunks/[^"]+\.js)"[^}]*?\(880932\)', script_content2).group(1)
if actions and xsid_script:
Parser.grok_mapping.append({
"xsid_script": xsid_script,
"action_script": action_script,
"actions": actions
})
with open('core/mappings/grok.json', 'w') as f:
dump(Parser.grok_mapping, f, indent=2)
return actions, xsid_script
else:
print("Something went wrong while parsing script and actions")