From 286418c24685aee834eaa202b8a23542260619ef Mon Sep 17 00:00:00 2001 From: Evil0ctal Date: Mon, 6 May 2024 05:55:19 -0700 Subject: [PATCH] =?UTF-8?q?=F0=9F=8E=A8:=20=E6=B7=BB=E5=8A=A0=E8=87=AA?= =?UTF-8?q?=E5=8A=A8=E5=8C=96=E6=8A=96=E9=9F=B3Cookie=E8=8E=B7=E5=8F=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crawlers/douyin/web/config.yaml | 2 +- crawlers/douyin/web/web_crawler.py | 20 ++++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) diff --git a/crawlers/douyin/web/config.yaml b/crawlers/douyin/web/config.yaml index 4e3dbde..08fb83f 100644 --- a/crawlers/douyin/web/config.yaml +++ b/crawlers/douyin/web/config.yaml @@ -4,7 +4,7 @@ TokenManager: Accept-Language: zh-CN,zh;q=0.8,zh-TW;q=0.7,zh-HK;q=0.5,en-US;q=0.3,en;q=0.2 User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36 Referer: https://www.douyin.com/ - Cookie: __ac_nonce=066319b1d00a2c3a5a32; __ac_signature=_02B4Z6wo00f01CAwRGAAAIDDX8H3JTg7X-ggAUDAAG5E57; ttwid=1%7CIce39UJBic2W8B8JRj9tkzb3BMl_LVmwKdahq4T_TQ4%7C1714527006%7C7831bb5a3935a2aaa8e4605d99cffb648fff799220f7c912d53d358937975aa6; douyin.com; device_web_cpu_core=16; device_web_memory_size=-1; architecture=amd64; IsDouyinActive=true; home_can_add_dy_2_desktop=%220%22; dy_swidth=1323; dy_sheight=827; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1323%2C%5C%22screen_height%5C%22%3A827%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A16%2C%5C%22device_memory%5C%22%3A0%2C%5C%22downlink%5C%22%3A%5C%22%5C%22%2C%5C%22effective_type%5C%22%3A%5C%22%5C%22%2C%5C%22round_trip_time%5C%22%3A0%7D%22; strategyABtestKey=%221714527007.257%22; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Atrue%2C%22volume%22%3A0.5%7D; stream_player_status_params=%22%7B%5C%22is_auto_play%5C%22%3A0%2C%5C%22is_full_screen%5C%22%3A0%2C%5C%22is_full_webscreen%5C%22%3A1%2C%5C%22is_mute%5C%22%3A1%2C%5C%22is_speed%5C%22%3A1%2C%5C%22is_visible%5C%22%3A0%7D%22; xgplayer_user_id=367707317542; csrf_session_id=6f34e666e71445c9d39d8d06a347a13f; xg_device_score=Infinity; FORCE_LOGIN=%7B%22videoConsumedRemainSeconds%22%3A180%7D; passport_csrf_token=92247773d074342637df81db51489969; passport_csrf_token_default=92247773d074342637df81db51489969; msToken=kyCxymhw2m4Lc4K-n-Vr-OY3Kv7R72D9GXpmxrE3Ka4kRkHy-zvLatY3pXrsaEOyxlsTw-3I5JSDRGWNAFAyydNkomXzvVwKeL_GDMQ4P-jvvWbyw1eKeJq2kG7H9-4=; bd_ticket_guard_client_web_domain=2; odin_tt=5576b2ace24aa26f57c5fcec2a416b05a1f7ba785c62b93d6757376f421d2da7cf23b79de915f1eca97affb19b7270782382245533ceeaa8cdfa7f35d5ccd4c8c5a4d1f624ad356376ce6ffda93bc741; s_v_web_id=verify_lvn54e5x_fb563a9d_f244_ce83_cc4f_45789afba4ce; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTTlxYlFUOWlOdTRzZFg2MldFT1lnVUdrZENsaUU3WllsK1JrK3lRVCs0RDdWMEU3aGpWREtYaHplMW9aR2g5NW0wVlpncXN4MStGY3VIZjgvTnBIY3c9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoxfQ%3D%3D + Cookie: odin_tt=a5406392350bf546a7890440b69f3ae1e73e89a8722399df65ea5626eaf5084f;passport_fe_beating_status=false;sid_guard=5b3139e92b5f62521b81426c0e43b9b0%7C1715000097%7C21600%7CMon%2C+06-May-2024+18%3A54%3A57+GMT;uid_tt=9382a988f1d647cf145ed96be16eddcf;uid_tt_ss=9382a988f1d647cf145ed96be16eddcf;sid_tt=5b3139e92b5f62521b81426c0e43b9b0;sessionid=5b3139e92b5f62521b81426c0e43b9b0;sessionid_ss=5b3139e92b5f62521b81426c0e43b9b0;sid_ucp_v1=1.0.0-KDBkM2Q0ZDZiNTUyOWVlNGJjODYzZDEwNjU4ZTIyMzhjMzdjYjg0MDEKCBChpuOxBhgNGgJsZiIgNWIzMTM5ZTkyYjVmNjI1MjFiODE0MjZjMGU0M2I5YjA;ssid_ucp_v1=1.0.0-KDBkM2Q0ZDZiNTUyOWVlNGJjODYzZDEwNjU4ZTIyMzhjMzdjYjg0MDEKCBChpuOxBhgNGgJsZiIgNWIzMTM5ZTkyYjVmNjI1MjFiODE0MjZjMGU0M2I5YjA;passport_assist_user=; ttwid=1%7CgkHwc-xmrQUG6nv1gZ1YWZbzctdFs9LkB9ff80zOAaM%7C1715000096%7C6759be79ff15ef8a6d8b88d7abd843267d7f32d0af105caaea84cf9abe232c09 proxies: http: diff --git a/crawlers/douyin/web/web_crawler.py b/crawlers/douyin/web/web_crawler.py index 2a3ebab..12f86bf 100644 --- a/crawlers/douyin/web/web_crawler.py +++ b/crawlers/douyin/web/web_crawler.py @@ -35,6 +35,8 @@ import asyncio # 异步I/O import time # 时间操作 + +import httpx import yaml # 配置文件 import os # 系统操作 @@ -237,6 +239,19 @@ class DouyinWebCrawler: "-------------------------------------------------------utils接口列表-------------------------------------------------------" + # 获取抖音Web的游客Cookie + async def fetch_douyin_web_guest_cookie(self, user_agent: str): + headers = { + 'User-Agent': user_agent, + 'Cookie': '' + } + async with httpx.AsyncClient() as client: + domain = "https://beta.tikhub.io" + uri = "/api/v1/douyin/web/fetch_douyin_web_guest_cookie" + url = f"{domain}{uri}?user_agent={user_agent}" + response = await client.get(url, headers=headers) + return response.json().get("data") + # 生成真实msToken async def gen_real_msToken(self, ): result = { @@ -398,6 +413,11 @@ class DouyinWebCrawler: """-------------------------------------------------------utils接口列表-------------------------------------------------------""" + # 获取抖音Web的游客Cookie + # user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36" + # result = await self.fetch_douyin_web_guest_cookie(user_agent) + # print(result) + # 生成真实msToken # result = await self.gen_real_msToken() # print(result)