diff --git a/scraper.py b/scraper.py index a9edab5..d0e9441 100644 --- a/scraper.py +++ b/scraper.py @@ -2,7 +2,7 @@ # -*- encoding: utf-8 -*- # @Author: https://github.com/Evil0ctal/ # @Time: 2021/11/06 -# @Update: 2023/03/08 +# @Update: 2023/06/27 # @Version: 3.3.0 # @Function: # 核心代码,估值1块(๑•̀ㅂ•́)و✧ @@ -26,36 +26,6 @@ from tenacity import * class Scraper: - """ - 简介/Introduction - - Scraper.get_url(text: str) -> Union[str, None] - 用于检索出文本中的链接并返回/Used to retrieve the link in the text and return it. - - Scraper.convert_share_urls(self, url: str) -> Union[str, None]\n - 用于转换分享链接为原始链接/Convert share links to original links - - Scraper.get_douyin_video_id(self, original_url: str) -> Union[str, None]\n - 用于获取抖音视频ID/Get Douyin video ID - - Scraper.get_douyin_video_data(self, video_id: str) -> Union[dict, None]\n - 用于获取抖音视频数据/Get Douyin video data - - Scraper.get_douyin_live_video_data(self, original_url: str) -> Union[str, None]\n - 用于获取抖音直播视频数据/Get Douyin live video data - - Scraper.get_tiktok_video_id(self, original_url: str) -> Union[str, None]\n - 用于获取TikTok视频ID/Get TikTok video ID - - Scraper.get_tiktok_video_data(self, video_id: str) -> Union[dict, None]\n - 用于获取TikTok视频数据/Get TikTok video data - - Scraper.hybrid_parsing(self, video_url: str) -> dict\n - 用于混合解析/ Hybrid parsing - - Scraper.hybrid_parsing_minimal(data: dict) -> dict\n - 用于混合解析最小化/Hybrid parsing minimal - """ """__________________________________________⬇️initialization(初始化)⬇️______________________________________""" @@ -68,7 +38,7 @@ class Scraper: 'accept-encoding': 'gzip, deflate, br', 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36', 'referer': 'https://www.douyin.com/', - 'cookie': "s_v_web_id=verify_leytkxgn_kvO5kOmO_SdMs_4t1o_B5ml_BUqtWM1mP6BF;" + 'cookie': "ttwid=1%7C0YBAnAwiC5T3U5yJi8RVXEK3DOwF_2vpJ7kVJJZe8HU%7C1666668932%7C21048e6555b73e8801d3956afc6130b4a05ae73a2eefe4d3fef5ef1b61caf0e9; __live_version__=%221.1.1.2586%22; odin_tt=a77b90afad5db31e86fe004b39c5f35423292023ce7837cde82fd1f7fe54278890ce24dc89e09c8a2e55b1f4904950a7b0fca6b4fbff3b549ba6d55a335373ec; pwa2=%223%7C0%7C0%7C0%22; s_v_web_id=verify_lkagpdq1_IuHpxJyS_q6YH_4AvH_8aNH_zhvGPr95Jrc8; passport_csrf_token=301cf539fb735ab77de7e382b0dd93e5; passport_csrf_token_default=301cf539fb735ab77de7e382b0dd93e5; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCRXhuWUdqREVBa3ErdjRsT2l3anRIWi9HU2hRNXFseWdJMklLanIxM0orRHozYnA0M2pXc3M3N25CUzdnbE5tTXhHbWU3cldoSE9pdkJvVmNnT2JiWFU9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoxfQ==; passport_assist_user=CkHJzB17Xsy3FUHyNfX2Dyb8IFKKA_0pu1SKYG0OAT_av3ImQyCbEmGJV7b8MJep4l9MjeCRK1FPY9k9yAkVHbIbvhpICjzS68aPlRjIsUzHLIEM-5jMbp9awcdJnkACni5Nnc_PBm4ljAlEqChbF4nYPpn4xyh4kY2hBvRikmXs0sgQ4fq2DRiJr9ZUIgEDbm8-yw%3D%3D; n_mh=13KNPUKNEzoW3A4J-OLRxfal2zj1GbF-vJUFPs3WSIY; sso_uid_tt=2581aab41d03156c0b7fee9c7e865c6c; sso_uid_tt_ss=2581aab41d03156c0b7fee9c7e865c6c; toutiao_sso_user=b2556b53ed5cee89e947b154b17645f1; toutiao_sso_user_ss=b2556b53ed5cee89e947b154b17645f1; sid_ucp_sso_v1=1.0.0-KDhlZjRhMmJhZGU0OTVmOWM0YzBkMTY5ZGNkZmI4NTFjNTk2ODU5OTkKHwiPluCxqYzbAhC29OKmBhjvMSAMMLDIpZkGOAZA9AcaAmhsIiBiMjU1NmI1M2VkNWNlZTg5ZTk0N2IxNTRiMTc2NDVmMQ; ssid_ucp_sso_v1=1.0.0-KDhlZjRhMmJhZGU0OTVmOWM0YzBkMTY5ZGNkZmI4NTFjNTk2ODU5OTkKHwiPluCxqYzbAhC29OKmBhjvMSAMMLDIpZkGOAZA9AcaAmhsIiBiMjU1NmI1M2VkNWNlZTg5ZTk0N2IxNTRiMTc2NDVmMQ; sid_guard=c1d1ac1d22198149dfc6cac74938b14a%7C1691925046%7C5184000%7CThu%2C+12-Oct-2023+11%3A10%3A46+GMT; uid_tt=7e39a426dac7802b2448fa2266ca1b85; uid_tt_ss=7e39a426dac7802b2448fa2266ca1b85; sid_tt=c1d1ac1d22198149dfc6cac74938b14a; sessionid=c1d1ac1d22198149dfc6cac74938b14a; sessionid_ss=c1d1ac1d22198149dfc6cac74938b14a; sid_ucp_v1=1.0.0-KDc4Y2VkZjIyN2JlMDNhYmNhYTFlYTE5ODM1YzI2YjVlZDNmMGY0N2YKGwiPluCxqYzbAhC29OKmBhjvMSAMOAZA9AdIBBoCbHEiIGMxZDFhYzFkMjIxOTgxNDlkZmM2Y2FjNzQ5MzhiMTRh; ssid_ucp_v1=1.0.0-KDc4Y2VkZjIyN2JlMDNhYmNhYTFlYTE5ODM1YzI2YjVlZDNmMGY0N2YKGwiPluCxqYzbAhC29OKmBhjvMSAMOAZA9AdIBBoCbHEiIGMxZDFhYzFkMjIxOTgxNDlkZmM2Y2FjNzQ5MzhiMTRh; LOGIN_STATUS=1; _bd_ticket_crypt_cookie=861cdca903469f36dd23fc1ecfe847c1; __security_server_data_status=1; store-region=us; store-region-src=uid; d_ticket=28acd5a9c6df4227b13582669694acded6ede; __ac_nonce=064ec4f3a00901157c769; __ac_signature=_02B4Z6wo00f01ve8HKgAAIDD6.-iFWbfM-r3jRgAANkQTCm7UjsJOQlMGY7o-iPsCIAe0kuriDaQ15lHcML.nW.cGNWpSBLUJzdr6s8KHRbqh5ywvupCeAKBEHKKbji7hD1-Z0x3DI-n0KKx34; douyin.com; device_web_cpu_core=16; device_web_memory_size=-1; webcast_local_quality=null; publish_badge_show_info=%220%2C0%2C0%2C1693208382348%22; IsDouyinActive=true; home_can_add_dy_2_desktop=%220%22; strategyABtestKey=%221693208382.387%22; stream_recommend_feed_params=%22%7B%5C%22cookie_enabled%5C%22%3Atrue%2C%5C%22screen_width%5C%22%3A1344%2C%5C%22screen_height%5C%22%3A756%2C%5C%22browser_online%5C%22%3Atrue%2C%5C%22cpu_core_num%5C%22%3A16%2C%5C%22device_memory%5C%22%3A0%2C%5C%22downlink%5C%22%3A%5C%22%5C%22%2C%5C%22effective_type%5C%22%3A%5C%22%5C%22%2C%5C%22round_trip_time%5C%22%3A0%7D%22; VIDEO_FILTER_MEMO_SELECT=%7B%22expireTime%22%3A1693813183367%2C%22type%22%3A1%7D; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Atrue%2C%22volume%22%3A1%7D; my_rd=1; passport_fe_beating_status=true; msToken=ESPx4FwNhcdEvr36-bmhWde9xupU_c64WeeqvvzqzLCtmEsvGPXhkwsKM8miaoC2w8gWSzNAfqxPEju4w3jzopIFompVSmwemq9-z1F8V-2vLNhTxLlYCUVdXkzNj6zM; download_guide=%221%2F20230828%2F0%22; csrf_session_id=3c194edf7f2cee968b0df65f97a11648; msToken=XFIGWeX20IGrrEUGYr_4SR2DPrduwK5zxB3gOp8FfbxW_Ng-w9uNh8wQRUIoPUtkSblL6msqte55jyfcrKPb8eDZekS9Q1P9hkdkPFiV4Ni-l9Vmsr0KgFo5MOkLaBZy; tt_scid=-i-7N5fAMRj8pGg4drGXbjasutdtD4tzIeqRnm6OJ1LoXRRZGl8FNhORnEuY3id.b3b7" } self.tiktok_api_headers = { 'User-Agent': 'com.ss.android.ugc.trill/494+Mozilla/5.0+(Linux;+Android+12;+2112123G+Build/SKQ1.211006.001;+wv)+AppleWebKit/537.36+(KHTML,+like+Gecko)+Version/4.0+Chrome/107.0.5304.105+Mobile+Safari/537.36' @@ -100,7 +70,7 @@ class Scraper: """__________________________________________⬇️utils(实用程序)⬇️______________________________________""" - # 检索字符串中的链接 + # 检索字符串中的链接/Retrieve links from string @staticmethod def get_url(text: str) -> Union[str, None]: try: @@ -113,6 +83,14 @@ class Scraper: print('Error in get_url:', e) return None + # 生成X-Bogus签名/Generate X-Bogus signature + @staticmethod + def generate_x_bogus_url(url: str, headers: dict) -> str: + query = urllib.parse.urlparse(url).query + xbogus = execjs.compile(open('./X-Bogus.js').read()).call('sign', query, headers['User-Agent']) + new_url = url + "&X-Bogus=" + xbogus + return new_url + # 转换链接/convert url @retry(stop=stop_after_attempt(4), wait=wait_fixed(7)) async def convert_share_urls(self, url: str) -> Union[str, None]: @@ -668,5 +646,5 @@ if __name__ == '__main__': # params = "device_platform=webapp&aid=6383&channel=channel_pc_web&aweme_id=7153585499477757192&pc_client_type=1&version_code=190500&version_name=19.5.0&cookie_enabled=true&screen_width=1344&screen_height=756&browser_language=zh-CN&browser_platform=Win32&browser_name=Firefox&browser_version=110.0&browser_online=true&engine_name=Gecko&engine_version=109.0&os_name=Windows&os_version=10&cpu_core_num=16&device_memory=&platform=PC&webid=7158288523463362079" # api.generate_x_bogus(params) douyin_url = 'https://v.douyin.com/rLyrQxA/6.66' - tiktok_url = 'https://vt.tiktok.com/ZSRwWXtdr/' + tiktok_url = 'https://www.tiktok.com/@evil0ctal/video/7217027383390555438' asyncio.run(async_test(_douyin_url=douyin_url, _tiktok_url=tiktok_url)) diff --git a/web_api.py b/web_api.py index e274ffc..d9afe25 100644 --- a/web_api.py +++ b/web_api.py @@ -2,7 +2,7 @@ # -*- encoding: utf-8 -*- # @Author: https://github.com/Evil0ctal/ # @Time: 2021/11/06 -# @Update: 2023/03/08 +# @Update: 2023/06/27 # @Version: 3.1.5 # @Function: # 创建一个接受提交参数的FastAPi应用程序。 diff --git a/web_app.py b/web_app.py index 68a2106..0175ed3 100644 --- a/web_app.py +++ b/web_app.py @@ -2,8 +2,8 @@ # -*- encoding: utf-8 -*- # @Author: https://github.com/Evil0ctal/ # @Time: 2021/11/06 -# @Update: 2022/12/25 -# @Version: 3.0.1 +# @Update: 2023/06/27 +# @Version: 3.0.2 # @Function: # 用于在线批量解析Douyin/TikTok的无水印视频/图集。 # 基于 PyWebIO,将scraper.py返回的内容显示在网页上。