diff --git a/scraper.py b/scraper.py index f332443..cee5b5b 100644 --- a/scraper.py +++ b/scraper.py @@ -2,7 +2,7 @@ # -*- encoding: utf-8 -*- # @Author: https://github.com/Evil0ctal/ # @Time: 2021/11/06 -# @Update: 2022/07/03 +# @Update: 2022/07/29 # @Function: # 核心代码,估值1块(๑•̀ㅂ•́)و✧ # 用于爬取Douyin/TikTok数据并以字典形式返回。 @@ -334,65 +334,67 @@ class Scraper: except: video_info = None # 从TikTok官方API获取部分视频数据 - tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D'.format( + # 新API2022年7月29日 https://api.tiktokv.com/aweme/v1/aweme/detail/?aweme_id={} + # 旧API https://api.tiktokv.com/aweme/v1/multi/aweme/detail/?aweme_ids=%5B{}%5D + tiktok_api_link = 'https://api.tiktokv.com/aweme/v1/aweme/detail/?aweme_id={}'.format( video_id) print('正在请求API链接:{}'.format(tiktok_api_link)) response = requests.get(url=tiktok_api_link, headers=headers, proxies=self.proxies).text # 将API获取到的内容格式化为JSON result = json.loads(response) - for i in result["aweme_details"][0]: + for i in result["aweme_detail"]: if i != 'image_post_info': # 类型为视频 url_type = 'video' print('类型为视频') # 无水印视频链接 - nwm_video_url = result["aweme_details"][0]["video"]["play_addr"]["url_list"][0] + nwm_video_url = result["aweme_detail"]["video"]["play_addr"]["url_list"][0] try: # 有水印视频链接 - wm_video_url = result["aweme_details"][0]["video"]['download_addr']['url_list'][0] + wm_video_url = result["aweme_detail"]["video"]['download_addr']['url_list'][0] except Exception: # 有水印视频链接 wm_video_url = 'None' # 视频标题 - video_title = result["aweme_details"][0]["desc"] + video_title = result["aweme_detail"]["desc"] # 视频作者昵称 - video_author_nickname = result["aweme_details"][0]['author']["nickname"] + video_author_nickname = result["aweme_detail"]['author']["nickname"] # 视频作者ID - video_author_id = result["aweme_details"][0]['author']["unique_id"] + video_author_id = result["aweme_detail"]['author']["unique_id"] # 上传时间戳 - video_create_time = result["aweme_details"][0]['create_time'] + video_create_time = result["aweme_detail"]['create_time'] # 视频ID - video_aweme_id = result["aweme_details"][0]['statistics']['aweme_id'] + video_aweme_id = result["aweme_detail"]['statistics']['aweme_id'] try: # 视频BGM标题 - video_music_title = result["aweme_details"][0]['music']['title'] + video_music_title = result["aweme_detail"]['music']['title'] # 视频BGM作者 - video_music_author = result["aweme_details"][0]['music']['author'] + video_music_author = result["aweme_detail"]['music']['author'] # 视频BGM ID - video_music_id = result["aweme_details"][0]['music']['id'] + video_music_id = result["aweme_detail"]['music']['id'] # 视频BGM链接 - video_music_url = result["aweme_details"][0]['music']['play_url']['url_list'][0] + video_music_url = result["aweme_detail"]['music']['play_url']['url_list'][0] except: video_music_title, video_music_author, video_music_id, video_music_url = "None", "None", "None", "None" # 评论数量 - video_comment_count = result["aweme_details"][0]['statistics']['comment_count'] + video_comment_count = result["aweme_detail"]['statistics']['comment_count'] # 获赞数量 - video_digg_count = result["aweme_details"][0]['statistics']['digg_count'] + video_digg_count = result["aweme_detail"]['statistics']['digg_count'] # 播放次数 - video_play_count = result["aweme_details"][0]['statistics']['play_count'] + video_play_count = result["aweme_detail"]['statistics']['play_count'] # 下载次数 - video_download_count = result["aweme_details"][0]['statistics']['download_count'] + video_download_count = result["aweme_detail"]['statistics']['download_count'] # 分享次数 - video_share_count = result["aweme_details"][0]['statistics']['share_count'] + video_share_count = result["aweme_detail"]['statistics']['share_count'] # 视频封面 - video_cover = result["aweme_details"][0]['video']['cover']['url_list'][0] + video_cover = result["aweme_detail"]['video']['cover']['url_list'][0] # 视频动态封面 - video_dynamic_cover = result["aweme_details"][0]['video']['dynamic_cover']['url_list'][0] + video_dynamic_cover = result["aweme_detail"]['video']['dynamic_cover']['url_list'][0] # 视频原始封面 - video_origin_cover = result["aweme_details"][0]['video']['origin_cover']['url_list'][0] + video_origin_cover = result["aweme_detail"]['video']['origin_cover']['url_list'][0] # 将话题保存在列表中 video_hashtags = [] - for tag in result["aweme_details"][0]['text_extra']: + for tag in result["aweme_detail"]['text_extra']: if 'hashtag_name' in tag: video_hashtags.append(tag['hashtag_name']) else: @@ -474,39 +476,39 @@ class Scraper: url_type = 'album' print('类型为图集') # 视频标题 - album_title = result["aweme_details"][0]["desc"] + album_title = result["aweme_detail"]["desc"] # 视频作者昵称 - album_author_nickname = result["aweme_details"][0]['author']["nickname"] + album_author_nickname = result["aweme_detail"]['author']["nickname"] # 视频作者ID - album_author_id = result["aweme_details"][0]['author']["unique_id"] + album_author_id = result["aweme_detail"]['author']["unique_id"] # 上传时间戳 - album_create_time = result["aweme_details"][0]['create_time'] + album_create_time = result["aweme_detail"]['create_time'] # 视频ID - album_aweme_id = result["aweme_details"][0]['statistics']['aweme_id'] + album_aweme_id = result["aweme_detail"]['statistics']['aweme_id'] try: # 视频BGM标题 - album_music_title = result["aweme_details"][0]['music']['title'] + album_music_title = result["aweme_detail"]['music']['title'] # 视频BGM作者 - album_music_author = result["aweme_details"][0]['music']['author'] + album_music_author = result["aweme_detail"]['music']['author'] # 视频BGM ID - album_music_id = result["aweme_details"][0]['music']['id'] + album_music_id = result["aweme_detail"]['music']['id'] # 视频BGM链接 - album_music_url = result["aweme_details"][0]['music']['play_url']['url_list'][0] + album_music_url = result["aweme_detail"]['music']['play_url']['url_list'][0] except: album_music_title, album_music_author, album_music_id, album_music_url = "None", "None", "None", "None" # 评论数量 - album_comment_count = result["aweme_details"][0]['statistics']['comment_count'] + album_comment_count = result["aweme_detail"]['statistics']['comment_count'] # 获赞数量 - album_digg_count = result["aweme_details"][0]['statistics']['digg_count'] + album_digg_count = result["aweme_detail"]['statistics']['digg_count'] # 播放次数 - album_play_count = result["aweme_details"][0]['statistics']['play_count'] + album_play_count = result["aweme_detail"]['statistics']['play_count'] # 下载次数 - album_download_count = result["aweme_details"][0]['statistics']['download_count'] + album_download_count = result["aweme_detail"]['statistics']['download_count'] # 分享次数 - album_share_count = result["aweme_details"][0]['statistics']['share_count'] + album_share_count = result["aweme_detail"]['statistics']['share_count'] # 无水印图集 album_list = [] - for i in result["aweme_details"][0]['image_post_info']['images']: + for i in result["aweme_detail"]['image_post_info']['images']: album_list.append(i['display_image']['url_list'][0]) # 结束时间 end = time.time()