Delete API directory
This commit is contained in:
parent
9e2a16f05a
commit
8f4b2fba93
5 changed files with 0 additions and 666 deletions
|
|
@ -1,2 +0,0 @@
|
||||||
# @Author: https://github.com/Evil0ctal/
|
|
||||||
# @Time: 2021/11/06
|
|
||||||
|
|
@ -1,2 +0,0 @@
|
||||||
# @Author: https://github.com/Evil0ctal/
|
|
||||||
# @Time: 2021/11/06
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
certifi==2021.10.8
|
|
||||||
charset-normalizer==2.0.12
|
|
||||||
click==8.1.2
|
|
||||||
colorama==0.4.4
|
|
||||||
Flask==2.1.1
|
|
||||||
idna==3.3
|
|
||||||
itsdangerous==2.1.2
|
|
||||||
Jinja2==3.1.1
|
|
||||||
MarkupSafe==2.1.1
|
|
||||||
pywebio==1.6.0
|
|
||||||
requests==2.27.1
|
|
||||||
retrying==1.3.3
|
|
||||||
six==1.16.0
|
|
||||||
tornado==6.1
|
|
||||||
ua-parser==0.10.0
|
|
||||||
urllib3==1.26.5
|
|
||||||
user-agents==2.2.0
|
|
||||||
Werkzeug==2.1.1
|
|
||||||
410
API/scraper.py
410
API/scraper.py
|
|
@ -1,410 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- encoding: utf-8 -*-
|
|
||||||
# @Author: https://github.com/Evil0ctal/
|
|
||||||
# @Time: 2021/11/06
|
|
||||||
# @Update: 2022/04/06
|
|
||||||
# @Function:
|
|
||||||
# 核心代码,估值1块(๑•̀ㅂ•́)و✧
|
|
||||||
# 用于爬取Douyin/TikTok数据并以字典形式返回。
|
|
||||||
|
|
||||||
|
|
||||||
import re
|
|
||||||
import json
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
from retrying import retry
|
|
||||||
|
|
||||||
|
|
||||||
class Scraper:
|
|
||||||
"""
|
|
||||||
Scraper.douyin():抖音视频/图集解析,返回字典。
|
|
||||||
Scraper.tiktok():TikTok视频解析,返回字典。
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
self.headers = {
|
|
||||||
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.66'
|
|
||||||
}
|
|
||||||
self.tiktok_headers = {
|
|
||||||
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
|
||||||
"authority": "www.tiktok.com",
|
|
||||||
"Accept-Encoding": "gzip, deflate",
|
|
||||||
"Connection": "keep-alive",
|
|
||||||
"Host": "www.tiktok.com",
|
|
||||||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) coc_coc_browser/86.0.170 Chrome/80.0.3987.170 Safari/537.36",
|
|
||||||
}
|
|
||||||
|
|
||||||
@retry(stop_max_attempt_number=6)
|
|
||||||
def douyin(self, original_url):
|
|
||||||
"""
|
|
||||||
利用官方接口解析抖音链接信息
|
|
||||||
:param original_url: 抖音/TikTok链接(支持长/短链接)
|
|
||||||
:return:包含信息的字典
|
|
||||||
"""
|
|
||||||
headers = self.headers
|
|
||||||
try:
|
|
||||||
# 开始时间
|
|
||||||
start = time.time()
|
|
||||||
# 原视频链接
|
|
||||||
r = requests.get(url=original_url, headers=headers, allow_redirects=False)
|
|
||||||
try:
|
|
||||||
# 2021/12/11 发现抖音做了限制,会自动重定向网址,但是可以从回执头中获取
|
|
||||||
long_url = r.headers['Location']
|
|
||||||
except:
|
|
||||||
# 报错后判断为长链接,直接截取视频id
|
|
||||||
long_url = original_url
|
|
||||||
# 正则匹配出视频ID
|
|
||||||
key = re.findall('video/(\d+)?', long_url)[0]
|
|
||||||
# 构造抖音API链接
|
|
||||||
api_url = f'https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={key}'
|
|
||||||
print("正在请求抖音API链接: " + '\n' + api_url)
|
|
||||||
# 将回执以JSON格式处理
|
|
||||||
js = json.loads(requests.get(url=api_url, headers=headers).text)
|
|
||||||
# 判断是否为图集
|
|
||||||
if js['item_list'][0]['images'] is not None:
|
|
||||||
print("类型 = 图集")
|
|
||||||
# 类型为图集
|
|
||||||
url_type = 'album'
|
|
||||||
# 图集标题
|
|
||||||
album_title = str(js['item_list'][0]['desc'])
|
|
||||||
# 图集作者昵称
|
|
||||||
album_author = str(js['item_list'][0]['author']['nickname'])
|
|
||||||
# 图集作者签名
|
|
||||||
album_author_signature = str(js['item_list'][0]['author']['signature'])
|
|
||||||
# 图集作者UID
|
|
||||||
album_author_uid = str(js['item_list'][0]['author']['uid'])
|
|
||||||
# 图集作者抖音号
|
|
||||||
album_author_id = str(js['item_list'][0]['author']['unique_id'])
|
|
||||||
if album_author_id == "":
|
|
||||||
# 如果作者未修改过抖音号,应使用此值以避免无法获取其抖音ID
|
|
||||||
album_author_id = str(js['item_list'][0]['author']['short_id'])
|
|
||||||
# 尝试获取图集BGM信息
|
|
||||||
try:
|
|
||||||
# 图集BGM链接
|
|
||||||
album_music = str(js['item_list'][0]['music']['play_url']['url_list'][0])
|
|
||||||
# 图集BGM标题
|
|
||||||
album_music_title = str(js['item_list'][0]['music']['title'])
|
|
||||||
# 图集BGM作者
|
|
||||||
album_music_author = str(js['item_list'][0]['music']['author'])
|
|
||||||
# 图集BGM ID
|
|
||||||
album_music_id = str(js['item_list'][0]['music']['id'])
|
|
||||||
# 图集BGM MID
|
|
||||||
album_music_mid = str(js['item_list'][0]['music']['mid'])
|
|
||||||
except:
|
|
||||||
# 报错后代表无背景音乐
|
|
||||||
# 图集BGM链接
|
|
||||||
album_music = 'No BGM found'
|
|
||||||
# 图集BGM标题
|
|
||||||
album_music_title = 'No BGM found'
|
|
||||||
# 图集BGM作者
|
|
||||||
album_music_author = 'No BGM found'
|
|
||||||
# 图集BGM ID
|
|
||||||
album_music_id = 'No BGM found'
|
|
||||||
# 图集BGM MID
|
|
||||||
album_music_mid = 'No BGM found'
|
|
||||||
# 图集ID
|
|
||||||
album_aweme_id = str(js['item_list'][0]['statistics']['aweme_id'])
|
|
||||||
# 评论数量
|
|
||||||
album_comment_count = str(js['item_list'][0]['statistics']['comment_count'])
|
|
||||||
# 获赞数量
|
|
||||||
album_digg_count = str(js['item_list'][0]['statistics']['digg_count'])
|
|
||||||
# 播放次数
|
|
||||||
album_play_count = str(js['item_list'][0]['statistics']['play_count'])
|
|
||||||
# 分享次数
|
|
||||||
album_share_count = str(js['item_list'][0]['statistics']['share_count'])
|
|
||||||
# 上传时间戳
|
|
||||||
album_create_time = str(js['item_list'][0]['create_time'])
|
|
||||||
# 将话题保存在列表中
|
|
||||||
album_hashtags = []
|
|
||||||
for tag in js['item_list'][0]['text_extra']:
|
|
||||||
album_hashtags.append(tag['hashtag_name'])
|
|
||||||
# 将无水印图片链接保存在列表中
|
|
||||||
images_list = []
|
|
||||||
for data in js['item_list'][0]['images']:
|
|
||||||
images_list.append(data['url_list'][0])
|
|
||||||
# 结束时间
|
|
||||||
end = time.time()
|
|
||||||
# 解析时间
|
|
||||||
analyze_time = format((end - start), '.4f')
|
|
||||||
# 将信息储存在字典中
|
|
||||||
album_data = {'status': 'success',
|
|
||||||
'analyze_time': (analyze_time + 's'),
|
|
||||||
'url_type': url_type,
|
|
||||||
'platform': 'douyin',
|
|
||||||
'original_url': original_url,
|
|
||||||
'api_url': api_url,
|
|
||||||
'album_aweme_id': album_aweme_id,
|
|
||||||
'album_title': album_title,
|
|
||||||
'album_author': album_author,
|
|
||||||
'album_author_signature': album_author_signature,
|
|
||||||
'album_author_uid': album_author_uid,
|
|
||||||
'album_author_id': album_author_id,
|
|
||||||
'album_music': album_music,
|
|
||||||
'album_music_title': album_music_title,
|
|
||||||
'album_music_author': album_music_author,
|
|
||||||
'album_music_id': album_music_id,
|
|
||||||
'album_music_mid': album_music_mid,
|
|
||||||
'album_comment_count': album_comment_count,
|
|
||||||
'album_digg_count': album_digg_count,
|
|
||||||
'album_play_count': album_play_count,
|
|
||||||
'album_share_count': album_share_count,
|
|
||||||
'album_create_time': album_create_time,
|
|
||||||
'album_list': images_list,
|
|
||||||
'album_hashtags': album_hashtags}
|
|
||||||
return album_data
|
|
||||||
else:
|
|
||||||
print("类型 = 视频")
|
|
||||||
# 类型为视频
|
|
||||||
url_type = 'video'
|
|
||||||
# 视频标题
|
|
||||||
video_title = str(js['item_list'][0]['desc'])
|
|
||||||
# 视频作者昵称
|
|
||||||
video_author = str(js['item_list'][0]['author']['nickname'])
|
|
||||||
# 视频作者抖音号
|
|
||||||
video_author_id = str(js['item_list'][0]['author']['unique_id'])
|
|
||||||
if video_author_id == "":
|
|
||||||
# 如果作者未修改过抖音号,应使用此值以避免无法获取其抖音ID
|
|
||||||
video_author_id = str(js['item_list'][0]['author']['short_id'])
|
|
||||||
# 有水印视频链接
|
|
||||||
wm_video_url = str(js['item_list'][0]['video']['play_addr']['url_list'][0])
|
|
||||||
# 无水印视频链接 (在回执JSON中将关键字'playwm'替换为'play'即可获得无水印地址)
|
|
||||||
nwm_video_url = str(js['item_list'][0]['video']['play_addr']['url_list'][0]).replace('playwm', 'play')
|
|
||||||
# 去水印后视频链接(2022年1月1日抖音APi获取到的URL会进行跳转,需要在Location中获取直链)
|
|
||||||
r = requests.get(url=nwm_video_url, headers=headers, allow_redirects=False)
|
|
||||||
video_url = r.headers['Location']
|
|
||||||
# 视频作者签名
|
|
||||||
video_author_signature = str(js['item_list'][0]['author']['signature'])
|
|
||||||
# 视频作者UID
|
|
||||||
video_author_uid = str(js['item_list'][0]['author']['uid'])
|
|
||||||
# 尝试获取视频背景音乐
|
|
||||||
try:
|
|
||||||
# 视频BGM链接
|
|
||||||
video_music = str(js['item_list'][0]['music']['play_url']['url_list'][0])
|
|
||||||
# 视频BGM标题
|
|
||||||
video_music_title = str(js['item_list'][0]['music']['title'])
|
|
||||||
# 视频BGM作者
|
|
||||||
video_music_author = str(js['item_list'][0]['music']['author'])
|
|
||||||
# 视频BGM ID
|
|
||||||
video_music_id = str(js['item_list'][0]['music']['id'])
|
|
||||||
# 视频BGM MID
|
|
||||||
video_music_mid = str(js['item_list'][0]['music']['mid'])
|
|
||||||
except:
|
|
||||||
# 出错代表无背景音乐
|
|
||||||
# 视频BGM链接
|
|
||||||
video_music = 'No BGM found'
|
|
||||||
# 视频BGM标题
|
|
||||||
video_music_title = 'No BGM found'
|
|
||||||
# 视频BGM作者
|
|
||||||
video_music_author = 'No BGM found'
|
|
||||||
# 视频BGM ID
|
|
||||||
video_music_id = 'No BGM found'
|
|
||||||
# 视频BGM MID
|
|
||||||
video_music_mid = 'No BGM found'
|
|
||||||
# 视频ID
|
|
||||||
video_aweme_id = str(js['item_list'][0]['statistics']['aweme_id'])
|
|
||||||
# 评论数量
|
|
||||||
video_comment_count = str(js['item_list'][0]['statistics']['comment_count'])
|
|
||||||
# 获赞数量
|
|
||||||
video_digg_count = str(js['item_list'][0]['statistics']['digg_count'])
|
|
||||||
# 播放次数
|
|
||||||
video_play_count = str(js['item_list'][0]['statistics']['play_count'])
|
|
||||||
# 分享次数
|
|
||||||
video_share_count = str(js['item_list'][0]['statistics']['share_count'])
|
|
||||||
# 上传时间戳
|
|
||||||
video_create_time = str(js['item_list'][0]['create_time'])
|
|
||||||
# 将话题保存在列表中
|
|
||||||
video_hashtags = []
|
|
||||||
for tag in js['item_list'][0]['text_extra']:
|
|
||||||
video_hashtags.append(tag['hashtag_name'])
|
|
||||||
# 结束时间
|
|
||||||
end = time.time()
|
|
||||||
# 解析时间
|
|
||||||
analyze_time = format((end - start), '.4f')
|
|
||||||
# 返回包含数据的字典
|
|
||||||
video_data = {'status': 'success',
|
|
||||||
'analyze_time': (analyze_time + 's'),
|
|
||||||
'url_type': url_type,
|
|
||||||
'platform': 'douyin',
|
|
||||||
'original_url': original_url,
|
|
||||||
'api_url': api_url,
|
|
||||||
'video_title': video_title,
|
|
||||||
'nwm_video_url': video_url,
|
|
||||||
'wm_video_url': wm_video_url,
|
|
||||||
'video_aweme_id': video_aweme_id,
|
|
||||||
'video_author': video_author,
|
|
||||||
'video_author_signature': video_author_signature,
|
|
||||||
'video_author_uid': video_author_uid,
|
|
||||||
'video_author_id': video_author_id,
|
|
||||||
'video_music': video_music,
|
|
||||||
'video_music_title': video_music_title,
|
|
||||||
'video_music_author': video_music_author,
|
|
||||||
'video_music_id': video_music_id,
|
|
||||||
'video_music_mid': video_music_mid,
|
|
||||||
'video_comment_count': video_comment_count,
|
|
||||||
'video_digg_count': video_digg_count,
|
|
||||||
'video_play_count': video_play_count,
|
|
||||||
'video_share_count': video_share_count,
|
|
||||||
'video_create_time': video_create_time,
|
|
||||||
'video_hashtags': video_hashtags}
|
|
||||||
return video_data
|
|
||||||
except Exception as e:
|
|
||||||
# 返回异常
|
|
||||||
return {'status': 'failed', 'reason': e, 'function': 'Scraper.douyin()', 'value': original_url}
|
|
||||||
|
|
||||||
@retry(stop_max_attempt_number=6)
|
|
||||||
def tiktok(self, original_url):
|
|
||||||
"""
|
|
||||||
解析TikTok链接
|
|
||||||
:param original_url:TikTok链接
|
|
||||||
:return:包含信息的字典
|
|
||||||
"""
|
|
||||||
headers = self.headers
|
|
||||||
# 开始时间
|
|
||||||
start = time.time()
|
|
||||||
# 校验TikTok链接
|
|
||||||
if original_url[:12] == "https://www.":
|
|
||||||
original_url = original_url
|
|
||||||
print("目标链接: ", original_url)
|
|
||||||
else:
|
|
||||||
# 从请求头中获取原始链接
|
|
||||||
response = requests.get(url=original_url, headers=headers, allow_redirects=False)
|
|
||||||
true_link = response.headers['Location'].split("?")[0]
|
|
||||||
original_url = true_link
|
|
||||||
# TikTok请求头返回的第二种链接类型
|
|
||||||
if '.html' in true_link:
|
|
||||||
response = requests.get(url=true_link, headers=headers, allow_redirects=False)
|
|
||||||
original_url = response.headers['Location'].split("?")[0]
|
|
||||||
print("目标链接: ", original_url)
|
|
||||||
try:
|
|
||||||
# 开始获取TikTok数据
|
|
||||||
tiktok_headers = self.tiktok_headers
|
|
||||||
html = requests.get(url=original_url, headers=tiktok_headers)
|
|
||||||
res = re.search('<script id="sigi-persisted-data">(.*)</script><script', html.text).group(1)
|
|
||||||
resp = re.findall(r'^window\[\'SIGI_STATE\']=(.*)?;window', res)[0]
|
|
||||||
result = json.loads(resp)
|
|
||||||
author_id = result["ItemList"]["video"]["list"][0]
|
|
||||||
# 从网页中获得的视频JSON数据
|
|
||||||
video_info = result["ItemModule"][author_id]
|
|
||||||
# print(video_info)
|
|
||||||
# 使用第三方API获取无水印视频链接(不保证稳定)
|
|
||||||
s = requests.Session()
|
|
||||||
api_url = "https://ttdownloader.com/req/"
|
|
||||||
source = s.get("https://ttdownloader.com/")
|
|
||||||
token = re.findall(r'value=\"([0-9a-z]+)\"', source.text)
|
|
||||||
result = s.post(
|
|
||||||
api_url,
|
|
||||||
data={'url': original_url, 'format': '', 'token': token[0]}
|
|
||||||
)
|
|
||||||
nwm, wm, audio = re.findall(
|
|
||||||
r'(https?://.*?.php\?v\=.*?)\"', result.text
|
|
||||||
)
|
|
||||||
r = requests.get(nwm, allow_redirects=False)
|
|
||||||
# 整理数据
|
|
||||||
print("类型 = 视频")
|
|
||||||
# 无水印视频链接
|
|
||||||
nwm_video_url = r.headers['Location']
|
|
||||||
# 有水印视频链接
|
|
||||||
wm_video_url = video_info['video']['playAddr']
|
|
||||||
# 类型为视频
|
|
||||||
url_type = 'video'
|
|
||||||
# 视频标题
|
|
||||||
video_title = video_info['desc']
|
|
||||||
# 视频作者昵称
|
|
||||||
video_author = video_info['author']
|
|
||||||
# 视频作者别名
|
|
||||||
video_author_nickname = video_info['nickname']
|
|
||||||
# 视频作者抖音号
|
|
||||||
video_author_id = video_info['authorId']
|
|
||||||
# 视频作者secid
|
|
||||||
video_author_SecId = video_info['authorSecId']
|
|
||||||
# 视频背景音频
|
|
||||||
video_music = video_info['music']['playUrl']
|
|
||||||
# 上传时间戳
|
|
||||||
video_create_time = video_info['createTime']
|
|
||||||
# 视频ID
|
|
||||||
video_aweme_id = video_info['video']['id']
|
|
||||||
# 视频分辨率
|
|
||||||
video_ratio = video_info['video']['ratio']
|
|
||||||
# 视频BGM标题
|
|
||||||
video_music_title = video_info['music']['title']
|
|
||||||
# 视频BGM作者
|
|
||||||
video_music_author = video_info['music']['authorName']
|
|
||||||
# 视频BGM ID
|
|
||||||
video_music_id = video_info['music']['id']
|
|
||||||
# 视频BGM链接
|
|
||||||
video_music_url = video_info['music']['playUrl']
|
|
||||||
# 评论数量
|
|
||||||
video_comment_count = video_info['stats']['commentCount']
|
|
||||||
# 获赞数量
|
|
||||||
video_digg_count = video_info['stats']['diggCount']
|
|
||||||
# 播放次数
|
|
||||||
video_play_count = video_info['stats']['playCount']
|
|
||||||
# 分享次数
|
|
||||||
video_share_count = video_info['stats']['shareCount']
|
|
||||||
# 作者粉丝数量
|
|
||||||
video_author_followerCount = video_info['authorStats']['followerCount']
|
|
||||||
# 作者关注数量
|
|
||||||
video_author_followingCount = video_info['authorStats']['followingCount']
|
|
||||||
# 作者获赞数量
|
|
||||||
video_author_heartCount = video_info['authorStats']['heartCount']
|
|
||||||
# 作者视频数量
|
|
||||||
video_author_videoCount = video_info['authorStats']['videoCount']
|
|
||||||
# 作者已赞作品数量
|
|
||||||
video_author_diggCount = video_info['authorStats']['diggCount']
|
|
||||||
# 将话题保存在列表中
|
|
||||||
video_hashtags = []
|
|
||||||
for tag in video_info['challenges']:
|
|
||||||
video_hashtags.append(tag['title'])
|
|
||||||
# 结束时间
|
|
||||||
end = time.time()
|
|
||||||
# 解析时间
|
|
||||||
analyze_time = format((end - start), '.4f')
|
|
||||||
# 储存数据
|
|
||||||
video_date = {'status': 'success',
|
|
||||||
'analyze_time': (analyze_time + 's'),
|
|
||||||
'url_type': url_type,
|
|
||||||
'original_url': original_url,
|
|
||||||
'platform': 'tiktok',
|
|
||||||
'video_title': video_title,
|
|
||||||
'nwm_video_url': nwm_video_url,
|
|
||||||
'wm_video_url': wm_video_url,
|
|
||||||
'video_author': video_author,
|
|
||||||
'video_author_nickname': video_author_nickname,
|
|
||||||
'video_author_id': video_author_id,
|
|
||||||
'video_author_SecId': video_author_SecId,
|
|
||||||
'video_music': video_music,
|
|
||||||
'video_create_time': video_create_time,
|
|
||||||
'video_aweme_id': video_aweme_id,
|
|
||||||
'video_ratio': video_ratio,
|
|
||||||
'video_music_title': video_music_title,
|
|
||||||
'video_music_author': video_music_author,
|
|
||||||
'video_music_id': video_music_id,
|
|
||||||
'video_music_url': video_music_url,
|
|
||||||
'video_comment_count': video_comment_count,
|
|
||||||
'video_digg_count': video_digg_count,
|
|
||||||
'video_play_count': video_play_count,
|
|
||||||
'video_share_count': video_share_count,
|
|
||||||
'video_author_followerCount': video_author_followerCount,
|
|
||||||
'video_author_followingCount': video_author_followingCount,
|
|
||||||
'video_author_heartCount': video_author_heartCount,
|
|
||||||
'video_author_videoCount': video_author_videoCount,
|
|
||||||
'video_author_diggCount': video_author_diggCount,
|
|
||||||
'video_hashtags': video_hashtags
|
|
||||||
}
|
|
||||||
# 返回包含数据的字典
|
|
||||||
return video_date
|
|
||||||
except Exception as e:
|
|
||||||
# 异常捕获
|
|
||||||
return {'status': 'failed', 'reason': e, 'function': 'Scraper.tiktok()', 'value': original_url}
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# 测试类
|
|
||||||
scraper = Scraper()
|
|
||||||
tiktok_url = "https://www.tiktok.com/@oregonzoo/video/7074995215647477034"
|
|
||||||
tiktok_date = scraper.tiktok(tiktok_url)
|
|
||||||
print(tiktok_date)
|
|
||||||
print('')
|
|
||||||
douyin_url = "https://www.douyin.com/video/7055581212840086817"
|
|
||||||
douyin_date = scraper.douyin(douyin_url)
|
|
||||||
print(douyin_date)
|
|
||||||
234
API/web_api.py
234
API/web_api.py
|
|
@ -1,234 +0,0 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- encoding: utf-8 -*-
|
|
||||||
# @Author: https://github.com/Evil0ctal/
|
|
||||||
# @Time: 2021/11/06
|
|
||||||
# @Update: 2022/04/15
|
|
||||||
# @Function:
|
|
||||||
# 创建一个接受提交参数的Flask应用程序。
|
|
||||||
# 将scraper.py返回的内容以JSON格式返回。
|
|
||||||
|
|
||||||
import os
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
import requests
|
|
||||||
import unicodedata
|
|
||||||
from scraper import Scraper
|
|
||||||
from werkzeug.urls import url_quote
|
|
||||||
from flask import Flask, request, jsonify, make_response
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
|
||||||
headers = {
|
|
||||||
'user-agent': 'Mozilla/5.0 (Linux; Android 8.0; Pixel 2 Build/OPD3.170816.012) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Mobile Safari/537.36 Edg/87.0.664.66'
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
def find_url(string):
|
|
||||||
# 解析抖音分享口令中的链接并返回列表
|
|
||||||
url = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', string)
|
|
||||||
return url
|
|
||||||
|
|
||||||
|
|
||||||
def clean_filename(string, author_name):
|
|
||||||
# 替换不能用于文件名的字符('/ \ : * ? " < > |')
|
|
||||||
rstr = r"[\/\\\:\*\?\"\<\>\|]"
|
|
||||||
# 将上述字符替换为下划线
|
|
||||||
new_title = re.sub(rstr, "_", string)
|
|
||||||
# 新文件名
|
|
||||||
filename = ('douyin.wtf_' + new_title + '_' + author_name).replace('\n', '')
|
|
||||||
return filename
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/", methods=["POST", "GET"])
|
|
||||||
def index():
|
|
||||||
# 显示基础信息
|
|
||||||
index_info = {'API status': 'Running',
|
|
||||||
'GitHub': 'https://github.com/Evil0ctal/Douyin_TikTok_Download_API',
|
|
||||||
'Introduction': 'Free and open source Douyin/TikTok watermark-free video download tool, supports API calls.',
|
|
||||||
'Web interface': 'https://douyin.wtf/',
|
|
||||||
'iOS Shortcuts': 'https://api.douyin.wtf/ios',
|
|
||||||
'Parsing Douyin/TikTok videos': 'https://api.douyin.wtf/api?url=[Douyin/TikTok url]',
|
|
||||||
'Return Video MP4 File Download': 'https://api.douyin.wtf/video?url=[Douyin/TikTok url]',
|
|
||||||
'Return Video MP3 File Download': 'https://api.douyin.wtf/music?url=[Douyin/TikTok url]'}
|
|
||||||
return jsonify(index_info)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/api", methods=["POST", "GET"])
|
|
||||||
def webapi():
|
|
||||||
# 创建一个Flask应用获取POST参数并返回结果
|
|
||||||
api = Scraper()
|
|
||||||
content = request.args.get("url")
|
|
||||||
if content != '':
|
|
||||||
post_content = find_url(content)[0]
|
|
||||||
# 将API记录在API_logs.txt中
|
|
||||||
date = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
|
|
||||||
with open('API_logs.txt', 'a') as f:
|
|
||||||
f.write(date + " : " + post_content + '\n')
|
|
||||||
try:
|
|
||||||
# 开始时间
|
|
||||||
start = time.time()
|
|
||||||
# 校验是否为TikTok链接
|
|
||||||
if 'tiktok.com' in post_content:
|
|
||||||
result = api.tiktok(post_content)
|
|
||||||
# 以JSON格式返回TikTok信息
|
|
||||||
return jsonify(result)
|
|
||||||
# 如果关键字不存在则判断为抖音链接
|
|
||||||
elif 'douyin.com' in post_content:
|
|
||||||
result = api.douyin(post_content)
|
|
||||||
# 以JSON格式返回返回Douyin信息
|
|
||||||
return jsonify(result)
|
|
||||||
except Exception as e:
|
|
||||||
# 结束时间
|
|
||||||
end = time.time()
|
|
||||||
# 解析时间
|
|
||||||
analyze_time = (format((end - start), '.4f') + 's')
|
|
||||||
# 返回错误信息
|
|
||||||
return jsonify(status='failed', reason=str(e), time=analyze_time, function='webapi()', value=content)
|
|
||||||
else:
|
|
||||||
# 返回错误信息
|
|
||||||
return jsonify(status='failed', reason='url value cannot be empty', function='api()', value=content)
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/ios", methods=["POST", "GET"])
|
|
||||||
def ios_shortcut():
|
|
||||||
# 用于检查快捷指令更新
|
|
||||||
return jsonify(version='3.0',
|
|
||||||
update='2022/04/15',
|
|
||||||
link='https://www.icloud.com/shortcuts/126820d2783748d1bdec95a223a02639',
|
|
||||||
note='为快捷指令增加了检查更新的功能')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/video", methods=["POST", "GET"])
|
|
||||||
def download_video():
|
|
||||||
# 用于返回视频下载请求(返回MP4文件下载请求,面对大量请求时非常吃服务器内存,容易崩,慎用。)
|
|
||||||
# 将api_switch的值设定为False可关闭该API
|
|
||||||
api_switch = True
|
|
||||||
if api_switch:
|
|
||||||
api = Scraper()
|
|
||||||
content = request.args.get("url")
|
|
||||||
if content == '':
|
|
||||||
return jsonify(status='failed', reason='url value cannot be empty', function='download_music()',
|
|
||||||
value=content)
|
|
||||||
else:
|
|
||||||
post_content = find_url(content)[0]
|
|
||||||
try:
|
|
||||||
if 'douyin.com' in post_content:
|
|
||||||
# 获取视频信息
|
|
||||||
result = api.douyin(post_content)
|
|
||||||
# 视频链接
|
|
||||||
video_url = result['nwm_video_url']
|
|
||||||
# 视频标题
|
|
||||||
video_title = result['video_title']
|
|
||||||
# 作者昵称
|
|
||||||
video_author = result['video_author']
|
|
||||||
# 清理文件名
|
|
||||||
file_name = clean_filename(video_title, video_author)
|
|
||||||
elif 'tiktok.com' in post_content:
|
|
||||||
# 获取视频信息
|
|
||||||
result = api.tiktok(post_content)
|
|
||||||
# 无水印地址
|
|
||||||
video_url = result['nwm_video_url']
|
|
||||||
# 视频标题
|
|
||||||
video_title = result['video_title']
|
|
||||||
# 作者昵称
|
|
||||||
video_author = result['video_author']
|
|
||||||
# 清理文件名
|
|
||||||
file_name = clean_filename(video_title, video_author)
|
|
||||||
else:
|
|
||||||
return jsonify(Status='Failed', Reason='Check submitted parameters!')
|
|
||||||
# 获取视频文件字节流
|
|
||||||
video_mp4 = requests.get(video_url, headers).content
|
|
||||||
# 将字节流封装成返回对象
|
|
||||||
response = make_response(video_mp4)
|
|
||||||
# 添加响应头部信息
|
|
||||||
response.headers['Content-Type'] = "video/mp4"
|
|
||||||
# 他妈的,费了我老大劲才解决文件中文名的问题
|
|
||||||
try:
|
|
||||||
filename = file_name.encode('latin-1')
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
filenames = {
|
|
||||||
'filename': unicodedata.normalize('NFKD', file_name).encode('latin-1', 'ignore'),
|
|
||||||
'filename*': "UTF-8''{}".format(url_quote(file_name) + '.mp4'),
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
filenames = {'filename': file_name + '.mp4'}
|
|
||||||
# attachment表示以附件形式下载
|
|
||||||
response.headers.set('Content-Disposition', 'attachment', **filenames)
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
return jsonify(status='failed', reason=str(e), function='download_video()', value=content)
|
|
||||||
else:
|
|
||||||
return jsonify(Status='Failed', Reason='This API is disabled. To enable it, set the value of "api_switch" to True.')
|
|
||||||
|
|
||||||
|
|
||||||
@app.route("/music", methods=["POST", "GET"])
|
|
||||||
def download_music():
|
|
||||||
# 用于返回视频下载请求(返回MP3文件下载请求,面对大量请求时非常吃服务器内存,容易崩,慎用。)
|
|
||||||
# 将api_switch的值设定为False可关闭该API
|
|
||||||
api_switch = True
|
|
||||||
if api_switch:
|
|
||||||
api = Scraper()
|
|
||||||
content = request.args.get("url")
|
|
||||||
if content == '':
|
|
||||||
return jsonify(status='failed', reason='url value cannot be empty', function='download_music()',
|
|
||||||
value=content)
|
|
||||||
else:
|
|
||||||
post_content = find_url(content)[0]
|
|
||||||
try:
|
|
||||||
if 'douyin.com' in post_content:
|
|
||||||
# 获取视频信息
|
|
||||||
result = api.douyin(post_content)
|
|
||||||
bgm_url = result['video_music']
|
|
||||||
if bgm_url == "None":
|
|
||||||
return jsonify(Status='Failed', Reason='This link has no music to get!')
|
|
||||||
else:
|
|
||||||
# 视频标题
|
|
||||||
bgm_title = result['video_music_title']
|
|
||||||
# 作者昵称
|
|
||||||
author_name = result['video_music_author']
|
|
||||||
# 清理文件名
|
|
||||||
file_name = clean_filename(bgm_title, author_name)
|
|
||||||
elif 'tiktok.com' in post_content:
|
|
||||||
# 获取视频信息
|
|
||||||
result = api.douyin(post_content)
|
|
||||||
# BGM链接
|
|
||||||
bgm_url = result['video_music']
|
|
||||||
# 视频标题
|
|
||||||
bgm_title = result['video_music_title']
|
|
||||||
# 作者昵称
|
|
||||||
author_name = result['video_music_author']
|
|
||||||
# 清理文件名
|
|
||||||
file_name = clean_filename(bgm_title, author_name)
|
|
||||||
else:
|
|
||||||
return jsonify(Status='Failed', Reason='This link has no music to get!')
|
|
||||||
video_bgm = requests.get(bgm_url, headers).content
|
|
||||||
# 将bgm字节流封装成response对象
|
|
||||||
response = make_response(video_bgm)
|
|
||||||
# 添加响应头部信息
|
|
||||||
response.headers['Content-Type'] = "video/mp3"
|
|
||||||
# 他妈的,费了我老大劲才解决文件中文名的问题
|
|
||||||
try:
|
|
||||||
filename = file_name.encode('latin-1')
|
|
||||||
except UnicodeEncodeError:
|
|
||||||
filenames = {
|
|
||||||
'filename': unicodedata.normalize('NFKD', file_name).encode('latin-1', 'ignore'),
|
|
||||||
'filename*': "UTF-8''{}".format(url_quote(file_name) + '.mp3'),
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
filenames = {'filename': file_name + '.mp3'}
|
|
||||||
# attachment表示以附件形式下载
|
|
||||||
response.headers.set('Content-Disposition', 'attachment', **filenames)
|
|
||||||
return response
|
|
||||||
except Exception as e:
|
|
||||||
return jsonify(status='failed', reason=str(e), function='download_music()', value=content)
|
|
||||||
else:
|
|
||||||
return jsonify(Status='Failed', Reason='This API is disabled. To enable it, set the value of "api_switch" to True.')
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
# 开启WebAPI
|
|
||||||
if os.environ.get('PORT'):
|
|
||||||
port = int(os.environ.get('PORT'))
|
|
||||||
else:
|
|
||||||
# 默认端口
|
|
||||||
port = 2333
|
|
||||||
app.run(host='0.0.0.0', port=port)
|
|
||||||
Loading…
Reference in a new issue