Merge pull request #663 from JaggerH/dynamic-update-cookie
添加update-cookie接口,增加chrome扩展程序自动回调更新cookie
This commit is contained in:
commit
3b484a9d0d
9 changed files with 1148 additions and 40 deletions
|
|
@ -1,5 +1,7 @@
|
|||
import os
|
||||
import zipfile
|
||||
import subprocess
|
||||
import tempfile
|
||||
|
||||
import aiofiles
|
||||
import httpx
|
||||
|
|
@ -48,42 +50,102 @@ async def fetch_data_stream(url: str, request:Request , headers: dict = None, fi
|
|||
await out_file.write(chunk)
|
||||
return True
|
||||
|
||||
@router.get("/download", summary="在线下载抖音|TikTok视频/图片/Online download Douyin|TikTok video/image")
|
||||
async def merge_bilibili_video_audio(video_url: str, audio_url: str, request: Request, output_path: str, headers: dict) -> bool:
|
||||
"""
|
||||
下载并合并 Bilibili 的视频流和音频流
|
||||
"""
|
||||
try:
|
||||
# 创建临时文件
|
||||
with tempfile.NamedTemporaryFile(suffix='.m4v', delete=False) as video_temp:
|
||||
video_temp_path = video_temp.name
|
||||
with tempfile.NamedTemporaryFile(suffix='.m4a', delete=False) as audio_temp:
|
||||
audio_temp_path = audio_temp.name
|
||||
|
||||
# 下载视频流
|
||||
video_success = await fetch_data_stream(video_url, request, headers=headers, file_path=video_temp_path)
|
||||
# 下载音频流
|
||||
audio_success = await fetch_data_stream(audio_url, request, headers=headers, file_path=audio_temp_path)
|
||||
|
||||
if not video_success or not audio_success:
|
||||
print("Failed to download video or audio stream")
|
||||
return False
|
||||
|
||||
# 使用 FFmpeg 合并视频和音频
|
||||
ffmpeg_cmd = [
|
||||
'ffmpeg', '-y', # -y 覆盖输出文件
|
||||
'-i', video_temp_path, # 视频输入
|
||||
'-i', audio_temp_path, # 音频输入
|
||||
'-c:v', 'copy', # 复制视频编码,不重新编码
|
||||
'-c:a', 'copy', # 复制音频编码,不重新编码(保持原始质量)
|
||||
'-f', 'mp4', # 确保输出格式为MP4
|
||||
output_path
|
||||
]
|
||||
|
||||
print(f"FFmpeg command: {' '.join(ffmpeg_cmd)}")
|
||||
result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)
|
||||
print(f"FFmpeg return code: {result.returncode}")
|
||||
if result.stderr:
|
||||
print(f"FFmpeg stderr: {result.stderr}")
|
||||
if result.stdout:
|
||||
print(f"FFmpeg stdout: {result.stdout}")
|
||||
|
||||
# 清理临时文件
|
||||
try:
|
||||
os.unlink(video_temp_path)
|
||||
os.unlink(audio_temp_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
return result.returncode == 0
|
||||
|
||||
except Exception as e:
|
||||
# 清理临时文件
|
||||
try:
|
||||
os.unlink(video_temp_path)
|
||||
os.unlink(audio_temp_path)
|
||||
except:
|
||||
pass
|
||||
print(f"Error merging video and audio: {e}")
|
||||
return False
|
||||
|
||||
@router.get("/download", summary="在线下载抖音|TikTok|Bilibili视频/图片/Online download Douyin|TikTok|Bilibili video/image")
|
||||
async def download_file_hybrid(request: Request,
|
||||
url: str = Query(
|
||||
example="https://www.douyin.com/video/7372484719365098803",
|
||||
description="视频或图片的URL地址,也支持抖音|TikTok的分享链接,例如:https://v.douyin.com/e4J8Q7A/"),
|
||||
description="视频或图片的URL地址,支持抖音|TikTok|Bilibili的分享链接,例如:https://v.douyin.com/e4J8Q7A/ 或 https://www.bilibili.com/video/BV1xxxxxxxxx"),
|
||||
prefix: bool = True,
|
||||
with_watermark: bool = False):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 在线下载抖音|TikTok 无水印或有水印的视频/图片
|
||||
- 在线下载抖音|TikTok|Bilibili 无水印或有水印的视频/图片
|
||||
- 通过传入的视频URL参数,获取对应的视频或图片数据,然后下载到本地。
|
||||
- 如果你在尝试直接访问TikTok单一视频接口的JSON数据中的视频播放地址时遇到HTTP403错误,那么你可以使用此接口来下载视频。
|
||||
- Bilibili视频会自动合并视频流和音频流,确保下载的视频有声音。
|
||||
- 这个接口会占用一定的服务器资源,所以在Demo站点是默认关闭的,你可以在本地部署后调用此接口。
|
||||
### 参数:
|
||||
- url: 视频或图片的URL地址,也支持抖音|TikTok的分享链接,例如:https://v.douyin.com/e4J8Q7A/。
|
||||
- url: 视频或图片的URL地址,支持抖音|TikTok|Bilibili的分享链接,例如:https://v.douyin.com/e4J8Q7A/ 或 https://www.bilibili.com/video/BV1xxxxxxxxx
|
||||
- prefix: 下载文件的前缀,默认为True,可以在配置文件中修改。
|
||||
- with_watermark: 是否下载带水印的视频或图片,默认为False。
|
||||
- with_watermark: 是否下载带水印的视频或图片,默认为False。(注意:Bilibili没有水印概念)
|
||||
### 返回:
|
||||
- 返回下载的视频或图片文件响应。
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Download Douyin|TikTok video/image with or without watermark online.
|
||||
- Download Douyin|TikTok|Bilibili video/image with or without watermark online.
|
||||
- By passing the video URL parameter, get the corresponding video or image data, and then download it to the local.
|
||||
- If you encounter an HTTP403 error when trying to access the video playback address in the JSON data of the TikTok single video interface directly, you can use this interface to download the video.
|
||||
- Bilibili videos will automatically merge video and audio streams to ensure downloaded videos have sound.
|
||||
- This interface will occupy a certain amount of server resources, so it is disabled by default on the Demo site, you can call this interface after deploying it locally.
|
||||
### Parameters:
|
||||
- url: The URL address of the video or image, also supports Douyin|TikTok sharing links, for example: https://v.douyin.com/e4J8Q7A/.
|
||||
- url: The URL address of the video or image, supports Douyin|TikTok|Bilibili sharing links, for example: https://v.douyin.com/e4J8Q7A/ or https://www.bilibili.com/video/BV1xxxxxxxxx
|
||||
- prefix: The prefix of the downloaded file, the default is True, and can be modified in the configuration file.
|
||||
- with_watermark: Whether to download videos or images with watermarks, the default is False.
|
||||
- with_watermark: Whether to download videos or images with watermarks, the default is False. (Note: Bilibili has no watermark concept)
|
||||
### Returns:
|
||||
- Return the response of the downloaded video or image file.
|
||||
|
||||
# [示例/Example]
|
||||
url: https://www.douyin.com/video/7372484719365098803
|
||||
url: https://www.bilibili.com/video/BV1U5efz2Egn
|
||||
"""
|
||||
# 是否开启此端点/Whether to enable this endpoint
|
||||
if not config["API"]["Download_Switch"]:
|
||||
|
|
@ -103,7 +165,7 @@ async def download_file_hybrid(request: Request,
|
|||
try:
|
||||
data_type = data.get('type')
|
||||
platform = data.get('platform')
|
||||
aweme_id = data.get('aweme_id')
|
||||
video_id = data.get('video_id') # 改为使用video_id
|
||||
file_prefix = config.get("API").get("Download_File_Prefix") if prefix else ''
|
||||
download_path = os.path.join(config.get("API").get("Download_Path"), f"{platform}_{data_type}")
|
||||
|
||||
|
|
@ -112,25 +174,48 @@ async def download_file_hybrid(request: Request,
|
|||
|
||||
# 下载视频文件/Download video file
|
||||
if data_type == 'video':
|
||||
file_name = f"{file_prefix}{platform}_{aweme_id}.mp4" if not with_watermark else f"{file_prefix}{platform}_{aweme_id}_watermark.mp4"
|
||||
url = data.get('video_data').get('nwm_video_url_HQ') if not with_watermark else data.get('video_data').get(
|
||||
'wm_video_url_HQ')
|
||||
file_name = f"{file_prefix}{platform}_{video_id}.mp4" if not with_watermark else f"{file_prefix}{platform}_{video_id}_watermark.mp4"
|
||||
file_path = os.path.join(download_path, file_name)
|
||||
|
||||
# 判断文件是否存在,存在就直接返回
|
||||
if os.path.exists(file_path):
|
||||
return FileResponse(path=file_path, media_type='video/mp4', filename=file_name)
|
||||
|
||||
# 获取视频文件
|
||||
__headers = await HybridCrawler.TikTokWebCrawler.get_tiktok_headers() if platform == 'tiktok' else await HybridCrawler.DouyinWebCrawler.get_douyin_headers()
|
||||
# response = await fetch_data(url, headers=__headers)
|
||||
# 获取对应平台的headers
|
||||
if platform == 'tiktok':
|
||||
__headers = await HybridCrawler.TikTokWebCrawler.get_tiktok_headers()
|
||||
elif platform == 'bilibili':
|
||||
__headers = await HybridCrawler.BilibiliWebCrawler.get_bilibili_headers()
|
||||
else: # douyin
|
||||
__headers = await HybridCrawler.DouyinWebCrawler.get_douyin_headers()
|
||||
|
||||
success = await fetch_data_stream(url, request, headers=__headers, file_path=file_path)
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="An error occurred while fetching data"
|
||||
)
|
||||
# Bilibili 特殊处理:音视频分离
|
||||
if platform == 'bilibili':
|
||||
video_data = data.get('video_data', {})
|
||||
video_url = video_data.get('nwm_video_url_HQ') if not with_watermark else video_data.get('wm_video_url_HQ')
|
||||
audio_url = video_data.get('audio_url')
|
||||
if not video_url or not audio_url:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to get video or audio URL from Bilibili"
|
||||
)
|
||||
|
||||
# 使用专门的函数合并音视频
|
||||
success = await merge_bilibili_video_audio(video_url, audio_url, request, file_path, __headers.get('headers'))
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="Failed to merge Bilibili video and audio streams"
|
||||
)
|
||||
else:
|
||||
# 其他平台的常规处理
|
||||
url = data.get('video_data').get('nwm_video_url_HQ') if not with_watermark else data.get('video_data').get('wm_video_url_HQ')
|
||||
success = await fetch_data_stream(url, request, headers=__headers, file_path=file_path)
|
||||
if not success:
|
||||
raise HTTPException(
|
||||
status_code=500,
|
||||
detail="An error occurred while fetching data"
|
||||
)
|
||||
|
||||
# # 保存文件
|
||||
# async with aiofiles.open(file_path, 'wb') as out_file:
|
||||
|
|
@ -142,7 +227,7 @@ async def download_file_hybrid(request: Request,
|
|||
# 下载图片文件/Download image file
|
||||
elif data_type == 'image':
|
||||
# 压缩文件属性/Compress file properties
|
||||
zip_file_name = f"{file_prefix}{platform}_{aweme_id}_images.zip" if not with_watermark else f"{file_prefix}{platform}_{aweme_id}_images_watermark.zip"
|
||||
zip_file_name = f"{file_prefix}{platform}_{video_id}_images.zip" if not with_watermark else f"{file_prefix}{platform}_{video_id}_images_watermark.zip"
|
||||
zip_file_path = os.path.join(download_path, zip_file_name)
|
||||
|
||||
# 判断文件是否存在,存在就直接返回、
|
||||
|
|
@ -159,7 +244,7 @@ async def download_file_hybrid(request: Request,
|
|||
index = int(urls.index(url))
|
||||
content_type = response.headers.get('content-type')
|
||||
file_format = content_type.split('/')[1]
|
||||
file_name = f"{file_prefix}{platform}_{aweme_id}_{index + 1}.{file_format}" if not with_watermark else f"{file_prefix}{platform}_{aweme_id}_{index + 1}_watermark.{file_format}"
|
||||
file_name = f"{file_prefix}{platform}_{video_id}_{index + 1}.{file_format}" if not with_watermark else f"{file_prefix}{platform}_{video_id}_{index + 1}_watermark.{file_format}"
|
||||
file_path = os.path.join(download_path, file_name)
|
||||
image_file_list.append(file_path)
|
||||
|
||||
|
|
|
|||
|
|
@ -51,3 +51,67 @@ async def hybrid_parsing_single_video(request: Request,
|
|||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
|
||||
# 更新Cookie
|
||||
@router.post("/update_cookie",
|
||||
response_model=ResponseModel,
|
||||
summary="更新Cookie/Update Cookie")
|
||||
async def update_cookie_api(request: Request,
|
||||
service: str = Body(example="douyin", description="服务名称/Service name"),
|
||||
cookie: str = Body(example="YOUR_NEW_COOKIE", description="新的Cookie值/New Cookie value")):
|
||||
"""
|
||||
# [中文]
|
||||
### 用途:
|
||||
- 更新指定服务的Cookie
|
||||
### 参数:
|
||||
- service: 服务名称 (如: douyin_web)
|
||||
- cookie: 新的Cookie值
|
||||
### 返回:
|
||||
- 更新结果
|
||||
|
||||
# [English]
|
||||
### Purpose:
|
||||
- Update Cookie for specified service
|
||||
### Parameters:
|
||||
- service: Service name (e.g.: douyin_web)
|
||||
- cookie: New Cookie value
|
||||
### Return:
|
||||
- Update result
|
||||
|
||||
# [示例/Example]
|
||||
service = "douyin_web"
|
||||
cookie = "YOUR_NEW_COOKIE"
|
||||
"""
|
||||
try:
|
||||
if service == "douyin":
|
||||
from crawlers.douyin.web.web_crawler import DouyinWebCrawler
|
||||
douyin_crawler = DouyinWebCrawler()
|
||||
await douyin_crawler.update_cookie(cookie)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data={"message": f"Cookie for {service} updated successfully"})
|
||||
elif service == "tiktok":
|
||||
# 这里可以添加TikTok的cookie更新逻辑
|
||||
# from crawlers.tiktok.web.web_crawler import TikTokWebCrawler
|
||||
# tiktok_crawler = TikTokWebCrawler()
|
||||
# await tiktok_crawler.update_cookie(cookie)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data={"message": f"Cookie for {service} will be updated (not implemented yet)"})
|
||||
elif service == "bilibili":
|
||||
# 这里可以添加Bilibili的cookie更新逻辑
|
||||
# from crawlers.bilibili.web.web_crawler import BilibiliWebCrawler
|
||||
# bilibili_crawler = BilibiliWebCrawler()
|
||||
# await bilibili_crawler.update_cookie(cookie)
|
||||
return ResponseModel(code=200,
|
||||
router=request.url.path,
|
||||
data={"message": f"Cookie for {service} will be updated (not implemented yet)"})
|
||||
else:
|
||||
raise ValueError(f"Service '{service}' is not supported. Supported services: douyin, tiktok, bilibili")
|
||||
except Exception as e:
|
||||
status_code = 400
|
||||
detail = ErrorResponseModel(code=status_code,
|
||||
router=request.url.path,
|
||||
params=dict(request.query_params),
|
||||
)
|
||||
raise HTTPException(status_code=status_code, detail=detail.dict())
|
||||
171
chrome-cookie-sniffer/README.md
Normal file
171
chrome-cookie-sniffer/README.md
Normal file
|
|
@ -0,0 +1,171 @@
|
|||
# Chrome Cookie Sniffer
|
||||
|
||||
一个用于自动嗅探和提取网站Cookie的Chrome扩展程序。支持抖音等主流平台,具备智能去重、时间控制和Webhook回调等功能。
|
||||
|
||||
## 功能特性
|
||||
|
||||
- 🎯 **智能Cookie抓取** - 自动拦截POST/GET请求中的Cookie
|
||||
- ⏱️ **防重复机制** - 5分钟内不重复抓取相同服务
|
||||
- 🔄 **内容去重** - 只有Cookie内容变化时才保存
|
||||
- 🎨 **现代化界面** - Card列表展示,状态一目了然
|
||||
- 🔗 **Webhook回调** - Cookie更新时自动推送到指定地址
|
||||
- 📋 **一键复制** - 快速复制Cookie到剪贴板
|
||||
- 🗂️ **数据管理** - 支持导出、清理和单独删除
|
||||
- 🔧 **调试友好** - 内置Webhook测试功能
|
||||
|
||||
## 支持的网站
|
||||
|
||||
- 🎵 **抖音** (douyin.com)
|
||||
- 🚀 **扩展性** - 架构支持轻松添加更多平台
|
||||
|
||||
## 安装方法
|
||||
|
||||
### 1. 下载源码
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
# 或直接下载ZIP文件并解压
|
||||
```
|
||||
|
||||
### 2. 在Chrome中加载扩展
|
||||
|
||||
1. **打开Chrome扩展管理页面**
|
||||
- 方法一:地址栏输入 `chrome://extensions/`
|
||||
- 方法二:菜单 → 更多工具 → 扩展程序
|
||||
|
||||
2. **启用开发者模式**
|
||||
- 在扩展管理页面右上角,开启"开发者模式"开关
|
||||
|
||||
3. **加载解压的扩展程序**
|
||||
- 点击"加载已解压的扩展程序"按钮
|
||||
- 选择 `chrome-cookie-sniffer` 文件夹
|
||||
- 确认加载
|
||||
|
||||
4. **验证安装**
|
||||
- 扩展列表中出现"Cookie Sniffer"
|
||||
- 浏览器工具栏出现扩展图标
|
||||
- 状态显示为"已启用"
|
||||
|
||||
### 3. 权限确认
|
||||
|
||||
安装时Chrome会请求以下权限:
|
||||
- `webRequest` - 拦截网络请求
|
||||
- `storage` - 本地数据存储
|
||||
- `cookies` - 读取Cookie信息
|
||||
- `activeTab` - 当前标签页访问
|
||||
- `host_permissions` - 访问douyin.com域名
|
||||
|
||||
## 使用方法
|
||||
|
||||
### 基础使用
|
||||
|
||||
1. **访问目标网站** - 打开抖音等支持的网站
|
||||
2. **触发请求** - 正常浏览,触发POST/GET请求
|
||||
3. **查看结果** - 点击扩展图标查看抓取的Cookie
|
||||
|
||||
### 配置Webhook
|
||||
|
||||
1. **打开扩展弹窗**
|
||||
2. **输入Webhook地址** - 在顶部输入框填入回调URL
|
||||
3. **测试连接** - 点击"🔧 测试"按钮验证
|
||||
4. **自动回调** - Cookie更新时自动POST到指定地址
|
||||
|
||||
### Webhook数据格式
|
||||
|
||||
```json
|
||||
{
|
||||
"service": "douyin",
|
||||
"cookie": "具体的Cookie字符串",
|
||||
"timestamp": "2025-08-29T12:34:56.789Z"
|
||||
}
|
||||
```
|
||||
|
||||
测试时会额外包含:
|
||||
```json
|
||||
{
|
||||
"test": true,
|
||||
"message": "这是一个测试回调..."
|
||||
}
|
||||
```
|
||||
|
||||
### 数据管理
|
||||
|
||||
- **📋 复制Cookie** - 点击卡片中的复制按钮
|
||||
- **🗑️ 删除数据** - 删除单个服务的Cookie
|
||||
- **🔄 刷新** - 手动刷新数据显示
|
||||
- **📤 导出** - 导出所有数据为JSON文件
|
||||
- **🧹 清空** - 清空所有Cookie数据
|
||||
|
||||
## 调试指南
|
||||
|
||||
### 查看日志
|
||||
|
||||
1. **打开扩展管理页面** (`chrome://extensions/`)
|
||||
2. **找到Cookie Sniffer扩展**
|
||||
3. **点击"服务工作进程"** - 查看蓝色链接
|
||||
4. **查看控制台输出** - 所有日志都在这里
|
||||
|
||||
### 常见问题
|
||||
|
||||
**Q: 扩展不工作?**
|
||||
- 检查是否启用开发者模式
|
||||
- 确认权限已正确授予
|
||||
- 查看service worker是否正在运行
|
||||
|
||||
**Q: 没有抓取到Cookie?**
|
||||
- 确认访问的是支持的网站
|
||||
- 检查是否触发了POST/GET请求
|
||||
- 查看service worker控制台日志
|
||||
|
||||
**Q: Webhook测试失败?**
|
||||
- 检查URL格式是否正确
|
||||
- 确认服务器支持跨域请求
|
||||
- 验证服务器是否正常响应
|
||||
|
||||
### 开发者选项
|
||||
|
||||
修改 `background.js` 中的 `SERVICES` 配置来添加新网站:
|
||||
|
||||
```javascript
|
||||
const SERVICES = {
|
||||
douyin: {
|
||||
name: 'douyin',
|
||||
displayName: '抖音',
|
||||
domains: ['douyin.com'],
|
||||
cookieDomain: '.douyin.com'
|
||||
},
|
||||
// 添加新服务
|
||||
bilibili: {
|
||||
name: 'bilibili',
|
||||
displayName: 'B站',
|
||||
domains: ['bilibili.com'],
|
||||
cookieDomain: '.bilibili.com'
|
||||
}
|
||||
};
|
||||
```
|
||||
|
||||
## 文件结构
|
||||
|
||||
```
|
||||
chrome-cookie-sniffer/
|
||||
├── manifest.json # 扩展配置文件
|
||||
├── background.js # 后台服务脚本
|
||||
├── popup.html # 弹窗界面
|
||||
├── popup.js # 弹窗逻辑
|
||||
└── README.md # 说明文档
|
||||
```
|
||||
|
||||
## 注意事项
|
||||
|
||||
- ⚠️ **仅用于合法用途** - 请遵守网站服务条款
|
||||
- 🔒 **数据安全** - Cookie数据存储在本地,不会上传
|
||||
- 🔄 **定期更新** - 网站更新可能影响抓取效果
|
||||
- 📱 **Chrome限制** - 部分网站可能有反爬虫机制
|
||||
|
||||
## 开源协议
|
||||
|
||||
本项目遵循 MIT 开源协议。
|
||||
|
||||
## 贡献指南
|
||||
|
||||
欢迎提交Issue和Pull Request来改进这个项目!
|
||||
177
chrome-cookie-sniffer/background.js
Normal file
177
chrome-cookie-sniffer/background.js
Normal file
|
|
@ -0,0 +1,177 @@
|
|||
// 启动时记录
|
||||
console.log('Cookie Sniffer service worker 已启动');
|
||||
|
||||
// 服务配置
|
||||
const SERVICES = {
|
||||
douyin: {
|
||||
name: 'douyin',
|
||||
displayName: '抖音',
|
||||
domains: ['douyin.com'],
|
||||
cookieDomain: '.douyin.com'
|
||||
}
|
||||
};
|
||||
|
||||
// 获取服务名称
|
||||
function getServiceFromUrl(url) {
|
||||
for (const [key, service] of Object.entries(SERVICES)) {
|
||||
if (service.domains.some(domain => url.includes(domain))) {
|
||||
return service;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// 检查是否在5分钟内抓取过
|
||||
async function shouldSkipCapture(serviceName) {
|
||||
return new Promise((resolve) => {
|
||||
chrome.storage.local.get([`lastCapture_${serviceName}`], function(result) {
|
||||
const lastTime = result[`lastCapture_${serviceName}`];
|
||||
if (!lastTime) {
|
||||
resolve(false);
|
||||
return;
|
||||
}
|
||||
|
||||
const now = Date.now();
|
||||
const fiveMinutes = 5 * 60 * 1000;
|
||||
const shouldSkip = (now - lastTime) < fiveMinutes;
|
||||
|
||||
if (shouldSkip) {
|
||||
console.log(`${serviceName}: 5分钟内已抓取过,跳过`);
|
||||
}
|
||||
resolve(shouldSkip);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 检查Cookie是否有变化
|
||||
async function isCookieChanged(serviceName, newCookie) {
|
||||
return new Promise((resolve) => {
|
||||
chrome.storage.local.get([`cookieData_${serviceName}`], function(result) {
|
||||
const existingData = result[`cookieData_${serviceName}`];
|
||||
if (!existingData || existingData.cookie !== newCookie) {
|
||||
resolve(true);
|
||||
} else {
|
||||
console.log(`${serviceName}: Cookie内容无变化,跳过`);
|
||||
resolve(false);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 保存Cookie数据
|
||||
async function saveCookieData(serviceName, url, cookie, source = 'headers') {
|
||||
const cookieData = {
|
||||
service: serviceName,
|
||||
url: url,
|
||||
timestamp: Date.now(),
|
||||
lastUpdate: new Date().toISOString(),
|
||||
cookie: cookie,
|
||||
source: source
|
||||
};
|
||||
|
||||
// 保存服务数据
|
||||
chrome.storage.local.set({
|
||||
[`cookieData_${serviceName}`]: cookieData,
|
||||
[`lastCapture_${serviceName}`]: Date.now()
|
||||
});
|
||||
|
||||
// 触发Webhook回调
|
||||
await sendWebhook(serviceName, cookie);
|
||||
|
||||
console.log(`${serviceName}: Cookie已保存`);
|
||||
}
|
||||
|
||||
// Webhook回调
|
||||
async function sendWebhook(serviceName, cookie) {
|
||||
chrome.storage.local.get(['webhookUrl'], function(result) {
|
||||
const webhookUrl = result.webhookUrl;
|
||||
if (webhookUrl && webhookUrl.trim()) {
|
||||
const payload = {
|
||||
service: serviceName,
|
||||
cookie: cookie,
|
||||
timestamp: new Date().toISOString()
|
||||
};
|
||||
|
||||
fetch(webhookUrl, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(payload)
|
||||
}).then(response => {
|
||||
console.log(`Webhook回调成功: ${serviceName}`, response.status);
|
||||
}).catch(error => {
|
||||
console.error(`Webhook回调失败: ${serviceName}`, error);
|
||||
});
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
chrome.webRequest.onBeforeSendHeaders.addListener(
|
||||
async function(details) {
|
||||
const service = getServiceFromUrl(details.url);
|
||||
if (!service) return;
|
||||
|
||||
console.log(`请求拦截: ${service.displayName}`, details.url, details.method);
|
||||
|
||||
if (details.method === "POST" || details.method === "GET") {
|
||||
// 检查5分钟限制
|
||||
if (await shouldSkipCapture(service.name)) {
|
||||
return;
|
||||
}
|
||||
|
||||
let cookieFound = false;
|
||||
|
||||
// 尝试从请求头获取Cookie
|
||||
if (details.requestHeaders) {
|
||||
for (let header of details.requestHeaders) {
|
||||
if (header.name.toLowerCase() === "cookie") {
|
||||
console.log(`从请求头捕获到Cookie: ${service.displayName}`);
|
||||
|
||||
// 检查Cookie是否有变化
|
||||
if (await isCookieChanged(service.name, header.value)) {
|
||||
await saveCookieData(service.name, details.url, header.value, 'headers');
|
||||
}
|
||||
|
||||
cookieFound = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 如果请求头没有Cookie,使用cookies API备用方案
|
||||
if (!cookieFound) {
|
||||
chrome.cookies.getAll({domain: service.cookieDomain}, async function(cookies) {
|
||||
if (cookies && cookies.length > 0) {
|
||||
console.log(`通过cookies API获取到: ${service.displayName}`, cookies.length, '个cookie');
|
||||
const cookieString = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||
|
||||
// 检查Cookie是否有变化
|
||||
if (await isCookieChanged(service.name, cookieString)) {
|
||||
await saveCookieData(service.name, details.url, cookieString, 'cookies_api');
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
},
|
||||
{ urls: ["https://*.douyin.com/*", "https://douyin.com/*"] },
|
||||
["requestHeaders", "extraHeaders"]
|
||||
);
|
||||
|
||||
// 添加存储变化监听
|
||||
chrome.storage.onChanged.addListener((changes, areaName) => {
|
||||
if (areaName === 'local') {
|
||||
// 监听服务数据变化
|
||||
Object.keys(changes).forEach(key => {
|
||||
if (key.startsWith('cookieData_')) {
|
||||
const serviceName = key.replace('cookieData_', '');
|
||||
const serviceConfig = SERVICES[serviceName];
|
||||
if (serviceConfig && changes[key].newValue) {
|
||||
console.log(`${serviceConfig.displayName} Cookie数据已更新`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
24
chrome-cookie-sniffer/manifest.json
Normal file
24
chrome-cookie-sniffer/manifest.json
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
{
|
||||
"manifest_version": 3,
|
||||
"name": "Cookie Sniffer",
|
||||
"version": "1.0",
|
||||
"description": "监听并获取指定网站的请求 Cookie",
|
||||
"permissions": [
|
||||
"webRequest",
|
||||
"storage",
|
||||
"activeTab",
|
||||
"cookies"
|
||||
],
|
||||
"host_permissions": [
|
||||
"https://*.douyin.com/*",
|
||||
"https://douyin.com/*"
|
||||
],
|
||||
"background": {
|
||||
"service_worker": "background.js"
|
||||
},
|
||||
"action": {
|
||||
"default_popup": "popup.html",
|
||||
"default_title": "Cookie Sniffer"
|
||||
}
|
||||
}
|
||||
|
||||
178
chrome-cookie-sniffer/popup.html
Normal file
178
chrome-cookie-sniffer/popup.html
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta charset="utf-8">
|
||||
<style>
|
||||
body {
|
||||
width: 400px;
|
||||
padding: 15px;
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
|
||||
margin: 0;
|
||||
background: #f8f9fa;
|
||||
}
|
||||
|
||||
.header {
|
||||
margin-bottom: 20px;
|
||||
}
|
||||
|
||||
.header h3 {
|
||||
margin: 0 0 15px 0;
|
||||
color: #2c3e50;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.webhook-config {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 12px;
|
||||
margin-bottom: 15px;
|
||||
border: 1px solid #e1e8ed;
|
||||
}
|
||||
|
||||
.webhook-config label {
|
||||
display: block;
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
margin-bottom: 5px;
|
||||
}
|
||||
|
||||
.webhook-config input {
|
||||
width: 100%;
|
||||
padding: 8px;
|
||||
border: 1px solid #ddd;
|
||||
border-radius: 4px;
|
||||
font-size: 13px;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
.controls {
|
||||
text-align: center;
|
||||
margin-bottom: 15px;
|
||||
}
|
||||
|
||||
.btn {
|
||||
padding: 6px 12px;
|
||||
margin: 0 3px;
|
||||
border: none;
|
||||
border-radius: 4px;
|
||||
cursor: pointer;
|
||||
font-size: 12px;
|
||||
transition: background-color 0.2s;
|
||||
}
|
||||
|
||||
.btn-primary { background: #007bff; color: white; }
|
||||
.btn-primary:hover { background: #0056b3; }
|
||||
.btn-danger { background: #dc3545; color: white; }
|
||||
.btn-danger:hover { background: #c82333; }
|
||||
.btn-success { background: #28a745; color: white; }
|
||||
.btn-success:hover { background: #1e7e34; }
|
||||
|
||||
.service-card {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
margin-bottom: 12px;
|
||||
border: 1px solid #e1e8ed;
|
||||
overflow: hidden;
|
||||
transition: box-shadow 0.2s;
|
||||
}
|
||||
|
||||
.service-card:hover {
|
||||
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
|
||||
}
|
||||
|
||||
.card-header {
|
||||
padding: 12px 15px;
|
||||
border-bottom: 1px solid #e1e8ed;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
.service-name {
|
||||
font-weight: 600;
|
||||
color: #2c3e50;
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
.service-status {
|
||||
font-size: 11px;
|
||||
padding: 2px 6px;
|
||||
border-radius: 10px;
|
||||
color: white;
|
||||
}
|
||||
|
||||
.status-active { background: #28a745; }
|
||||
.status-inactive { background: #6c757d; }
|
||||
|
||||
.card-body {
|
||||
padding: 12px 15px;
|
||||
}
|
||||
|
||||
.last-update {
|
||||
color: #666;
|
||||
font-size: 12px;
|
||||
margin-bottom: 8px;
|
||||
}
|
||||
|
||||
.actions {
|
||||
display: flex;
|
||||
gap: 8px;
|
||||
}
|
||||
|
||||
.btn-sm {
|
||||
padding: 4px 8px;
|
||||
font-size: 11px;
|
||||
}
|
||||
|
||||
.empty-state {
|
||||
text-align: center;
|
||||
color: #6c757d;
|
||||
font-style: italic;
|
||||
padding: 40px 20px;
|
||||
}
|
||||
|
||||
.status-info {
|
||||
background: white;
|
||||
border-radius: 8px;
|
||||
padding: 10px;
|
||||
margin-bottom: 15px;
|
||||
border-left: 4px solid #17a2b8;
|
||||
font-size: 12px;
|
||||
color: #666;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<div class="header">
|
||||
<h3>Cookie Sniffer</h3>
|
||||
|
||||
<div class="webhook-config">
|
||||
<label>Webhook回调地址</label>
|
||||
<div style="display: flex; gap: 8px; align-items: center;">
|
||||
<input type="url" id="webhookUrl" placeholder="https://your-server.com/webhook" style="flex: 1;">
|
||||
<button class="btn btn-sm" id="testWebhook" style="background: #17a2b8; color: white; white-space: nowrap;" disabled>
|
||||
🔧 测试
|
||||
</button>
|
||||
</div>
|
||||
<div id="webhookStatus" style="font-size: 11px; color: #666; margin-top: 4px; min-height: 14px;"></div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="controls">
|
||||
<button class="btn btn-primary" id="refresh">刷新</button>
|
||||
<button class="btn btn-danger" id="clear">清空所有</button>
|
||||
<button class="btn btn-success" id="export">导出JSON</button>
|
||||
</div>
|
||||
|
||||
<div id="statusInfo" class="status-info" style="display: none;"></div>
|
||||
|
||||
<div id="serviceCards"></div>
|
||||
|
||||
<div id="emptyState" class="empty-state" style="display: none;">
|
||||
暂未抓取到任何Cookie数据<br>
|
||||
请访问相关网站触发请求
|
||||
</div>
|
||||
|
||||
<script src="popup.js"></script>
|
||||
</body>
|
||||
</html>
|
||||
292
chrome-cookie-sniffer/popup.js
Normal file
292
chrome-cookie-sniffer/popup.js
Normal file
|
|
@ -0,0 +1,292 @@
|
|||
document.addEventListener('DOMContentLoaded', function() {
|
||||
const refreshBtn = document.getElementById('refresh');
|
||||
const clearBtn = document.getElementById('clear');
|
||||
const exportBtn = document.getElementById('export');
|
||||
const webhookInput = document.getElementById('webhookUrl');
|
||||
const testWebhookBtn = document.getElementById('testWebhook');
|
||||
const webhookStatus = document.getElementById('webhookStatus');
|
||||
const statusInfo = document.getElementById('statusInfo');
|
||||
const serviceCards = document.getElementById('serviceCards');
|
||||
const emptyState = document.getElementById('emptyState');
|
||||
|
||||
// 服务配置
|
||||
const SERVICES = {
|
||||
douyin: { name: 'douyin', displayName: '抖音', icon: '🎵' }
|
||||
};
|
||||
|
||||
// 加载Webhook配置
|
||||
function loadWebhookConfig() {
|
||||
chrome.storage.local.get(['webhookUrl'], function(result) {
|
||||
if (result.webhookUrl) {
|
||||
webhookInput.value = result.webhookUrl;
|
||||
}
|
||||
updateTestButtonState();
|
||||
});
|
||||
}
|
||||
|
||||
// 保存Webhook配置
|
||||
function saveWebhookConfig() {
|
||||
const url = webhookInput.value.trim();
|
||||
chrome.storage.local.set({ webhookUrl: url });
|
||||
showStatusInfo('Webhook地址已保存');
|
||||
updateTestButtonState();
|
||||
}
|
||||
|
||||
// 更新测试按钮状态
|
||||
function updateTestButtonState() {
|
||||
const url = webhookInput.value.trim();
|
||||
testWebhookBtn.disabled = !url || !isValidUrl(url);
|
||||
}
|
||||
|
||||
// 验证URL格式
|
||||
function isValidUrl(string) {
|
||||
try {
|
||||
new URL(string);
|
||||
return string.startsWith('http://') || string.startsWith('https://');
|
||||
} catch (_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// 测试Webhook回调
|
||||
async function testWebhook() {
|
||||
const url = webhookInput.value.trim();
|
||||
if (!url) {
|
||||
webhookStatus.textContent = '请先输入Webhook地址';
|
||||
webhookStatus.style.color = '#dc3545';
|
||||
return;
|
||||
}
|
||||
|
||||
testWebhookBtn.disabled = true;
|
||||
testWebhookBtn.textContent = '⏳ 测试中...';
|
||||
webhookStatus.textContent = '正在发送测试请求...';
|
||||
webhookStatus.style.color = '#17a2b8';
|
||||
|
||||
// 获取现有数据或创建测试数据
|
||||
chrome.storage.local.get(['cookieData_douyin'], async function(result) {
|
||||
let testData;
|
||||
|
||||
if (result.cookieData_douyin) {
|
||||
// 使用现有数据
|
||||
testData = {
|
||||
service: 'douyin',
|
||||
cookie: result.cookieData_douyin.cookie,
|
||||
timestamp: new Date().toISOString(),
|
||||
test: true,
|
||||
message: '这是一个测试回调,使用了真实的Cookie数据'
|
||||
};
|
||||
} else {
|
||||
// 使用模拟数据
|
||||
testData = {
|
||||
service: 'douyin',
|
||||
cookie: 'test_cookie=test_value; another_cookie=another_value',
|
||||
timestamp: new Date().toISOString(),
|
||||
test: true,
|
||||
message: '这是一个测试回调,使用了模拟Cookie数据'
|
||||
};
|
||||
}
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
},
|
||||
body: JSON.stringify(testData)
|
||||
});
|
||||
|
||||
if (response.ok) {
|
||||
webhookStatus.textContent = `✅ 测试成功 (${response.status})`;
|
||||
webhookStatus.style.color = '#28a745';
|
||||
} else {
|
||||
webhookStatus.textContent = `❌ 服务器错误 (${response.status})`;
|
||||
webhookStatus.style.color = '#dc3545';
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Webhook测试失败:', error);
|
||||
if (error.name === 'TypeError' && error.message.includes('fetch')) {
|
||||
webhookStatus.textContent = '❌ 网络错误或跨域限制';
|
||||
} else {
|
||||
webhookStatus.textContent = `❌ 请求失败: ${error.message}`;
|
||||
}
|
||||
webhookStatus.style.color = '#dc3545';
|
||||
} finally {
|
||||
testWebhookBtn.disabled = false;
|
||||
testWebhookBtn.textContent = '🔧 测试';
|
||||
updateTestButtonState();
|
||||
|
||||
// 5秒后清除状态信息
|
||||
setTimeout(() => {
|
||||
webhookStatus.textContent = '';
|
||||
}, 5000);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 显示状态信息
|
||||
function showStatusInfo(message) {
|
||||
statusInfo.textContent = message;
|
||||
statusInfo.style.display = 'block';
|
||||
setTimeout(() => {
|
||||
statusInfo.style.display = 'none';
|
||||
}, 3000);
|
||||
}
|
||||
|
||||
// 加载服务数据
|
||||
function loadServiceData() {
|
||||
const serviceKeys = Object.keys(SERVICES).map(service => `cookieData_${service}`);
|
||||
chrome.storage.local.get(serviceKeys, function(result) {
|
||||
const hasData = Object.keys(result).length > 0;
|
||||
|
||||
if (!hasData) {
|
||||
serviceCards.innerHTML = '';
|
||||
emptyState.style.display = 'block';
|
||||
return;
|
||||
}
|
||||
|
||||
emptyState.style.display = 'none';
|
||||
serviceCards.innerHTML = '';
|
||||
|
||||
Object.keys(SERVICES).forEach(serviceKey => {
|
||||
const service = SERVICES[serviceKey];
|
||||
const data = result[`cookieData_${serviceKey}`];
|
||||
|
||||
if (data) {
|
||||
createServiceCard(service, data);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
// 创建服务卡片
|
||||
function createServiceCard(service, data) {
|
||||
const card = document.createElement('div');
|
||||
card.className = 'service-card';
|
||||
|
||||
const isRecent = Date.now() - data.timestamp < 5 * 60 * 1000; // 5分钟内
|
||||
const lastUpdate = new Date(data.lastUpdate).toLocaleString();
|
||||
|
||||
card.innerHTML = `
|
||||
<div class="card-header">
|
||||
<div class="service-name">${service.icon} ${service.displayName}</div>
|
||||
<div class="service-status ${isRecent ? 'status-active' : 'status-inactive'}">
|
||||
${isRecent ? '活跃' : '休眠'}
|
||||
</div>
|
||||
</div>
|
||||
<div class="card-body">
|
||||
<div class="last-update">上次更新: ${lastUpdate}</div>
|
||||
<div class="actions">
|
||||
<button class="btn btn-primary btn-sm copy-btn" data-service="${service.name}">
|
||||
📋 复制Cookie
|
||||
</button>
|
||||
<button class="btn btn-danger btn-sm delete-btn" data-service="${service.name}">
|
||||
🗑️ 删除
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
`;
|
||||
|
||||
serviceCards.appendChild(card);
|
||||
}
|
||||
|
||||
// 复制Cookie到剪贴板
|
||||
async function copyCookie(serviceName) {
|
||||
chrome.storage.local.get([`cookieData_${serviceName}`], async function(result) {
|
||||
const data = result[`cookieData_${serviceName}`];
|
||||
if (data && data.cookie) {
|
||||
try {
|
||||
await navigator.clipboard.writeText(data.cookie);
|
||||
showStatusInfo(`${SERVICES[serviceName].displayName} Cookie已复制到剪贴板`);
|
||||
} catch (err) {
|
||||
// 备用方案
|
||||
const textarea = document.createElement('textarea');
|
||||
textarea.value = data.cookie;
|
||||
document.body.appendChild(textarea);
|
||||
textarea.select();
|
||||
document.execCommand('copy');
|
||||
document.body.removeChild(textarea);
|
||||
showStatusInfo(`${SERVICES[serviceName].displayName} Cookie已复制到剪贴板`);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// 删除服务数据
|
||||
function deleteService(serviceName) {
|
||||
if (confirm(`确定要删除 ${SERVICES[serviceName].displayName} 的Cookie数据吗?`)) {
|
||||
chrome.storage.local.remove([
|
||||
`cookieData_${serviceName}`,
|
||||
`lastCapture_${serviceName}`
|
||||
], function() {
|
||||
loadServiceData();
|
||||
showStatusInfo(`${SERVICES[serviceName].displayName} 数据已删除`);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 清空所有数据
|
||||
function clearAllData() {
|
||||
if (confirm('确定要清空所有Cookie数据吗?')) {
|
||||
const keysToRemove = [];
|
||||
Object.keys(SERVICES).forEach(service => {
|
||||
keysToRemove.push(`cookieData_${service}`);
|
||||
keysToRemove.push(`lastCapture_${service}`);
|
||||
});
|
||||
|
||||
chrome.storage.local.remove(keysToRemove, function() {
|
||||
loadServiceData();
|
||||
showStatusInfo('所有数据已清空');
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// 导出数据
|
||||
function exportData() {
|
||||
const serviceKeys = Object.keys(SERVICES).map(service => `cookieData_${service}`);
|
||||
chrome.storage.local.get(serviceKeys, function(result) {
|
||||
const exportData = {};
|
||||
|
||||
Object.keys(result).forEach(key => {
|
||||
const serviceName = key.replace('cookieData_', '');
|
||||
exportData[serviceName] = result[key];
|
||||
});
|
||||
|
||||
const blob = new Blob([JSON.stringify(exportData, null, 2)], {type: 'application/json'});
|
||||
const url = URL.createObjectURL(blob);
|
||||
|
||||
const a = document.createElement('a');
|
||||
a.href = url;
|
||||
a.download = `cookie-sniffer-${new Date().toISOString().slice(0,10)}.json`;
|
||||
a.click();
|
||||
|
||||
URL.revokeObjectURL(url);
|
||||
showStatusInfo('数据已导出');
|
||||
});
|
||||
}
|
||||
|
||||
// 事件绑定
|
||||
refreshBtn.addEventListener('click', loadServiceData);
|
||||
clearBtn.addEventListener('click', clearAllData);
|
||||
exportBtn.addEventListener('click', exportData);
|
||||
webhookInput.addEventListener('blur', saveWebhookConfig);
|
||||
webhookInput.addEventListener('input', updateTestButtonState);
|
||||
testWebhookBtn.addEventListener('click', testWebhook);
|
||||
|
||||
// 代理点击事件
|
||||
serviceCards.addEventListener('click', function(e) {
|
||||
if (e.target.classList.contains('copy-btn')) {
|
||||
const serviceName = e.target.getAttribute('data-service');
|
||||
copyCookie(serviceName);
|
||||
} else if (e.target.classList.contains('delete-btn')) {
|
||||
const serviceName = e.target.getAttribute('data-service');
|
||||
deleteService(serviceName);
|
||||
}
|
||||
});
|
||||
|
||||
// 初始化
|
||||
loadWebhookConfig();
|
||||
loadServiceData();
|
||||
|
||||
// 自动刷新(每30秒)
|
||||
setInterval(loadServiceData, 30000);
|
||||
});
|
||||
|
|
@ -348,6 +348,26 @@ class DouyinWebCrawler:
|
|||
# 对于URL列表
|
||||
return await WebCastIdFetcher.get_all_webcast_id(urls)
|
||||
|
||||
async def update_cookie(self, cookie: str):
|
||||
"""
|
||||
更新指定服务的Cookie
|
||||
|
||||
Args:
|
||||
service: 服务名称 (如: douyin_web)
|
||||
cookie: 新的Cookie值
|
||||
"""
|
||||
global config
|
||||
service = "douyin"
|
||||
print('DouyinWebCrawler before update', config["TokenManager"][service]["headers"]["Cookie"])
|
||||
print('DouyinWebCrawler to update', cookie)
|
||||
# 1. 更新内存中的配置(立即生效)
|
||||
config["TokenManager"][service]["headers"]["Cookie"] = cookie
|
||||
print('DouyinWebCrawler cookie updated', config["TokenManager"][service]["headers"]["Cookie"])
|
||||
# 2. 写入配置文件(持久化)
|
||||
config_path = f"{path}/config.yaml"
|
||||
with open(config_path, 'w', encoding='utf-8') as file:
|
||||
yaml.dump(config, file, default_flow_style=False, allow_unicode=True, indent=2)
|
||||
|
||||
async def main(self):
|
||||
"""-------------------------------------------------------handler接口列表-------------------------------------------------------"""
|
||||
|
||||
|
|
|
|||
|
|
@ -32,10 +32,13 @@
|
|||
# ==============================================================================
|
||||
|
||||
import asyncio
|
||||
import re
|
||||
import httpx
|
||||
|
||||
from crawlers.douyin.web.web_crawler import DouyinWebCrawler # 导入抖音Web爬虫
|
||||
from crawlers.tiktok.web.web_crawler import TikTokWebCrawler # 导入TikTok Web爬虫
|
||||
from crawlers.tiktok.app.app_crawler import TikTokAPPCrawler # 导入TikTok App爬虫
|
||||
from crawlers.bilibili.web.web_crawler import BilibiliWebCrawler # 导入Bilibili Web爬虫
|
||||
|
||||
|
||||
class HybridCrawler:
|
||||
|
|
@ -43,6 +46,25 @@ class HybridCrawler:
|
|||
self.DouyinWebCrawler = DouyinWebCrawler()
|
||||
self.TikTokWebCrawler = TikTokWebCrawler()
|
||||
self.TikTokAPPCrawler = TikTokAPPCrawler()
|
||||
self.BilibiliWebCrawler = BilibiliWebCrawler()
|
||||
|
||||
async def get_bilibili_bv_id(self, url: str) -> str:
|
||||
"""
|
||||
从 Bilibili URL 中提取 BV 号,支持短链重定向
|
||||
"""
|
||||
# 如果是 b23.tv 短链,需要重定向获取真实URL
|
||||
if "b23.tv" in url:
|
||||
async with httpx.AsyncClient() as client:
|
||||
response = await client.head(url, follow_redirects=True)
|
||||
url = str(response.url)
|
||||
|
||||
# 从URL中提取BV号
|
||||
bv_pattern = r'(?:video\/|\/)(BV[A-Za-z0-9]+)'
|
||||
match = re.search(bv_pattern, url)
|
||||
if match:
|
||||
return match.group(1)
|
||||
else:
|
||||
raise ValueError(f"Cannot extract BV ID from URL: {url}")
|
||||
|
||||
async def hybrid_parsing_single_video(self, url: str, minimal: bool = False):
|
||||
# 解析抖音视频/Parse Douyin video
|
||||
|
|
@ -65,6 +87,14 @@ class HybridCrawler:
|
|||
data = await self.TikTokAPPCrawler.fetch_one_video(aweme_id)
|
||||
# $.imagePost exists if aweme_type is photo
|
||||
aweme_type = data.get("aweme_type")
|
||||
# 解析Bilibili视频/Parse Bilibili video
|
||||
elif "bilibili" in url or "b23.tv" in url:
|
||||
platform = "bilibili"
|
||||
aweme_id = await self.get_bilibili_bv_id(url) # BV号作为统一的video_id
|
||||
response = await self.BilibiliWebCrawler.fetch_one_video(aweme_id)
|
||||
data = response.get('data', {}) # 提取data部分
|
||||
# Bilibili只有视频类型,aweme_type设为0(video)
|
||||
aweme_type = 0
|
||||
else:
|
||||
raise ValueError("hybrid_parsing_single_video: Cannot judge the video source from the URL.")
|
||||
|
||||
|
|
@ -103,27 +133,44 @@ class HybridCrawler:
|
|||
and then use the .update() method to update the data
|
||||
"""
|
||||
|
||||
result_data = {
|
||||
'type': url_type,
|
||||
'platform': platform,
|
||||
'aweme_id': aweme_id,
|
||||
'desc': data.get("desc"),
|
||||
'create_time': data.get("create_time"),
|
||||
'author': data.get("author"),
|
||||
'music': data.get("music"),
|
||||
'statistics': data.get("statistics"),
|
||||
'cover_data': {
|
||||
'cover': data.get("video").get("cover"),
|
||||
'origin_cover': data.get("video").get("origin_cover"),
|
||||
'dynamic_cover': data.get("video").get("dynamic_cover")
|
||||
},
|
||||
'hashtags': data.get('text_extra'),
|
||||
}
|
||||
# 根据平台适配字段映射
|
||||
if platform == 'bilibili':
|
||||
result_data = {
|
||||
'type': url_type,
|
||||
'platform': platform,
|
||||
'video_id': aweme_id,
|
||||
'desc': data.get("title"), # Bilibili使用title
|
||||
'create_time': data.get("pubdate"), # Bilibili使用pubdate
|
||||
'author': data.get("owner"), # Bilibili使用owner
|
||||
'music': None, # Bilibili没有音乐信息
|
||||
'statistics': data.get("stat"), # Bilibili使用stat
|
||||
'cover_data': {}, # 将在各平台处理中填充
|
||||
'hashtags': None, # Bilibili没有hashtags概念
|
||||
}
|
||||
else:
|
||||
result_data = {
|
||||
'type': url_type,
|
||||
'platform': platform,
|
||||
'video_id': aweme_id, # 统一使用video_id字段,内容可能是aweme_id或bv_id
|
||||
'desc': data.get("desc"),
|
||||
'create_time': data.get("create_time"),
|
||||
'author': data.get("author"),
|
||||
'music': data.get("music"),
|
||||
'statistics': data.get("statistics"),
|
||||
'cover_data': {}, # 将在各平台处理中填充
|
||||
'hashtags': data.get('text_extra'),
|
||||
}
|
||||
# 创建一个空变量,稍后使用.update()方法更新数据/Create an empty variable and use the .update() method to update the data
|
||||
api_data = None
|
||||
# 判断链接类型并处理数据/Judge link type and process data
|
||||
# 抖音数据处理/Douyin data processing
|
||||
if platform == 'douyin':
|
||||
# 填充封面数据
|
||||
result_data['cover_data'] = {
|
||||
'cover': data.get("video", {}).get("cover"),
|
||||
'origin_cover': data.get("video", {}).get("origin_cover"),
|
||||
'dynamic_cover': data.get("video", {}).get("dynamic_cover")
|
||||
}
|
||||
# 抖音视频数据处理/Douyin video data processing
|
||||
if url_type == 'video':
|
||||
# 将信息储存在字典中/Store information in a dictionary
|
||||
|
|
@ -160,6 +207,12 @@ class HybridCrawler:
|
|||
}
|
||||
# TikTok数据处理/TikTok data processing
|
||||
elif platform == 'tiktok':
|
||||
# 填充封面数据
|
||||
result_data['cover_data'] = {
|
||||
'cover': data.get("video", {}).get("cover"),
|
||||
'origin_cover': data.get("video", {}).get("origin_cover"),
|
||||
'dynamic_cover': data.get("video", {}).get("dynamic_cover")
|
||||
}
|
||||
# TikTok视频数据处理/TikTok video data processing
|
||||
if url_type == 'video':
|
||||
# 将信息储存在字典中/Store information in a dictionary
|
||||
|
|
@ -198,6 +251,50 @@ class HybridCrawler:
|
|||
'watermark_image_list': watermark_image_list
|
||||
}
|
||||
}
|
||||
# Bilibili数据处理/Bilibili data processing
|
||||
elif platform == 'bilibili':
|
||||
# 填充封面数据
|
||||
result_data['cover_data'] = {
|
||||
'cover': data.get("pic"), # Bilibili使用pic作为封面
|
||||
'origin_cover': data.get("pic"),
|
||||
'dynamic_cover': data.get("pic")
|
||||
}
|
||||
# Bilibili只有视频,直接处理视频数据
|
||||
if url_type == 'video':
|
||||
# 获取视频播放地址需要额外调用API
|
||||
cid = data.get('cid') # 获取cid
|
||||
if cid:
|
||||
# 获取播放链接,cid需要转换为字符串
|
||||
playurl_data = await self.BilibiliWebCrawler.fetch_video_playurl(aweme_id, str(cid))
|
||||
# 从播放数据中提取URL
|
||||
dash = playurl_data.get('data', {}).get('dash', {})
|
||||
video_list = dash.get('video', [])
|
||||
audio_list = dash.get('audio', [])
|
||||
|
||||
# 选择最高质量的视频流
|
||||
video_url = video_list[0].get('baseUrl') if video_list else None
|
||||
audio_url = audio_list[0].get('baseUrl') if audio_list else None
|
||||
|
||||
api_data = {
|
||||
'video_data': {
|
||||
'wm_video_url': video_url,
|
||||
'wm_video_url_HQ': video_url,
|
||||
'nwm_video_url': video_url, # Bilibili没有水印概念
|
||||
'nwm_video_url_HQ': video_url,
|
||||
'audio_url': audio_url, # Bilibili音视频分离
|
||||
'cid': cid, # 保存cid供后续使用
|
||||
}
|
||||
}
|
||||
else:
|
||||
api_data = {
|
||||
'video_data': {
|
||||
'wm_video_url': None,
|
||||
'wm_video_url_HQ': None,
|
||||
'nwm_video_url': None,
|
||||
'nwm_video_url_HQ': None,
|
||||
'error': 'Failed to get cid for video playback'
|
||||
}
|
||||
}
|
||||
# 更新数据/Update data
|
||||
result_data.update(api_data)
|
||||
return result_data
|
||||
|
|
|
|||
Loading…
Reference in a new issue