103 lines
4.2 KiB
Python
103 lines
4.2 KiB
Python
import json
|
|
import re
|
|
import requests
|
|
import yt_dlp
|
|
import syncedlyrics
|
|
from starlette.concurrency import run_in_threadpool
|
|
from backend.core.cache import CacheManager
|
|
from backend.core.config import settings
|
|
|
|
class LyricsService:
|
|
def __init__(self):
|
|
self.cache = CacheManager(str(settings.CACHE_DIR))
|
|
self.lyrics_cache_dir = settings.CACHE_DIR / "lyrics"
|
|
self.lyrics_cache_dir.mkdir(parents=True, exist_ok=True)
|
|
|
|
def _parse_lrc_string(self, lrc_string: str):
|
|
parsed = []
|
|
for line in lrc_string.split('\n'):
|
|
match = re.search(r'\[(\d+):(\d+\.\d+)\](.*)', line)
|
|
if match:
|
|
minutes = int(match.group(1))
|
|
seconds = float(match.group(2))
|
|
text = match.group(3).strip()
|
|
parsed.append({"time": minutes * 60 + seconds, "text": text})
|
|
return parsed
|
|
|
|
async def get_lyrics(self, id: str, title: str = None, artist: str = None):
|
|
if not id: return []
|
|
|
|
cache_key = f"lyrics:{id}"
|
|
cached = self.cache.get(cache_key)
|
|
if cached: return cached
|
|
|
|
parsed_lines = []
|
|
|
|
# Strategy 1: yt-dlp
|
|
def fetch_ytdlp():
|
|
parsed = []
|
|
try:
|
|
out_tmpl = str(self.lyrics_cache_dir / f"{id}")
|
|
ydl_opts = {
|
|
'skip_download': True, 'writesubtitles': True, 'writeautomaticsub': True,
|
|
'subtitleslangs': ['en', 'vi'], 'subtitlesformat': 'json3',
|
|
'outtmpl': out_tmpl, 'quiet': True
|
|
}
|
|
url = f"https://www.youtube.com/watch?v={id}"
|
|
import glob
|
|
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
|
ydl.download([url])
|
|
|
|
pattern = str(self.lyrics_cache_dir / f"{id}.*.json3")
|
|
found_files = glob.glob(pattern)
|
|
if found_files:
|
|
best_file = next((f for f in found_files if f.endswith(f"{id}.en.json3")), found_files[0])
|
|
with open(best_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
for event in data.get('events', []):
|
|
if 'segs' in event and 'tStartMs' in event:
|
|
text = "".join([s.get('utf8', '') for s in event['segs']]).strip()
|
|
if text and not text.startswith('[') and text != '\n':
|
|
parsed.append({"time": float(event['tStartMs']) / 1000.0, "text": text})
|
|
except Exception as e:
|
|
print(f"yt-dlp sub error: {e}")
|
|
return parsed
|
|
|
|
parsed_lines = await run_in_threadpool(fetch_ytdlp)
|
|
|
|
if not parsed_lines and title and artist:
|
|
# Strategy 2: LRCLIB
|
|
def fetch_lrclib():
|
|
try:
|
|
cleaned_title = re.sub(r'\(.*?\)', '', title)
|
|
clean_query = f"{artist} {cleaned_title}".strip()
|
|
resp = requests.get("https://lrclib.net/api/search", params={"q": clean_query}, timeout=5)
|
|
if resp.status_code == 200:
|
|
results = resp.json()
|
|
for item in results:
|
|
if item.get("syncedLyrics"):
|
|
return self._parse_lrc_string(item["syncedLyrics"])
|
|
except Exception:
|
|
pass
|
|
return []
|
|
|
|
parsed_lines = await run_in_threadpool(fetch_lrclib)
|
|
|
|
if not parsed_lines and title and artist:
|
|
# Strategy 3: syncedlyrics
|
|
def fetch_syncedlyrics():
|
|
try:
|
|
clean_query = f"{title} {artist}".strip()
|
|
lrc_str = syncedlyrics.search(clean_query)
|
|
if lrc_str:
|
|
return self._parse_lrc_string(lrc_str)
|
|
except Exception:
|
|
pass
|
|
return []
|
|
|
|
parsed_lines = await run_in_threadpool(fetch_syncedlyrics)
|
|
|
|
if parsed_lines:
|
|
self.cache.set(cache_key, parsed_lines, ttl_seconds=86400)
|
|
|
|
return parsed_lines
|