spotify-clone/backend/services/lyrics.py

103 lines
4.2 KiB
Python

import json
import re
import requests
import yt_dlp
import syncedlyrics
from starlette.concurrency import run_in_threadpool
from backend.core.cache import CacheManager
from backend.core.config import settings
class LyricsService:
def __init__(self):
self.cache = CacheManager(str(settings.CACHE_DIR))
self.lyrics_cache_dir = settings.CACHE_DIR / "lyrics"
self.lyrics_cache_dir.mkdir(parents=True, exist_ok=True)
def _parse_lrc_string(self, lrc_string: str):
parsed = []
for line in lrc_string.split('\n'):
match = re.search(r'\[(\d+):(\d+\.\d+)\](.*)', line)
if match:
minutes = int(match.group(1))
seconds = float(match.group(2))
text = match.group(3).strip()
parsed.append({"time": minutes * 60 + seconds, "text": text})
return parsed
async def get_lyrics(self, id: str, title: str = None, artist: str = None):
if not id: return []
cache_key = f"lyrics:{id}"
cached = self.cache.get(cache_key)
if cached: return cached
parsed_lines = []
# Strategy 1: yt-dlp
def fetch_ytdlp():
parsed = []
try:
out_tmpl = str(self.lyrics_cache_dir / f"{id}")
ydl_opts = {
'skip_download': True, 'writesubtitles': True, 'writeautomaticsub': True,
'subtitleslangs': ['en', 'vi'], 'subtitlesformat': 'json3',
'outtmpl': out_tmpl, 'quiet': True
}
url = f"https://www.youtube.com/watch?v={id}"
import glob
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
pattern = str(self.lyrics_cache_dir / f"{id}.*.json3")
found_files = glob.glob(pattern)
if found_files:
best_file = next((f for f in found_files if f.endswith(f"{id}.en.json3")), found_files[0])
with open(best_file, 'r', encoding='utf-8') as f:
data = json.load(f)
for event in data.get('events', []):
if 'segs' in event and 'tStartMs' in event:
text = "".join([s.get('utf8', '') for s in event['segs']]).strip()
if text and not text.startswith('[') and text != '\n':
parsed.append({"time": float(event['tStartMs']) / 1000.0, "text": text})
except Exception as e:
print(f"yt-dlp sub error: {e}")
return parsed
parsed_lines = await run_in_threadpool(fetch_ytdlp)
if not parsed_lines and title and artist:
# Strategy 2: LRCLIB
def fetch_lrclib():
try:
cleaned_title = re.sub(r'\(.*?\)', '', title)
clean_query = f"{artist} {cleaned_title}".strip()
resp = requests.get("https://lrclib.net/api/search", params={"q": clean_query}, timeout=5)
if resp.status_code == 200:
results = resp.json()
for item in results:
if item.get("syncedLyrics"):
return self._parse_lrc_string(item["syncedLyrics"])
except Exception:
pass
return []
parsed_lines = await run_in_threadpool(fetch_lrclib)
if not parsed_lines and title and artist:
# Strategy 3: syncedlyrics
def fetch_syncedlyrics():
try:
clean_query = f"{title} {artist}".strip()
lrc_str = syncedlyrics.search(clean_query)
if lrc_str:
return self._parse_lrc_string(lrc_str)
except Exception:
pass
return []
parsed_lines = await run_in_threadpool(fetch_syncedlyrics)
if parsed_lines:
self.cache.set(cache_key, parsed_lines, ttl_seconds=86400)
return parsed_lines