kv-netflix/backend/internal/scraper/phim30.go
vndangkhoa b647bc8272
Some checks failed
Release APKs / Build TV APK (push) Has been cancelled
Release APKs / Build Mobile APK (push) Has been cancelled
Release APKs / Create Release (push) Has been cancelled
v3.9: Add Next/Prev episode buttons, replace PhimMoiChill with Phim30 scraper, filter blank thumbnails
- Add Next/Previous episode navigation to Android TV ExoPlayer UI
- Implement Phim30.me scraper as replacement for unstable PhimMoiChill
- Remove all PhimMoiChill code (scraper, extractor, fallback URLs)
- Filter out movies without thumbnails from API responses
- Fix HTTP 500 error caused by dead phimmoichill.network fallback
- Include Android TV APK in webapp for download
2026-02-28 18:45:48 +07:00

191 lines
4.5 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package scraper
import (
"fmt"
"net/http"
"net/url"
"strconv"
"strings"
"time"
"streamflow-backend/internal/models"
"github.com/PuerkitoBio/goquery"
)
func parseEpisodeNumber(title string) int {
// e.g "Tập 1", "Tập 01", "Full"
t := strings.ToLower(strings.TrimSpace(title))
if t == "full" {
return 1
}
t = strings.ReplaceAll(t, "tập ", "")
t = strings.ReplaceAll(t, "tap ", "")
// handle multi-spaces
parts := strings.Fields(t)
if len(parts) > 0 {
num, err := strconv.Atoi(parts[0])
if err == nil {
return num
}
}
return 1
}
const Phim30BaseURL = "https://phim30.me"
type Phim30Scraper struct {
client *http.Client
}
func NewPhim30Scraper() *Phim30Scraper {
return &Phim30Scraper{
client: &http.Client{
Timeout: 30 * time.Second,
},
}
}
func (p *Phim30Scraper) Search(query string, page int) ([]models.RophimMovie, error) {
searchURL := fmt.Sprintf("%s/tim-kiem?keyword=%s&page=%d", Phim30BaseURL, url.QueryEscape(query), page)
return p.scrapeMovieList(searchURL)
}
func (p *Phim30Scraper) GetMoviesByCategory(category string, page int) ([]models.RophimMovie, error) {
// e.g. https://phim30.me/the-loai/hanh-dong?page=1
catURL := fmt.Sprintf("%s/the-loai/%s?page=%d", Phim30BaseURL, category, page)
return p.scrapeMovieList(catURL)
}
func (p *Phim30Scraper) scrapeMovieList(targetURL string) ([]models.RophimMovie, error) {
req, err := http.NewRequest("GET", targetURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
resp, err := p.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("phim30 returned status: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
var movies []models.RophimMovie
doc.Find("a[href^='https://phim30.me/phim/']").Each(func(i int, s *goquery.Selection) {
href, _ := s.Attr("href")
title, _ := s.Attr("title")
// Remove the base url to get the slug
slug := strings.TrimPrefix(href, "https://phim30.me/phim/")
// Try to find an image child (check data-src for lazy-loaded images)
thumb := ""
s.Find("img").Each(func(j int, img *goquery.Selection) {
src, _ := img.Attr("src")
dataSrc, _ := img.Attr("data-src")
lazySrc, _ := img.Attr("lazy-src")
if dataSrc != "" {
thumb = dataSrc
} else if lazySrc != "" {
thumb = lazySrc
} else if src != "" && !strings.Contains(src, "data:image") {
thumb = src
}
})
if title != "" && slug != "" {
movies = append(movies, models.RophimMovie{
ID: slug,
Slug: slug,
Title: title,
OriginalTitle: title,
Thumbnail: thumb,
})
}
})
// Deduplicate movies because a search page might have multiple links to the same movie
var uniqueMovies []models.RophimMovie
seen := make(map[string]bool)
for _, m := range movies {
if !seen[m.Slug] {
seen[m.Slug] = true
uniqueMovies = append(uniqueMovies, m)
}
}
return uniqueMovies, nil
}
func (p *Phim30Scraper) GetMovieDetail(slug string) (*models.RophimMovie, error) {
targetURL := fmt.Sprintf("%s/phim/%s", Phim30BaseURL, slug)
req, err := http.NewRequest("GET", targetURL, nil)
if err != nil {
return nil, err
}
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
resp, err := p.client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
return nil, fmt.Errorf("phim30 returned status: %d", resp.StatusCode)
}
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil {
return nil, err
}
movie := &models.RophimMovie{
ID: slug,
Slug: slug,
}
title := doc.Find("h1.movie-title").Text()
if title == "" {
title = doc.Find("title").Text()
title = strings.Split(title, "")[0]
title = strings.TrimSpace(title)
}
movie.Title = title
movie.OriginalTitle = title
var eps []models.Episode
doc.Find("a[href*='/xem-phim/']").Each(func(i int, s *goquery.Selection) {
href, _ := s.Attr("href")
epName := strings.TrimSpace(s.Text())
if epName != "" && href != "" {
if !strings.HasPrefix(href, "http") {
href = Phim30BaseURL + href
}
eps = append(eps, models.Episode{
ServerName: "Phim30",
Title: epName,
Number: parseEpisodeNumber(epName),
URL: href,
})
}
})
if len(eps) > 0 {
movie.Episodes = eps
}
return movie, nil
}