Fix missing Episode 1 and duplicate search results (Unicode-aware dedup)

This commit is contained in:
vndangkhoa 2026-02-20 20:15:59 +07:00
parent e788043395
commit 0230054f92
4 changed files with 43 additions and 8 deletions

View file

@ -8,6 +8,7 @@ require (
github.com/go-chi/chi/v5 v5.2.4 github.com/go-chi/chi/v5 v5.2.4
github.com/go-chi/cors v1.2.2 github.com/go-chi/cors v1.2.2
golang.org/x/image v0.35.0 golang.org/x/image v0.35.0
golang.org/x/text v0.33.0
gorm.io/gorm v1.31.1 gorm.io/gorm v1.31.1
) )
@ -22,7 +23,6 @@ require (
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/net v0.49.0 // indirect golang.org/x/net v0.49.0 // indirect
golang.org/x/sys v0.40.0 // indirect golang.org/x/sys v0.40.0 // indirect
golang.org/x/text v0.33.0 // indirect
modernc.org/libc v1.22.5 // indirect modernc.org/libc v1.22.5 // indirect
modernc.org/mathutil v1.5.0 // indirect modernc.org/mathutil v1.5.0 // indirect
modernc.org/memory v1.5.0 // indirect modernc.org/memory v1.5.0 // indirect

View file

@ -11,6 +11,7 @@ import (
"strconv" "strconv"
"strings" "strings"
"sync" "sync"
"unicode"
"streamflow-backend/internal/database" "streamflow-backend/internal/database"
"streamflow-backend/internal/models" "streamflow-backend/internal/models"
@ -18,6 +19,9 @@ import (
"streamflow-backend/internal/service" "streamflow-backend/internal/service"
"github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
) )
const ( const (
@ -128,6 +132,7 @@ func (h *Handler) fetchAndMergeMovies(fetch movieFetcher) []models.RophimMovie {
func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie { func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie {
var allMovies []models.RophimMovie var allMovies []models.RophimMovie
seenID := make(map[string]int) seenID := make(map[string]int)
seenSlug := make(map[string]int)
seenTitle := make(map[string]int) seenTitle := make(map[string]int)
for i := 0; i < maxLen; i++ { for i := 0; i < maxLen; i++ {
@ -135,11 +140,22 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
if i < len(movies) { if i < len(movies) {
movie := movies[i] movie := movies[i]
// Check 1: Exact ID match
if idx, found := seenID[movie.ID]; found { if idx, found := seenID[movie.ID]; found {
h.mergeMovieMetadata(&allMovies[idx], &movie) h.mergeMovieMetadata(&allMovies[idx], &movie)
continue continue
} }
// Check 2: Slug match (e.g. "vu-tru-cua-doi-ta" from both providers)
slugKey := normalizeKey(movie.Slug)
if slugKey != "" {
if idx, found := seenSlug[slugKey]; found {
h.mergeMovieMetadata(&allMovies[idx], &movie)
continue
}
}
// Check 3: Normalized title match
titleKey := normalizeKey(movie.OriginalTitle) titleKey := normalizeKey(movie.OriginalTitle)
if titleKey == "" { if titleKey == "" {
titleKey = normalizeKey(movie.Title) titleKey = normalizeKey(movie.Title)
@ -152,6 +168,9 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
allMovies = append(allMovies, movie) allMovies = append(allMovies, movie)
currIdx := len(allMovies) - 1 currIdx := len(allMovies) - 1
seenID[movie.ID] = currIdx seenID[movie.ID] = currIdx
if slugKey != "" {
seenSlug[slugKey] = currIdx
}
if titleKey != "" { if titleKey != "" {
seenTitle[titleKey] = currIdx seenTitle[titleKey] = currIdx
} }
@ -418,7 +437,19 @@ func (h *Handler) mergeMovieMetadata(existing, new *models.RophimMovie) {
} }
func normalizeKey(s string) string { func normalizeKey(s string) string {
if s == "" {
return ""
}
s = strings.ToLower(s) s = strings.ToLower(s)
// Strip Vietnamese diacritics: Vũ Trụ Của Đôi Ta → vu tru cua doi ta
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
result, _, err := transform.String(t, s)
if err == nil {
s = result
}
// Replace đ/Đ which NFD doesn't decompose
s = strings.ReplaceAll(s, "đ", "d")
// Keep only alphanumeric
reg := regexp.MustCompile("[^a-z0-9]+") reg := regexp.MustCompile("[^a-z0-9]+")
return reg.ReplaceAllString(s, "") return reg.ReplaceAllString(s, "")
} }

View file

@ -303,8 +303,11 @@ func (s *OphimScraper) GetMovieDetail(slug string) (*models.RophimMovie, error)
var n int var n int
if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil { if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil {
epNum = n epNum = n
} else { }
epNum = 1 // If still 0 (e.g. "Full", "Trailer"), skip — don't default to 1
// as that would collide with real Episode 1 during dedup
if epNum == 0 {
continue
} }
} }

View file

@ -204,14 +204,15 @@ func (s *PhimMoiChillScraper) GetMovieDetail(slug string) (*models.RophimMovie,
epNum := 0 epNum := 0
if strings.EqualFold(epName, "Full") { if strings.EqualFold(epName, "Full") {
epNum = 1 // Single-movie "Full" — will be handled by the fallback below
} else { // Don't assign epNum=1 as it collides with real Episode 1 in series
// Try "Tập 1", "Tập 2" return
fmt.Sscanf(epName, "Tập %d", &epNum)
} }
// Try "Tập 1", "Tập 2"
fmt.Sscanf(epName, "Tập %d", &epNum)
if epNum == 0 { if epNum == 0 {
// Try to extract from title if current text is just "Tap X" // Try plain number
fmt.Sscanf(epName, "%d", &epNum) fmt.Sscanf(epName, "%d", &epNum)
} }