Fix missing Episode 1 and duplicate search results (Unicode-aware dedup)

This commit is contained in:
vndangkhoa 2026-02-20 20:15:59 +07:00
parent e788043395
commit 0230054f92
4 changed files with 43 additions and 8 deletions

View file

@ -8,6 +8,7 @@ require (
github.com/go-chi/chi/v5 v5.2.4
github.com/go-chi/cors v1.2.2
golang.org/x/image v0.35.0
golang.org/x/text v0.33.0
gorm.io/gorm v1.31.1
)
@ -22,7 +23,6 @@ require (
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
golang.org/x/net v0.49.0 // indirect
golang.org/x/sys v0.40.0 // indirect
golang.org/x/text v0.33.0 // indirect
modernc.org/libc v1.22.5 // indirect
modernc.org/mathutil v1.5.0 // indirect
modernc.org/memory v1.5.0 // indirect

View file

@ -11,6 +11,7 @@ import (
"strconv"
"strings"
"sync"
"unicode"
"streamflow-backend/internal/database"
"streamflow-backend/internal/models"
@ -18,6 +19,9 @@ import (
"streamflow-backend/internal/service"
"github.com/go-chi/chi/v5"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
const (
@ -128,6 +132,7 @@ func (h *Handler) fetchAndMergeMovies(fetch movieFetcher) []models.RophimMovie {
func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie {
var allMovies []models.RophimMovie
seenID := make(map[string]int)
seenSlug := make(map[string]int)
seenTitle := make(map[string]int)
for i := 0; i < maxLen; i++ {
@ -135,11 +140,22 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
if i < len(movies) {
movie := movies[i]
// Check 1: Exact ID match
if idx, found := seenID[movie.ID]; found {
h.mergeMovieMetadata(&allMovies[idx], &movie)
continue
}
// Check 2: Slug match (e.g. "vu-tru-cua-doi-ta" from both providers)
slugKey := normalizeKey(movie.Slug)
if slugKey != "" {
if idx, found := seenSlug[slugKey]; found {
h.mergeMovieMetadata(&allMovies[idx], &movie)
continue
}
}
// Check 3: Normalized title match
titleKey := normalizeKey(movie.OriginalTitle)
if titleKey == "" {
titleKey = normalizeKey(movie.Title)
@ -152,6 +168,9 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
allMovies = append(allMovies, movie)
currIdx := len(allMovies) - 1
seenID[movie.ID] = currIdx
if slugKey != "" {
seenSlug[slugKey] = currIdx
}
if titleKey != "" {
seenTitle[titleKey] = currIdx
}
@ -418,7 +437,19 @@ func (h *Handler) mergeMovieMetadata(existing, new *models.RophimMovie) {
}
func normalizeKey(s string) string {
if s == "" {
return ""
}
s = strings.ToLower(s)
// Strip Vietnamese diacritics: Vũ Trụ Của Đôi Ta → vu tru cua doi ta
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
result, _, err := transform.String(t, s)
if err == nil {
s = result
}
// Replace đ/Đ which NFD doesn't decompose
s = strings.ReplaceAll(s, "đ", "d")
// Keep only alphanumeric
reg := regexp.MustCompile("[^a-z0-9]+")
return reg.ReplaceAllString(s, "")
}

View file

@ -303,8 +303,11 @@ func (s *OphimScraper) GetMovieDetail(slug string) (*models.RophimMovie, error)
var n int
if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil {
epNum = n
} else {
epNum = 1
}
// If still 0 (e.g. "Full", "Trailer"), skip — don't default to 1
// as that would collide with real Episode 1 during dedup
if epNum == 0 {
continue
}
}

View file

@ -204,14 +204,15 @@ func (s *PhimMoiChillScraper) GetMovieDetail(slug string) (*models.RophimMovie,
epNum := 0
if strings.EqualFold(epName, "Full") {
epNum = 1
} else {
// Try "Tập 1", "Tập 2"
fmt.Sscanf(epName, "Tập %d", &epNum)
// Single-movie "Full" — will be handled by the fallback below
// Don't assign epNum=1 as it collides with real Episode 1 in series
return
}
// Try "Tập 1", "Tập 2"
fmt.Sscanf(epName, "Tập %d", &epNum)
if epNum == 0 {
// Try to extract from title if current text is just "Tap X"
// Try plain number
fmt.Sscanf(epName, "%d", &epNum)
}