From 0230054f92fc9138b90594f558641a512c99128d Mon Sep 17 00:00:00 2001 From: vndangkhoa Date: Fri, 20 Feb 2026 20:15:59 +0700 Subject: [PATCH] Fix missing Episode 1 and duplicate search results (Unicode-aware dedup) --- backend/go.mod | 2 +- backend/internal/api/handlers.go | 31 ++++++++++++++++++++++++ backend/internal/scraper/ophim.go | 7 ++++-- backend/internal/scraper/phimmoichill.go | 11 +++++---- 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/backend/go.mod b/backend/go.mod index b48e243..fc82cc7 100644 --- a/backend/go.mod +++ b/backend/go.mod @@ -8,6 +8,7 @@ require ( github.com/go-chi/chi/v5 v5.2.4 github.com/go-chi/cors v1.2.2 golang.org/x/image v0.35.0 + golang.org/x/text v0.33.0 gorm.io/gorm v1.31.1 ) @@ -22,7 +23,6 @@ require ( github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect golang.org/x/net v0.49.0 // indirect golang.org/x/sys v0.40.0 // indirect - golang.org/x/text v0.33.0 // indirect modernc.org/libc v1.22.5 // indirect modernc.org/mathutil v1.5.0 // indirect modernc.org/memory v1.5.0 // indirect diff --git a/backend/internal/api/handlers.go b/backend/internal/api/handlers.go index b8afe24..fd43ff1 100644 --- a/backend/internal/api/handlers.go +++ b/backend/internal/api/handlers.go @@ -11,6 +11,7 @@ import ( "strconv" "strings" "sync" + "unicode" "streamflow-backend/internal/database" "streamflow-backend/internal/models" @@ -18,6 +19,9 @@ import ( "streamflow-backend/internal/service" "github.com/go-chi/chi/v5" + "golang.org/x/text/runes" + "golang.org/x/text/transform" + "golang.org/x/text/unicode/norm" ) const ( @@ -128,6 +132,7 @@ func (h *Handler) fetchAndMergeMovies(fetch movieFetcher) []models.RophimMovie { func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie { var allMovies []models.RophimMovie seenID := make(map[string]int) + seenSlug := make(map[string]int) seenTitle := make(map[string]int) for i := 0; i < maxLen; i++ { @@ -135,11 +140,22 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int if i < len(movies) { movie := movies[i] + // Check 1: Exact ID match if idx, found := seenID[movie.ID]; found { h.mergeMovieMetadata(&allMovies[idx], &movie) continue } + // Check 2: Slug match (e.g. "vu-tru-cua-doi-ta" from both providers) + slugKey := normalizeKey(movie.Slug) + if slugKey != "" { + if idx, found := seenSlug[slugKey]; found { + h.mergeMovieMetadata(&allMovies[idx], &movie) + continue + } + } + + // Check 3: Normalized title match titleKey := normalizeKey(movie.OriginalTitle) if titleKey == "" { titleKey = normalizeKey(movie.Title) @@ -152,6 +168,9 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int allMovies = append(allMovies, movie) currIdx := len(allMovies) - 1 seenID[movie.ID] = currIdx + if slugKey != "" { + seenSlug[slugKey] = currIdx + } if titleKey != "" { seenTitle[titleKey] = currIdx } @@ -418,7 +437,19 @@ func (h *Handler) mergeMovieMetadata(existing, new *models.RophimMovie) { } func normalizeKey(s string) string { + if s == "" { + return "" + } s = strings.ToLower(s) + // Strip Vietnamese diacritics: Vũ Trụ Của Đôi Ta → vu tru cua doi ta + t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC) + result, _, err := transform.String(t, s) + if err == nil { + s = result + } + // Replace đ/Đ which NFD doesn't decompose + s = strings.ReplaceAll(s, "đ", "d") + // Keep only alphanumeric reg := regexp.MustCompile("[^a-z0-9]+") return reg.ReplaceAllString(s, "") } diff --git a/backend/internal/scraper/ophim.go b/backend/internal/scraper/ophim.go index 4e0dcac..235f796 100644 --- a/backend/internal/scraper/ophim.go +++ b/backend/internal/scraper/ophim.go @@ -303,8 +303,11 @@ func (s *OphimScraper) GetMovieDetail(slug string) (*models.RophimMovie, error) var n int if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil { epNum = n - } else { - epNum = 1 + } + // If still 0 (e.g. "Full", "Trailer"), skip — don't default to 1 + // as that would collide with real Episode 1 during dedup + if epNum == 0 { + continue } } diff --git a/backend/internal/scraper/phimmoichill.go b/backend/internal/scraper/phimmoichill.go index a5c72b2..2626821 100644 --- a/backend/internal/scraper/phimmoichill.go +++ b/backend/internal/scraper/phimmoichill.go @@ -204,14 +204,15 @@ func (s *PhimMoiChillScraper) GetMovieDetail(slug string) (*models.RophimMovie, epNum := 0 if strings.EqualFold(epName, "Full") { - epNum = 1 - } else { - // Try "Tập 1", "Tập 2" - fmt.Sscanf(epName, "Tập %d", &epNum) + // Single-movie "Full" — will be handled by the fallback below + // Don't assign epNum=1 as it collides with real Episode 1 in series + return } + // Try "Tập 1", "Tập 2" + fmt.Sscanf(epName, "Tập %d", &epNum) if epNum == 0 { - // Try to extract from title if current text is just "Tap X" + // Try plain number fmt.Sscanf(epName, "%d", &epNum) }