Fix missing Episode 1 and duplicate search results (Unicode-aware dedup)
This commit is contained in:
parent
e788043395
commit
0230054f92
4 changed files with 43 additions and 8 deletions
|
|
@ -8,6 +8,7 @@ require (
|
|||
github.com/go-chi/chi/v5 v5.2.4
|
||||
github.com/go-chi/cors v1.2.2
|
||||
golang.org/x/image v0.35.0
|
||||
golang.org/x/text v0.33.0
|
||||
gorm.io/gorm v1.31.1
|
||||
)
|
||||
|
||||
|
|
@ -22,7 +23,6 @@ require (
|
|||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
golang.org/x/net v0.49.0 // indirect
|
||||
golang.org/x/sys v0.40.0 // indirect
|
||||
golang.org/x/text v0.33.0 // indirect
|
||||
modernc.org/libc v1.22.5 // indirect
|
||||
modernc.org/mathutil v1.5.0 // indirect
|
||||
modernc.org/memory v1.5.0 // indirect
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ import (
|
|||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"unicode"
|
||||
|
||||
"streamflow-backend/internal/database"
|
||||
"streamflow-backend/internal/models"
|
||||
|
|
@ -18,6 +19,9 @@ import (
|
|||
"streamflow-backend/internal/service"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
"golang.org/x/text/unicode/norm"
|
||||
)
|
||||
|
||||
const (
|
||||
|
|
@ -128,6 +132,7 @@ func (h *Handler) fetchAndMergeMovies(fetch movieFetcher) []models.RophimMovie {
|
|||
func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie {
|
||||
var allMovies []models.RophimMovie
|
||||
seenID := make(map[string]int)
|
||||
seenSlug := make(map[string]int)
|
||||
seenTitle := make(map[string]int)
|
||||
|
||||
for i := 0; i < maxLen; i++ {
|
||||
|
|
@ -135,11 +140,22 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
|
|||
if i < len(movies) {
|
||||
movie := movies[i]
|
||||
|
||||
// Check 1: Exact ID match
|
||||
if idx, found := seenID[movie.ID]; found {
|
||||
h.mergeMovieMetadata(&allMovies[idx], &movie)
|
||||
continue
|
||||
}
|
||||
|
||||
// Check 2: Slug match (e.g. "vu-tru-cua-doi-ta" from both providers)
|
||||
slugKey := normalizeKey(movie.Slug)
|
||||
if slugKey != "" {
|
||||
if idx, found := seenSlug[slugKey]; found {
|
||||
h.mergeMovieMetadata(&allMovies[idx], &movie)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
// Check 3: Normalized title match
|
||||
titleKey := normalizeKey(movie.OriginalTitle)
|
||||
if titleKey == "" {
|
||||
titleKey = normalizeKey(movie.Title)
|
||||
|
|
@ -152,6 +168,9 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
|
|||
allMovies = append(allMovies, movie)
|
||||
currIdx := len(allMovies) - 1
|
||||
seenID[movie.ID] = currIdx
|
||||
if slugKey != "" {
|
||||
seenSlug[slugKey] = currIdx
|
||||
}
|
||||
if titleKey != "" {
|
||||
seenTitle[titleKey] = currIdx
|
||||
}
|
||||
|
|
@ -418,7 +437,19 @@ func (h *Handler) mergeMovieMetadata(existing, new *models.RophimMovie) {
|
|||
}
|
||||
|
||||
func normalizeKey(s string) string {
|
||||
if s == "" {
|
||||
return ""
|
||||
}
|
||||
s = strings.ToLower(s)
|
||||
// Strip Vietnamese diacritics: Vũ Trụ Của Đôi Ta → vu tru cua doi ta
|
||||
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
|
||||
result, _, err := transform.String(t, s)
|
||||
if err == nil {
|
||||
s = result
|
||||
}
|
||||
// Replace đ/Đ which NFD doesn't decompose
|
||||
s = strings.ReplaceAll(s, "đ", "d")
|
||||
// Keep only alphanumeric
|
||||
reg := regexp.MustCompile("[^a-z0-9]+")
|
||||
return reg.ReplaceAllString(s, "")
|
||||
}
|
||||
|
|
|
|||
|
|
@ -303,8 +303,11 @@ func (s *OphimScraper) GetMovieDetail(slug string) (*models.RophimMovie, error)
|
|||
var n int
|
||||
if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil {
|
||||
epNum = n
|
||||
} else {
|
||||
epNum = 1
|
||||
}
|
||||
// If still 0 (e.g. "Full", "Trailer"), skip — don't default to 1
|
||||
// as that would collide with real Episode 1 during dedup
|
||||
if epNum == 0 {
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -204,14 +204,15 @@ func (s *PhimMoiChillScraper) GetMovieDetail(slug string) (*models.RophimMovie,
|
|||
|
||||
epNum := 0
|
||||
if strings.EqualFold(epName, "Full") {
|
||||
epNum = 1
|
||||
} else {
|
||||
// Try "Tập 1", "Tập 2"
|
||||
fmt.Sscanf(epName, "Tập %d", &epNum)
|
||||
// Single-movie "Full" — will be handled by the fallback below
|
||||
// Don't assign epNum=1 as it collides with real Episode 1 in series
|
||||
return
|
||||
}
|
||||
// Try "Tập 1", "Tập 2"
|
||||
fmt.Sscanf(epName, "Tập %d", &epNum)
|
||||
|
||||
if epNum == 0 {
|
||||
// Try to extract from title if current text is just "Tap X"
|
||||
// Try plain number
|
||||
fmt.Sscanf(epName, "%d", &epNum)
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue