Fix missing Episode 1 and duplicate search results (Unicode-aware dedup)
This commit is contained in:
parent
e788043395
commit
0230054f92
4 changed files with 43 additions and 8 deletions
|
|
@ -8,6 +8,7 @@ require (
|
||||||
github.com/go-chi/chi/v5 v5.2.4
|
github.com/go-chi/chi/v5 v5.2.4
|
||||||
github.com/go-chi/cors v1.2.2
|
github.com/go-chi/cors v1.2.2
|
||||||
golang.org/x/image v0.35.0
|
golang.org/x/image v0.35.0
|
||||||
|
golang.org/x/text v0.33.0
|
||||||
gorm.io/gorm v1.31.1
|
gorm.io/gorm v1.31.1
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -22,7 +23,6 @@ require (
|
||||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||||
golang.org/x/net v0.49.0 // indirect
|
golang.org/x/net v0.49.0 // indirect
|
||||||
golang.org/x/sys v0.40.0 // indirect
|
golang.org/x/sys v0.40.0 // indirect
|
||||||
golang.org/x/text v0.33.0 // indirect
|
|
||||||
modernc.org/libc v1.22.5 // indirect
|
modernc.org/libc v1.22.5 // indirect
|
||||||
modernc.org/mathutil v1.5.0 // indirect
|
modernc.org/mathutil v1.5.0 // indirect
|
||||||
modernc.org/memory v1.5.0 // indirect
|
modernc.org/memory v1.5.0 // indirect
|
||||||
|
|
|
||||||
|
|
@ -11,6 +11,7 @@ import (
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
|
"unicode"
|
||||||
|
|
||||||
"streamflow-backend/internal/database"
|
"streamflow-backend/internal/database"
|
||||||
"streamflow-backend/internal/models"
|
"streamflow-backend/internal/models"
|
||||||
|
|
@ -18,6 +19,9 @@ import (
|
||||||
"streamflow-backend/internal/service"
|
"streamflow-backend/internal/service"
|
||||||
|
|
||||||
"github.com/go-chi/chi/v5"
|
"github.com/go-chi/chi/v5"
|
||||||
|
"golang.org/x/text/runes"
|
||||||
|
"golang.org/x/text/transform"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
|
@ -128,6 +132,7 @@ func (h *Handler) fetchAndMergeMovies(fetch movieFetcher) []models.RophimMovie {
|
||||||
func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie {
|
func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int) []models.RophimMovie {
|
||||||
var allMovies []models.RophimMovie
|
var allMovies []models.RophimMovie
|
||||||
seenID := make(map[string]int)
|
seenID := make(map[string]int)
|
||||||
|
seenSlug := make(map[string]int)
|
||||||
seenTitle := make(map[string]int)
|
seenTitle := make(map[string]int)
|
||||||
|
|
||||||
for i := 0; i < maxLen; i++ {
|
for i := 0; i < maxLen; i++ {
|
||||||
|
|
@ -135,11 +140,22 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
|
||||||
if i < len(movies) {
|
if i < len(movies) {
|
||||||
movie := movies[i]
|
movie := movies[i]
|
||||||
|
|
||||||
|
// Check 1: Exact ID match
|
||||||
if idx, found := seenID[movie.ID]; found {
|
if idx, found := seenID[movie.ID]; found {
|
||||||
h.mergeMovieMetadata(&allMovies[idx], &movie)
|
h.mergeMovieMetadata(&allMovies[idx], &movie)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check 2: Slug match (e.g. "vu-tru-cua-doi-ta" from both providers)
|
||||||
|
slugKey := normalizeKey(movie.Slug)
|
||||||
|
if slugKey != "" {
|
||||||
|
if idx, found := seenSlug[slugKey]; found {
|
||||||
|
h.mergeMovieMetadata(&allMovies[idx], &movie)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check 3: Normalized title match
|
||||||
titleKey := normalizeKey(movie.OriginalTitle)
|
titleKey := normalizeKey(movie.OriginalTitle)
|
||||||
if titleKey == "" {
|
if titleKey == "" {
|
||||||
titleKey = normalizeKey(movie.Title)
|
titleKey = normalizeKey(movie.Title)
|
||||||
|
|
@ -152,6 +168,9 @@ func (h *Handler) mergeMovies(providerResults [][]models.RophimMovie, maxLen int
|
||||||
allMovies = append(allMovies, movie)
|
allMovies = append(allMovies, movie)
|
||||||
currIdx := len(allMovies) - 1
|
currIdx := len(allMovies) - 1
|
||||||
seenID[movie.ID] = currIdx
|
seenID[movie.ID] = currIdx
|
||||||
|
if slugKey != "" {
|
||||||
|
seenSlug[slugKey] = currIdx
|
||||||
|
}
|
||||||
if titleKey != "" {
|
if titleKey != "" {
|
||||||
seenTitle[titleKey] = currIdx
|
seenTitle[titleKey] = currIdx
|
||||||
}
|
}
|
||||||
|
|
@ -418,7 +437,19 @@ func (h *Handler) mergeMovieMetadata(existing, new *models.RophimMovie) {
|
||||||
}
|
}
|
||||||
|
|
||||||
func normalizeKey(s string) string {
|
func normalizeKey(s string) string {
|
||||||
|
if s == "" {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
s = strings.ToLower(s)
|
s = strings.ToLower(s)
|
||||||
|
// Strip Vietnamese diacritics: Vũ Trụ Của Đôi Ta → vu tru cua doi ta
|
||||||
|
t := transform.Chain(norm.NFD, runes.Remove(runes.In(unicode.Mn)), norm.NFC)
|
||||||
|
result, _, err := transform.String(t, s)
|
||||||
|
if err == nil {
|
||||||
|
s = result
|
||||||
|
}
|
||||||
|
// Replace đ/Đ which NFD doesn't decompose
|
||||||
|
s = strings.ReplaceAll(s, "đ", "d")
|
||||||
|
// Keep only alphanumeric
|
||||||
reg := regexp.MustCompile("[^a-z0-9]+")
|
reg := regexp.MustCompile("[^a-z0-9]+")
|
||||||
return reg.ReplaceAllString(s, "")
|
return reg.ReplaceAllString(s, "")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -303,8 +303,11 @@ func (s *OphimScraper) GetMovieDetail(slug string) (*models.RophimMovie, error)
|
||||||
var n int
|
var n int
|
||||||
if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil {
|
if _, err := fmt.Sscanf(ep.Name, "Tap %d", &n); err == nil {
|
||||||
epNum = n
|
epNum = n
|
||||||
} else {
|
}
|
||||||
epNum = 1
|
// If still 0 (e.g. "Full", "Trailer"), skip — don't default to 1
|
||||||
|
// as that would collide with real Episode 1 during dedup
|
||||||
|
if epNum == 0 {
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -204,14 +204,15 @@ func (s *PhimMoiChillScraper) GetMovieDetail(slug string) (*models.RophimMovie,
|
||||||
|
|
||||||
epNum := 0
|
epNum := 0
|
||||||
if strings.EqualFold(epName, "Full") {
|
if strings.EqualFold(epName, "Full") {
|
||||||
epNum = 1
|
// Single-movie "Full" — will be handled by the fallback below
|
||||||
} else {
|
// Don't assign epNum=1 as it collides with real Episode 1 in series
|
||||||
|
return
|
||||||
|
}
|
||||||
// Try "Tập 1", "Tập 2"
|
// Try "Tập 1", "Tập 2"
|
||||||
fmt.Sscanf(epName, "Tập %d", &epNum)
|
fmt.Sscanf(epName, "Tập %d", &epNum)
|
||||||
}
|
|
||||||
|
|
||||||
if epNum == 0 {
|
if epNum == 0 {
|
||||||
// Try to extract from title if current text is just "Tap X"
|
// Try plain number
|
||||||
fmt.Sscanf(epName, "%d", &epNum)
|
fmt.Sscanf(epName, "%d", &epNum)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue