refactor(lists): title character filtering (#1955)

This commit is contained in:
martylukyy 2025-02-08 14:16:54 +01:00 committed by GitHub
parent 4fbaa0b72c
commit 6e77f0339b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 15 additions and 19 deletions

View file

@ -7,7 +7,6 @@ import (
"context"
"encoding/json"
"net/http"
"regexp"
"sort"
"strings"
@ -16,13 +15,6 @@ import (
"github.com/pkg/errors"
)
var (
// including math and curreny symbols: $¤<~♡+=^ etc
symbolsRegexp = regexp.MustCompile(`\p{S}`)
latin1SupplementRegexp = regexp.MustCompile(`[\x{0080}-\x{00FF}]`) // Unicode Block “Latin-1 Supplement”
latinExtendedARegexp = regexp.MustCompile(`[\x{0100}-\x{017F}]`)
)
func (s *service) anilist(ctx context.Context, list *domain.List) error {
l := s.log.With().Str("type", "anilist").Str("list", list.Name).Logger()
@ -70,11 +62,7 @@ func (s *service) anilist(ctx context.Context, list *domain.List) error {
}
for title := range titlesToProcess {
// replace unicode symbols, Unicode Block “Latin-1 Supplement” and Unicode Block “Latin Extended-A” chars by "?"
clearedTitle := symbolsRegexp.ReplaceAllString(title, "?")
clearedTitle = latin1SupplementRegexp.ReplaceAllString(clearedTitle, "?")
clearedTitle = latinExtendedARegexp.ReplaceAllString(clearedTitle, "?")
for _, processedTitle := range processTitle(clearedTitle, list.MatchRelease) {
for _, processedTitle := range processTitle(title, list.MatchRelease) {
titleSet[processedTitle] = struct{}{}
}
}