mirror of
https://github.com/idanoo/autobrr
synced 2025-07-23 08:49:13 +00:00
refactor(lists): title variation processing (#1965)
refactor(lists): remove code duplication from title processing
This commit is contained in:
parent
06229edb55
commit
e581d14066
2 changed files with 58 additions and 44 deletions
|
@ -6,15 +6,16 @@ package list
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"regexp"
|
"regexp"
|
||||||
|
"slices"
|
||||||
"strings"
|
"strings"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
/*
|
/*
|
||||||
replaceRegexp replaces various character classes/categories such as
|
replaceRegexp replaces various character classes/categories such as
|
||||||
\p{P} all Unicode punctuation category characters
|
\p{P} Unicode punctuation category characters
|
||||||
\p{S} all Unicode symbol category characters
|
\p{S} Unicode symbol category characters
|
||||||
\p{Z) the Unicode seperator category characters
|
\p{Z) Unicode seperator category characters
|
||||||
\x{0080}-\x{017F} Unicode block "Latin-1 Supplement" and "Latin Extended-A" characters
|
\x{0080}-\x{017F} Unicode block "Latin-1 Supplement" and "Latin Extended-A" characters
|
||||||
https://www.unicode.org/reports/tr44/#General_Category_Values
|
https://www.unicode.org/reports/tr44/#General_Category_Values
|
||||||
https://www.regular-expressions.info/unicode.html#category
|
https://www.regular-expressions.info/unicode.html#category
|
||||||
|
@ -23,10 +24,36 @@ var (
|
||||||
*/
|
*/
|
||||||
replaceRegexp = regexp.MustCompile(`[\p{P}\p{S}\p{Z}\x{0080}-\x{017F}]`)
|
replaceRegexp = regexp.MustCompile(`[\p{P}\p{S}\p{Z}\x{0080}-\x{017F}]`)
|
||||||
questionmarkRegexp = regexp.MustCompile(`[?]{2,}`)
|
questionmarkRegexp = regexp.MustCompile(`[?]{2,}`)
|
||||||
regionCodeRegexp = regexp.MustCompile(`\(\S+\)`) // also cleans titles from years like (YYYY)!
|
// cleans titles from years and region codes in parentheses, for example (2024) or (US)
|
||||||
|
parentheticalRegexp = regexp.MustCompile(`\(\S+\)`)
|
||||||
parenthesesEndRegexp = regexp.MustCompile(`\)$`)
|
parenthesesEndRegexp = regexp.MustCompile(`\)$`)
|
||||||
|
|
||||||
|
apostropheReplacer = strings.NewReplacer("'", "", "´", "", "`", "", "‘", "", "’", "")
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// generateVariations returns variations of the title with optionally removing apostrophes and info in parentheses.
|
||||||
|
func generateVariations(title string, removeApostrophes, removeParenthetical bool) []string {
|
||||||
|
var variation string
|
||||||
|
|
||||||
|
if removeParenthetical {
|
||||||
|
variation = parentheticalRegexp.ReplaceAllString(title, "")
|
||||||
|
variation = strings.TrimRight(variation, " ")
|
||||||
|
} else {
|
||||||
|
variation = parenthesesEndRegexp.ReplaceAllString(title, "?")
|
||||||
|
}
|
||||||
|
|
||||||
|
if removeApostrophes {
|
||||||
|
variation = apostropheReplacer.Replace(variation)
|
||||||
|
}
|
||||||
|
variation = replaceRegexp.ReplaceAllString(variation, "?")
|
||||||
|
variation = questionmarkRegexp.ReplaceAllString(variation, "*")
|
||||||
|
|
||||||
|
return []string{
|
||||||
|
variation,
|
||||||
|
strings.TrimRight(variation, "?* "),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// yearRegexp = regexp.MustCompile(`\(\d{4}\)$`)
|
// yearRegexp = regexp.MustCompile(`\(\d{4}\)$`)
|
||||||
func processTitle(title string, matchRelease bool) []string {
|
func processTitle(title string, matchRelease bool) []string {
|
||||||
// Checking if the title is empty.
|
// Checking if the title is empty.
|
||||||
|
@ -38,46 +65,25 @@ func processTitle(title string, matchRelease bool) []string {
|
||||||
// var re = regexp.MustCompile(`(?m)\s(\(\d+\))`)
|
// var re = regexp.MustCompile(`(?m)\s(\(\d+\))`)
|
||||||
// title = re.ReplaceAllString(title, "")
|
// title = re.ReplaceAllString(title, "")
|
||||||
|
|
||||||
t := NewTitleSlice()
|
t := NewTitleSet()
|
||||||
|
|
||||||
if replaceRegexp.ReplaceAllString(title, "") == "" {
|
if replaceRegexp.ReplaceAllString(title, "") == "" {
|
||||||
t.Add(title, matchRelease)
|
t.Add(title, matchRelease)
|
||||||
} else {
|
} else {
|
||||||
// title with all non-alphanumeric characters replaced by "?"
|
titles := slices.Concat(
|
||||||
apostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
|
// don't remove apostrophes and info in parentheses
|
||||||
apostropheTitle = replaceRegexp.ReplaceAllString(apostropheTitle, "?")
|
generateVariations(title, false, false),
|
||||||
apostropheTitle = questionmarkRegexp.ReplaceAllString(apostropheTitle, "*")
|
// remove apostrophes but don't remove info in parentheses
|
||||||
|
generateVariations(title, true, false),
|
||||||
|
// don't remove apostrophes but remove info in parentheses
|
||||||
|
generateVariations(title, false, true),
|
||||||
|
// remove apostrophes and info in parentheses
|
||||||
|
generateVariations(title, true, true),
|
||||||
|
)
|
||||||
|
|
||||||
t.Add(apostropheTitle, matchRelease)
|
for _, title := range titles {
|
||||||
t.Add(strings.TrimRight(apostropheTitle, "?* "), matchRelease)
|
t.Add(title, matchRelease)
|
||||||
|
}
|
||||||
// title with apostrophes removed and all non-alphanumeric characters replaced by "?"
|
|
||||||
noApostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
|
|
||||||
noApostropheTitle = strings.NewReplacer("'", "", "´", "", "`", "", "‘", "", "’", "").Replace(noApostropheTitle)
|
|
||||||
noApostropheTitle = replaceRegexp.ReplaceAllString(noApostropheTitle, "?")
|
|
||||||
noApostropheTitle = questionmarkRegexp.ReplaceAllString(noApostropheTitle, "*")
|
|
||||||
|
|
||||||
t.Add(noApostropheTitle, matchRelease)
|
|
||||||
t.Add(strings.TrimRight(noApostropheTitle, "?* "), matchRelease)
|
|
||||||
|
|
||||||
// title with regions in parentheses removed and all non-alphanumeric characters replaced by "?"
|
|
||||||
removedRegionCodeApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
|
|
||||||
removedRegionCodeApostrophe = strings.TrimRight(removedRegionCodeApostrophe, " ")
|
|
||||||
removedRegionCodeApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeApostrophe, "?")
|
|
||||||
removedRegionCodeApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeApostrophe, "*")
|
|
||||||
|
|
||||||
t.Add(removedRegionCodeApostrophe, matchRelease)
|
|
||||||
t.Add(strings.TrimRight(removedRegionCodeApostrophe, "?* "), matchRelease)
|
|
||||||
|
|
||||||
// title with regions in parentheses and apostrophes removed and all non-alphanumeric characters replaced by "?"
|
|
||||||
removedRegionCodeNoApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
|
|
||||||
removedRegionCodeNoApostrophe = strings.TrimRight(removedRegionCodeNoApostrophe, " ")
|
|
||||||
removedRegionCodeNoApostrophe = strings.NewReplacer("'", "", "´", "", "`", "", "‘", "", "’", "").Replace(removedRegionCodeNoApostrophe)
|
|
||||||
removedRegionCodeNoApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "?")
|
|
||||||
removedRegionCodeNoApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "*")
|
|
||||||
|
|
||||||
t.Add(removedRegionCodeNoApostrophe, matchRelease)
|
|
||||||
t.Add(strings.TrimRight(removedRegionCodeNoApostrophe, "?* "), matchRelease)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return t.Titles()
|
return t.Titles()
|
||||||
|
@ -87,7 +93,7 @@ type Titles struct {
|
||||||
tm map[string]struct{}
|
tm map[string]struct{}
|
||||||
}
|
}
|
||||||
|
|
||||||
func NewTitleSlice() *Titles {
|
func NewTitleSet() *Titles {
|
||||||
ts := Titles{
|
ts := Titles{
|
||||||
tm: map[string]struct{}{},
|
tm: map[string]struct{}{},
|
||||||
}
|
}
|
||||||
|
@ -100,7 +106,7 @@ func (ts *Titles) Add(title string, matchRelease bool) {
|
||||||
}
|
}
|
||||||
|
|
||||||
if matchRelease {
|
if matchRelease {
|
||||||
title = strings.Trim(title, "?")
|
title = strings.Trim(title, "?* ")
|
||||||
title = fmt.Sprintf("*%v*", title)
|
title = fmt.Sprintf("*%v*", title)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -277,6 +277,14 @@ func Test_processTitle(t *testing.T) {
|
||||||
},
|
},
|
||||||
want: []string{"pok?mon"},
|
want: []string{"pok?mon"},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
name: "test_33",
|
||||||
|
args: args{
|
||||||
|
title: "What If…?",
|
||||||
|
matchRelease: true,
|
||||||
|
},
|
||||||
|
want: []string{"*What?If*"},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
for _, tt := range tests {
|
for _, tt := range tests {
|
||||||
t.Run(tt.name, func(t *testing.T) {
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue