mirror of
https://github.com/idanoo/autobrr
synced 2025-07-22 16:29:12 +00:00
refactor(lists): title variation processing (#1965)
refactor(lists): remove code duplication from title processing
This commit is contained in:
parent
06229edb55
commit
e581d14066
2 changed files with 58 additions and 44 deletions
|
@ -6,28 +6,55 @@ package list
|
|||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"slices"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var (
|
||||
/*
|
||||
replaceRegexp replaces various character classes/categories such as
|
||||
\p{P} all Unicode punctuation category characters
|
||||
\p{S} all Unicode symbol category characters
|
||||
\p{Z) the Unicode seperator category characters
|
||||
\p{P} Unicode punctuation category characters
|
||||
\p{S} Unicode symbol category characters
|
||||
\p{Z) Unicode seperator category characters
|
||||
\x{0080}-\x{017F} Unicode block "Latin-1 Supplement" and "Latin Extended-A" characters
|
||||
https://www.unicode.org/reports/tr44/#General_Category_Values
|
||||
https://www.regular-expressions.info/unicode.html#category
|
||||
https://www.compart.com/en/unicode/block/U+0080
|
||||
https://www.compart.com/en/unicode/block/U+0100
|
||||
*/
|
||||
replaceRegexp = regexp.MustCompile(`[\p{P}\p{S}\p{Z}\x{0080}-\x{017F}]`)
|
||||
questionmarkRegexp = regexp.MustCompile(`[?]{2,}`)
|
||||
regionCodeRegexp = regexp.MustCompile(`\(\S+\)`) // also cleans titles from years like (YYYY)!
|
||||
replaceRegexp = regexp.MustCompile(`[\p{P}\p{S}\p{Z}\x{0080}-\x{017F}]`)
|
||||
questionmarkRegexp = regexp.MustCompile(`[?]{2,}`)
|
||||
// cleans titles from years and region codes in parentheses, for example (2024) or (US)
|
||||
parentheticalRegexp = regexp.MustCompile(`\(\S+\)`)
|
||||
parenthesesEndRegexp = regexp.MustCompile(`\)$`)
|
||||
|
||||
apostropheReplacer = strings.NewReplacer("'", "", "´", "", "`", "", "‘", "", "’", "")
|
||||
)
|
||||
|
||||
// yearRegexp = regexp.MustCompile(`\(\d{4}\)$`)
|
||||
// generateVariations returns variations of the title with optionally removing apostrophes and info in parentheses.
|
||||
func generateVariations(title string, removeApostrophes, removeParenthetical bool) []string {
|
||||
var variation string
|
||||
|
||||
if removeParenthetical {
|
||||
variation = parentheticalRegexp.ReplaceAllString(title, "")
|
||||
variation = strings.TrimRight(variation, " ")
|
||||
} else {
|
||||
variation = parenthesesEndRegexp.ReplaceAllString(title, "?")
|
||||
}
|
||||
|
||||
if removeApostrophes {
|
||||
variation = apostropheReplacer.Replace(variation)
|
||||
}
|
||||
variation = replaceRegexp.ReplaceAllString(variation, "?")
|
||||
variation = questionmarkRegexp.ReplaceAllString(variation, "*")
|
||||
|
||||
return []string{
|
||||
variation,
|
||||
strings.TrimRight(variation, "?* "),
|
||||
}
|
||||
}
|
||||
|
||||
// yearRegexp = regexp.MustCompile(`\(\d{4}\)$`)
|
||||
func processTitle(title string, matchRelease bool) []string {
|
||||
// Checking if the title is empty.
|
||||
if strings.TrimSpace(title) == "" {
|
||||
|
@ -38,46 +65,25 @@ func processTitle(title string, matchRelease bool) []string {
|
|||
// var re = regexp.MustCompile(`(?m)\s(\(\d+\))`)
|
||||
// title = re.ReplaceAllString(title, "")
|
||||
|
||||
t := NewTitleSlice()
|
||||
t := NewTitleSet()
|
||||
|
||||
if replaceRegexp.ReplaceAllString(title, "") == "" {
|
||||
t.Add(title, matchRelease)
|
||||
} else {
|
||||
// title with all non-alphanumeric characters replaced by "?"
|
||||
apostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
|
||||
apostropheTitle = replaceRegexp.ReplaceAllString(apostropheTitle, "?")
|
||||
apostropheTitle = questionmarkRegexp.ReplaceAllString(apostropheTitle, "*")
|
||||
titles := slices.Concat(
|
||||
// don't remove apostrophes and info in parentheses
|
||||
generateVariations(title, false, false),
|
||||
// remove apostrophes but don't remove info in parentheses
|
||||
generateVariations(title, true, false),
|
||||
// don't remove apostrophes but remove info in parentheses
|
||||
generateVariations(title, false, true),
|
||||
// remove apostrophes and info in parentheses
|
||||
generateVariations(title, true, true),
|
||||
)
|
||||
|
||||
t.Add(apostropheTitle, matchRelease)
|
||||
t.Add(strings.TrimRight(apostropheTitle, "?* "), matchRelease)
|
||||
|
||||
// title with apostrophes removed and all non-alphanumeric characters replaced by "?"
|
||||
noApostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
|
||||
noApostropheTitle = strings.NewReplacer("'", "", "´", "", "`", "", "‘", "", "’", "").Replace(noApostropheTitle)
|
||||
noApostropheTitle = replaceRegexp.ReplaceAllString(noApostropheTitle, "?")
|
||||
noApostropheTitle = questionmarkRegexp.ReplaceAllString(noApostropheTitle, "*")
|
||||
|
||||
t.Add(noApostropheTitle, matchRelease)
|
||||
t.Add(strings.TrimRight(noApostropheTitle, "?* "), matchRelease)
|
||||
|
||||
// title with regions in parentheses removed and all non-alphanumeric characters replaced by "?"
|
||||
removedRegionCodeApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
|
||||
removedRegionCodeApostrophe = strings.TrimRight(removedRegionCodeApostrophe, " ")
|
||||
removedRegionCodeApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeApostrophe, "?")
|
||||
removedRegionCodeApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeApostrophe, "*")
|
||||
|
||||
t.Add(removedRegionCodeApostrophe, matchRelease)
|
||||
t.Add(strings.TrimRight(removedRegionCodeApostrophe, "?* "), matchRelease)
|
||||
|
||||
// title with regions in parentheses and apostrophes removed and all non-alphanumeric characters replaced by "?"
|
||||
removedRegionCodeNoApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
|
||||
removedRegionCodeNoApostrophe = strings.TrimRight(removedRegionCodeNoApostrophe, " ")
|
||||
removedRegionCodeNoApostrophe = strings.NewReplacer("'", "", "´", "", "`", "", "‘", "", "’", "").Replace(removedRegionCodeNoApostrophe)
|
||||
removedRegionCodeNoApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "?")
|
||||
removedRegionCodeNoApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "*")
|
||||
|
||||
t.Add(removedRegionCodeNoApostrophe, matchRelease)
|
||||
t.Add(strings.TrimRight(removedRegionCodeNoApostrophe, "?* "), matchRelease)
|
||||
for _, title := range titles {
|
||||
t.Add(title, matchRelease)
|
||||
}
|
||||
}
|
||||
|
||||
return t.Titles()
|
||||
|
@ -87,7 +93,7 @@ type Titles struct {
|
|||
tm map[string]struct{}
|
||||
}
|
||||
|
||||
func NewTitleSlice() *Titles {
|
||||
func NewTitleSet() *Titles {
|
||||
ts := Titles{
|
||||
tm: map[string]struct{}{},
|
||||
}
|
||||
|
@ -100,7 +106,7 @@ func (ts *Titles) Add(title string, matchRelease bool) {
|
|||
}
|
||||
|
||||
if matchRelease {
|
||||
title = strings.Trim(title, "?")
|
||||
title = strings.Trim(title, "?* ")
|
||||
title = fmt.Sprintf("*%v*", title)
|
||||
}
|
||||
|
||||
|
|
|
@ -277,6 +277,14 @@ func Test_processTitle(t *testing.T) {
|
|||
},
|
||||
want: []string{"pok?mon"},
|
||||
},
|
||||
{
|
||||
name: "test_33",
|
||||
args: args{
|
||||
title: "What If…?",
|
||||
matchRelease: true,
|
||||
},
|
||||
want: []string{"*What?If*"},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue