mirror of
https://github.com/idanoo/autobrr
synced 2025-07-22 16:29:12 +00:00
feat(filters): wildcard slice matching optimizations (#1716)
* chore(tests): add more test cases * chore(tests): add code comments for matching patterns * chore(tests): fix typos --------- Co-authored-by: martylukyy <35452459+martylukyy@users.noreply.github.com>
This commit is contained in:
parent
3af06553e7
commit
e9f8730ca0
5 changed files with 279 additions and 48 deletions
|
@ -878,12 +878,14 @@ func sliceContainsSlice(tags []string, filters []string) bool {
|
|||
}
|
||||
|
||||
func containsMatchFuzzy(tags []string, filters []string) bool {
|
||||
advanced := make([]string, 0, len(filters))
|
||||
for _, tag := range tags {
|
||||
if tag == "" {
|
||||
continue
|
||||
}
|
||||
tag = strings.ToLower(tag)
|
||||
|
||||
clear(advanced)
|
||||
for _, filter := range filters {
|
||||
if filter == "" {
|
||||
continue
|
||||
|
@ -893,20 +895,22 @@ func containsMatchFuzzy(tags []string, filters []string) bool {
|
|||
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
|
||||
a := strings.ContainsAny(filter, "?|*")
|
||||
if a {
|
||||
match := wildcard.Match(filter, tag)
|
||||
if match {
|
||||
return true
|
||||
}
|
||||
advanced = append(advanced, filter)
|
||||
} else if strings.Contains(tag, filter) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if wildcard.MatchSlice(advanced, tag) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func containsMatch(tags []string, filters []string) bool {
|
||||
advanced := make([]string, 0, len(filters))
|
||||
for _, tag := range tags {
|
||||
if tag == "" {
|
||||
continue
|
||||
|
@ -914,6 +918,7 @@ func containsMatch(tags []string, filters []string) bool {
|
|||
tag = strings.ToLower(tag)
|
||||
tag = strings.Trim(tag, " ")
|
||||
|
||||
clear(advanced)
|
||||
for _, filter := range filters {
|
||||
if filter == "" {
|
||||
continue
|
||||
|
@ -923,14 +928,15 @@ func containsMatch(tags []string, filters []string) bool {
|
|||
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
|
||||
a := strings.ContainsAny(filter, "?|*")
|
||||
if a {
|
||||
match := wildcard.Match(filter, tag)
|
||||
if match {
|
||||
return true
|
||||
}
|
||||
advanced = append(advanced, filter)
|
||||
} else if tag == filter {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if wildcard.MatchSlice(advanced, tag) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
|
@ -945,6 +951,8 @@ func containsAllMatch(tags []string, filters []string) bool {
|
|||
filter = strings.Trim(filter, " ")
|
||||
found := false
|
||||
|
||||
wildFilter := strings.ContainsAny(filter, "?|*")
|
||||
|
||||
for _, tag := range tags {
|
||||
if tag == "" {
|
||||
continue
|
||||
|
@ -955,7 +963,7 @@ func containsAllMatch(tags []string, filters []string) bool {
|
|||
if tag == filter {
|
||||
found = true
|
||||
break
|
||||
} else if strings.ContainsAny(filter, "?|*") {
|
||||
} else if wildFilter {
|
||||
if wildcard.Match(filter, tag) {
|
||||
found = true
|
||||
break
|
||||
|
@ -994,11 +1002,13 @@ func containsMatchBasic(tags []string, filters []string) bool {
|
|||
}
|
||||
|
||||
func containsAnySlice(tags []string, filters []string) bool {
|
||||
advanced := make([]string, 0, len(filters))
|
||||
for _, tag := range tags {
|
||||
if tag == "" {
|
||||
continue
|
||||
}
|
||||
tag = strings.ToLower(tag)
|
||||
clear(advanced)
|
||||
|
||||
for _, filter := range filters {
|
||||
if filter == "" {
|
||||
|
@ -1007,16 +1017,17 @@ func containsAnySlice(tags []string, filters []string) bool {
|
|||
filter = strings.ToLower(filter)
|
||||
filter = strings.Trim(filter, " ")
|
||||
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
|
||||
wild := strings.ContainsAny(filter, "?|*")
|
||||
if wild {
|
||||
match := wildcard.Match(filter, tag)
|
||||
if match {
|
||||
return true
|
||||
}
|
||||
a := strings.ContainsAny(filter, "?|*")
|
||||
if a {
|
||||
advanced = append(advanced, filter)
|
||||
} else if tag == filter {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
if wildcard.MatchSlice(advanced, tag) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
|
|
|
@ -42,7 +42,7 @@ func TestFilter_CheckFilter(t *testing.T) {
|
|||
args: args{
|
||||
filter: Filter{
|
||||
Enabled: true,
|
||||
MatchCategories: "Movies",
|
||||
MatchCategories: "TV*,Movies*",
|
||||
Freeleech: true,
|
||||
MinSize: "10 GB",
|
||||
MaxSize: "40GB",
|
||||
|
@ -1911,6 +1911,20 @@ func TestFilter_CheckFilter1(t *testing.T) {
|
|||
wantRejections: []string{"match release tags regex not matching. got: want: foreign - 17"},
|
||||
wantMatch: false,
|
||||
},
|
||||
{
|
||||
name: "test_43",
|
||||
fields: fields{
|
||||
Shows: ",Dutchess,Preacher",
|
||||
Seasons: "1",
|
||||
Episodes: "0",
|
||||
Resolutions: []string{"2160p"},
|
||||
Sources: []string{"WEB-DL"},
|
||||
Codecs: []string{"x265"},
|
||||
},
|
||||
args: args{&Release{TorrentName: "Preacher.S01.DV.2160p.ATVP.WEB-DL.DDPA5.1.x265-NOSiViD"}},
|
||||
wantRejections: nil,
|
||||
wantMatch: true,
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
|
|
|
@ -69,3 +69,16 @@ func Compile(pattern string) (*regexp.Regexp, error) {
|
|||
cache.Set(pattern, reg, ttlcache.DefaultTTL)
|
||||
return reg, nil
|
||||
}
|
||||
|
||||
func SubmitOriginal(plain string, reg *regexp.Regexp) {
|
||||
cache.Set(plain, reg, ttlcache.DefaultTTL)
|
||||
}
|
||||
|
||||
func FindOriginal(plain string) (*regexp.Regexp, bool) {
|
||||
item := cache.Get(plain)
|
||||
if item != nil {
|
||||
return item.Value(), true
|
||||
}
|
||||
|
||||
return nil, false
|
||||
}
|
||||
|
|
|
@ -15,14 +15,7 @@ import (
|
|||
// supports only '*' wildcard in the pattern.
|
||||
// considers a file system path as a flat name space.
|
||||
func MatchSimple(pattern, name string) bool {
|
||||
if pattern == "" {
|
||||
return name == pattern
|
||||
}
|
||||
if pattern == "*" {
|
||||
return true
|
||||
}
|
||||
// Does only wildcard '*' match.
|
||||
return deepMatchRune(name, pattern, true)
|
||||
return match(pattern, name, true)
|
||||
}
|
||||
|
||||
// Match - finds whether the text matches/satisfies the pattern string.
|
||||
|
@ -30,33 +23,111 @@ func MatchSimple(pattern, name string) bool {
|
|||
// unlike path.Match(), considers a path as a flat name space while matching the pattern.
|
||||
// The difference is illustrated in the example here https://play.golang.org/p/Ega9qgD4Qz .
|
||||
func Match(pattern, name string) (matched bool) {
|
||||
if pattern == "" {
|
||||
return name == pattern
|
||||
}
|
||||
if pattern == "*" {
|
||||
return true
|
||||
}
|
||||
// Does extended wildcard '*' and '?' match.
|
||||
return deepMatchRune(name, pattern, false)
|
||||
return match(pattern, name, false)
|
||||
}
|
||||
|
||||
var convSimple = regexp.MustCompile(regexp.QuoteMeta(`\*`))
|
||||
var convWildChar = regexp.MustCompile(regexp.QuoteMeta(`\?`))
|
||||
|
||||
func deepMatchRune(str, pattern string, simple bool) bool {
|
||||
pattern = regexp.QuoteMeta(pattern)
|
||||
if strings.Contains(pattern, "*") {
|
||||
pattern = convSimple.ReplaceAllLiteralString(pattern, ".*")
|
||||
func match(pattern, name string, simple bool) (matched bool) {
|
||||
if pattern == "" {
|
||||
return name == ""
|
||||
} else if pattern == "*" {
|
||||
return true
|
||||
}
|
||||
|
||||
if !simple && strings.Contains(pattern, "?") {
|
||||
pattern = convWildChar.ReplaceAllLiteralString(pattern, ".")
|
||||
return deepMatchRune(name, pattern, simple, pattern, false)
|
||||
}
|
||||
|
||||
func MatchSliceSimple(pattern []string, name string) (matched bool) {
|
||||
return matchSlice(pattern, name, true)
|
||||
}
|
||||
|
||||
func MatchSlice(pattern []string, name string) (matched bool) {
|
||||
return matchSlice(pattern, name, false)
|
||||
}
|
||||
|
||||
func matchSlice(pattern []string, name string, simple bool) (matched bool) {
|
||||
for i := 0; i < len(pattern); i++ {
|
||||
if match(pattern[i], name, simple) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
user, err := regexcache.Compile(pattern)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
|
||||
return false
|
||||
return false
|
||||
}
|
||||
|
||||
// go 1.23 seems to still be too slow for regex.
|
||||
// the single case now skips almost all allocations.
|
||||
/* func matchSlice(pattern []string, name string, simple bool) (matched bool) {
|
||||
var build strings.Builder
|
||||
{
|
||||
grow := 0
|
||||
for i := 0; i < len(pattern); i++ {
|
||||
grow += len(pattern[i]) + 6 // ^\?\*$
|
||||
}
|
||||
|
||||
build.Grow(grow)
|
||||
}
|
||||
|
||||
for i := 0; i < len(pattern); i++ {
|
||||
if pattern[i] == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
if build.Len() != 0 {
|
||||
build.WriteString("|")
|
||||
}
|
||||
|
||||
build.WriteString(prepForRegex(pattern[i]))
|
||||
}
|
||||
|
||||
if build.Len() == 0 {
|
||||
return name == ""
|
||||
}
|
||||
|
||||
return deepMatchRune(name, build.String(), simple, build.String(), true)
|
||||
} */
|
||||
|
||||
var convSimple = regexp.QuoteMeta("*")
|
||||
var convWildChar = regexp.QuoteMeta("?")
|
||||
|
||||
func cleanForRegex(pattern string, simple bool) string {
|
||||
if strings.Contains(pattern, convSimple) {
|
||||
pattern = strings.ReplaceAll(pattern, convSimple, ".*")
|
||||
}
|
||||
|
||||
if !simple && strings.Contains(pattern, convWildChar) {
|
||||
pattern = strings.ReplaceAll(pattern, convWildChar, ".")
|
||||
}
|
||||
|
||||
return pattern
|
||||
}
|
||||
|
||||
func prepForRegex(pattern string) string {
|
||||
return `^` + regexp.QuoteMeta(pattern) + `$`
|
||||
}
|
||||
|
||||
func deepMatchRune(str, pattern string, simple bool, original string, bulk bool) bool {
|
||||
salt := ""
|
||||
if simple {
|
||||
salt = "//" // invalid regex.
|
||||
}
|
||||
|
||||
user, ok := regexcache.FindOriginal(original + salt)
|
||||
if !ok {
|
||||
if !bulk {
|
||||
pattern = prepForRegex(pattern)
|
||||
}
|
||||
|
||||
pattern = cleanForRegex(pattern, simple)
|
||||
{
|
||||
var err error
|
||||
user, err = regexcache.Compile(pattern)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
regexcache.SubmitOriginal(original+salt, user)
|
||||
}
|
||||
|
||||
idx := user.FindStringIndex(str)
|
||||
|
|
|
@ -3,10 +3,13 @@
|
|||
|
||||
package wildcard
|
||||
|
||||
import "testing"
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
// TestMatch - Tests validate the logic of wild card matching.
|
||||
// `Match` supports '*' and '?' wildcards.
|
||||
// `Match` supports '*' (zero or more characters) and '?' (one character) wildcards in typical glob style filtering.
|
||||
// A '*' in a provided string will not result in matching the strings before and after the '*' of the string provided.
|
||||
// Sample usage: In resource matching for bucket policy validation.
|
||||
func TestMatch(t *testing.T) {
|
||||
testCases := []struct {
|
||||
|
@ -59,8 +62,33 @@ func TestMatch(t *testing.T) {
|
|||
text: "Good show shift S02 2160p ATVP WEB-DL DDP 5.1 Atmos DV HEVC-GROUP",
|
||||
matched: true,
|
||||
},
|
||||
{
|
||||
pattern: "The God of the Brr*The Power of Brr",
|
||||
text: "The Power of Brr",
|
||||
matched: false,
|
||||
},
|
||||
{
|
||||
pattern: "The God of the Brr*The Power of Brr",
|
||||
text: "The God of the Brr",
|
||||
matched: false,
|
||||
},
|
||||
{
|
||||
pattern: "The God of the Brr*The Power of Brr",
|
||||
text: "The God of the Brr The Power of Brr",
|
||||
matched: true,
|
||||
},
|
||||
{
|
||||
pattern: "The God of the Brr*The Power of Brr",
|
||||
text: "The God of the Brr - The Power of Brr",
|
||||
matched: true,
|
||||
},
|
||||
{
|
||||
pattern: "The God of the Brr*The Power of Brr",
|
||||
text: "The God of the BrrThe Power of Brr",
|
||||
matched: true,
|
||||
},
|
||||
}
|
||||
// Iterating over the test cases, call the function under test and asert the output.
|
||||
// Iterating over the test cases, call the function under test and assert the output.
|
||||
for i, testCase := range testCases {
|
||||
actualResult := Match(testCase.pattern, testCase.text)
|
||||
if testCase.matched != actualResult {
|
||||
|
@ -68,3 +96,97 @@ func TestMatch(t *testing.T) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSimple(t *testing.T) {
|
||||
tests := []struct {
|
||||
pattern string
|
||||
name string
|
||||
want bool
|
||||
}{
|
||||
{"", "", true},
|
||||
{"*", "test", true},
|
||||
{"t*t", "test", true},
|
||||
{"t*t", "tost", true},
|
||||
{"t?st", "test", false},
|
||||
{"t?st", "tast", false},
|
||||
{"test", "test", true},
|
||||
{"test", "toast", false},
|
||||
{"", "non-empty", false},
|
||||
{"*", "", true},
|
||||
{"te*t", "test", true},
|
||||
{"te*", "te", true},
|
||||
{"te*", "ten", true},
|
||||
{"?est", "test", false},
|
||||
{"best", "best", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := MatchSimple(tt.pattern, tt.name); got != tt.want {
|
||||
t.Errorf("MatchSimple(%q, %q) = %v, want %v", tt.pattern, tt.name, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSliceSimple(t *testing.T) {
|
||||
tests := []struct {
|
||||
patterns []string
|
||||
name string
|
||||
want bool
|
||||
}{
|
||||
{[]string{"*", "test"}, "test", true},
|
||||
{[]string{"te?t", "tost", "random"}, "tost", true},
|
||||
{[]string{"*st", "n?st", "l*st"}, "list", true},
|
||||
{[]string{"?", "?*", "?**"}, "t", false},
|
||||
{[]string{"a", "b", "c"}, "d", false},
|
||||
{[]string{}, "test", false},
|
||||
{[]string{"*"}, "any", true},
|
||||
{[]string{"abc", "def", "ghi"}, "ghi", true},
|
||||
{[]string{"abc", "def", "ghi"}, "xyz", false},
|
||||
{[]string{"abc*", "def*", "ghi*"}, "ghi-test", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := MatchSliceSimple(tt.patterns, tt.name); got != tt.want {
|
||||
t.Errorf("MatchSliceSimple(%v, %q) = %v, want %v", tt.patterns, tt.name, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMatchSlice(t *testing.T) {
|
||||
tests := []struct {
|
||||
patterns []string
|
||||
name string
|
||||
want bool
|
||||
}{
|
||||
{[]string{"*", "test", "t?st"}, "test", true},
|
||||
{[]string{"te?t", "t?st", "random"}, "tost", true},
|
||||
{[]string{"*st", "n?st", "l*st"}, "list", true},
|
||||
{[]string{"?", "??", "???"}, "t", true},
|
||||
{[]string{"a", "b", "c"}, "d", false},
|
||||
{[]string{}, "test", false},
|
||||
{[]string{"*"}, "any", true},
|
||||
{[]string{"abc", "def", "ghi"}, "ghi", true},
|
||||
{[]string{"abc", "def", "ghi"}, "xyz", false},
|
||||
{[]string{"abc*", "def*", "ghi*"}, "ghi-test", true},
|
||||
{[]string{"abc?", "def?", "ghi?"}, "ghiz", true},
|
||||
{[]string{"abc?", "def?", "ghi?"}, "ghizz", false},
|
||||
{[]string{"a*?", "b*?", "c*?"}, "cwhatever", true},
|
||||
{[]string{"a*?", "b*?", "c*?"}, "dwhatever", false},
|
||||
{[]string{"*"}, "", true},
|
||||
{[]string{"abc"}, "abc", true},
|
||||
{[]string{"?bc"}, "abc", true},
|
||||
{[]string{"abc*"}, "abcd", true},
|
||||
{[]string{"guacamole", "The?Simpsons*"}, "The Simpsons S12", true},
|
||||
{[]string{"guacamole*", "The?Sompsons*"}, "The Simpsons S12", false},
|
||||
{[]string{"guac?mole*", "The?S?mpson"}, "The Simpsons S12", false},
|
||||
{[]string{"guac?mole*", "The?S?mpson"}, "guacamole Tornado", true},
|
||||
{[]string{"mole*", "The?S?mpson"}, "guacamole Tornado", false},
|
||||
{[]string{"??**mole*", "The?S?mpson"}, "guacamole Tornado", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
if got := MatchSlice(tt.patterns, tt.name); got != tt.want {
|
||||
t.Errorf("MatchSlice(%v, %q) = %v, want %v", tt.patterns, tt.name, got, tt.want)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue