feat(filters): wildcard slice matching optimizations (#1716)

* chore(tests): add more test cases
* chore(tests): add code comments for matching patterns
* chore(tests): fix typos

---------

Co-authored-by: martylukyy <35452459+martylukyy@users.noreply.github.com>
This commit is contained in:
Kyle Sanderson 2024-09-14 02:31:26 -07:00 committed by GitHub
parent 3af06553e7
commit e9f8730ca0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 279 additions and 48 deletions

View file

@ -878,12 +878,14 @@ func sliceContainsSlice(tags []string, filters []string) bool {
}
func containsMatchFuzzy(tags []string, filters []string) bool {
advanced := make([]string, 0, len(filters))
for _, tag := range tags {
if tag == "" {
continue
}
tag = strings.ToLower(tag)
clear(advanced)
for _, filter := range filters {
if filter == "" {
continue
@ -893,20 +895,22 @@ func containsMatchFuzzy(tags []string, filters []string) bool {
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
a := strings.ContainsAny(filter, "?|*")
if a {
match := wildcard.Match(filter, tag)
if match {
return true
}
advanced = append(advanced, filter)
} else if strings.Contains(tag, filter) {
return true
}
}
if wildcard.MatchSlice(advanced, tag) {
return true
}
}
return false
}
func containsMatch(tags []string, filters []string) bool {
advanced := make([]string, 0, len(filters))
for _, tag := range tags {
if tag == "" {
continue
@ -914,6 +918,7 @@ func containsMatch(tags []string, filters []string) bool {
tag = strings.ToLower(tag)
tag = strings.Trim(tag, " ")
clear(advanced)
for _, filter := range filters {
if filter == "" {
continue
@ -923,14 +928,15 @@ func containsMatch(tags []string, filters []string) bool {
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
a := strings.ContainsAny(filter, "?|*")
if a {
match := wildcard.Match(filter, tag)
if match {
return true
}
advanced = append(advanced, filter)
} else if tag == filter {
return true
}
}
if wildcard.MatchSlice(advanced, tag) {
return true
}
}
return false
@ -945,6 +951,8 @@ func containsAllMatch(tags []string, filters []string) bool {
filter = strings.Trim(filter, " ")
found := false
wildFilter := strings.ContainsAny(filter, "?|*")
for _, tag := range tags {
if tag == "" {
continue
@ -955,7 +963,7 @@ func containsAllMatch(tags []string, filters []string) bool {
if tag == filter {
found = true
break
} else if strings.ContainsAny(filter, "?|*") {
} else if wildFilter {
if wildcard.Match(filter, tag) {
found = true
break
@ -994,11 +1002,13 @@ func containsMatchBasic(tags []string, filters []string) bool {
}
func containsAnySlice(tags []string, filters []string) bool {
advanced := make([]string, 0, len(filters))
for _, tag := range tags {
if tag == "" {
continue
}
tag = strings.ToLower(tag)
clear(advanced)
for _, filter := range filters {
if filter == "" {
@ -1007,16 +1017,17 @@ func containsAnySlice(tags []string, filters []string) bool {
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
wild := strings.ContainsAny(filter, "?|*")
if wild {
match := wildcard.Match(filter, tag)
if match {
return true
}
a := strings.ContainsAny(filter, "?|*")
if a {
advanced = append(advanced, filter)
} else if tag == filter {
return true
}
}
if wildcard.MatchSlice(advanced, tag) {
return true
}
}
return false

View file

@ -42,7 +42,7 @@ func TestFilter_CheckFilter(t *testing.T) {
args: args{
filter: Filter{
Enabled: true,
MatchCategories: "Movies",
MatchCategories: "TV*,Movies*",
Freeleech: true,
MinSize: "10 GB",
MaxSize: "40GB",
@ -1911,6 +1911,20 @@ func TestFilter_CheckFilter1(t *testing.T) {
wantRejections: []string{"match release tags regex not matching. got: want: foreign - 17"},
wantMatch: false,
},
{
name: "test_43",
fields: fields{
Shows: ",Dutchess,Preacher",
Seasons: "1",
Episodes: "0",
Resolutions: []string{"2160p"},
Sources: []string{"WEB-DL"},
Codecs: []string{"x265"},
},
args: args{&Release{TorrentName: "Preacher.S01.DV.2160p.ATVP.WEB-DL.DDPA5.1.x265-NOSiViD"}},
wantRejections: nil,
wantMatch: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

View file

@ -69,3 +69,16 @@ func Compile(pattern string) (*regexp.Regexp, error) {
cache.Set(pattern, reg, ttlcache.DefaultTTL)
return reg, nil
}
func SubmitOriginal(plain string, reg *regexp.Regexp) {
cache.Set(plain, reg, ttlcache.DefaultTTL)
}
func FindOriginal(plain string) (*regexp.Regexp, bool) {
item := cache.Get(plain)
if item != nil {
return item.Value(), true
}
return nil, false
}

View file

@ -15,14 +15,7 @@ import (
// supports only '*' wildcard in the pattern.
// considers a file system path as a flat name space.
func MatchSimple(pattern, name string) bool {
if pattern == "" {
return name == pattern
}
if pattern == "*" {
return true
}
// Does only wildcard '*' match.
return deepMatchRune(name, pattern, true)
return match(pattern, name, true)
}
// Match - finds whether the text matches/satisfies the pattern string.
@ -30,33 +23,111 @@ func MatchSimple(pattern, name string) bool {
// unlike path.Match(), considers a path as a flat name space while matching the pattern.
// The difference is illustrated in the example here https://play.golang.org/p/Ega9qgD4Qz .
func Match(pattern, name string) (matched bool) {
if pattern == "" {
return name == pattern
}
if pattern == "*" {
return true
}
// Does extended wildcard '*' and '?' match.
return deepMatchRune(name, pattern, false)
return match(pattern, name, false)
}
var convSimple = regexp.MustCompile(regexp.QuoteMeta(`\*`))
var convWildChar = regexp.MustCompile(regexp.QuoteMeta(`\?`))
func deepMatchRune(str, pattern string, simple bool) bool {
pattern = regexp.QuoteMeta(pattern)
if strings.Contains(pattern, "*") {
pattern = convSimple.ReplaceAllLiteralString(pattern, ".*")
func match(pattern, name string, simple bool) (matched bool) {
if pattern == "" {
return name == ""
} else if pattern == "*" {
return true
}
if !simple && strings.Contains(pattern, "?") {
pattern = convWildChar.ReplaceAllLiteralString(pattern, ".")
return deepMatchRune(name, pattern, simple, pattern, false)
}
func MatchSliceSimple(pattern []string, name string) (matched bool) {
return matchSlice(pattern, name, true)
}
func MatchSlice(pattern []string, name string) (matched bool) {
return matchSlice(pattern, name, false)
}
func matchSlice(pattern []string, name string, simple bool) (matched bool) {
for i := 0; i < len(pattern); i++ {
if match(pattern[i], name, simple) {
return true
}
}
user, err := regexcache.Compile(pattern)
if err != nil {
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
return false
return false
}
// go 1.23 seems to still be too slow for regex.
// the single case now skips almost all allocations.
/* func matchSlice(pattern []string, name string, simple bool) (matched bool) {
var build strings.Builder
{
grow := 0
for i := 0; i < len(pattern); i++ {
grow += len(pattern[i]) + 6 // ^\?\*$
}
build.Grow(grow)
}
for i := 0; i < len(pattern); i++ {
if pattern[i] == "" {
continue
}
if build.Len() != 0 {
build.WriteString("|")
}
build.WriteString(prepForRegex(pattern[i]))
}
if build.Len() == 0 {
return name == ""
}
return deepMatchRune(name, build.String(), simple, build.String(), true)
} */
var convSimple = regexp.QuoteMeta("*")
var convWildChar = regexp.QuoteMeta("?")
func cleanForRegex(pattern string, simple bool) string {
if strings.Contains(pattern, convSimple) {
pattern = strings.ReplaceAll(pattern, convSimple, ".*")
}
if !simple && strings.Contains(pattern, convWildChar) {
pattern = strings.ReplaceAll(pattern, convWildChar, ".")
}
return pattern
}
func prepForRegex(pattern string) string {
return `^` + regexp.QuoteMeta(pattern) + `$`
}
func deepMatchRune(str, pattern string, simple bool, original string, bulk bool) bool {
salt := ""
if simple {
salt = "//" // invalid regex.
}
user, ok := regexcache.FindOriginal(original + salt)
if !ok {
if !bulk {
pattern = prepForRegex(pattern)
}
pattern = cleanForRegex(pattern, simple)
{
var err error
user, err = regexcache.Compile(pattern)
if err != nil {
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
return false
}
}
regexcache.SubmitOriginal(original+salt, user)
}
idx := user.FindStringIndex(str)

View file

@ -3,10 +3,13 @@
package wildcard
import "testing"
import (
"testing"
)
// TestMatch - Tests validate the logic of wild card matching.
// `Match` supports '*' and '?' wildcards.
// `Match` supports '*' (zero or more characters) and '?' (one character) wildcards in typical glob style filtering.
// A '*' in a provided string will not result in matching the strings before and after the '*' of the string provided.
// Sample usage: In resource matching for bucket policy validation.
func TestMatch(t *testing.T) {
testCases := []struct {
@ -59,8 +62,33 @@ func TestMatch(t *testing.T) {
text: "Good show shift S02 2160p ATVP WEB-DL DDP 5.1 Atmos DV HEVC-GROUP",
matched: true,
},
{
pattern: "The God of the Brr*The Power of Brr",
text: "The Power of Brr",
matched: false,
},
{
pattern: "The God of the Brr*The Power of Brr",
text: "The God of the Brr",
matched: false,
},
{
pattern: "The God of the Brr*The Power of Brr",
text: "The God of the Brr The Power of Brr",
matched: true,
},
{
pattern: "The God of the Brr*The Power of Brr",
text: "The God of the Brr - The Power of Brr",
matched: true,
},
{
pattern: "The God of the Brr*The Power of Brr",
text: "The God of the BrrThe Power of Brr",
matched: true,
},
}
// Iterating over the test cases, call the function under test and asert the output.
// Iterating over the test cases, call the function under test and assert the output.
for i, testCase := range testCases {
actualResult := Match(testCase.pattern, testCase.text)
if testCase.matched != actualResult {
@ -68,3 +96,97 @@ func TestMatch(t *testing.T) {
}
}
}
func TestMatchSimple(t *testing.T) {
tests := []struct {
pattern string
name string
want bool
}{
{"", "", true},
{"*", "test", true},
{"t*t", "test", true},
{"t*t", "tost", true},
{"t?st", "test", false},
{"t?st", "tast", false},
{"test", "test", true},
{"test", "toast", false},
{"", "non-empty", false},
{"*", "", true},
{"te*t", "test", true},
{"te*", "te", true},
{"te*", "ten", true},
{"?est", "test", false},
{"best", "best", true},
}
for _, tt := range tests {
if got := MatchSimple(tt.pattern, tt.name); got != tt.want {
t.Errorf("MatchSimple(%q, %q) = %v, want %v", tt.pattern, tt.name, got, tt.want)
}
}
}
func TestMatchSliceSimple(t *testing.T) {
tests := []struct {
patterns []string
name string
want bool
}{
{[]string{"*", "test"}, "test", true},
{[]string{"te?t", "tost", "random"}, "tost", true},
{[]string{"*st", "n?st", "l*st"}, "list", true},
{[]string{"?", "?*", "?**"}, "t", false},
{[]string{"a", "b", "c"}, "d", false},
{[]string{}, "test", false},
{[]string{"*"}, "any", true},
{[]string{"abc", "def", "ghi"}, "ghi", true},
{[]string{"abc", "def", "ghi"}, "xyz", false},
{[]string{"abc*", "def*", "ghi*"}, "ghi-test", true},
}
for _, tt := range tests {
if got := MatchSliceSimple(tt.patterns, tt.name); got != tt.want {
t.Errorf("MatchSliceSimple(%v, %q) = %v, want %v", tt.patterns, tt.name, got, tt.want)
}
}
}
func TestMatchSlice(t *testing.T) {
tests := []struct {
patterns []string
name string
want bool
}{
{[]string{"*", "test", "t?st"}, "test", true},
{[]string{"te?t", "t?st", "random"}, "tost", true},
{[]string{"*st", "n?st", "l*st"}, "list", true},
{[]string{"?", "??", "???"}, "t", true},
{[]string{"a", "b", "c"}, "d", false},
{[]string{}, "test", false},
{[]string{"*"}, "any", true},
{[]string{"abc", "def", "ghi"}, "ghi", true},
{[]string{"abc", "def", "ghi"}, "xyz", false},
{[]string{"abc*", "def*", "ghi*"}, "ghi-test", true},
{[]string{"abc?", "def?", "ghi?"}, "ghiz", true},
{[]string{"abc?", "def?", "ghi?"}, "ghizz", false},
{[]string{"a*?", "b*?", "c*?"}, "cwhatever", true},
{[]string{"a*?", "b*?", "c*?"}, "dwhatever", false},
{[]string{"*"}, "", true},
{[]string{"abc"}, "abc", true},
{[]string{"?bc"}, "abc", true},
{[]string{"abc*"}, "abcd", true},
{[]string{"guacamole", "The?Simpsons*"}, "The Simpsons S12", true},
{[]string{"guacamole*", "The?Sompsons*"}, "The Simpsons S12", false},
{[]string{"guac?mole*", "The?S?mpson"}, "The Simpsons S12", false},
{[]string{"guac?mole*", "The?S?mpson"}, "guacamole Tornado", true},
{[]string{"mole*", "The?S?mpson"}, "guacamole Tornado", false},
{[]string{"??**mole*", "The?S?mpson"}, "guacamole Tornado", true},
}
for _, tt := range tests {
if got := MatchSlice(tt.patterns, tt.name); got != tt.want {
t.Errorf("MatchSlice(%v, %q) = %v, want %v", tt.patterns, tt.name, got, tt.want)
}
}
}