diff --git a/internal/domain/filter.go b/internal/domain/filter.go index 073a780..f760fa9 100644 --- a/internal/domain/filter.go +++ b/internal/domain/filter.go @@ -878,12 +878,14 @@ func sliceContainsSlice(tags []string, filters []string) bool { } func containsMatchFuzzy(tags []string, filters []string) bool { + advanced := make([]string, 0, len(filters)) for _, tag := range tags { if tag == "" { continue } tag = strings.ToLower(tag) + clear(advanced) for _, filter := range filters { if filter == "" { continue @@ -893,20 +895,22 @@ func containsMatchFuzzy(tags []string, filters []string) bool { // check if line contains * or ?, if so try wildcard match, otherwise try substring match a := strings.ContainsAny(filter, "?|*") if a { - match := wildcard.Match(filter, tag) - if match { - return true - } + advanced = append(advanced, filter) } else if strings.Contains(tag, filter) { return true } } + + if wildcard.MatchSlice(advanced, tag) { + return true + } } return false } func containsMatch(tags []string, filters []string) bool { + advanced := make([]string, 0, len(filters)) for _, tag := range tags { if tag == "" { continue @@ -914,6 +918,7 @@ func containsMatch(tags []string, filters []string) bool { tag = strings.ToLower(tag) tag = strings.Trim(tag, " ") + clear(advanced) for _, filter := range filters { if filter == "" { continue @@ -923,14 +928,15 @@ func containsMatch(tags []string, filters []string) bool { // check if line contains * or ?, if so try wildcard match, otherwise try substring match a := strings.ContainsAny(filter, "?|*") if a { - match := wildcard.Match(filter, tag) - if match { - return true - } + advanced = append(advanced, filter) } else if tag == filter { return true } } + + if wildcard.MatchSlice(advanced, tag) { + return true + } } return false @@ -945,6 +951,8 @@ func containsAllMatch(tags []string, filters []string) bool { filter = strings.Trim(filter, " ") found := false + wildFilter := strings.ContainsAny(filter, "?|*") + for _, tag := range tags { if tag == "" { continue @@ -955,7 +963,7 @@ func containsAllMatch(tags []string, filters []string) bool { if tag == filter { found = true break - } else if strings.ContainsAny(filter, "?|*") { + } else if wildFilter { if wildcard.Match(filter, tag) { found = true break @@ -994,11 +1002,13 @@ func containsMatchBasic(tags []string, filters []string) bool { } func containsAnySlice(tags []string, filters []string) bool { + advanced := make([]string, 0, len(filters)) for _, tag := range tags { if tag == "" { continue } tag = strings.ToLower(tag) + clear(advanced) for _, filter := range filters { if filter == "" { @@ -1007,16 +1017,17 @@ func containsAnySlice(tags []string, filters []string) bool { filter = strings.ToLower(filter) filter = strings.Trim(filter, " ") // check if line contains * or ?, if so try wildcard match, otherwise try substring match - wild := strings.ContainsAny(filter, "?|*") - if wild { - match := wildcard.Match(filter, tag) - if match { - return true - } + a := strings.ContainsAny(filter, "?|*") + if a { + advanced = append(advanced, filter) } else if tag == filter { return true } } + + if wildcard.MatchSlice(advanced, tag) { + return true + } } return false diff --git a/internal/domain/filter_test.go b/internal/domain/filter_test.go index 8d58129..1462b74 100644 --- a/internal/domain/filter_test.go +++ b/internal/domain/filter_test.go @@ -42,7 +42,7 @@ func TestFilter_CheckFilter(t *testing.T) { args: args{ filter: Filter{ Enabled: true, - MatchCategories: "Movies", + MatchCategories: "TV*,Movies*", Freeleech: true, MinSize: "10 GB", MaxSize: "40GB", @@ -1911,6 +1911,20 @@ func TestFilter_CheckFilter1(t *testing.T) { wantRejections: []string{"match release tags regex not matching. got: want: foreign - 17"}, wantMatch: false, }, + { + name: "test_43", + fields: fields{ + Shows: ",Dutchess,Preacher", + Seasons: "1", + Episodes: "0", + Resolutions: []string{"2160p"}, + Sources: []string{"WEB-DL"}, + Codecs: []string{"x265"}, + }, + args: args{&Release{TorrentName: "Preacher.S01.DV.2160p.ATVP.WEB-DL.DDPA5.1.x265-NOSiViD"}}, + wantRejections: nil, + wantMatch: true, + }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { diff --git a/pkg/regexcache/regex.go b/pkg/regexcache/regex.go index dd56529..581d165 100644 --- a/pkg/regexcache/regex.go +++ b/pkg/regexcache/regex.go @@ -69,3 +69,16 @@ func Compile(pattern string) (*regexp.Regexp, error) { cache.Set(pattern, reg, ttlcache.DefaultTTL) return reg, nil } + +func SubmitOriginal(plain string, reg *regexp.Regexp) { + cache.Set(plain, reg, ttlcache.DefaultTTL) +} + +func FindOriginal(plain string) (*regexp.Regexp, bool) { + item := cache.Get(plain) + if item != nil { + return item.Value(), true + } + + return nil, false +} diff --git a/pkg/wildcard/match.go b/pkg/wildcard/match.go index 63b973b..a445465 100644 --- a/pkg/wildcard/match.go +++ b/pkg/wildcard/match.go @@ -15,14 +15,7 @@ import ( // supports only '*' wildcard in the pattern. // considers a file system path as a flat name space. func MatchSimple(pattern, name string) bool { - if pattern == "" { - return name == pattern - } - if pattern == "*" { - return true - } - // Does only wildcard '*' match. - return deepMatchRune(name, pattern, true) + return match(pattern, name, true) } // Match - finds whether the text matches/satisfies the pattern string. @@ -30,33 +23,111 @@ func MatchSimple(pattern, name string) bool { // unlike path.Match(), considers a path as a flat name space while matching the pattern. // The difference is illustrated in the example here https://play.golang.org/p/Ega9qgD4Qz . func Match(pattern, name string) (matched bool) { - if pattern == "" { - return name == pattern - } - if pattern == "*" { - return true - } - // Does extended wildcard '*' and '?' match. - return deepMatchRune(name, pattern, false) + return match(pattern, name, false) } -var convSimple = regexp.MustCompile(regexp.QuoteMeta(`\*`)) -var convWildChar = regexp.MustCompile(regexp.QuoteMeta(`\?`)) - -func deepMatchRune(str, pattern string, simple bool) bool { - pattern = regexp.QuoteMeta(pattern) - if strings.Contains(pattern, "*") { - pattern = convSimple.ReplaceAllLiteralString(pattern, ".*") +func match(pattern, name string, simple bool) (matched bool) { + if pattern == "" { + return name == "" + } else if pattern == "*" { + return true } - if !simple && strings.Contains(pattern, "?") { - pattern = convWildChar.ReplaceAllLiteralString(pattern, ".") + return deepMatchRune(name, pattern, simple, pattern, false) +} + +func MatchSliceSimple(pattern []string, name string) (matched bool) { + return matchSlice(pattern, name, true) +} + +func MatchSlice(pattern []string, name string) (matched bool) { + return matchSlice(pattern, name, false) +} + +func matchSlice(pattern []string, name string, simple bool) (matched bool) { + for i := 0; i < len(pattern); i++ { + if match(pattern[i], name, simple) { + return true + } } - user, err := regexcache.Compile(pattern) - if err != nil { - log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern) - return false + return false +} + +// go 1.23 seems to still be too slow for regex. +// the single case now skips almost all allocations. +/* func matchSlice(pattern []string, name string, simple bool) (matched bool) { + var build strings.Builder + { + grow := 0 + for i := 0; i < len(pattern); i++ { + grow += len(pattern[i]) + 6 // ^\?\*$ + } + + build.Grow(grow) + } + + for i := 0; i < len(pattern); i++ { + if pattern[i] == "" { + continue + } + + if build.Len() != 0 { + build.WriteString("|") + } + + build.WriteString(prepForRegex(pattern[i])) + } + + if build.Len() == 0 { + return name == "" + } + + return deepMatchRune(name, build.String(), simple, build.String(), true) +} */ + +var convSimple = regexp.QuoteMeta("*") +var convWildChar = regexp.QuoteMeta("?") + +func cleanForRegex(pattern string, simple bool) string { + if strings.Contains(pattern, convSimple) { + pattern = strings.ReplaceAll(pattern, convSimple, ".*") + } + + if !simple && strings.Contains(pattern, convWildChar) { + pattern = strings.ReplaceAll(pattern, convWildChar, ".") + } + + return pattern +} + +func prepForRegex(pattern string) string { + return `^` + regexp.QuoteMeta(pattern) + `$` +} + +func deepMatchRune(str, pattern string, simple bool, original string, bulk bool) bool { + salt := "" + if simple { + salt = "//" // invalid regex. + } + + user, ok := regexcache.FindOriginal(original + salt) + if !ok { + if !bulk { + pattern = prepForRegex(pattern) + } + + pattern = cleanForRegex(pattern, simple) + { + var err error + user, err = regexcache.Compile(pattern) + if err != nil { + log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern) + return false + } + } + + regexcache.SubmitOriginal(original+salt, user) } idx := user.FindStringIndex(str) diff --git a/pkg/wildcard/match_test.go b/pkg/wildcard/match_test.go index 08a25c0..99beaa9 100644 --- a/pkg/wildcard/match_test.go +++ b/pkg/wildcard/match_test.go @@ -3,10 +3,13 @@ package wildcard -import "testing" +import ( + "testing" +) // TestMatch - Tests validate the logic of wild card matching. -// `Match` supports '*' and '?' wildcards. +// `Match` supports '*' (zero or more characters) and '?' (one character) wildcards in typical glob style filtering. +// A '*' in a provided string will not result in matching the strings before and after the '*' of the string provided. // Sample usage: In resource matching for bucket policy validation. func TestMatch(t *testing.T) { testCases := []struct { @@ -59,8 +62,33 @@ func TestMatch(t *testing.T) { text: "Good show shift S02 2160p ATVP WEB-DL DDP 5.1 Atmos DV HEVC-GROUP", matched: true, }, + { + pattern: "The God of the Brr*The Power of Brr", + text: "The Power of Brr", + matched: false, + }, + { + pattern: "The God of the Brr*The Power of Brr", + text: "The God of the Brr", + matched: false, + }, + { + pattern: "The God of the Brr*The Power of Brr", + text: "The God of the Brr The Power of Brr", + matched: true, + }, + { + pattern: "The God of the Brr*The Power of Brr", + text: "The God of the Brr - The Power of Brr", + matched: true, + }, + { + pattern: "The God of the Brr*The Power of Brr", + text: "The God of the BrrThe Power of Brr", + matched: true, + }, } - // Iterating over the test cases, call the function under test and asert the output. + // Iterating over the test cases, call the function under test and assert the output. for i, testCase := range testCases { actualResult := Match(testCase.pattern, testCase.text) if testCase.matched != actualResult { @@ -68,3 +96,97 @@ func TestMatch(t *testing.T) { } } } + +func TestMatchSimple(t *testing.T) { + tests := []struct { + pattern string + name string + want bool + }{ + {"", "", true}, + {"*", "test", true}, + {"t*t", "test", true}, + {"t*t", "tost", true}, + {"t?st", "test", false}, + {"t?st", "tast", false}, + {"test", "test", true}, + {"test", "toast", false}, + {"", "non-empty", false}, + {"*", "", true}, + {"te*t", "test", true}, + {"te*", "te", true}, + {"te*", "ten", true}, + {"?est", "test", false}, + {"best", "best", true}, + } + + for _, tt := range tests { + if got := MatchSimple(tt.pattern, tt.name); got != tt.want { + t.Errorf("MatchSimple(%q, %q) = %v, want %v", tt.pattern, tt.name, got, tt.want) + } + } +} + +func TestMatchSliceSimple(t *testing.T) { + tests := []struct { + patterns []string + name string + want bool + }{ + {[]string{"*", "test"}, "test", true}, + {[]string{"te?t", "tost", "random"}, "tost", true}, + {[]string{"*st", "n?st", "l*st"}, "list", true}, + {[]string{"?", "?*", "?**"}, "t", false}, + {[]string{"a", "b", "c"}, "d", false}, + {[]string{}, "test", false}, + {[]string{"*"}, "any", true}, + {[]string{"abc", "def", "ghi"}, "ghi", true}, + {[]string{"abc", "def", "ghi"}, "xyz", false}, + {[]string{"abc*", "def*", "ghi*"}, "ghi-test", true}, + } + + for _, tt := range tests { + if got := MatchSliceSimple(tt.patterns, tt.name); got != tt.want { + t.Errorf("MatchSliceSimple(%v, %q) = %v, want %v", tt.patterns, tt.name, got, tt.want) + } + } +} + +func TestMatchSlice(t *testing.T) { + tests := []struct { + patterns []string + name string + want bool + }{ + {[]string{"*", "test", "t?st"}, "test", true}, + {[]string{"te?t", "t?st", "random"}, "tost", true}, + {[]string{"*st", "n?st", "l*st"}, "list", true}, + {[]string{"?", "??", "???"}, "t", true}, + {[]string{"a", "b", "c"}, "d", false}, + {[]string{}, "test", false}, + {[]string{"*"}, "any", true}, + {[]string{"abc", "def", "ghi"}, "ghi", true}, + {[]string{"abc", "def", "ghi"}, "xyz", false}, + {[]string{"abc*", "def*", "ghi*"}, "ghi-test", true}, + {[]string{"abc?", "def?", "ghi?"}, "ghiz", true}, + {[]string{"abc?", "def?", "ghi?"}, "ghizz", false}, + {[]string{"a*?", "b*?", "c*?"}, "cwhatever", true}, + {[]string{"a*?", "b*?", "c*?"}, "dwhatever", false}, + {[]string{"*"}, "", true}, + {[]string{"abc"}, "abc", true}, + {[]string{"?bc"}, "abc", true}, + {[]string{"abc*"}, "abcd", true}, + {[]string{"guacamole", "The?Simpsons*"}, "The Simpsons S12", true}, + {[]string{"guacamole*", "The?Sompsons*"}, "The Simpsons S12", false}, + {[]string{"guac?mole*", "The?S?mpson"}, "The Simpsons S12", false}, + {[]string{"guac?mole*", "The?S?mpson"}, "guacamole Tornado", true}, + {[]string{"mole*", "The?S?mpson"}, "guacamole Tornado", false}, + {[]string{"??**mole*", "The?S?mpson"}, "guacamole Tornado", true}, + } + + for _, tt := range tests { + if got := MatchSlice(tt.patterns, tt.name); got != tt.want { + t.Errorf("MatchSlice(%v, %q) = %v, want %v", tt.patterns, tt.name, got, tt.want) + } + } +}