feat(filters): wildcard slice matching optimizations (#1716)

* chore(tests): add more test cases
* chore(tests): add code comments for matching patterns
* chore(tests): fix typos

---------

Co-authored-by: martylukyy <35452459+martylukyy@users.noreply.github.com>
This commit is contained in:
Kyle Sanderson 2024-09-14 02:31:26 -07:00 committed by GitHub
parent 3af06553e7
commit e9f8730ca0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 279 additions and 48 deletions

View file

@ -15,14 +15,7 @@ import (
// supports only '*' wildcard in the pattern.
// considers a file system path as a flat name space.
func MatchSimple(pattern, name string) bool {
if pattern == "" {
return name == pattern
}
if pattern == "*" {
return true
}
// Does only wildcard '*' match.
return deepMatchRune(name, pattern, true)
return match(pattern, name, true)
}
// Match - finds whether the text matches/satisfies the pattern string.
@ -30,33 +23,111 @@ func MatchSimple(pattern, name string) bool {
// unlike path.Match(), considers a path as a flat name space while matching the pattern.
// The difference is illustrated in the example here https://play.golang.org/p/Ega9qgD4Qz .
func Match(pattern, name string) (matched bool) {
if pattern == "" {
return name == pattern
}
if pattern == "*" {
return true
}
// Does extended wildcard '*' and '?' match.
return deepMatchRune(name, pattern, false)
return match(pattern, name, false)
}
var convSimple = regexp.MustCompile(regexp.QuoteMeta(`\*`))
var convWildChar = regexp.MustCompile(regexp.QuoteMeta(`\?`))
func deepMatchRune(str, pattern string, simple bool) bool {
pattern = regexp.QuoteMeta(pattern)
if strings.Contains(pattern, "*") {
pattern = convSimple.ReplaceAllLiteralString(pattern, ".*")
func match(pattern, name string, simple bool) (matched bool) {
if pattern == "" {
return name == ""
} else if pattern == "*" {
return true
}
if !simple && strings.Contains(pattern, "?") {
pattern = convWildChar.ReplaceAllLiteralString(pattern, ".")
return deepMatchRune(name, pattern, simple, pattern, false)
}
func MatchSliceSimple(pattern []string, name string) (matched bool) {
return matchSlice(pattern, name, true)
}
func MatchSlice(pattern []string, name string) (matched bool) {
return matchSlice(pattern, name, false)
}
func matchSlice(pattern []string, name string, simple bool) (matched bool) {
for i := 0; i < len(pattern); i++ {
if match(pattern[i], name, simple) {
return true
}
}
user, err := regexcache.Compile(pattern)
if err != nil {
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
return false
return false
}
// go 1.23 seems to still be too slow for regex.
// the single case now skips almost all allocations.
/* func matchSlice(pattern []string, name string, simple bool) (matched bool) {
var build strings.Builder
{
grow := 0
for i := 0; i < len(pattern); i++ {
grow += len(pattern[i]) + 6 // ^\?\*$
}
build.Grow(grow)
}
for i := 0; i < len(pattern); i++ {
if pattern[i] == "" {
continue
}
if build.Len() != 0 {
build.WriteString("|")
}
build.WriteString(prepForRegex(pattern[i]))
}
if build.Len() == 0 {
return name == ""
}
return deepMatchRune(name, build.String(), simple, build.String(), true)
} */
var convSimple = regexp.QuoteMeta("*")
var convWildChar = regexp.QuoteMeta("?")
func cleanForRegex(pattern string, simple bool) string {
if strings.Contains(pattern, convSimple) {
pattern = strings.ReplaceAll(pattern, convSimple, ".*")
}
if !simple && strings.Contains(pattern, convWildChar) {
pattern = strings.ReplaceAll(pattern, convWildChar, ".")
}
return pattern
}
func prepForRegex(pattern string) string {
return `^` + regexp.QuoteMeta(pattern) + `$`
}
func deepMatchRune(str, pattern string, simple bool, original string, bulk bool) bool {
salt := ""
if simple {
salt = "//" // invalid regex.
}
user, ok := regexcache.FindOriginal(original + salt)
if !ok {
if !bulk {
pattern = prepForRegex(pattern)
}
pattern = cleanForRegex(pattern, simple)
{
var err error
user, err = regexcache.Compile(pattern)
if err != nil {
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
return false
}
}
regexcache.SubmitOriginal(original+salt, user)
}
idx := user.FindStringIndex(str)