refactor(wildcard): optimize and add caching (#1634)

* fix(wildcard): avoid excessive allocations every loop

* are you going to Scarborough Fair?

* ruby ruby ruby ruby

* ride on, little murphy

* shirley?

* to the moon

* reggie are you there?

* code 99

* my doctorate is in Art History

* helps to be consistent

* tidy

* slow and steady gets the clam

* oysters were better anyway

* DIAL TONE
This commit is contained in:
Kyle Sanderson 2024-09-02 02:18:14 -07:00 committed by GitHub
parent bc0f4cc055
commit 982f7ddf68
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 177 additions and 60 deletions

View file

@ -19,9 +19,10 @@ func (c Category) String() string {
return fmt.Sprintf("%s[%d]", c.Name, c.ID)
}
var newzCategory = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
func (c Category) FromString(str string) {
var re = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
match := re.FindAllString(str, -1)
match := newzCategory.FindAllString(str, -1)
c.Name = match[1]
c.ID, _ = strconv.Atoi(match[2])

71
pkg/regexcache/regex.go Normal file
View file

@ -0,0 +1,71 @@
// Copyright (c) 2021 - 2024, Ludvig Lundgren and the autobrr contributors.
// SPDX-License-Identifier: GPL-2.0-or-later
package regexcache
import (
"regexp"
"time"
"github.com/jellydator/ttlcache/v3"
)
var cache = ttlcache.New[string, *regexp.Regexp](
ttlcache.WithTTL[string, *regexp.Regexp](5 * time.Minute),
)
func init() {
go cache.Start()
}
func MustCompilePOSIX(pattern string) *regexp.Regexp {
item := cache.Get(pattern)
if item != nil {
return item.Value()
}
reg := regexp.MustCompilePOSIX(pattern)
cache.Set(pattern, reg, ttlcache.NoTTL)
return reg
}
func MustCompile(pattern string) *regexp.Regexp {
item := cache.Get(pattern)
if item != nil {
return item.Value()
}
reg := regexp.MustCompile(pattern)
cache.Set(pattern, reg, ttlcache.NoTTL)
return reg
}
func CompilePOSIX(pattern string) (*regexp.Regexp, error) {
item := cache.Get(pattern)
if item != nil {
return item.Value(), nil
}
reg, err := regexp.CompilePOSIX(pattern)
if err != nil {
return nil, err
}
cache.Set(pattern, reg, ttlcache.DefaultTTL)
return reg, nil
}
func Compile(pattern string) (*regexp.Regexp, error) {
item := cache.Get(pattern)
if item != nil {
return item.Value(), nil
}
reg, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
cache.Set(pattern, reg, ttlcache.DefaultTTL)
return reg, nil
}

View file

@ -19,10 +19,10 @@ func (c Category) String() string {
return fmt.Sprintf("%s[%d]", c.Name, c.ID)
}
func (c Category) FromString(str string) {
var re = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
match := re.FindAllString(str, -1)
var catRegex = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
func (c Category) FromString(str string) {
match := catRegex.FindAllString(str, -1)
c.Name = match[1]
c.ID, _ = strconv.Atoi(match[2])
}

View file

@ -3,6 +3,14 @@
package wildcard
import (
"regexp"
"strings"
"github.com/autobrr/autobrr/pkg/regexcache"
"github.com/rs/zerolog/log"
)
// MatchSimple - finds whether the text matches/satisfies the pattern string.
// supports only '*' wildcard in the pattern.
// considers a file system path as a flat name space.
@ -14,7 +22,7 @@ func MatchSimple(pattern, name string) bool {
return true
}
// Does only wildcard '*' match.
return deepMatchRune([]rune(name), []rune(pattern), true)
return deepMatchRune(name, pattern, true)
}
// Match - finds whether the text matches/satisfies the pattern string.
@ -29,26 +37,32 @@ func Match(pattern, name string) (matched bool) {
return true
}
// Does extended wildcard '*' and '?' match.
return deepMatchRune([]rune(name), []rune(pattern), false)
return deepMatchRune(name, pattern, false)
}
func deepMatchRune(str, pattern []rune, simple bool) bool {
for len(pattern) > 0 {
switch pattern[0] {
default:
if len(str) == 0 || str[0] != pattern[0] {
return false
}
case '?':
if len(str) == 0 && !simple {
return false
}
case '*':
return deepMatchRune(str, pattern[1:], simple) ||
(len(str) > 0 && deepMatchRune(str[1:], pattern, simple))
}
str = str[1:]
pattern = pattern[1:]
var convSimple = regexp.MustCompile(regexp.QuoteMeta(`\*`))
var convWildChar = regexp.MustCompile(regexp.QuoteMeta(`\?`))
func deepMatchRune(str, pattern string, simple bool) bool {
pattern = regexp.QuoteMeta(pattern)
if strings.Contains(pattern, "*") {
pattern = convSimple.ReplaceAllLiteralString(pattern, ".*")
}
return len(str) == 0 && len(pattern) == 0
if !simple && strings.Contains(pattern, "?") {
pattern = convWildChar.ReplaceAllLiteralString(pattern, ".")
}
user, err := regexcache.Compile(pattern)
if err != nil {
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
return false
}
idx := user.FindStringIndex(str)
if idx == nil {
return false
}
return idx[1] == len(str)
}

View file

@ -29,6 +29,36 @@ func TestMatch(t *testing.T) {
text: "The.Simps.S12",
matched: false,
},
{
pattern: "The?Simp",
text: "The.Simps.S12",
matched: false,
},
{
pattern: "The?Simp",
text: "The.Simps.S12",
matched: false,
},
{
pattern: "The*Simp",
text: "The.Simp",
matched: true,
},
{
pattern: "*tv*",
text: "tv",
matched: true,
},
{
pattern: "*EPUB*",
text: "Translated (Group) / EPUB",
matched: true,
},
{
pattern: "*shift*",
text: "Good show shift S02 2160p ATVP WEB-DL DDP 5.1 Atmos DV HEVC-GROUP",
matched: true,
},
}
// Iterating over the test cases, call the function under test and asert the output.
for i, testCase := range testCases {