mirror of
https://github.com/idanoo/autobrr
synced 2025-07-22 16:29:12 +00:00
refactor(wildcard): optimize and add caching (#1634)
* fix(wildcard): avoid excessive allocations every loop * are you going to Scarborough Fair? * ruby ruby ruby ruby * ride on, little murphy * shirley? * to the moon * reggie are you there? * code 99 * my doctorate is in Art History * helps to be consistent * tidy * slow and steady gets the clam * oysters were better anyway * DIAL TONE
This commit is contained in:
parent
bc0f4cc055
commit
982f7ddf68
13 changed files with 177 additions and 60 deletions
1
go.mod
1
go.mod
|
@ -27,6 +27,7 @@ require (
|
|||
github.com/hashicorp/go-version v1.7.0
|
||||
github.com/hekmon/transmissionrpc/v3 v3.0.0
|
||||
github.com/icholy/digest v0.1.23
|
||||
github.com/jellydator/ttlcache/v3 v3.3.0
|
||||
github.com/lib/pq v1.10.9
|
||||
github.com/mattn/go-shellwords v1.0.12
|
||||
github.com/mmcdole/gofeed v1.3.0
|
||||
|
|
4
go.sum
4
go.sum
|
@ -207,6 +207,8 @@ github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4=
|
|||
github.com/imdario/mergo v0.3.16/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY=
|
||||
github.com/jarcoal/httpmock v1.3.0 h1:2RJ8GP0IIaWwcC9Fp2BmVi8Kog3v2Hn7VXM3fTd+nuc=
|
||||
github.com/jarcoal/httpmock v1.3.0/go.mod h1:3yb8rc4BI7TCBhFY8ng0gjuLKJNquuDNiPaZjnENuYg=
|
||||
github.com/jellydator/ttlcache/v3 v3.3.0 h1:BdoC9cE81qXfrxeb9eoJi9dWrdhSuwXMAnHTbnBm4Wc=
|
||||
github.com/jellydator/ttlcache/v3 v3.3.0/go.mod h1:bj2/e0l4jRnQdrnSTaGTsh4GSXvMjQcy41i7th0GVGw=
|
||||
github.com/json-iterator/go v1.1.6/go.mod h1:+SdeFBvtyEkXs7REEP0seUULqWtbJapLOCVDaaPEHmU=
|
||||
github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/uOdHXbAo4=
|
||||
github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM=
|
||||
|
@ -389,6 +391,8 @@ go.opencensus.io v0.20.2/go.mod h1:6WKK9ahsWS3RSO+PY9ZHZUfv2irvY6gN279GOPZjmmk=
|
|||
go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
|
||||
go.uber.org/automaxprocs v1.5.3 h1:kWazyxZUrS3Gs4qUpbwo5kEIMGe/DAvi5Z4tl2NW4j8=
|
||||
go.uber.org/automaxprocs v1.5.3/go.mod h1:eRbA25aqJrxAbsLO0xy5jVwPt7FQnRgjW+efnwa1WM0=
|
||||
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
|
||||
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
|
||||
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
|
||||
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
|
||||
golang.org/x/crypto v0.0.0-20180904163835-0709b304e793/go.mod h1:6SG95UA2DQfeDnfUPMdvaQW0Q7yPrPDi9nlGo2tz2b4=
|
||||
|
|
|
@ -112,6 +112,20 @@ func (repo *ReleaseRepo) Find(ctx context.Context, params domain.ReleaseQueryPar
|
|||
return releases, nextCursor, total, nil
|
||||
}
|
||||
|
||||
var reservedSearch = map[string]*regexp.Regexp{
|
||||
"r.title": regexp.MustCompile(`(?i)(?:` + `title` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.release_group": regexp.MustCompile(`(?i)(?:` + `release_group` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.category": regexp.MustCompile(`(?i)(?:` + `category` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.season": regexp.MustCompile(`(?i)(?:` + `season` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.episode": regexp.MustCompile(`(?i)(?:` + `episode` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.year": regexp.MustCompile(`(?i)(?:` + `year` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.resolution": regexp.MustCompile(`(?i)(?:` + `resolution` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.source": regexp.MustCompile(`(?i)(?:` + `source` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.codec": regexp.MustCompile(`(?i)(?:` + `codec` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.hdr": regexp.MustCompile(`(?i)(?:` + `hdr` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
"r.filter": regexp.MustCompile(`(?i)(?:` + `filter` + `:)(?P<value>'.*?'|".*?"|\S+)`),
|
||||
}
|
||||
|
||||
func (repo *ReleaseRepo) findReleases(ctx context.Context, tx *Tx, params domain.ReleaseQueryParams) ([]*domain.Release, int64, int64, error) {
|
||||
whereQueryBuilder := sq.And{}
|
||||
if params.Cursor > 0 {
|
||||
|
@ -119,27 +133,12 @@ func (repo *ReleaseRepo) findReleases(ctx context.Context, tx *Tx, params domain
|
|||
}
|
||||
|
||||
if params.Search != "" {
|
||||
reserved := map[string]string{
|
||||
"title": "r.title",
|
||||
"group": "r.release_group",
|
||||
"category": "r.category",
|
||||
"season": "r.season",
|
||||
"episode": "r.episode",
|
||||
"year": "r.year",
|
||||
"resolution": "r.resolution",
|
||||
"source": "r.source",
|
||||
"codec": "r.codec",
|
||||
"hdr": "r.hdr",
|
||||
"filter": "r.filter",
|
||||
}
|
||||
|
||||
search := strings.TrimSpace(params.Search)
|
||||
for k, v := range reserved {
|
||||
r := regexp.MustCompile(fmt.Sprintf(`(?i)(?:%s:)(?P<value>'.*?'|".*?"|\S+)`, k))
|
||||
if reskey := r.FindAllStringSubmatch(search, -1); len(reskey) != 0 {
|
||||
for dbField, regex := range reservedSearch {
|
||||
if reskey := regex.FindAllStringSubmatch(search, -1); len(reskey) != 0 {
|
||||
filter := sq.Or{}
|
||||
for _, found := range reskey {
|
||||
filter = append(filter, repo.db.ILike(v, strings.ReplaceAll(strings.Trim(strings.Trim(found[1], `"`), `'`), ".", "_")+"%"))
|
||||
filter = append(filter, repo.db.ILike(dbField, strings.ReplaceAll(strings.Trim(strings.Trim(found[1], `"`), `'`), ".", "_")+"%"))
|
||||
}
|
||||
|
||||
if len(filter) == 0 {
|
||||
|
@ -147,7 +146,7 @@ func (repo *ReleaseRepo) findReleases(ctx context.Context, tx *Tx, params domain
|
|||
}
|
||||
|
||||
whereQueryBuilder = append(whereQueryBuilder, filter)
|
||||
search = strings.TrimSpace(r.ReplaceAllLiteralString(search, ""))
|
||||
search = strings.TrimSpace(regex.ReplaceAllLiteralString(search, ""))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -7,12 +7,12 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/autobrr/autobrr/pkg/errors"
|
||||
"github.com/autobrr/autobrr/pkg/regexcache"
|
||||
"github.com/autobrr/autobrr/pkg/sanitize"
|
||||
"github.com/autobrr/autobrr/pkg/wildcard"
|
||||
|
||||
|
@ -774,7 +774,7 @@ func matchRegex(tag string, filterList string) bool {
|
|||
if filter == "" {
|
||||
continue
|
||||
}
|
||||
re, err := regexp.Compile(`(?i)(?:` + filter + `)`)
|
||||
re, err := regexcache.Compile(`(?i)(?:` + filter + `)`)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
|
|
@ -205,6 +205,8 @@ func (p IRCParserOrpheus) replaceSeparator(s string) string {
|
|||
return strings.ReplaceAll(s, "–", "-")
|
||||
}
|
||||
|
||||
var lastDecimalTag = regexp.MustCompile(`^\d{1,2}$|^100$`)
|
||||
|
||||
func (p IRCParserOrpheus) Parse(rls *Release, vars map[string]string) error {
|
||||
// OPS uses en-dashes as separators, which causes moistari/rls to not parse the torrentName properly,
|
||||
// we replace the en-dashes with hyphens here
|
||||
|
@ -219,7 +221,7 @@ func (p IRCParserOrpheus) Parse(rls *Release, vars map[string]string) error {
|
|||
// Check and replace the last tag if it's a number between 0 and 100
|
||||
if len(splittedTags) > 0 {
|
||||
lastTag := splittedTags[len(splittedTags)-1]
|
||||
match, _ := regexp.MatchString(`^\d{1,2}$|^100$`, lastTag)
|
||||
match := lastDecimalTag.MatchString(lastTag)
|
||||
if match {
|
||||
splittedTags[len(splittedTags)-1] = lastTag + "%"
|
||||
}
|
||||
|
|
|
@ -7,8 +7,6 @@ import (
|
|||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
|
||||
"github.com/autobrr/autobrr/pkg/errors"
|
||||
)
|
||||
|
||||
var types map[string][]*TagInfo
|
||||
|
@ -261,13 +259,9 @@ func init() {
|
|||
// language `(?i)\b((DK|DKSUBS|DANiSH|DUTCH|NL|NLSUBBED|ENG|FI|FLEMiSH|FiNNiSH|DE|FRENCH|GERMAN|HE|HEBREW|HebSub|HiNDi|iCELANDiC|KOR|MULTi|MULTiSUBS|NORWEGiAN|NO|NORDiC|PL|PO|POLiSH|PLDUB|RO|ROMANiAN|RUS|SPANiSH|SE|SWEDiSH|SWESUB||))\b`)
|
||||
// websites `(?i)\b((AMBC|AS|AMZN|AMC|ANPL|ATVP|iP|CORE|BCORE|CMOR|CN|CBC|CBS|CMAX|CNBC|CC|CRIT|CR|CSPN|CW|DAZN|DCU|DISC|DSCP|DSNY|DSNP|DPLY|ESPN|FOX|FUNI|PLAY|HBO|HMAX|HIST|HS|HOTSTAR|HULU|iT|MNBC|MTV|NATG|NBC|NF|NICK|NRK|PMNT|PMNP|PCOK|PBS|PBSK|PSN|QIBI|SBS|SHO|STAN|STZ|SVT|SYFY|TLC|TRVL|TUBI|TV3|TV4|TVL|VH1|VICE|VMEO|UFC|USAN|VIAP|VIAPLAY|VL|WWEN|XBOX|YHOO|YT|RED))\b`)
|
||||
|
||||
for s, infos := range types {
|
||||
for _, infos := range types {
|
||||
for _, info := range infos {
|
||||
var err error
|
||||
//if info.re, err = regexp.Compile(`(?i)^(?:` + info.RE() + `)$`); err != nil {
|
||||
if info.re, err = regexp.Compile(`(?i)(?:` + info.RE() + `)`); err != nil {
|
||||
errors.Wrap(err, "tag %q has invalid regexp %q\n", s, info.re)
|
||||
}
|
||||
info.re = regexp.MustCompile(`(?i)(?:` + info.RE() + `)`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -2,8 +2,8 @@ package indexer
|
|||
|
||||
import (
|
||||
"errors"
|
||||
"regexp"
|
||||
|
||||
"github.com/autobrr/autobrr/pkg/regexcache"
|
||||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
|
@ -12,7 +12,7 @@ type Logger interface {
|
|||
}
|
||||
|
||||
func regExMatch(pattern string, value string) ([]string, error) {
|
||||
rxp, err := regexp.Compile(pattern)
|
||||
rxp, err := regexcache.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
@ -54,7 +54,7 @@ func parseExtract(logger Logger, pattern string, vars []string, tmpVars map[stri
|
|||
}
|
||||
|
||||
func parseMatchRegexp(pattern string, tmpVars map[string]string, line string, ignore bool) (bool, error) {
|
||||
var re = regexp.MustCompile(`(?mi)` + pattern)
|
||||
var re = regexcache.MustCompile(`(?mi)` + pattern)
|
||||
|
||||
groupNames := re.SubexpNames()
|
||||
for _, match := range re.FindAllStringSubmatch(line, -1) {
|
||||
|
|
|
@ -37,9 +37,10 @@ func (s *lunaSeaSender) Name() string {
|
|||
return "lunasea"
|
||||
}
|
||||
|
||||
var lunaWebhook = regexp.MustCompile(`/(radarr|sonarr|lidarr|tautulli|overseerr)/`)
|
||||
|
||||
func (s *lunaSeaSender) rewriteWebhookURL(url string) string {
|
||||
re := regexp.MustCompile(`/(radarr|sonarr|lidarr|tautulli|overseerr)/`)
|
||||
return re.ReplaceAllString(url, "/custom/")
|
||||
return lunaWebhook.ReplaceAllString(url, "/custom/")
|
||||
} // `custom` is not mentioned in their docs, so I thought this would be a good idea to add to avoid user errors
|
||||
|
||||
func NewLunaSeaSender(log zerolog.Logger, settings domain.Notification) domain.NotificationSender {
|
||||
|
|
|
@ -19,9 +19,10 @@ func (c Category) String() string {
|
|||
return fmt.Sprintf("%s[%d]", c.Name, c.ID)
|
||||
}
|
||||
|
||||
var newzCategory = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
|
||||
|
||||
func (c Category) FromString(str string) {
|
||||
var re = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
|
||||
match := re.FindAllString(str, -1)
|
||||
match := newzCategory.FindAllString(str, -1)
|
||||
|
||||
c.Name = match[1]
|
||||
c.ID, _ = strconv.Atoi(match[2])
|
||||
|
|
71
pkg/regexcache/regex.go
Normal file
71
pkg/regexcache/regex.go
Normal file
|
@ -0,0 +1,71 @@
|
|||
// Copyright (c) 2021 - 2024, Ludvig Lundgren and the autobrr contributors.
|
||||
// SPDX-License-Identifier: GPL-2.0-or-later
|
||||
|
||||
package regexcache
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"time"
|
||||
|
||||
"github.com/jellydator/ttlcache/v3"
|
||||
)
|
||||
|
||||
var cache = ttlcache.New[string, *regexp.Regexp](
|
||||
ttlcache.WithTTL[string, *regexp.Regexp](5 * time.Minute),
|
||||
)
|
||||
|
||||
func init() {
|
||||
go cache.Start()
|
||||
}
|
||||
|
||||
func MustCompilePOSIX(pattern string) *regexp.Regexp {
|
||||
item := cache.Get(pattern)
|
||||
if item != nil {
|
||||
return item.Value()
|
||||
}
|
||||
|
||||
reg := regexp.MustCompilePOSIX(pattern)
|
||||
cache.Set(pattern, reg, ttlcache.NoTTL)
|
||||
return reg
|
||||
}
|
||||
|
||||
func MustCompile(pattern string) *regexp.Regexp {
|
||||
item := cache.Get(pattern)
|
||||
if item != nil {
|
||||
return item.Value()
|
||||
}
|
||||
|
||||
reg := regexp.MustCompile(pattern)
|
||||
cache.Set(pattern, reg, ttlcache.NoTTL)
|
||||
return reg
|
||||
}
|
||||
|
||||
func CompilePOSIX(pattern string) (*regexp.Regexp, error) {
|
||||
item := cache.Get(pattern)
|
||||
if item != nil {
|
||||
return item.Value(), nil
|
||||
}
|
||||
|
||||
reg, err := regexp.CompilePOSIX(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache.Set(pattern, reg, ttlcache.DefaultTTL)
|
||||
return reg, nil
|
||||
}
|
||||
|
||||
func Compile(pattern string) (*regexp.Regexp, error) {
|
||||
item := cache.Get(pattern)
|
||||
if item != nil {
|
||||
return item.Value(), nil
|
||||
}
|
||||
|
||||
reg, err := regexp.Compile(pattern)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cache.Set(pattern, reg, ttlcache.DefaultTTL)
|
||||
return reg, nil
|
||||
}
|
|
@ -19,10 +19,10 @@ func (c Category) String() string {
|
|||
return fmt.Sprintf("%s[%d]", c.Name, c.ID)
|
||||
}
|
||||
|
||||
func (c Category) FromString(str string) {
|
||||
var re = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
|
||||
match := re.FindAllString(str, -1)
|
||||
var catRegex = regexp.MustCompile(`(?m)(.+)\[(.+)\]`)
|
||||
|
||||
func (c Category) FromString(str string) {
|
||||
match := catRegex.FindAllString(str, -1)
|
||||
c.Name = match[1]
|
||||
c.ID, _ = strconv.Atoi(match[2])
|
||||
}
|
||||
|
|
|
@ -3,6 +3,14 @@
|
|||
|
||||
package wildcard
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
|
||||
"github.com/autobrr/autobrr/pkg/regexcache"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// MatchSimple - finds whether the text matches/satisfies the pattern string.
|
||||
// supports only '*' wildcard in the pattern.
|
||||
// considers a file system path as a flat name space.
|
||||
|
@ -14,7 +22,7 @@ func MatchSimple(pattern, name string) bool {
|
|||
return true
|
||||
}
|
||||
// Does only wildcard '*' match.
|
||||
return deepMatchRune([]rune(name), []rune(pattern), true)
|
||||
return deepMatchRune(name, pattern, true)
|
||||
}
|
||||
|
||||
// Match - finds whether the text matches/satisfies the pattern string.
|
||||
|
@ -29,26 +37,32 @@ func Match(pattern, name string) (matched bool) {
|
|||
return true
|
||||
}
|
||||
// Does extended wildcard '*' and '?' match.
|
||||
return deepMatchRune([]rune(name), []rune(pattern), false)
|
||||
return deepMatchRune(name, pattern, false)
|
||||
}
|
||||
|
||||
func deepMatchRune(str, pattern []rune, simple bool) bool {
|
||||
for len(pattern) > 0 {
|
||||
switch pattern[0] {
|
||||
default:
|
||||
if len(str) == 0 || str[0] != pattern[0] {
|
||||
var convSimple = regexp.MustCompile(regexp.QuoteMeta(`\*`))
|
||||
var convWildChar = regexp.MustCompile(regexp.QuoteMeta(`\?`))
|
||||
|
||||
func deepMatchRune(str, pattern string, simple bool) bool {
|
||||
pattern = regexp.QuoteMeta(pattern)
|
||||
if strings.Contains(pattern, "*") {
|
||||
pattern = convSimple.ReplaceAllLiteralString(pattern, ".*")
|
||||
}
|
||||
|
||||
if !simple && strings.Contains(pattern, "?") {
|
||||
pattern = convWildChar.ReplaceAllLiteralString(pattern, ".")
|
||||
}
|
||||
|
||||
user, err := regexcache.Compile(pattern)
|
||||
if err != nil {
|
||||
log.Error().Err(err).Msgf("deepMatchRune: unable to parse %q", pattern)
|
||||
return false
|
||||
}
|
||||
case '?':
|
||||
if len(str) == 0 && !simple {
|
||||
|
||||
idx := user.FindStringIndex(str)
|
||||
if idx == nil {
|
||||
return false
|
||||
}
|
||||
case '*':
|
||||
return deepMatchRune(str, pattern[1:], simple) ||
|
||||
(len(str) > 0 && deepMatchRune(str[1:], pattern, simple))
|
||||
}
|
||||
str = str[1:]
|
||||
pattern = pattern[1:]
|
||||
}
|
||||
return len(str) == 0 && len(pattern) == 0
|
||||
|
||||
return idx[1] == len(str)
|
||||
}
|
||||
|
|
|
@ -29,6 +29,36 @@ func TestMatch(t *testing.T) {
|
|||
text: "The.Simps.S12",
|
||||
matched: false,
|
||||
},
|
||||
{
|
||||
pattern: "The?Simp",
|
||||
text: "The.Simps.S12",
|
||||
matched: false,
|
||||
},
|
||||
{
|
||||
pattern: "The?Simp",
|
||||
text: "The.Simps.S12",
|
||||
matched: false,
|
||||
},
|
||||
{
|
||||
pattern: "The*Simp",
|
||||
text: "The.Simp",
|
||||
matched: true,
|
||||
},
|
||||
{
|
||||
pattern: "*tv*",
|
||||
text: "tv",
|
||||
matched: true,
|
||||
},
|
||||
{
|
||||
pattern: "*EPUB*",
|
||||
text: "Translated (Group) / EPUB",
|
||||
matched: true,
|
||||
},
|
||||
{
|
||||
pattern: "*shift*",
|
||||
text: "Good show shift S02 2160p ATVP WEB-DL DDP 5.1 Atmos DV HEVC-GROUP",
|
||||
matched: true,
|
||||
},
|
||||
}
|
||||
// Iterating over the test cases, call the function under test and asert the output.
|
||||
for i, testCase := range testCases {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue