fix(filters): RED and OPS lossless parsing and filtering (#1373)

* fix(filters): RED and OPS lossless parsing and filtering

* fix(filters): logscore and EP parsing

* fix(filters): tests

* fix(filters): tests

* feat(definitions): RED parse title variable

* feat(indexers): setup indexer to filter tests

* feat(indexers): tests and improve parsing

* feat(indexers): improve tests
This commit is contained in:
ze0s 2024-01-28 22:03:25 +01:00 committed by GitHub
parent 9db5a8b116
commit 5328078b32
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
15 changed files with 1093 additions and 360 deletions

View file

@ -80,6 +80,7 @@ irc:
- line: 'TORRENT: Dirty Dike Bogies & Alcohol [2008] [Album] CD/MP3/320 hip.hop,uk.hip.hop,united.kingdom https://orpheus.network/torrents.php?id=0000000 https://orpheus.network/torrents.php?id=0000000&torrentid=0000000&action=download'
expect:
torrentName: Dirty Dike Bogies & Alcohol [2008] [Album] CD/MP3/320
title: Dirty Dike Bogies & Alcohol
year: "2008"
category: Album
releaseTags: CD/MP3/320
@ -89,6 +90,7 @@ irc:
- line: 'TORRENT: Various Artists Bicycle Day: 85 Yrs of LSD Special [2023] [Compilation] WEB/FLAC/Lossless ambient,electronic https://orpheus.network/torrents.php?id=0000000 https://orpheus.network/torrents.php?id=0000000&torrentid=0000000&action=download'
expect:
torrentName: 'Various Artists Bicycle Day: 85 Yrs of LSD Special [2023] [Compilation] WEB/FLAC/Lossless'
title: 'Various Artists Bicycle Day: 85 Yrs of LSD Special'
year: "2023"
category: Compilation
releaseTags: WEB/FLAC/Lossless
@ -98,15 +100,17 @@ irc:
- line: 'TORRENT: Snoop Dogg Untitled [2001] [Sampler] Vinyl/MP3/320 https://orpheus.network/torrents.php?id=0000000 https://orpheus.network/torrents.php?id=0000000&torrentid=0000000&action=download'
expect:
torrentName: Snoop Dogg Untitled [2001] [Sampler] Vinyl/MP3/320
title: Snoop Dogg Untitled
year: "2001"
category: Sampler
releaseTags: Vinyl/MP3/320
tags: ""
baseUrl: https://orpheus.network/
torrentId: "0000000"
pattern: 'TORRENT: (.* . \[(.*?)\] \[(.*?)\] (.*)) . \s*(.*) . https?:\/\/.* . (https?:\/\/.*\/).*torrentid=(\d+).*'
pattern: 'TORRENT: ((.*) . \[(.*?)\] \[(.*?)\] (.*)) . \s*(.*) . https?:\/\/.* . (https?:\/\/.*\/).*torrentid=(\d+).*'
vars:
- torrentName
- title
- year
- category
- releaseTags

View file

@ -86,6 +86,7 @@ irc:
- line: Artist - Albumname [2008] [Single] - FLAC / Lossless / Log / 100% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - hip.hop,rhythm.and.blues,2000s
expect:
torrentName: Artist - Albumname [2008] [Single] - FLAC / Lossless / Log / 100% / Cue / CD
title: Artist - Albumname
year: "2008"
category: Single
releaseTags: FLAC / Lossless / Log / 100% / Cue / CD
@ -96,6 +97,7 @@ irc:
- line: A really long name here - Concertos 5 and 6, Suite No 2 [1991] [Album] - FLAC / Lossless / Log / 100% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - classical
expect:
torrentName: A really long name here - Concertos 5 and 6, Suite No 2 [1991] [Album] - FLAC / Lossless / Log / 100% / Cue / CD
title: A really long name here - Concertos 5 and 6, Suite No 2
year: "1991"
category: Album
releaseTags: FLAC / Lossless / Log / 100% / Cue / CD
@ -103,9 +105,10 @@ irc:
baseUrl: https://redacted.ch/
torrentId: "0000000"
tags: classical
pattern: '(.* (?:\[(.*)\] \[(.*)\] - (.*))?) - .*id=(.*) \/ (https?://.+/).+id=(\d+)[ -]*(.*)'
pattern: '((.*) (?:\[(.*)\] \[(.*)\] - (.*))?) - .*id=(.*) \/ (https?://.+/).+id=(\d+)[ -]*(.*)'
vars:
- torrentName
- title
- year
- category
- releaseTags

View file

@ -0,0 +1,362 @@
// Copyright (c) 2021 - 2024, Ludvig Lundgren and the autobrr contributors.
// SPDX-License-Identifier: GPL-2.0-or-later
package indexer
import (
"io"
"testing"
"github.com/autobrr/autobrr/internal/domain"
"github.com/rs/zerolog"
"github.com/stretchr/testify/assert"
)
func TestIndexersParseAndFilter(t *testing.T) {
type fields struct {
identifier string
settings map[string]string
}
type filterTest struct {
filter *domain.Filter
match bool
rejections []string
}
type args struct {
announceLines []string
filters []filterTest
}
type subTest struct {
name string
args args
match bool
}
tests := []struct {
name string
fields fields
match bool
subTests []subTest
}{
{
name: "ops",
fields: fields{
identifier: "orpheus",
settings: map[string]string{
"torrent_pass": "pass",
"api_key": "key",
},
},
subTests: []subTest{
{
name: "announce_1",
args: args{
announceLines: []string{"TORRENT: Dirty Dike Bogies & Alcohol [2008] [Album] CD/MP3/320 hip.hop,uk.hip.hop,united.kingdom https://orpheus.network/torrents.php?id=0000000 https://orpheus.network/torrents.php?id=0000000&torrentid=0000000&action=download"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "Album",
Years: "2008",
},
match: true,
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "Single",
Years: "2008",
},
match: false,
rejections: []string{"category not matching. got: Album want: Single"},
},
},
},
match: false,
},
{
name: "announce_2",
args: args{
announceLines: []string{"TORRENT: Dirty Dike Bogies & Alcohol [2024] [EP] CD/FLAC/Lossless hip.hop,uk.hip.hop,united.kingdom https://orpheus.network/torrents.php?id=0000000 https://orpheus.network/torrents.php?id=0000000&torrentid=0000000&action=download"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "EP,Album",
Years: "2024",
Quality: []string{"Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
},
match: true,
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "EP,Album",
Years: "2024",
Quality: []string{"24bit Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
},
match: false,
rejections: []string{"quality not matching. got: [FLAC Lossless] want: [24bit Lossless]"},
},
},
},
match: false,
},
},
match: true,
},
{
name: "redacted",
fields: fields{
identifier: "red",
settings: map[string]string{
"authkey": "key",
"torrent_pass": "pass",
"api_key": "key",
},
},
subTests: []subTest{
{
name: "announce_1",
args: args{
announceLines: []string{"Artist - Albumname [2008] [Single] - FLAC / Lossless / Log / 100% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - hip.hop,rhythm.and.blues,2000s"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "Single",
Years: "2008",
},
match: true,
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "Album",
},
match: false,
rejections: []string{"category not matching. got: Album want: Single"},
},
},
},
match: false,
},
{
name: "announce_2",
args: args{
announceLines: []string{"A really long name here - Concertos 5 and 6, Suite No 2 [1991] [Album] - FLAC / Lossless / Log / 100% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - classical"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "EP,Album",
Years: "1991",
PerfectFlac: true,
//Quality: []string{"Lossless"},
//Sources: []string{"CD"},
//Formats: []string{"FLAC"},
Tags: "classical",
},
match: true,
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "EP,Album",
Years: "2024",
Quality: []string{"24bit Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
},
match: false,
rejections: []string{"year not matching. got: 1991 want: 2024", "quality not matching. got: [Cue FLAC Lossless Log100 Log] want: [24bit Lossless]"},
},
},
},
match: false,
},
{
name: "announce_3",
args: args{
announceLines: []string{"The best artist - Album No 2 [2024] [EP] - FLAC / Lossless / Log / 100% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - classical"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "EP",
Years: "2024",
Quality: []string{"Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
Log: true,
LogScore: 100,
Cue: true,
},
match: true,
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "EP,Album",
Years: "2024",
Quality: []string{"24bit Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
},
match: false,
rejections: []string{"quality not matching. got: [FLAC Lossless] want: [24bit Lossless]"},
},
},
},
match: false,
},
{
name: "announce_4",
args: args{
announceLines: []string{"The best artist - Album No 2 [2024] [EP] - FLAC / Lossless / Log / 100% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - classical"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "EP",
Years: "2024",
Quality: []string{"Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
Log: true,
LogScore: 100,
Cue: true,
},
match: true,
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "EP,Album",
Years: "2024",
Quality: []string{"24bit Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
},
match: false,
rejections: []string{"quality not matching. got: [FLAC Lossless] want: [24bit Lossless]"},
},
},
},
match: false,
},
{
name: "announce_5",
args: args{
announceLines: []string{"The best artist - Album No 1 [2024] [EP] - FLAC / Lossless / Log / 87% / Cue / CD - https://redacted.ch/torrents.php?id=0000000 / https://redacted.ch/torrents.php?action=download&id=0000000 - classical"},
filters: []filterTest{
{
filter: &domain.Filter{
Name: "filter_1",
MatchCategories: "EP",
Years: "2024",
Quality: []string{"Lossless"},
Sources: []string{"CD"},
Formats: []string{"FLAC"},
Log: true,
LogScore: 100,
Cue: true,
},
match: false,
rejections: []string{"log score. got: 87 want: 100"},
},
{
filter: &domain.Filter{
Name: "filter_2",
MatchCategories: "EP",
PerfectFlac: true,
},
match: false,
rejections: []string{"wanted: perfect flac. got: [Cue FLAC Lossless Log87 Log]"},
},
},
},
match: false,
},
},
match: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
//l := zerolog.New(io.Discard)
//l := logger.Mock()
i, err := OpenAndProcessDefinition("./definitions/" + tt.fields.identifier + ".yaml")
assert.NoError(t, err)
i.SettingsMap = tt.fields.settings
ll := zerolog.New(io.Discard)
// indexer subtests
for _, subT := range tt.subTests {
t.Run(subT.name, func(t *testing.T) {
// from announce/announce.go
tmpVars := map[string]string{}
parseFailed := false
for idx, parseLine := range i.IRC.Parse.Lines {
match, err := ParseLine(&ll, parseLine.Pattern, parseLine.Vars, tmpVars, subT.args.announceLines[idx], parseLine.Ignore)
if err != nil {
parseFailed = true
break
}
if !match {
parseFailed = true
break
}
}
if parseFailed {
return
}
rls := domain.NewRelease(i.Identifier)
rls.Protocol = domain.ReleaseProtocol(i.Protocol)
// on lines matched
err = i.IRC.Parse.Parse(i, tmpVars, rls)
assert.NoError(t, err)
// release/service.go
//ctx := context.Background()
//filterSvc := filter.NewService(l, nil, nil, nil, nil, nil)
for _, filterT := range subT.args.filters {
t.Run(filterT.filter.Name, func(t *testing.T) {
filter := filterT.filter
//l := s.log.With().Str("indexer", release.Indexer).Str("filter", filter.Name).Str("release", release.TorrentName).Logger()
// save filter on release
rls.Filter = filter
rls.FilterName = filter.Name
rls.FilterID = filter.ID
// test filter
//match, err := filterSvc.CheckFilter(ctx, filter, rls)
rejections, matchedFilter := filter.CheckFilter(rls)
assert.Len(t, rejections, len(filterT.rejections))
assert.Equal(t, filterT.match, matchedFilter)
})
}
})
}
})
}
}

View file

@ -516,6 +516,46 @@ func (s *service) LoadIndexerDefinitions() error {
return nil
}
var ErrIndexerDefinitionDeprecated = errors.New("DEPRECATED: indexer definition version")
func isValidExtension(ext string) bool {
return ext == ".yaml" || ext == ".yml"
}
func OpenAndProcessDefinition(file string) (*domain.IndexerDefinition, error) {
f, err := os.Open(file)
if err != nil {
return nil, errors.Wrap(err, "could not open file: %s", file)
}
defer f.Close()
var d *domain.IndexerDefinitionCustom
dec := yaml.NewDecoder(f)
dec.KnownFields(false)
if err = dec.Decode(&d); err != nil {
return nil, errors.Wrap(err, "could not decode definition file: %s", file)
}
if d == nil {
//s.log.Warn().Msgf("skipping empty file: %s", file)
return nil, errors.New("empty definition file")
}
if d.Implementation == "" {
d.Implementation = "irc"
}
//if d.Implementation == "irc" && d.IRC != nil {
// if d.IRC.Parse == nil {
// s.log.Warn().Msgf("DEPRECATED: indexer definition version: %s", file)
// }
//}
return d.ToIndexerDefinition(), nil
}
// LoadCustomIndexerDefinitions load definitions from custom path
func (s *service) LoadCustomIndexerDefinitions() error {
if s.config.CustomDefinitions == "" {
@ -539,51 +579,23 @@ func (s *service) LoadCustomIndexerDefinitions() error {
customCount := 0
for _, f := range entries {
fileExtension := filepath.Ext(f.Name())
if fileExtension != ".yaml" && fileExtension != ".yml" {
s.log.Warn().Stack().Msgf("skipping unknown extension definition file: %s", f.Name())
ext := filepath.Ext(f.Name())
if !isValidExtension(ext) {
s.log.Warn().Msgf("unsupported extension %s, definition file: %s", ext, f.Name())
continue
}
file := filepath.Join(s.config.CustomDefinitions, f.Name())
s.log.Trace().Msgf("parsing custom: %s", file)
s.log.Trace().Msgf("parsing custom definition: %s", file)
data, err := os.ReadFile(file)
definition, err := OpenAndProcessDefinition(file)
if err != nil {
s.log.Error().Stack().Err(err).Msgf("failed reading file: %s", file)
return errors.Wrap(err, "could not read file: %s", file)
}
var d *domain.IndexerDefinitionCustom
dec := yaml.NewDecoder(bytes.NewReader(data))
// Do _not_ fail on unknown fields while parsing custom indexer
// definitions for better backwards compatibility. See discussion:
// https://github.com/autobrr/autobrr/pull/1257#issuecomment-1813821391
dec.KnownFields(false)
if err = dec.Decode(&d); err != nil {
s.log.Error().Stack().Err(err).Msgf("failed unmarshal file: %s", file)
return errors.Wrap(err, "could not unmarshal file: %s", file)
}
if d == nil {
s.log.Warn().Msgf("skipping empty file: %s", file)
s.log.Error().Err(err).Msgf("could not open definition file: %s", file)
continue
}
if d.Implementation == "" {
d.Implementation = "irc"
}
// to prevent crashing from non-updated definitions lets skip
if d.Implementation == "irc" && d.IRC != nil {
if d.IRC.Parse == nil {
s.log.Warn().Msgf("DEPRECATED: indexer definition version: %s", file)
}
}
s.definitions[d.Identifier] = *d.ToIndexerDefinition()
s.definitions[definition.Identifier] = *definition
customCount++
}