feat(filters): skip duplicates (#1711)

* feat(filters): skip duplicates

* fix: add interface instead of any

* fix(filters): tonullint

* feat(filters): skip dupes check month day

* chore: cleanup

* feat(db): set autoincrement id

* feat(filters): add repack and proper to dupe profile

* feat(filters): add default dupe profiles

* feat(duplicates): check audio and website

* feat(duplicates): update tests

* feat(duplicates): add toggles on addform

* feat(duplicates): fix sqlite upgrade path and initialize duplicate profiles

* feat(duplicates): simplify sqlite upgrade

avoiding temp table and unwieldy select.  Besides, FK constraints
are turned off anyway in #229.

* feat(duplicates): change CheckIsDuplicateRelease treatment of PROPER and REPACK

"Proper" and "Repack" are not parallel to the other conditions like "Title",
so they do not belong as dedup conditions.  "PROPER" means there was an issue in
the previous release, and so a PROPER is never a duplicate, even if it replaces
another PROPER.  Similarly, "REPACK" means there was an issue in the previous
release by that group, and so it is a duplicate only if we previously took a
release from a DIFFERENT group.

I have not removed Proper and Repack from the UI or the schema yet.

* feat(duplicates): update postgres schema to match sqlite

* feat(duplicates): fix web build errors

* feat(duplicates): fix postgres errors

* feat(filters): do leftjoin for duplicate profile

* fix(filters): partial update dupe profile

* go fmt `internal/domain/filter.go`

* feat(duplicates): restore straightforward logic for proper/repack

* feat(duplicates): remove mostly duplicate TV duplicate profiles

Having one profile seems the cleanest.  If somebody wants multiple
resolutions then they can add Resolution to the duplicate profile.
Tested this profile with both weekly episodic releases and daily
show releases.

* feat(release): add db indexes and sub_title

* feat(release): add IsDuplicate tests

* feat(release): update action handler

* feat(release): add more tests for skip duplicates

* feat(duplicates): check audio

* feat(duplicates): add more tests

* feat(duplicates): match edition cut and more

* fix(duplicates): tests

* fix(duplicates): missing imports

* fix(duplicates): tests

* feat(duplicates): handle sub_title edition and language in ui

* fix(duplicates): tests

* feat(duplicates): check name against normalized hash

* fix(duplicates): tests

* chore: update .gitignore to ignore .pnpm-store

* fix: tests

* fix(filters): tests

* fix: bad conflict merge

* fix: update release type in test

* fix: use vendored hot-toast

* fix: release_test.go

* fix: rss_test.go

* feat(duplicates): improve title hashing for unique check

* feat(duplicates): further improve title hashing for unique check with lang

* feat(duplicates): fix tests

* feat(duplicates): add macros IsDuplicate and DuplicateProfile ID and name

* feat(duplicates): add normalized hash match option

* fix: headlessui-state prop warning

* fix(duplicates): add missing year in daily ep normalize

* fix(duplicates): check rejections len

---------

Co-authored-by: ze0s <ze0s@riseup.net>
This commit is contained in:
kenstir 2024-12-25 16:33:46 -05:00 committed by GitHub
parent d153ac44b8
commit 4009554d10
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
49 changed files with 3792 additions and 743 deletions

View file

@ -46,6 +46,7 @@ type Service interface {
AdditionalRecordLabelCheck(ctx context.Context, f *domain.Filter, release *domain.Release) (bool, error)
CheckSmartEpisodeCanDownload(ctx context.Context, params *domain.SmartEpisodeParams) (bool, error)
GetDownloadsByFilterId(ctx context.Context, filterID int) (*domain.FilterDownloads, error)
CheckIsDuplicateRelease(ctx context.Context, profile *domain.DuplicateReleaseProfile, release *domain.Release) (bool, error)
}
type service struct {
@ -374,6 +375,8 @@ func (s *service) Delete(ctx context.Context, filterID int) error {
func (s *service) CheckFilter(ctx context.Context, f *domain.Filter, release *domain.Release) (bool, error) {
l := s.log.With().Str("method", "CheckFilter").Logger()
l.Debug().Msgf("checking filter: %s with release %s", f.Name, release.TorrentName)
l.Trace().Msgf("checking filter: %s %+v", f.Name, f)
l.Trace().Msgf("checking filter: %s for release: %+v", f.Name, release)
@ -393,110 +396,130 @@ func (s *service) CheckFilter(ctx context.Context, f *domain.Filter, release *do
return false, nil
}
if matchedFilter {
// smartEpisode check
if f.SmartEpisode {
params := &domain.SmartEpisodeParams{
Title: release.Title,
Season: release.Season,
Episode: release.Episode,
Year: release.Year,
Month: release.Month,
Day: release.Day,
Repack: release.Repack,
Proper: release.Proper,
Group: release.Group,
}
canDownloadShow, err := s.CheckSmartEpisodeCanDownload(ctx, params)
if err != nil {
l.Trace().Msgf("failed smart episode check: %s", f.Name)
return false, nil
}
if !canDownloadShow {
l.Trace().Msgf("failed smart episode check: %s", f.Name)
if params.IsDailyEpisode() {
f.RejectReasons.Add("smart episode", fmt.Sprintf("not new (%s) daily: %d-%d-%d", release.Title, release.Year, release.Month, release.Day), fmt.Sprintf("expected newer than (%s) daily: %d-%d-%d", release.Title, release.Year, release.Month, release.Day))
} else {
f.RejectReasons.Add("smart episode", fmt.Sprintf("not new (%s) season: %d ep: %d", release.Title, release.Season, release.Episode), fmt.Sprintf("expected newer than (%s) season: %d ep: %d", release.Title, release.Season, release.Episode))
}
return false, nil
}
}
// if matched, do additional size check if needed, attach actions and return the filter
l.Debug().Msgf("found and matched filter: %s", f.Name)
// If size constraints are set in a filter and the indexer did not
// announce the size, we need to do an additional out of band size check.
if release.AdditionalSizeCheckRequired {
l.Debug().Msgf("(%s) additional size check required", f.Name)
ok, err := s.AdditionalSizeCheck(ctx, f, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) additional size check error", f.Name)
return false, err
}
if !ok {
l.Trace().Msgf("(%s) additional size check not matching what filter wanted", f.Name)
return false, nil
}
}
// check uploader if the indexer supports check via api
if release.AdditionalUploaderCheckRequired {
l.Debug().Msgf("(%s) additional uploader check required", f.Name)
ok, err := s.AdditionalUploaderCheck(ctx, f, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) additional uploader check error", f.Name)
return false, err
}
if !ok {
l.Trace().Msgf("(%s) additional uploader check not matching what filter wanted", f.Name)
return false, nil
}
}
if release.AdditionalRecordLabelCheckRequired {
l.Debug().Msgf("(%s) additional record label check required", f.Name)
ok, err := s.AdditionalRecordLabelCheck(ctx, f, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) additional record label check error", f.Name)
return false, err
}
if !ok {
l.Trace().Msgf("(%s) additional record label check not matching what filter wanted", f.Name)
return false, nil
}
}
// run external filters
if f.External != nil {
externalOk, err := s.RunExternalFilters(ctx, f, f.External, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) external filter check error", f.Name)
return false, err
}
if !externalOk {
l.Debug().Msgf("(%s) external filter check not matching what filter wanted", f.Name)
return false, nil
}
}
return true, nil
if !matchedFilter {
// if no match, return nil
return false, nil
}
// if no match, return nil
return false, nil
// smartEpisode check
if f.SmartEpisode {
params := &domain.SmartEpisodeParams{
Title: release.Title,
Season: release.Season,
Episode: release.Episode,
Year: release.Year,
Month: release.Month,
Day: release.Day,
Repack: release.Repack,
Proper: release.Proper,
Group: release.Group,
}
canDownloadShow, err := s.CheckSmartEpisodeCanDownload(ctx, params)
if err != nil {
l.Trace().Msgf("failed smart episode check: %s", f.Name)
return false, nil
}
if !canDownloadShow {
l.Trace().Msgf("failed smart episode check: %s", f.Name)
if params.IsDailyEpisode() {
f.RejectReasons.Add("smart episode", fmt.Sprintf("not new (%s) daily: %d-%d-%d", release.Title, release.Year, release.Month, release.Day), fmt.Sprintf("expected newer than (%s) daily: %d-%d-%d", release.Title, release.Year, release.Month, release.Day))
} else {
f.RejectReasons.Add("smart episode", fmt.Sprintf("not new (%s) season: %d ep: %d", release.Title, release.Season, release.Episode), fmt.Sprintf("expected newer than (%s) season: %d ep: %d", release.Title, release.Season, release.Episode))
}
return false, nil
}
}
// check duplicates
if f.DuplicateHandling != nil {
l.Debug().Msgf("(%s) check is duplicate with profile %s", f.Name, f.DuplicateHandling.Name)
release.SkipDuplicateProfileID = f.DuplicateHandling.ID
release.SkipDuplicateProfileName = f.DuplicateHandling.Name
isDuplicate, err := s.CheckIsDuplicateRelease(ctx, f.DuplicateHandling, release)
if err != nil {
return false, errors.Wrap(err, "error finding duplicate handle")
}
if isDuplicate {
l.Debug().Msgf("filter %s rejected release %q as duplicate with profile %q", f.Name, release.TorrentName, f.DuplicateHandling.Name)
f.RejectReasons.Add("duplicate", "duplicate", "not duplicate")
// let it continue so external filters can trigger checks
//return false, nil
release.IsDuplicate = true
}
}
// if matched, do additional size check if needed, attach actions and return the filter
l.Debug().Msgf("found and matched filter: %s", f.Name)
// If size constraints are set in a filter and the indexer did not
// announce the size, we need to do an additional out of band size check.
if release.AdditionalSizeCheckRequired {
l.Debug().Msgf("(%s) additional size check required", f.Name)
ok, err := s.AdditionalSizeCheck(ctx, f, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) additional size check error", f.Name)
return false, err
}
if !ok {
l.Trace().Msgf("(%s) additional size check not matching what filter wanted", f.Name)
return false, nil
}
}
// check uploader if the indexer supports check via api
if release.AdditionalUploaderCheckRequired {
l.Debug().Msgf("(%s) additional uploader check required", f.Name)
ok, err := s.AdditionalUploaderCheck(ctx, f, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) additional uploader check error", f.Name)
return false, err
}
if !ok {
l.Trace().Msgf("(%s) additional uploader check not matching what filter wanted", f.Name)
return false, nil
}
}
if release.AdditionalRecordLabelCheckRequired {
l.Debug().Msgf("(%s) additional record label check required", f.Name)
ok, err := s.AdditionalRecordLabelCheck(ctx, f, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) additional record label check error", f.Name)
return false, err
}
if !ok {
l.Trace().Msgf("(%s) additional record label check not matching what filter wanted", f.Name)
return false, nil
}
}
// run external filters
if f.External != nil {
externalOk, err := s.RunExternalFilters(ctx, f, f.External, release)
if err != nil {
l.Error().Err(err).Msgf("(%s) external filter check error", f.Name)
return false, err
}
if !externalOk {
l.Debug().Msgf("(%s) external filter check not matching what filter wanted", f.Name)
return false, nil
}
}
return true, nil
}
// AdditionalSizeCheck performs additional out-of-band checks to determine the
@ -732,10 +755,18 @@ func (s *service) CheckSmartEpisodeCanDownload(ctx context.Context, params *doma
return s.releaseRepo.CheckSmartEpisodeCanDownload(ctx, params)
}
func (s *service) CheckIsDuplicateRelease(ctx context.Context, profile *domain.DuplicateReleaseProfile, release *domain.Release) (bool, error) {
return s.releaseRepo.CheckIsDuplicateRelease(ctx, profile, release)
}
func (s *service) RunExternalFilters(ctx context.Context, f *domain.Filter, externalFilters []domain.FilterExternal, release *domain.Release) (ok bool, err error) {
defer func() {
// try recover panic if anything went wrong with the external filter checks
errors.RecoverPanic(recover(), &err)
if err != nil {
s.log.Error().Err(err).Msgf("filter %s external filter check panic", f.Name)
ok = false
}
}()
// sort filters by index