feat(feeds): optimize existing cache items check (#2078)

* feat(feeds): optimize existing items cache check

* feat(feeds): remove ttl from repo method ExistingItems

* feat(feeds): add db integration test for ExistingItems

* feat(feeds): improve release and filter processing

* feat(feeds): fix failing test
This commit is contained in:
ze0s 2025-06-07 12:46:08 +02:00 committed by GitHub
parent 92ddb919a5
commit 46f6fbe5cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 266 additions and 45 deletions

View file

@ -157,6 +157,48 @@ func (r *FeedCacheRepo) Exists(feedId int, key string) (bool, error) {
return exists, nil return exists, nil
} }
// ExistingItems checks multiple keys in the cache for a given feed ID
// and returns a map of existing keys to their values
func (r *FeedCacheRepo) ExistingItems(ctx context.Context, feedId int, keys []string) (map[string]bool, error) {
if len(keys) == 0 {
return make(map[string]bool), nil
}
// Build a query that returns all keys that exist in the cache
queryBuilder := r.db.squirrel.
Select("key").
From("feed_cache").
Where(sq.Eq{"feed_id": feedId}).
Where(sq.Eq{"key": keys})
query, args, err := queryBuilder.ToSql()
if err != nil {
return nil, errors.Wrap(err, "error building query")
}
rows, err := r.db.handler.QueryContext(ctx, query, args...)
if err != nil {
return nil, errors.Wrap(err, "error executing query")
}
defer rows.Close()
result := make(map[string]bool)
for rows.Next() {
var key string
if err := rows.Scan(&key); err != nil {
return nil, errors.Wrap(err, "error scanning row")
}
result[key] = true
}
if err := rows.Err(); err != nil {
return nil, errors.Wrap(err, "row error")
}
return result, nil
}
func (r *FeedCacheRepo) Put(feedId int, key string, val []byte, ttl time.Time) error { func (r *FeedCacheRepo) Put(feedId int, key string, val []byte, ttl time.Time) error {
queryBuilder := r.db.squirrel. queryBuilder := r.db.squirrel.
Insert("feed_cache"). Insert("feed_cache").

View file

@ -155,6 +155,107 @@ func TestFeedCacheRepo_Exists(t *testing.T) {
} }
} }
func TestFeedCacheRepo_ExistingItems(t *testing.T) {
for dbType, db := range testDBs {
log := setupLoggerForTest()
repo := NewFeedCacheRepo(log, db)
feedRepo := NewFeedRepo(log, db)
indexerRepo := NewIndexerRepo(log, db)
mockData := getMockFeed()
indexerMockData := getMockIndexer()
t.Run(fmt.Sprintf("ExistingItems_SingleItem_Multi_Keys [%s]", dbType), func(t *testing.T) {
// Setup
indexer, err := indexerRepo.Store(t.Context(), indexerMockData)
assert.NoError(t, err)
mockData.IndexerID = int(indexer.ID)
err = feedRepo.Store(t.Context(), mockData)
assert.NoError(t, err)
err = repo.Put(mockData.ID, "test_key", []byte("test_value"), time.Now().Add(time.Hour))
assert.NoError(t, err)
keys := []string{"test_key", "test_key_2"}
// Execute
items, err := repo.ExistingItems(t.Context(), mockData.ID, keys)
assert.NoError(t, err)
assert.Len(t, items, 1)
//assert.True(t, exists)
// Cleanup
_ = feedRepo.Delete(t.Context(), mockData.ID)
_ = indexerRepo.Delete(t.Context(), int(indexer.ID))
_ = repo.Delete(t.Context(), mockData.ID, "test_key")
})
t.Run(fmt.Sprintf("ExistingItems_MultipleItems [%s]", dbType), func(t *testing.T) {
// Setup
indexer, err := indexerRepo.Store(t.Context(), indexerMockData)
assert.NoError(t, err)
mockData.IndexerID = int(indexer.ID)
err = feedRepo.Store(t.Context(), mockData)
assert.NoError(t, err)
err = repo.Put(mockData.ID, "test_key", []byte("test_value"), time.Now().Add(time.Hour))
assert.NoError(t, err)
err = repo.Put(mockData.ID, "test_key_2", []byte("test_value_2"), time.Now().Add(time.Hour))
assert.NoError(t, err)
keys := []string{"test_key", "test_key_2"}
// Execute
items, err := repo.ExistingItems(t.Context(), mockData.ID, keys)
assert.NoError(t, err)
assert.Len(t, items, 2)
// Cleanup
_ = feedRepo.Delete(t.Context(), mockData.ID)
_ = indexerRepo.Delete(t.Context(), int(indexer.ID))
_ = repo.Delete(t.Context(), mockData.ID, "test_key")
})
t.Run(fmt.Sprintf("ExistingItems_MultipleItems_Single_Key [%s]", dbType), func(t *testing.T) {
// Setup
indexer, err := indexerRepo.Store(t.Context(), indexerMockData)
assert.NoError(t, err)
mockData.IndexerID = int(indexer.ID)
err = feedRepo.Store(t.Context(), mockData)
assert.NoError(t, err)
err = repo.Put(mockData.ID, "test_key", []byte("test_value"), time.Now().Add(time.Hour))
assert.NoError(t, err)
err = repo.Put(mockData.ID, "test_key_2", []byte("test_value_2"), time.Now().Add(time.Hour))
assert.NoError(t, err)
keys := []string{"test_key"}
// Execute
items, err := repo.ExistingItems(t.Context(), mockData.ID, keys)
assert.NoError(t, err)
assert.Len(t, items, 1)
// Cleanup
_ = feedRepo.Delete(t.Context(), mockData.ID)
_ = indexerRepo.Delete(t.Context(), int(indexer.ID))
_ = repo.Delete(t.Context(), mockData.ID, "test_key")
})
t.Run(fmt.Sprintf("ExistsItems_Nonexistent_Key [%s]", dbType), func(t *testing.T) {
// Execute
exists, err := repo.Exists(-1, "nonexistent_key")
assert.NoError(t, err)
assert.False(t, exists)
})
}
}
func TestFeedCacheRepo_Put(t *testing.T) { func TestFeedCacheRepo_Put(t *testing.T) {
for dbType, db := range testDBs { for dbType, db := range testDBs {
log := setupLoggerForTest() log := setupLoggerForTest()

View file

@ -10,7 +10,8 @@ import (
) )
var ( var (
ErrRecordNotFound = sql.ErrNoRows ErrRecordNotFound = sql.ErrNoRows
ErrUpdateFailed = errors.New("update failed") ErrUpdateFailed = errors.New("update failed")
ErrDeleteFailed = errors.New("delete failed") ErrDeleteFailed = errors.New("delete failed")
ErrNoActiveFiltersFoundForIndexer = errors.New("no active filters found for indexer")
) )

View file

@ -13,6 +13,7 @@ type FeedCacheRepo interface {
GetByFeed(ctx context.Context, feedId int) ([]FeedCacheItem, error) GetByFeed(ctx context.Context, feedId int) ([]FeedCacheItem, error)
GetCountByFeed(ctx context.Context, feedId int) (int, error) GetCountByFeed(ctx context.Context, feedId int) (int, error)
Exists(feedId int, key string) (bool, error) Exists(feedId int, key string) (bool, error)
ExistingItems(ctx context.Context, feedId int, keys []string) (map[string]bool, error)
Put(feedId int, key string, val []byte, ttl time.Time) error Put(feedId int, key string, val []byte, ttl time.Time) error
PutMany(ctx context.Context, items []FeedCacheItem) error PutMany(ctx context.Context, items []FeedCacheItem) error
Delete(ctx context.Context, feedId int, key string) error Delete(ctx context.Context, feedId int, key string) error

View file

@ -124,7 +124,7 @@ func (j *NewznabJob) process(ctx context.Context) error {
} }
// process all new releases // process all new releases
go j.ReleaseSvc.ProcessMultiple(releases) go j.ReleaseSvc.ProcessMultipleFromIndexer(releases, j.Feed.Indexer)
return nil return nil
} }
@ -164,10 +164,9 @@ func (j *NewznabJob) getFeed(ctx context.Context) ([]newznab.FeedItem, error) {
return feed.Channel.Items[i].PubDate.After(feed.Channel.Items[j].PubDate.Time) return feed.Channel.Items[i].PubDate.After(feed.Channel.Items[j].PubDate.Time)
}) })
toCache := make([]domain.FeedCacheItem, 0) // Collect all valid GUIDs first
guidItemMap := make(map[string]*newznab.FeedItem)
// set ttl to 1 month var guids []string
ttl := time.Now().AddDate(0, 1, 0)
for _, item := range feed.Channel.Items { for _, item := range feed.Channel.Items {
if item.GUID == "" { if item.GUID == "" {
@ -175,13 +174,22 @@ func (j *NewznabJob) getFeed(ctx context.Context) ([]newznab.FeedItem, error) {
continue continue
} }
exists, err := j.CacheRepo.Exists(j.Feed.ID, item.GUID) guidItemMap[item.GUID] = item
if err != nil { guids = append(guids, item.GUID)
j.Log.Error().Err(err).Msg("could not check if item exists") }
continue
}
if exists { existingGuids, err := j.CacheRepo.ExistingItems(ctx, j.Feed.ID, guids)
if err != nil {
j.Log.Error().Err(err).Msg("could not get existing items from cache")
return nil, errors.Wrap(err, "could not get existing items from cache")
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
toCache := make([]domain.FeedCacheItem, 0)
for guid, item := range guidItemMap {
if existingGuids[guid] {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title) j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue continue
} }
@ -190,7 +198,7 @@ func (j *NewznabJob) getFeed(ctx context.Context) ([]newznab.FeedItem, error) {
toCache = append(toCache, domain.FeedCacheItem{ toCache = append(toCache, domain.FeedCacheItem{
FeedId: strconv.Itoa(j.Feed.ID), FeedId: strconv.Itoa(j.Feed.ID),
Key: item.GUID, Key: guid,
Value: []byte(item.Title), Value: []byte(item.Title),
TTL: ttl, TTL: ttl,
}) })

View file

@ -105,7 +105,7 @@ func (j *RSSJob) process(ctx context.Context) error {
} }
// process all new releases // process all new releases
go j.ReleaseSvc.ProcessMultiple(releases) go j.ReleaseSvc.ProcessMultipleFromIndexer(releases, j.Feed.Indexer)
return nil return nil
} }
@ -280,11 +280,8 @@ func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error)
} }
//sort.Sort(feed) //sort.Sort(feed)
guidItemMap := make(map[string]*gofeed.Item)
toCache := make([]domain.FeedCacheItem, 0) var guids []string
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
for _, item := range feed.Items { for _, item := range feed.Items {
key := item.GUID key := item.GUID
@ -295,12 +292,22 @@ func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error)
} }
} }
exists, err := j.CacheRepo.Exists(j.Feed.ID, key) guidItemMap[key] = item
if err != nil { guids = append(guids, key)
j.Log.Error().Err(err).Msg("could not check if item exists") }
continue
} existingGuids, err := j.CacheRepo.ExistingItems(ctx, j.Feed.ID, guids)
if exists { if err != nil {
j.Log.Error().Err(err).Msgf("error getting existing items from cache")
return
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
toCache := make([]domain.FeedCacheItem, 0)
for guid, item := range guidItemMap {
if existingGuids[guid] {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title) j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue continue
} }
@ -309,7 +316,7 @@ func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error)
toCache = append(toCache, domain.FeedCacheItem{ toCache = append(toCache, domain.FeedCacheItem{
FeedId: strconv.Itoa(j.Feed.ID), FeedId: strconv.Itoa(j.Feed.ID),
Key: key, Key: guid,
Value: []byte(item.Title), Value: []byte(item.Title),
TTL: ttl, TTL: ttl,
}) })

View file

@ -158,7 +158,7 @@ func (j *TorznabJob) process(ctx context.Context) error {
} }
// process all new releases // process all new releases
go j.ReleaseSvc.ProcessMultiple(releases) go j.ReleaseSvc.ProcessMultipleFromIndexer(releases, j.Feed.Indexer)
return nil return nil
} }
@ -259,10 +259,9 @@ func (j *TorznabJob) getFeed(ctx context.Context) ([]torznab.FeedItem, error) {
return feed.Channel.Items[i].PubDate.After(feed.Channel.Items[j].PubDate.Time) return feed.Channel.Items[i].PubDate.After(feed.Channel.Items[j].PubDate.Time)
}) })
toCache := make([]domain.FeedCacheItem, 0) // Collect all valid GUIDs first
guidItemMap := make(map[string]*torznab.FeedItem)
// set ttl to 1 month var guids []string
ttl := time.Now().AddDate(0, 1, 0)
for _, item := range feed.Channel.Items { for _, item := range feed.Channel.Items {
if item.GUID == "" { if item.GUID == "" {
@ -270,12 +269,24 @@ func (j *TorznabJob) getFeed(ctx context.Context) ([]torznab.FeedItem, error) {
continue continue
} }
exists, err := j.CacheRepo.Exists(j.Feed.ID, item.GUID) guidItemMap[item.GUID] = item
if err != nil { guids = append(guids, item.GUID)
j.Log.Error().Err(err).Msg("could not check if item exists") }
continue
} // Batch check which GUIDs already exist in the cache
if exists { existingGuids, err := j.CacheRepo.ExistingItems(ctx, j.Feed.ID, guids)
if err != nil {
j.Log.Error().Err(err).Msg("could not check existing items")
return nil, errors.Wrap(err, "could not check existing items")
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
toCache := make([]domain.FeedCacheItem, 0)
// Process items that don't exist in the cache
for guid, item := range guidItemMap {
if existingGuids[guid] {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title) j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue continue
} }
@ -284,12 +295,12 @@ func (j *TorznabJob) getFeed(ctx context.Context) ([]torznab.FeedItem, error) {
toCache = append(toCache, domain.FeedCacheItem{ toCache = append(toCache, domain.FeedCacheItem{
FeedId: strconv.Itoa(j.Feed.ID), FeedId: strconv.Itoa(j.Feed.ID),
Key: item.GUID, Key: guid,
Value: []byte(item.Title), Value: []byte(item.Title),
TTL: ttl, TTL: ttl,
}) })
// only append if we successfully added to cache // Add item to result list
items = append(items, *item) items = append(items, *item)
} }

View file

@ -30,6 +30,7 @@ type Service interface {
Delete(ctx context.Context, req *domain.DeleteReleaseRequest) error Delete(ctx context.Context, req *domain.DeleteReleaseRequest) error
Process(release *domain.Release) Process(release *domain.Release)
ProcessMultiple(releases []*domain.Release) ProcessMultiple(releases []*domain.Release)
ProcessMultipleFromIndexer(releases []*domain.Release, indexer domain.IndexerMinimal) error
ProcessManual(ctx context.Context, req *domain.ReleaseProcessReq) error ProcessManual(ctx context.Context, req *domain.ReleaseProcessReq) error
Retry(ctx context.Context, req *domain.ReleaseActionRetryReq) error Retry(ctx context.Context, req *domain.ReleaseActionRetryReq) error
@ -175,8 +176,6 @@ func (s *service) Process(release *domain.Release) {
} }
}() }()
defer release.CleanupTemporaryFiles()
ctx := context.Background() ctx := context.Background()
// TODO check in config for "Save all releases" // TODO check in config for "Save all releases"
@ -195,10 +194,23 @@ func (s *service) Process(release *domain.Release) {
return return
} }
if err := s.processFilters(ctx, filters, release); err != nil { if err := s.processRelease(ctx, release, filters); err != nil {
s.log.Error().Err(err).Msgf("release.Process: error processing filters for indexer: %s", release.Indexer.Name) s.log.Error().Err(err).Msgf("release.Process: error processing filters for indexer: %s", release.Indexer.Name)
return return
} }
return
}
func (s *service) processRelease(ctx context.Context, release *domain.Release, filters []*domain.Filter) error {
defer release.CleanupTemporaryFiles()
if err := s.processFilters(ctx, filters, release); err != nil {
s.log.Error().Err(err).Msgf("release.Process: error processing filters for indexer: %s", release.Indexer.Name)
return err
}
return nil
} }
func (s *service) processFilters(ctx context.Context, filters []*domain.Filter, release *domain.Release) error { func (s *service) processFilters(ctx context.Context, filters []*domain.Filter, release *domain.Release) error {
@ -340,7 +352,6 @@ func (s *service) ProcessMultiple(releases []*domain.Release) {
s.log.Debug().Msgf("process (%d) new releases from feed", len(releases)) s.log.Debug().Msgf("process (%d) new releases from feed", len(releases))
for _, rls := range releases { for _, rls := range releases {
rls := rls
if rls == nil { if rls == nil {
continue continue
} }
@ -348,6 +359,45 @@ func (s *service) ProcessMultiple(releases []*domain.Release) {
} }
} }
func (s *service) ProcessMultipleFromIndexer(releases []*domain.Release, indexer domain.IndexerMinimal) error {
s.log.Debug().Msgf("process (%d) new releases from feed %s", len(releases), indexer.Name)
defer func() {
if r := recover(); r != nil {
s.log.Error().Msgf("recovering from panic in release process %s error: %v", "", r)
//err := errors.New("panic in release process: %s", release.TorrentName)
return
}
}()
ctx := context.Background()
// get filters by priority
filters, err := s.filterSvc.FindByIndexerIdentifier(ctx, indexer.Identifier)
if err != nil {
s.log.Error().Err(err).Msgf("release.Process: error finding filters for indexer: %s", indexer.Name)
return err
}
if len(filters) == 0 {
s.log.Warn().Msgf("no active filters found for indexer: %s skipping rest..", indexer.Name)
return domain.ErrNoActiveFiltersFoundForIndexer
}
for _, release := range releases {
if release == nil {
continue
}
if err := s.processRelease(ctx, release, filters); err != nil {
s.log.Error().Err(err).Msgf("release.ProcessMultipleFromIndexer: error processing filters for indexer: %s", indexer.Name)
return nil
}
}
return nil
}
func (s *service) runAction(ctx context.Context, action *domain.Action, release *domain.Release, status *domain.ReleaseActionStatus) (*domain.ReleaseActionStatus, error) { func (s *service) runAction(ctx context.Context, action *domain.Action, release *domain.Release, status *domain.ReleaseActionStatus) (*domain.ReleaseActionStatus, error) {
// add action status as pending // add action status as pending
//status := domain.NewReleaseActionStatus(action, release) //status := domain.NewReleaseActionStatus(action, release)