feat(feeds): optimize existing cache items check (#2078)

* feat(feeds): optimize existing items cache check

* feat(feeds): remove ttl from repo method ExistingItems

* feat(feeds): add db integration test for ExistingItems

* feat(feeds): improve release and filter processing

* feat(feeds): fix failing test
This commit is contained in:
ze0s 2025-06-07 12:46:08 +02:00 committed by GitHub
parent 92ddb919a5
commit 46f6fbe5cc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 266 additions and 45 deletions

View file

@ -124,7 +124,7 @@ func (j *NewznabJob) process(ctx context.Context) error {
}
// process all new releases
go j.ReleaseSvc.ProcessMultiple(releases)
go j.ReleaseSvc.ProcessMultipleFromIndexer(releases, j.Feed.Indexer)
return nil
}
@ -164,10 +164,9 @@ func (j *NewznabJob) getFeed(ctx context.Context) ([]newznab.FeedItem, error) {
return feed.Channel.Items[i].PubDate.After(feed.Channel.Items[j].PubDate.Time)
})
toCache := make([]domain.FeedCacheItem, 0)
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
// Collect all valid GUIDs first
guidItemMap := make(map[string]*newznab.FeedItem)
var guids []string
for _, item := range feed.Channel.Items {
if item.GUID == "" {
@ -175,13 +174,22 @@ func (j *NewznabJob) getFeed(ctx context.Context) ([]newznab.FeedItem, error) {
continue
}
exists, err := j.CacheRepo.Exists(j.Feed.ID, item.GUID)
if err != nil {
j.Log.Error().Err(err).Msg("could not check if item exists")
continue
}
guidItemMap[item.GUID] = item
guids = append(guids, item.GUID)
}
if exists {
existingGuids, err := j.CacheRepo.ExistingItems(ctx, j.Feed.ID, guids)
if err != nil {
j.Log.Error().Err(err).Msg("could not get existing items from cache")
return nil, errors.Wrap(err, "could not get existing items from cache")
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
toCache := make([]domain.FeedCacheItem, 0)
for guid, item := range guidItemMap {
if existingGuids[guid] {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue
}
@ -190,7 +198,7 @@ func (j *NewznabJob) getFeed(ctx context.Context) ([]newznab.FeedItem, error) {
toCache = append(toCache, domain.FeedCacheItem{
FeedId: strconv.Itoa(j.Feed.ID),
Key: item.GUID,
Key: guid,
Value: []byte(item.Title),
TTL: ttl,
})

View file

@ -105,7 +105,7 @@ func (j *RSSJob) process(ctx context.Context) error {
}
// process all new releases
go j.ReleaseSvc.ProcessMultiple(releases)
go j.ReleaseSvc.ProcessMultipleFromIndexer(releases, j.Feed.Indexer)
return nil
}
@ -280,11 +280,8 @@ func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error)
}
//sort.Sort(feed)
toCache := make([]domain.FeedCacheItem, 0)
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
guidItemMap := make(map[string]*gofeed.Item)
var guids []string
for _, item := range feed.Items {
key := item.GUID
@ -295,12 +292,22 @@ func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error)
}
}
exists, err := j.CacheRepo.Exists(j.Feed.ID, key)
if err != nil {
j.Log.Error().Err(err).Msg("could not check if item exists")
continue
}
if exists {
guidItemMap[key] = item
guids = append(guids, key)
}
existingGuids, err := j.CacheRepo.ExistingItems(ctx, j.Feed.ID, guids)
if err != nil {
j.Log.Error().Err(err).Msgf("error getting existing items from cache")
return
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
toCache := make([]domain.FeedCacheItem, 0)
for guid, item := range guidItemMap {
if existingGuids[guid] {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue
}
@ -309,7 +316,7 @@ func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error)
toCache = append(toCache, domain.FeedCacheItem{
FeedId: strconv.Itoa(j.Feed.ID),
Key: key,
Key: guid,
Value: []byte(item.Title),
TTL: ttl,
})

View file

@ -158,7 +158,7 @@ func (j *TorznabJob) process(ctx context.Context) error {
}
// process all new releases
go j.ReleaseSvc.ProcessMultiple(releases)
go j.ReleaseSvc.ProcessMultipleFromIndexer(releases, j.Feed.Indexer)
return nil
}
@ -259,10 +259,9 @@ func (j *TorznabJob) getFeed(ctx context.Context) ([]torznab.FeedItem, error) {
return feed.Channel.Items[i].PubDate.After(feed.Channel.Items[j].PubDate.Time)
})
toCache := make([]domain.FeedCacheItem, 0)
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
// Collect all valid GUIDs first
guidItemMap := make(map[string]*torznab.FeedItem)
var guids []string
for _, item := range feed.Channel.Items {
if item.GUID == "" {
@ -270,12 +269,24 @@ func (j *TorznabJob) getFeed(ctx context.Context) ([]torznab.FeedItem, error) {
continue
}
exists, err := j.CacheRepo.Exists(j.Feed.ID, item.GUID)
if err != nil {
j.Log.Error().Err(err).Msg("could not check if item exists")
continue
}
if exists {
guidItemMap[item.GUID] = item
guids = append(guids, item.GUID)
}
// Batch check which GUIDs already exist in the cache
existingGuids, err := j.CacheRepo.ExistingItems(ctx, j.Feed.ID, guids)
if err != nil {
j.Log.Error().Err(err).Msg("could not check existing items")
return nil, errors.Wrap(err, "could not check existing items")
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
toCache := make([]domain.FeedCacheItem, 0)
// Process items that don't exist in the cache
for guid, item := range guidItemMap {
if existingGuids[guid] {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue
}
@ -284,12 +295,12 @@ func (j *TorznabJob) getFeed(ctx context.Context) ([]torznab.FeedItem, error) {
toCache = append(toCache, domain.FeedCacheItem{
FeedId: strconv.Itoa(j.Feed.ID),
Key: item.GUID,
Key: guid,
Value: []byte(item.Title),
TTL: ttl,
})
// only append if we successfully added to cache
// Add item to result list
items = append(items, *item)
}