mirror of
https://github.com/idanoo/autobrr
synced 2025-07-23 00:39:13 +00:00

Feed dates are not always initialized, which defaults (if present) to epoch on some feed items. When this happens the results can be unpredictable, and not expected for what users are intending for. As such to have sane defaults, we ignore the date if it's before April 1st 1970, because nothing really happened before this date.
301 lines
7 KiB
Go
301 lines
7 KiB
Go
package feed
|
|
|
|
import (
|
|
"context"
|
|
"encoding/xml"
|
|
"fmt"
|
|
"net/url"
|
|
"regexp"
|
|
"time"
|
|
|
|
"github.com/autobrr/autobrr/internal/domain"
|
|
"github.com/autobrr/autobrr/internal/release"
|
|
"github.com/autobrr/autobrr/pkg/errors"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
"github.com/rs/zerolog"
|
|
)
|
|
|
|
type RSSJob struct {
|
|
Feed *domain.Feed
|
|
Name string
|
|
IndexerIdentifier string
|
|
Log zerolog.Logger
|
|
URL string
|
|
Repo domain.FeedRepo
|
|
CacheRepo domain.FeedCacheRepo
|
|
ReleaseSvc release.Service
|
|
Timeout time.Duration
|
|
|
|
attempts int
|
|
errors []error
|
|
|
|
JobID int
|
|
}
|
|
|
|
func NewRSSJob(feed *domain.Feed, name string, indexerIdentifier string, log zerolog.Logger, url string, repo domain.FeedRepo, cacheRepo domain.FeedCacheRepo, releaseSvc release.Service, timeout time.Duration) *RSSJob {
|
|
return &RSSJob{
|
|
Feed: feed,
|
|
Name: name,
|
|
IndexerIdentifier: indexerIdentifier,
|
|
Log: log,
|
|
URL: url,
|
|
Repo: repo,
|
|
CacheRepo: cacheRepo,
|
|
ReleaseSvc: releaseSvc,
|
|
Timeout: timeout,
|
|
}
|
|
}
|
|
|
|
func (j *RSSJob) Run() {
|
|
if err := j.process(); err != nil {
|
|
j.Log.Error().Err(err).Int("attempts", j.attempts).Msg("rss feed process error")
|
|
|
|
j.errors = append(j.errors, err)
|
|
return
|
|
}
|
|
|
|
j.attempts = 0
|
|
j.errors = []error{}
|
|
|
|
return
|
|
}
|
|
|
|
func (j *RSSJob) process() error {
|
|
items, err := j.getFeed()
|
|
if err != nil {
|
|
j.Log.Error().Err(err).Msgf("error fetching rss feed items")
|
|
return errors.Wrap(err, "error getting rss feed items")
|
|
}
|
|
|
|
j.Log.Debug().Msgf("found (%d) new items to process", len(items))
|
|
|
|
if len(items) == 0 {
|
|
return nil
|
|
}
|
|
|
|
releases := make([]*domain.Release, 0)
|
|
|
|
for _, item := range items {
|
|
item := item
|
|
j.Log.Debug().Msgf("item: %v", item.Title)
|
|
|
|
rls := j.processItem(item)
|
|
if rls != nil {
|
|
releases = append(releases, rls)
|
|
}
|
|
}
|
|
|
|
// process all new releases
|
|
go j.ReleaseSvc.ProcessMultiple(releases)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
|
|
now := time.Now()
|
|
|
|
if j.Feed.MaxAge > 0 {
|
|
if item.PublishedParsed != nil && item.PublishedParsed.After(time.Date(1970, time.April, 1, 0, 0, 0, 0, time.UTC)) {
|
|
if !isNewerThanMaxAge(j.Feed.MaxAge, *item.PublishedParsed, now) {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
rls := domain.NewRelease(j.IndexerIdentifier)
|
|
rls.Implementation = domain.ReleaseImplementationRSS
|
|
|
|
rls.ParseString(item.Title)
|
|
|
|
if len(item.Enclosures) > 0 {
|
|
e := item.Enclosures[0]
|
|
if e.Type == "application/x-bittorrent" && e.URL != "" {
|
|
rls.TorrentURL = e.URL
|
|
}
|
|
if e.Length != "" {
|
|
rls.ParseSizeBytesString(e.Length)
|
|
}
|
|
}
|
|
|
|
if rls.TorrentURL == "" && item.Link != "" {
|
|
rls.TorrentURL = item.Link
|
|
}
|
|
|
|
if rls.TorrentURL != "" {
|
|
// handle no baseurl with only relative url
|
|
// grab url from feed url and create full url
|
|
if parsedURL, _ := url.Parse(rls.TorrentURL); parsedURL != nil && len(parsedURL.Hostname()) == 0 {
|
|
if parentURL, _ := url.Parse(j.URL); parentURL != nil {
|
|
parentURL.Path, parentURL.RawPath = "", ""
|
|
|
|
// unescape the query params for max compatibility
|
|
escapedUrl, _ := url.QueryUnescape(parentURL.JoinPath(rls.TorrentURL).String())
|
|
rls.TorrentURL = escapedUrl
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, v := range item.Categories {
|
|
rls.Categories = append(rls.Categories, item.Categories...)
|
|
|
|
if len(rls.Category) != 0 {
|
|
rls.Category += ", "
|
|
}
|
|
|
|
rls.Category += v
|
|
}
|
|
|
|
for _, v := range item.Authors {
|
|
if len(rls.Uploader) != 0 {
|
|
rls.Uploader += ", "
|
|
}
|
|
|
|
rls.Uploader += v.Name
|
|
}
|
|
|
|
if rls.Size == 0 {
|
|
// parse size bytes string
|
|
if sz, ok := item.Custom["size"]; ok {
|
|
rls.ParseSizeBytesString(sz)
|
|
}
|
|
}
|
|
|
|
// additional size parsing
|
|
// some feeds have a fixed size for enclosure so lets check for custom elements
|
|
// and parse size from there if it differs
|
|
if customTorrent, ok := item.Custom["torrent"]; ok {
|
|
var element itemCustomElement
|
|
if err := xml.Unmarshal([]byte("<torrent>"+customTorrent+"</torrent>"), &element); err != nil {
|
|
j.Log.Error().Err(err).Msg("could not unmarshal item.Custom.Torrent")
|
|
}
|
|
|
|
if element.ContentLength > 0 {
|
|
if uint64(element.ContentLength) != rls.Size {
|
|
rls.Size = uint64(element.ContentLength)
|
|
}
|
|
}
|
|
|
|
if rls.TorrentHash == "" && element.InfoHash != "" {
|
|
rls.TorrentHash = element.InfoHash
|
|
}
|
|
}
|
|
|
|
// basic freeleech parsing
|
|
if isFreeleech([]string{item.Title, item.Description}) {
|
|
rls.Freeleech = true
|
|
rls.Bonus = []string{"Freeleech"}
|
|
}
|
|
|
|
// add cookie to release for download if needed
|
|
if j.Feed.Cookie != "" {
|
|
rls.RawCookie = j.Feed.Cookie
|
|
}
|
|
|
|
return rls
|
|
}
|
|
|
|
func (j *RSSJob) getFeed() (items []*gofeed.Item, err error) {
|
|
ctx, cancel := context.WithTimeout(context.Background(), j.Timeout)
|
|
defer cancel()
|
|
|
|
feed, err := NewFeedParser(j.Timeout, j.Feed.Cookie).ParseURLWithContext(ctx, j.URL)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "error fetching rss feed items")
|
|
}
|
|
|
|
// get feed as JSON string
|
|
feedData := feed.String()
|
|
|
|
if err := j.Repo.UpdateLastRunWithData(context.Background(), j.Feed.ID, feedData); err != nil {
|
|
j.Log.Error().Err(err).Msgf("error updating last run for feed id: %v", j.Feed.ID)
|
|
}
|
|
|
|
j.Log.Debug().Msgf("refreshing rss feed: %v, found (%d) items", j.Name, len(feed.Items))
|
|
|
|
if len(feed.Items) == 0 {
|
|
return
|
|
}
|
|
|
|
bucketKey := fmt.Sprintf("%v+%v", j.IndexerIdentifier, j.Name)
|
|
|
|
//sort.Sort(feed)
|
|
|
|
bucketCount, err := j.CacheRepo.GetCountByBucket(ctx, bucketKey)
|
|
if err != nil {
|
|
j.Log.Error().Err(err).Msg("could not check if item exists")
|
|
return nil, err
|
|
}
|
|
|
|
// set ttl to 1 month
|
|
ttl := time.Now().AddDate(0, 1, 0)
|
|
|
|
for _, i := range feed.Items {
|
|
item := i
|
|
|
|
key := item.GUID
|
|
if len(key) == 0 {
|
|
key = item.Title
|
|
if len(key) == 0 {
|
|
continue
|
|
}
|
|
}
|
|
|
|
exists, err := j.CacheRepo.Exists(bucketKey, key)
|
|
if err != nil {
|
|
j.Log.Error().Err(err).Msg("could not check if item exists")
|
|
continue
|
|
}
|
|
if exists {
|
|
j.Log.Trace().Msgf("cache item exists, skipping release: %v", item.Title)
|
|
continue
|
|
}
|
|
|
|
if err := j.CacheRepo.Put(bucketKey, key, []byte(item.Title), ttl); err != nil {
|
|
j.Log.Error().Err(err).Str("entry", key).Msg("cache.Put: error storing item in cache")
|
|
continue
|
|
}
|
|
|
|
// first time we fetch the feed the cached bucket count will be 0
|
|
// only append to items if it's bigger than 0, so we get new items only
|
|
if bucketCount > 0 {
|
|
items = append(items, item)
|
|
}
|
|
}
|
|
|
|
// send to filters
|
|
return
|
|
}
|
|
|
|
func isNewerThanMaxAge(maxAge int, item, now time.Time) bool {
|
|
// now minus max age
|
|
nowMaxAge := now.Add(time.Duration(-maxAge) * time.Second)
|
|
|
|
if item.After(nowMaxAge) {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isFreeleech basic freeleech parsing
|
|
func isFreeleech(str []string) bool {
|
|
for _, s := range str {
|
|
var re = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
|
|
|
|
match := re.FindAllString(s, -1)
|
|
|
|
if len(match) > 0 {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// itemCustomElement
|
|
// used for some feeds like Aviztas network
|
|
type itemCustomElement struct {
|
|
ContentLength int64 `xml:"contentLength"`
|
|
InfoHash string `xml:"infoHash"`
|
|
}
|