mirror of
https://github.com/idanoo/autobrr
synced 2025-07-23 00:39:13 +00:00

* In rss feeds, sometimes the only place the size is mentioned is in the description field. If the size has not already been determined from another source try to read it from there. * Accept sizes with no space between value and unit of measure * feat(feeds): get size from description add test * fix(feeds): tests --------- Co-authored-by: ze0s <ze0s@riseup.net>
321 lines
7.6 KiB
Go
321 lines
7.6 KiB
Go
// Copyright (c) 2021 - 2023, Ludvig Lundgren and the autobrr contributors.
|
|
// SPDX-License-Identifier: GPL-2.0-or-later
|
|
|
|
package feed
|
|
|
|
import (
|
|
"context"
|
|
"encoding/xml"
|
|
"net/url"
|
|
"regexp"
|
|
"time"
|
|
|
|
"github.com/autobrr/autobrr/internal/domain"
|
|
"github.com/autobrr/autobrr/internal/release"
|
|
"github.com/autobrr/autobrr/pkg/errors"
|
|
|
|
"github.com/mmcdole/gofeed"
|
|
"github.com/rs/zerolog"
|
|
)
|
|
|
|
var (
|
|
rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`)
|
|
rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
|
|
)
|
|
|
|
type RSSJob struct {
|
|
Feed *domain.Feed
|
|
Name string
|
|
IndexerIdentifier string
|
|
Log zerolog.Logger
|
|
URL string
|
|
Repo domain.FeedRepo
|
|
CacheRepo domain.FeedCacheRepo
|
|
ReleaseSvc release.Service
|
|
Timeout time.Duration
|
|
|
|
attempts int
|
|
errors []error
|
|
|
|
JobID int
|
|
}
|
|
|
|
func NewRSSJob(feed *domain.Feed, name string, indexerIdentifier string, log zerolog.Logger, url string, repo domain.FeedRepo, cacheRepo domain.FeedCacheRepo, releaseSvc release.Service, timeout time.Duration) *RSSJob {
|
|
return &RSSJob{
|
|
Feed: feed,
|
|
Name: name,
|
|
IndexerIdentifier: indexerIdentifier,
|
|
Log: log,
|
|
URL: url,
|
|
Repo: repo,
|
|
CacheRepo: cacheRepo,
|
|
ReleaseSvc: releaseSvc,
|
|
Timeout: timeout,
|
|
}
|
|
}
|
|
|
|
func (j *RSSJob) Run() {
|
|
ctx := context.Background()
|
|
|
|
if err := j.process(ctx); err != nil {
|
|
j.Log.Error().Err(err).Int("attempts", j.attempts).Msg("rss feed process error")
|
|
|
|
j.errors = append(j.errors, err)
|
|
return
|
|
}
|
|
|
|
j.attempts = 0
|
|
j.errors = []error{}
|
|
}
|
|
|
|
func (j *RSSJob) process(ctx context.Context) error {
|
|
items, err := j.getFeed(ctx)
|
|
if err != nil {
|
|
j.Log.Error().Err(err).Msgf("error fetching rss feed items")
|
|
return errors.Wrap(err, "error getting rss feed items")
|
|
}
|
|
|
|
j.Log.Debug().Msgf("found (%d) new items to process", len(items))
|
|
|
|
if len(items) == 0 {
|
|
return nil
|
|
}
|
|
|
|
releases := make([]*domain.Release, 0)
|
|
|
|
for _, item := range items {
|
|
item := item
|
|
j.Log.Debug().Msgf("item: %v", item.Title)
|
|
|
|
rls := j.processItem(item)
|
|
if rls != nil {
|
|
releases = append(releases, rls)
|
|
}
|
|
}
|
|
|
|
// process all new releases
|
|
go j.ReleaseSvc.ProcessMultiple(releases)
|
|
|
|
return nil
|
|
}
|
|
|
|
func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
|
|
now := time.Now()
|
|
|
|
if j.Feed.MaxAge > 0 {
|
|
if item.PublishedParsed != nil && item.PublishedParsed.After(time.Date(1970, time.April, 1, 0, 0, 0, 0, time.UTC)) {
|
|
if !isNewerThanMaxAge(j.Feed.MaxAge, *item.PublishedParsed, now) {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
rls := domain.NewRelease(j.IndexerIdentifier)
|
|
rls.Implementation = domain.ReleaseImplementationRSS
|
|
|
|
rls.ParseString(item.Title)
|
|
|
|
if j.Feed.Settings != nil && j.Feed.Settings.DownloadType == domain.FeedDownloadTypeMagnet {
|
|
rls.MagnetURI = item.Link
|
|
rls.DownloadURL = ""
|
|
}
|
|
|
|
if len(item.Enclosures) > 0 {
|
|
e := item.Enclosures[0]
|
|
if e.Type == "application/x-bittorrent" && e.URL != "" {
|
|
rls.DownloadURL = e.URL
|
|
}
|
|
if e.Length != "" && e.Length != "39399" {
|
|
rls.ParseSizeBytesString(e.Length)
|
|
}
|
|
}
|
|
|
|
if rls.DownloadURL == "" && item.Link != "" {
|
|
rls.DownloadURL = item.Link
|
|
}
|
|
|
|
if rls.DownloadURL != "" {
|
|
// handle no baseurl with only relative url
|
|
// grab url from feed url and create full url
|
|
if parsedURL, _ := url.Parse(rls.DownloadURL); parsedURL != nil && len(parsedURL.Hostname()) == 0 {
|
|
if parentURL, _ := url.Parse(j.URL); parentURL != nil {
|
|
parentURL.Path, parentURL.RawPath = "", ""
|
|
|
|
// unescape the query params for max compatibility
|
|
escapedUrl, _ := url.QueryUnescape(parentURL.JoinPath(rls.DownloadURL).String())
|
|
rls.DownloadURL = escapedUrl
|
|
}
|
|
}
|
|
}
|
|
|
|
for _, v := range item.Categories {
|
|
rls.Categories = append(rls.Categories, item.Categories...)
|
|
|
|
if len(rls.Category) != 0 {
|
|
rls.Category += ", "
|
|
}
|
|
|
|
rls.Category += v
|
|
}
|
|
|
|
for _, v := range item.Authors {
|
|
if len(rls.Uploader) != 0 {
|
|
rls.Uploader += ", "
|
|
}
|
|
|
|
rls.Uploader += v.Name
|
|
}
|
|
|
|
// When custom->size and enclosures->size differ, `ParseSizeBytesString` will pick the largest one.
|
|
if size, ok := item.Custom["size"]; ok {
|
|
rls.ParseSizeBytesString(size)
|
|
}
|
|
|
|
// additional size parsing
|
|
// some feeds have a fixed size for enclosure so lets check for custom elements
|
|
// and parse size from there if it differs
|
|
if customTorrent, ok := item.Custom["torrent"]; ok {
|
|
var element itemCustomElement
|
|
if err := xml.Unmarshal([]byte("<torrent>"+customTorrent+"</torrent>"), &element); err != nil {
|
|
j.Log.Error().Err(err).Msg("could not unmarshal item.Custom.Torrent")
|
|
}
|
|
|
|
if element.ContentLength > 0 {
|
|
if uint64(element.ContentLength) != rls.Size {
|
|
rls.Size = uint64(element.ContentLength)
|
|
}
|
|
}
|
|
|
|
if rls.TorrentHash == "" && element.InfoHash != "" {
|
|
rls.TorrentHash = element.InfoHash
|
|
}
|
|
}
|
|
|
|
// basic freeleech parsing
|
|
if isFreeleech([]string{item.Title, item.Description}) {
|
|
rls.Freeleech = true
|
|
rls.Bonus = []string{"Freeleech"}
|
|
}
|
|
|
|
if item.Description != "" {
|
|
rls.Description = item.Description
|
|
|
|
if rls.Size == 0 {
|
|
hrSize := readSizeFromDescription(item.Description)
|
|
rls.ParseSizeBytesString(hrSize)
|
|
|
|
j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize)
|
|
}
|
|
}
|
|
|
|
// add cookie to release for download if needed
|
|
if j.Feed.Cookie != "" {
|
|
rls.RawCookie = j.Feed.Cookie
|
|
}
|
|
|
|
return rls
|
|
}
|
|
|
|
func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error) {
|
|
ctx, cancel := context.WithTimeout(ctx, j.Timeout)
|
|
defer cancel()
|
|
|
|
feed, err := NewFeedParser(j.Timeout, j.Feed.Cookie).ParseURLWithContext(ctx, j.URL)
|
|
if err != nil {
|
|
return nil, errors.Wrap(err, "error fetching rss feed items")
|
|
}
|
|
|
|
// get feed as JSON string
|
|
feedData := feed.String()
|
|
|
|
if err := j.Repo.UpdateLastRunWithData(ctx, j.Feed.ID, feedData); err != nil {
|
|
j.Log.Error().Err(err).Msgf("error updating last run for feed id: %v", j.Feed.ID)
|
|
}
|
|
|
|
j.Log.Debug().Msgf("refreshing rss feed: %v, found (%d) items", j.Name, len(feed.Items))
|
|
|
|
if len(feed.Items) == 0 {
|
|
return
|
|
}
|
|
|
|
//sort.Sort(feed)
|
|
|
|
// set ttl to 1 month
|
|
ttl := time.Now().AddDate(0, 1, 0)
|
|
|
|
for _, i := range feed.Items {
|
|
item := i
|
|
|
|
key := item.GUID
|
|
if len(key) == 0 {
|
|
key = item.Title
|
|
if len(key) == 0 {
|
|
continue
|
|
}
|
|
}
|
|
|
|
exists, err := j.CacheRepo.Exists(j.Feed.ID, key)
|
|
if err != nil {
|
|
j.Log.Error().Err(err).Msg("could not check if item exists")
|
|
continue
|
|
}
|
|
if exists {
|
|
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
|
|
continue
|
|
}
|
|
|
|
j.Log.Debug().Msgf("found new release: %s", i.Title)
|
|
|
|
if err := j.CacheRepo.Put(j.Feed.ID, key, []byte(item.Title), ttl); err != nil {
|
|
j.Log.Error().Err(err).Str("entry", key).Msg("cache.Put: error storing item in cache")
|
|
continue
|
|
}
|
|
|
|
// only append if we successfully added to cache
|
|
items = append(items, item)
|
|
}
|
|
|
|
// send to filters
|
|
return
|
|
}
|
|
|
|
func isNewerThanMaxAge(maxAge int, item, now time.Time) bool {
|
|
// now minus max age
|
|
nowMaxAge := now.Add(time.Duration(-maxAge) * time.Second)
|
|
|
|
if item.After(nowMaxAge) {
|
|
return true
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// isFreeleech basic freeleech parsing
|
|
func isFreeleech(str []string) bool {
|
|
for _, s := range str {
|
|
match := rxpFreeleech.FindAllString(s, -1)
|
|
|
|
if len(match) > 0 {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// readSizeFromDescription get size from description
|
|
func readSizeFromDescription(str string) string {
|
|
matches := rxpSize.FindStringSubmatch(str)
|
|
if matches == nil {
|
|
return ""
|
|
}
|
|
|
|
return matches[1]
|
|
}
|
|
|
|
// itemCustomElement
|
|
// used for some feeds like Aviztas network
|
|
type itemCustomElement struct {
|
|
ContentLength int64 `xml:"contentLength"`
|
|
InfoHash string `xml:"infoHash"`
|
|
}
|