autobrr/internal/feed/rss.go
ze0s 604c7896bd
chore: add LICENSE GPLv2-or-later (#897)
* chore: add LICENSE

* chore: add LICENSE to README
2023-05-01 16:21:59 +02:00

309 lines
7.4 KiB
Go

// Copyright (c) 2021 - 2023, Ludvig Lundgren and the autobrr contributors.
// SPDX-License-Identifier: GPL-2.0-or-later
package feed
import (
"context"
"encoding/xml"
"fmt"
"net/url"
"regexp"
"time"
"github.com/autobrr/autobrr/internal/domain"
"github.com/autobrr/autobrr/internal/release"
"github.com/autobrr/autobrr/pkg/errors"
"github.com/mmcdole/gofeed"
"github.com/rs/zerolog"
)
type RSSJob struct {
Feed *domain.Feed
Name string
IndexerIdentifier string
Log zerolog.Logger
URL string
Repo domain.FeedRepo
CacheRepo domain.FeedCacheRepo
ReleaseSvc release.Service
Timeout time.Duration
attempts int
errors []error
JobID int
}
func NewRSSJob(feed *domain.Feed, name string, indexerIdentifier string, log zerolog.Logger, url string, repo domain.FeedRepo, cacheRepo domain.FeedCacheRepo, releaseSvc release.Service, timeout time.Duration) *RSSJob {
return &RSSJob{
Feed: feed,
Name: name,
IndexerIdentifier: indexerIdentifier,
Log: log,
URL: url,
Repo: repo,
CacheRepo: cacheRepo,
ReleaseSvc: releaseSvc,
Timeout: timeout,
}
}
func (j *RSSJob) Run() {
ctx := context.Background()
if err := j.process(ctx); err != nil {
j.Log.Error().Err(err).Int("attempts", j.attempts).Msg("rss feed process error")
j.errors = append(j.errors, err)
return
}
j.attempts = 0
j.errors = []error{}
}
func (j *RSSJob) process(ctx context.Context) error {
items, err := j.getFeed(ctx)
if err != nil {
j.Log.Error().Err(err).Msgf("error fetching rss feed items")
return errors.Wrap(err, "error getting rss feed items")
}
j.Log.Debug().Msgf("found (%d) new items to process", len(items))
if len(items) == 0 {
return nil
}
releases := make([]*domain.Release, 0)
for _, item := range items {
item := item
j.Log.Debug().Msgf("item: %v", item.Title)
rls := j.processItem(item)
if rls != nil {
releases = append(releases, rls)
}
}
// process all new releases
go j.ReleaseSvc.ProcessMultiple(releases)
return nil
}
func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
now := time.Now()
if j.Feed.MaxAge > 0 {
if item.PublishedParsed != nil && item.PublishedParsed.After(time.Date(1970, time.April, 1, 0, 0, 0, 0, time.UTC)) {
if !isNewerThanMaxAge(j.Feed.MaxAge, *item.PublishedParsed, now) {
return nil
}
}
}
rls := domain.NewRelease(j.IndexerIdentifier)
rls.Implementation = domain.ReleaseImplementationRSS
rls.ParseString(item.Title)
if j.Feed.Settings != nil && j.Feed.Settings.DownloadType == domain.FeedDownloadTypeMagnet {
rls.MagnetURI = item.Link
rls.TorrentURL = ""
}
if len(item.Enclosures) > 0 {
e := item.Enclosures[0]
if e.Type == "application/x-bittorrent" && e.URL != "" {
rls.TorrentURL = e.URL
}
if e.Length != "" && e.Length != "39399" {
rls.ParseSizeBytesString(e.Length)
}
}
if rls.TorrentURL == "" && item.Link != "" {
rls.TorrentURL = item.Link
}
if rls.TorrentURL != "" {
// handle no baseurl with only relative url
// grab url from feed url and create full url
if parsedURL, _ := url.Parse(rls.TorrentURL); parsedURL != nil && len(parsedURL.Hostname()) == 0 {
if parentURL, _ := url.Parse(j.URL); parentURL != nil {
parentURL.Path, parentURL.RawPath = "", ""
// unescape the query params for max compatibility
escapedUrl, _ := url.QueryUnescape(parentURL.JoinPath(rls.TorrentURL).String())
rls.TorrentURL = escapedUrl
}
}
}
for _, v := range item.Categories {
rls.Categories = append(rls.Categories, item.Categories...)
if len(rls.Category) != 0 {
rls.Category += ", "
}
rls.Category += v
}
for _, v := range item.Authors {
if len(rls.Uploader) != 0 {
rls.Uploader += ", "
}
rls.Uploader += v.Name
}
// When custom->size and enclosures->size differ, `ParseSizeBytesString` will pick the largest one.
if size, ok := item.Custom["size"]; ok {
rls.ParseSizeBytesString(size)
}
// additional size parsing
// some feeds have a fixed size for enclosure so lets check for custom elements
// and parse size from there if it differs
if customTorrent, ok := item.Custom["torrent"]; ok {
var element itemCustomElement
if err := xml.Unmarshal([]byte("<torrent>"+customTorrent+"</torrent>"), &element); err != nil {
j.Log.Error().Err(err).Msg("could not unmarshal item.Custom.Torrent")
}
if element.ContentLength > 0 {
if uint64(element.ContentLength) != rls.Size {
rls.Size = uint64(element.ContentLength)
}
}
if rls.TorrentHash == "" && element.InfoHash != "" {
rls.TorrentHash = element.InfoHash
}
}
// basic freeleech parsing
if isFreeleech([]string{item.Title, item.Description}) {
rls.Freeleech = true
rls.Bonus = []string{"Freeleech"}
}
// add cookie to release for download if needed
if j.Feed.Cookie != "" {
rls.RawCookie = j.Feed.Cookie
}
return rls
}
func (j *RSSJob) getFeed(ctx context.Context) (items []*gofeed.Item, err error) {
ctx, cancel := context.WithTimeout(ctx, j.Timeout)
defer cancel()
feed, err := NewFeedParser(j.Timeout, j.Feed.Cookie).ParseURLWithContext(ctx, j.URL)
if err != nil {
return nil, errors.Wrap(err, "error fetching rss feed items")
}
// get feed as JSON string
feedData := feed.String()
if err := j.Repo.UpdateLastRunWithData(ctx, j.Feed.ID, feedData); err != nil {
j.Log.Error().Err(err).Msgf("error updating last run for feed id: %v", j.Feed.ID)
}
j.Log.Debug().Msgf("refreshing rss feed: %v, found (%d) items", j.Name, len(feed.Items))
if len(feed.Items) == 0 {
return
}
bucketKey := fmt.Sprintf("%v+%v", j.IndexerIdentifier, j.Name)
//sort.Sort(feed)
bucketCount, err := j.CacheRepo.GetCountByBucket(ctx, bucketKey)
if err != nil {
j.Log.Error().Err(err).Msg("could not check if item exists")
return nil, err
}
// set ttl to 1 month
ttl := time.Now().AddDate(0, 1, 0)
for _, i := range feed.Items {
item := i
key := item.GUID
if len(key) == 0 {
key = item.Title
if len(key) == 0 {
continue
}
}
exists, err := j.CacheRepo.Exists(bucketKey, key)
if err != nil {
j.Log.Error().Err(err).Msg("could not check if item exists")
continue
}
if exists {
j.Log.Trace().Msgf("cache item exists, skipping release: %s", item.Title)
continue
}
j.Log.Debug().Msgf("found new release: %s", i.Title)
if err := j.CacheRepo.Put(bucketKey, key, []byte(item.Title), ttl); err != nil {
j.Log.Error().Err(err).Str("entry", key).Msg("cache.Put: error storing item in cache")
continue
}
// first time we fetch the feed the cached bucket count will be 0
// only append to items if it's bigger than 0, so we get new items only
if bucketCount > 0 {
items = append(items, item)
}
}
// send to filters
return
}
func isNewerThanMaxAge(maxAge int, item, now time.Time) bool {
// now minus max age
nowMaxAge := now.Add(time.Duration(-maxAge) * time.Second)
if item.After(nowMaxAge) {
return true
}
return false
}
// isFreeleech basic freeleech parsing
func isFreeleech(str []string) bool {
for _, s := range str {
var re = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
match := re.FindAllString(s, -1)
if len(match) > 0 {
return true
}
}
return false
}
// itemCustomElement
// used for some feeds like Aviztas network
type itemCustomElement struct {
ContentLength int64 `xml:"contentLength"`
InfoHash string `xml:"infoHash"`
}