mirror of
https://github.com/idanoo/autobrr
synced 2025-07-23 08:49:13 +00:00
feat(feeds): improve file size parsing (#1162)
* In rss feeds, sometimes the only place the size is mentioned is in the description field. If the size has not already been determined from another source try to read it from there. * Accept sizes with no space between value and unit of measure * feat(feeds): get size from description add test * fix(feeds): tests --------- Co-authored-by: ze0s <ze0s@riseup.net>
This commit is contained in:
parent
03ef86ac96
commit
dbb3ff3a3b
2 changed files with 50 additions and 6 deletions
|
@ -18,6 +18,11 @@ import (
|
|||
"github.com/rs/zerolog"
|
||||
)
|
||||
|
||||
var (
|
||||
rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`)
|
||||
rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
|
||||
)
|
||||
|
||||
type RSSJob struct {
|
||||
Feed *domain.Feed
|
||||
Name string
|
||||
|
@ -194,6 +199,13 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
|
|||
|
||||
if item.Description != "" {
|
||||
rls.Description = item.Description
|
||||
|
||||
if rls.Size == 0 {
|
||||
hrSize := readSizeFromDescription(item.Description)
|
||||
rls.ParseSizeBytesString(hrSize)
|
||||
|
||||
j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize)
|
||||
}
|
||||
}
|
||||
|
||||
// add cookie to release for download if needed
|
||||
|
@ -281,9 +293,7 @@ func isNewerThanMaxAge(maxAge int, item, now time.Time) bool {
|
|||
// isFreeleech basic freeleech parsing
|
||||
func isFreeleech(str []string) bool {
|
||||
for _, s := range str {
|
||||
var re = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
|
||||
|
||||
match := re.FindAllString(s, -1)
|
||||
match := rxpFreeleech.FindAllString(s, -1)
|
||||
|
||||
if len(match) > 0 {
|
||||
return true
|
||||
|
@ -293,6 +303,16 @@ func isFreeleech(str []string) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// readSizeFromDescription get size from description
|
||||
func readSizeFromDescription(str string) string {
|
||||
matches := rxpSize.FindStringSubmatch(str)
|
||||
if matches == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
return matches[1]
|
||||
}
|
||||
|
||||
// itemCustomElement
|
||||
// used for some feeds like Aviztas network
|
||||
type itemCustomElement struct {
|
||||
|
|
|
@ -67,7 +67,7 @@ func TestRSSJob_processItem(t *testing.T) {
|
|||
Link: "/details.php?id=00000&hit=1",
|
||||
GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP",
|
||||
}},
|
||||
want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 0x0, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)},
|
||||
want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 1490000000, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)},
|
||||
},
|
||||
{
|
||||
name: "with_baseurl",
|
||||
|
@ -96,7 +96,7 @@ func TestRSSJob_processItem(t *testing.T) {
|
|||
Link: "https://fake-feed.com/details.php?id=00000&hit=1",
|
||||
GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP",
|
||||
}},
|
||||
want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 0x0, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)},
|
||||
want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 1490000000, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)},
|
||||
},
|
||||
{
|
||||
name: "time_parse",
|
||||
|
@ -126,7 +126,7 @@ func TestRSSJob_processItem(t *testing.T) {
|
|||
GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP",
|
||||
//PublishedParsed: &nowMinusTime,
|
||||
}},
|
||||
want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 0x0, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)},
|
||||
want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 1490000000, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)},
|
||||
},
|
||||
{
|
||||
name: "time_parse",
|
||||
|
@ -219,3 +219,27 @@ func Test_isMaxAge(t *testing.T) {
|
|||
})
|
||||
}
|
||||
}
|
||||
|
||||
func Test_readSizeFromDescription(t *testing.T) {
|
||||
type args struct {
|
||||
str string
|
||||
}
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
want string
|
||||
}{
|
||||
{name: "size", args: args{"Size: 12GB"}, want: "12GB"},
|
||||
{name: "size_1", args: args{"Size: 12 GB"}, want: "12 GB"},
|
||||
{name: "size_2", args: args{"Size: 12 GiB"}, want: "12 GiB"},
|
||||
{name: "size_3", args: args{"Size: 537 MiB"}, want: "537 MiB"},
|
||||
{name: "size_4", args: args{"<strong>Size</strong>: 20.48 GiB<br>"}, want: "20.48 GiB"},
|
||||
{name: "size_5", args: args{"file.name-GROUP / 20.48 GiB / x265"}, want: "20.48 GiB"},
|
||||
{name: "size_6", args: args{"<strong>Uploaded</strong>: 38 minutes ago<br>"}, want: ""},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equalf(t, tt.want, readSizeFromDescription(tt.args.str), "readSizeFromDescription(%v)", tt.args.str)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue