From dbb3ff3a3b8ad77ff4c7eb9359856ae5d4204e06 Mon Sep 17 00:00:00 2001 From: jolinarofmalkshur <146637583+jolinarofmalkshur@users.noreply.github.com> Date: Sun, 1 Oct 2023 23:47:47 +0300 Subject: [PATCH] feat(feeds): improve file size parsing (#1162) * In rss feeds, sometimes the only place the size is mentioned is in the description field. If the size has not already been determined from another source try to read it from there. * Accept sizes with no space between value and unit of measure * feat(feeds): get size from description add test * fix(feeds): tests --------- Co-authored-by: ze0s --- internal/feed/rss.go | 26 +++++++++++++++++++++++--- internal/feed/rss_test.go | 30 +++++++++++++++++++++++++++--- 2 files changed, 50 insertions(+), 6 deletions(-) diff --git a/internal/feed/rss.go b/internal/feed/rss.go index b4ef7bf..a7baaf3 100644 --- a/internal/feed/rss.go +++ b/internal/feed/rss.go @@ -18,6 +18,11 @@ import ( "github.com/rs/zerolog" ) +var ( + rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`) + rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`) +) + type RSSJob struct { Feed *domain.Feed Name string @@ -194,6 +199,13 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release { if item.Description != "" { rls.Description = item.Description + + if rls.Size == 0 { + hrSize := readSizeFromDescription(item.Description) + rls.ParseSizeBytesString(hrSize) + + j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize) + } } // add cookie to release for download if needed @@ -281,9 +293,7 @@ func isNewerThanMaxAge(maxAge int, item, now time.Time) bool { // isFreeleech basic freeleech parsing func isFreeleech(str []string) bool { for _, s := range str { - var re = regexp.MustCompile(`(?mi)(\bfreeleech\b)`) - - match := re.FindAllString(s, -1) + match := rxpFreeleech.FindAllString(s, -1) if len(match) > 0 { return true @@ -293,6 +303,16 @@ func isFreeleech(str []string) bool { return false } +// readSizeFromDescription get size from description +func readSizeFromDescription(str string) string { + matches := rxpSize.FindStringSubmatch(str) + if matches == nil { + return "" + } + + return matches[1] +} + // itemCustomElement // used for some feeds like Aviztas network type itemCustomElement struct { diff --git a/internal/feed/rss_test.go b/internal/feed/rss_test.go index 82f02fb..9a5032c 100644 --- a/internal/feed/rss_test.go +++ b/internal/feed/rss_test.go @@ -67,7 +67,7 @@ func TestRSSJob_processItem(t *testing.T) { Link: "/details.php?id=00000&hit=1", GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", }}, - want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 0x0, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)}, + want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 1490000000, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)}, }, { name: "with_baseurl", @@ -96,7 +96,7 @@ func TestRSSJob_processItem(t *testing.T) { Link: "https://fake-feed.com/details.php?id=00000&hit=1", GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", }}, - want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 0x0, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)}, + want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 1490000000, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)}, }, { name: "time_parse", @@ -126,7 +126,7 @@ func TestRSSJob_processItem(t *testing.T) { GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", //PublishedParsed: &nowMinusTime, }}, - want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 0x0, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)}, + want: &domain.Release{ID: 0, FilterStatus: "PENDING", Rejections: []string{}, Indexer: "mock-feed", FilterName: "", Protocol: "torrent", Implementation: "RSS", Timestamp: now, GroupID: "", TorrentID: "", DownloadURL: "https://fake-feed.com/details.php?id=00000&hit=1", TorrentTmpFile: "", TorrentDataRawBytes: []uint8(nil), TorrentHash: "", TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP", Size: 1490000000, Title: "Some Release Title", Description: "Category: Example\n Size: 1.49 GB\n Status: 27 seeders and 1 leechers\n Speed: 772.16 kB/s\n Added: 2022-09-29 16:06:08\n", Category: "", Season: 0, Episode: 0, Year: 2022, Resolution: "720p", Source: "WEB", Codec: []string{"H.264"}, Container: "", HDR: []string(nil), Audio: []string(nil), AudioChannels: "", Group: "GROUP", Region: "", Language: nil, Proper: false, Repack: false, Website: "", Artists: "", Type: "", LogScore: 0, Origin: "", Tags: []string{}, ReleaseTags: "", Freeleech: false, FreeleechPercent: 0, Bonus: []string(nil), Uploader: "", PreTime: "", Other: []string(nil), RawCookie: "", AdditionalSizeCheckRequired: false, FilterID: 0, Filter: (*domain.Filter)(nil), ActionStatus: []domain.ReleaseActionStatus(nil)}, }, { name: "time_parse", @@ -219,3 +219,27 @@ func Test_isMaxAge(t *testing.T) { }) } } + +func Test_readSizeFromDescription(t *testing.T) { + type args struct { + str string + } + tests := []struct { + name string + args args + want string + }{ + {name: "size", args: args{"Size: 12GB"}, want: "12GB"}, + {name: "size_1", args: args{"Size: 12 GB"}, want: "12 GB"}, + {name: "size_2", args: args{"Size: 12 GiB"}, want: "12 GiB"}, + {name: "size_3", args: args{"Size: 537 MiB"}, want: "537 MiB"}, + {name: "size_4", args: args{"Size: 20.48 GiB
"}, want: "20.48 GiB"}, + {name: "size_5", args: args{"file.name-GROUP / 20.48 GiB / x265"}, want: "20.48 GiB"}, + {name: "size_6", args: args{"Uploaded: 38 minutes ago
"}, want: ""}, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + assert.Equalf(t, tt.want, readSizeFromDescription(tt.args.str), "readSizeFromDescription(%v)", tt.args.str) + }) + } +}