feat(feeds): improve RSS size parsing (#1367)

* fix(feeds): Parse multiple sizes.

* refactor: Test_pullSizeFromDescription

* refactor: make test human readable

added helper function

* multi

* Agnewwwwww

* .

* humanize

* humanize

---------

Co-authored-by: soup <soup@r4tio.dev>
This commit is contained in:
Kyle Sanderson 2024-01-27 12:52:11 -08:00 committed by GitHub
parent abb7829abe
commit cdd91d27e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 66 additions and 23 deletions

View file

@ -22,6 +22,7 @@ import (
var ( var (
rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`) rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`)
rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`) rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
rxpHTML = regexp.MustCompile(`(?mi)<.*?>`)
) )
type RSSJob struct { type RSSJob struct {
@ -190,7 +191,7 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
} }
if element.ContentLength > 0 { if element.ContentLength > 0 {
if uint64(element.ContentLength) != rls.Size { if uint64(element.ContentLength) > rls.Size {
rls.Size = uint64(element.ContentLength) rls.Size = uint64(element.ContentLength)
} }
} }
@ -210,10 +211,8 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
rls.Description = item.Description rls.Description = item.Description
if rls.Size == 0 { if rls.Size == 0 {
hrSize := readSizeFromDescription(item.Description) readSizeFromDescription(item.Description, rls)
rls.ParseSizeBytesString(hrSize) j.Log.Trace().Msgf("Set new size %d from description", rls.Size)
j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize)
} }
} }
@ -326,13 +325,11 @@ func isFreeleech(str []string) bool {
} }
// readSizeFromDescription get size from description // readSizeFromDescription get size from description
func readSizeFromDescription(str string) string { func readSizeFromDescription(str string, r *domain.Release) {
matches := rxpSize.FindStringSubmatch(str) clean := rxpHTML.ReplaceAllString(str, " ")
if matches == nil { for _, sz := range rxpSize.FindAllString(clean, -1) {
return "" r.ParseSizeBytesString(sz)
} }
return matches[1]
} }
// itemCustomElement // itemCustomElement

View file

@ -10,6 +10,7 @@ import (
"github.com/autobrr/autobrr/internal/domain" "github.com/autobrr/autobrr/internal/domain"
"github.com/autobrr/autobrr/internal/release" "github.com/autobrr/autobrr/internal/release"
"github.com/dustin/go-humanize"
"github.com/mmcdole/gofeed" "github.com/mmcdole/gofeed"
"github.com/rs/zerolog" "github.com/rs/zerolog"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
@ -221,25 +222,70 @@ func Test_isMaxAge(t *testing.T) {
} }
func Test_readSizeFromDescription(t *testing.T) { func Test_readSizeFromDescription(t *testing.T) {
type args struct { t.Parallel()
str string
}
tests := []struct { tests := []struct {
name string name string
args args str string
want string want string
}{ }{
{name: "size", args: args{"Size: 12GB"}, want: "12GB"}, {
{name: "size_1", args: args{"Size: 12 GB"}, want: "12 GB"}, name: "with size in GB",
{name: "size_2", args: args{"Size: 12 GiB"}, want: "12 GiB"}, str: "Size: 12GB",
{name: "size_3", args: args{"Size: 537 MiB"}, want: "537 MiB"}, want: "12GB",
{name: "size_4", args: args{"<strong>Size</strong>: 20.48 GiB<br>"}, want: "20.48 GiB"}, },
{name: "size_5", args: args{"file.name-GROUP / 20.48 GiB / x265"}, want: "20.48 GiB"}, {
{name: "size_6", args: args{"<strong>Uploaded</strong>: 38 minutes ago<br>"}, want: ""}, name: "with size in GB with space",
str: "Size: 12 GB",
want: "12GB",
},
{
name: "with size in GiB",
str: "Size: 12 GiB",
want: "12GiB",
},
{
name: "with size in MiB",
str: "Size: 537 MiB",
want: "537MiB",
},
{
name: "with HTML tags",
str: "<strong>Size</strong>: 20.48 GiB<br>",
want: "20.48GiB",
},
{
name: "with additional text",
str: "file.name-GROUP / 20.48 GiB / x265",
want: "20.48GiB",
},
{
name: "without size info",
str: "<strong>Uploaded</strong>: 38 minutes ago<br>",
want: "0B",
},
{
name: "multiple sizes",
str: "<strong>Uploaded</strong>: 38B minutes ago<br>Size: 32GB",
want: "32GB",
},
} }
for _, tt := range tests { for _, tt := range tests {
tt := tt
t.Run(tt.name, func(t *testing.T) { t.Run(tt.name, func(t *testing.T) {
assert.Equalf(t, tt.want, readSizeFromDescription(tt.args.str), "readSizeFromDescription(%v)", tt.args.str) t.Parallel()
wantBytes, err := humanize.ParseBytes(tt.want)
if err != nil {
t.Fatalf("Failed to parse size string %q: %v", tt.want, err)
}
r := &domain.Release{}
readSizeFromDescription(tt.str, r)
if r.Size != wantBytes {
t.Errorf("readSizeFromDescription(%q) got %v bytes, want %v bytes", tt.str, r.Size, wantBytes)
}
}) })
} }
} }