mirror of
https://github.com/idanoo/autobrr
synced 2025-07-23 08:49:13 +00:00
feat(feeds): improve RSS size parsing (#1367)
* fix(feeds): Parse multiple sizes. * refactor: Test_pullSizeFromDescription * refactor: make test human readable added helper function * multi * Agnewwwwww * . * humanize * humanize --------- Co-authored-by: soup <soup@r4tio.dev>
This commit is contained in:
parent
abb7829abe
commit
cdd91d27e5
2 changed files with 66 additions and 23 deletions
|
@ -22,6 +22,7 @@ import (
|
|||
var (
|
||||
rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`)
|
||||
rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
|
||||
rxpHTML = regexp.MustCompile(`(?mi)<.*?>`)
|
||||
)
|
||||
|
||||
type RSSJob struct {
|
||||
|
@ -190,7 +191,7 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
|
|||
}
|
||||
|
||||
if element.ContentLength > 0 {
|
||||
if uint64(element.ContentLength) != rls.Size {
|
||||
if uint64(element.ContentLength) > rls.Size {
|
||||
rls.Size = uint64(element.ContentLength)
|
||||
}
|
||||
}
|
||||
|
@ -210,10 +211,8 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
|
|||
rls.Description = item.Description
|
||||
|
||||
if rls.Size == 0 {
|
||||
hrSize := readSizeFromDescription(item.Description)
|
||||
rls.ParseSizeBytesString(hrSize)
|
||||
|
||||
j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize)
|
||||
readSizeFromDescription(item.Description, rls)
|
||||
j.Log.Trace().Msgf("Set new size %d from description", rls.Size)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -326,13 +325,11 @@ func isFreeleech(str []string) bool {
|
|||
}
|
||||
|
||||
// readSizeFromDescription get size from description
|
||||
func readSizeFromDescription(str string) string {
|
||||
matches := rxpSize.FindStringSubmatch(str)
|
||||
if matches == nil {
|
||||
return ""
|
||||
func readSizeFromDescription(str string, r *domain.Release) {
|
||||
clean := rxpHTML.ReplaceAllString(str, " ")
|
||||
for _, sz := range rxpSize.FindAllString(clean, -1) {
|
||||
r.ParseSizeBytesString(sz)
|
||||
}
|
||||
|
||||
return matches[1]
|
||||
}
|
||||
|
||||
// itemCustomElement
|
||||
|
|
|
@ -10,6 +10,7 @@ import (
|
|||
"github.com/autobrr/autobrr/internal/domain"
|
||||
"github.com/autobrr/autobrr/internal/release"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/mmcdole/gofeed"
|
||||
"github.com/rs/zerolog"
|
||||
"github.com/stretchr/testify/assert"
|
||||
|
@ -221,25 +222,70 @@ func Test_isMaxAge(t *testing.T) {
|
|||
}
|
||||
|
||||
func Test_readSizeFromDescription(t *testing.T) {
|
||||
type args struct {
|
||||
str string
|
||||
}
|
||||
t.Parallel()
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
args args
|
||||
str string
|
||||
want string
|
||||
}{
|
||||
{name: "size", args: args{"Size: 12GB"}, want: "12GB"},
|
||||
{name: "size_1", args: args{"Size: 12 GB"}, want: "12 GB"},
|
||||
{name: "size_2", args: args{"Size: 12 GiB"}, want: "12 GiB"},
|
||||
{name: "size_3", args: args{"Size: 537 MiB"}, want: "537 MiB"},
|
||||
{name: "size_4", args: args{"<strong>Size</strong>: 20.48 GiB<br>"}, want: "20.48 GiB"},
|
||||
{name: "size_5", args: args{"file.name-GROUP / 20.48 GiB / x265"}, want: "20.48 GiB"},
|
||||
{name: "size_6", args: args{"<strong>Uploaded</strong>: 38 minutes ago<br>"}, want: ""},
|
||||
{
|
||||
name: "with size in GB",
|
||||
str: "Size: 12GB",
|
||||
want: "12GB",
|
||||
},
|
||||
{
|
||||
name: "with size in GB with space",
|
||||
str: "Size: 12 GB",
|
||||
want: "12GB",
|
||||
},
|
||||
{
|
||||
name: "with size in GiB",
|
||||
str: "Size: 12 GiB",
|
||||
want: "12GiB",
|
||||
},
|
||||
{
|
||||
name: "with size in MiB",
|
||||
str: "Size: 537 MiB",
|
||||
want: "537MiB",
|
||||
},
|
||||
{
|
||||
name: "with HTML tags",
|
||||
str: "<strong>Size</strong>: 20.48 GiB<br>",
|
||||
want: "20.48GiB",
|
||||
},
|
||||
{
|
||||
name: "with additional text",
|
||||
str: "file.name-GROUP / 20.48 GiB / x265",
|
||||
want: "20.48GiB",
|
||||
},
|
||||
{
|
||||
name: "without size info",
|
||||
str: "<strong>Uploaded</strong>: 38 minutes ago<br>",
|
||||
want: "0B",
|
||||
},
|
||||
{
|
||||
name: "multiple sizes",
|
||||
str: "<strong>Uploaded</strong>: 38B minutes ago<br>Size: 32GB",
|
||||
want: "32GB",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
tt := tt
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
assert.Equalf(t, tt.want, readSizeFromDescription(tt.args.str), "readSizeFromDescription(%v)", tt.args.str)
|
||||
t.Parallel()
|
||||
|
||||
wantBytes, err := humanize.ParseBytes(tt.want)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to parse size string %q: %v", tt.want, err)
|
||||
}
|
||||
|
||||
r := &domain.Release{}
|
||||
readSizeFromDescription(tt.str, r)
|
||||
if r.Size != wantBytes {
|
||||
t.Errorf("readSizeFromDescription(%q) got %v bytes, want %v bytes", tt.str, r.Size, wantBytes)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue