feat(feeds): improve RSS size parsing (#1367)

* fix(feeds): Parse multiple sizes.

* refactor: Test_pullSizeFromDescription

* refactor: make test human readable

added helper function

* multi

* Agnewwwwww

* .

* humanize

* humanize

---------

Co-authored-by: soup <soup@r4tio.dev>
This commit is contained in:
Kyle Sanderson 2024-01-27 12:52:11 -08:00 committed by GitHub
parent abb7829abe
commit cdd91d27e5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 66 additions and 23 deletions

View file

@ -22,6 +22,7 @@ import (
var (
rxpSize = regexp.MustCompile(`(?mi)(([0-9.]+)\s*(b|kb|kib|kilobyte|mb|mib|megabyte|gb|gib|gigabyte|tb|tib|terabyte))`)
rxpFreeleech = regexp.MustCompile(`(?mi)(\bfreeleech\b)`)
rxpHTML = regexp.MustCompile(`(?mi)<.*?>`)
)
type RSSJob struct {
@ -190,7 +191,7 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
}
if element.ContentLength > 0 {
if uint64(element.ContentLength) != rls.Size {
if uint64(element.ContentLength) > rls.Size {
rls.Size = uint64(element.ContentLength)
}
}
@ -210,10 +211,8 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
rls.Description = item.Description
if rls.Size == 0 {
hrSize := readSizeFromDescription(item.Description)
rls.ParseSizeBytesString(hrSize)
j.Log.Trace().Msgf("Set new size %d from description %s", rls.Size, hrSize)
readSizeFromDescription(item.Description, rls)
j.Log.Trace().Msgf("Set new size %d from description", rls.Size)
}
}
@ -326,13 +325,11 @@ func isFreeleech(str []string) bool {
}
// readSizeFromDescription get size from description
func readSizeFromDescription(str string) string {
matches := rxpSize.FindStringSubmatch(str)
if matches == nil {
return ""
func readSizeFromDescription(str string, r *domain.Release) {
clean := rxpHTML.ReplaceAllString(str, " ")
for _, sz := range rxpSize.FindAllString(clean, -1) {
r.ParseSizeBytesString(sz)
}
return matches[1]
}
// itemCustomElement