fix(feeds): handle unicode escaped url characters (#1942)

* fix(rss): handle unicode escaped url characters

* refactor: simplify URL encoding function name

Co-authored-by: nuxen <47067662+nuxencs@users.noreply.github.com>

* feat(feeds): sanitize download url

---------

Co-authored-by: nuxen <47067662+nuxencs@users.noreply.github.com>
Co-authored-by: ze0s <ze0s@riseup.net>
This commit is contained in:
soup 2025-01-18 22:27:38 +01:00 committed by GitHub
parent b2be5a703f
commit f308286484
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 118 additions and 3 deletions

View file

@ -16,6 +16,7 @@ import (
"github.com/autobrr/autobrr/internal/proxy"
"github.com/autobrr/autobrr/internal/release"
"github.com/autobrr/autobrr/pkg/errors"
"github.com/autobrr/autobrr/pkg/sanitize"
"github.com/dustin/go-humanize"
"github.com/mmcdole/gofeed"
@ -148,7 +149,7 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
}
if rls.DownloadURL == "" && item.Link != "" {
rls.DownloadURL = item.Link
rls.DownloadURL = sanitize.URLEncoding(item.Link)
}
if rls.DownloadURL != "" {
@ -158,8 +159,8 @@ func (j *RSSJob) processItem(item *gofeed.Item) *domain.Release {
if parentURL, _ := url.Parse(j.URL); parentURL != nil {
parentURL.Path, parentURL.RawPath = "", ""
// unescape the query params for max compatibility
escapedUrl, _ := url.QueryUnescape(parentURL.JoinPath(rls.DownloadURL).String())
downloadURL := sanitize.URLEncoding(rls.DownloadURL)
escapedUrl, _ := url.QueryUnescape(parentURL.JoinPath(downloadURL).String())
rls.DownloadURL = escapedUrl
}
}

View file

@ -452,6 +452,96 @@ func TestRSSJob_processItem(t *testing.T) {
ActionStatus: []domain.ReleaseActionStatus(nil),
},
},
{
name: "unicode_escaped_url_chars",
fields: fields{
Feed: &domain.Feed{
MaxAge: 3600,
Indexer: domain.IndexerMinimal{
ID: 0,
Name: "Mock Feed",
Identifier: "mock-feed",
IdentifierExternal: "Mock Indexer",
},
},
Name: "test feed",
Log: zerolog.Logger{},
URL: "https://fake-feed.com/rss",
Repo: nil,
ReleaseSvc: nil,
attempts: 0,
errors: nil,
JobID: 0,
},
args: args{item: &gofeed.Item{
Title: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP",
Description: `Category: Example
Size: 1.49 GB`,
Link: "https://fake-feed.com\u002fdownload.php\u003fid\u003d00000\u0026hit\u003d1\u0026type\u003dtorrent\u0026name\u003dSome%20Movie%20Title\u0026hash\u003dabc123\u0040group\u003aname\u0023section\u0025test\u002bextra",
GUID: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP",
}},
want: &domain.Release{
ID: 0,
FilterStatus: "PENDING",
Rejections: []string{},
Indexer: domain.IndexerMinimal{0, "Mock Feed", "mock-feed", "Mock Indexer"},
FilterName: "",
Protocol: "torrent",
Implementation: "RSS",
AnnounceType: domain.AnnounceTypeNew,
Timestamp: now,
GroupID: "",
TorrentID: "",
DownloadURL: "https://fake-feed.com/download.php?id=00000&hit=1&type=torrent&name=Some%20Movie%20Title&hash=abc123@group:name#section%test+extra",
TorrentTmpFile: "",
TorrentDataRawBytes: []uint8(nil),
TorrentHash: "",
TorrentName: "Some.Release.Title.2022.09.22.720p.WEB.h264-GROUP",
NormalizedHash: "edfbe552ccde335f34b801e15930bc35",
Size: 1490000000,
Title: "Some Release Title",
Description: "Category: Example\n Size: 1.49 GB",
Category: "",
Season: 0,
Episode: 0,
Year: 2022,
Month: 9,
Day: 22,
Resolution: "720p",
Source: "WEB",
Codec: []string{"H.264"},
Container: "",
HDR: []string(nil),
Audio: []string(nil),
AudioChannels: "",
Group: "GROUP",
Region: "",
Language: []string{},
Proper: false,
Repack: false,
Edition: []string{},
Cut: []string{},
Website: "",
Artists: "",
Type: rls.Episode,
LogScore: 0,
Origin: "",
Tags: []string{},
ReleaseTags: "",
Freeleech: false,
FreeleechPercent: 0,
Bonus: []string(nil),
Uploader: "",
PreTime: "",
Other: []string{},
RawCookie: "",
AdditionalSizeCheckRequired: false,
AdditionalUploaderCheckRequired: false,
FilterID: 0,
Filter: (*domain.Filter)(nil),
ActionStatus: []domain.ReleaseActionStatus(nil),
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {

View file

@ -12,6 +12,30 @@ func String(str string) string {
return str
}
func URLEncoding(str string) string {
replacements := []struct {
old string
new string
}{
{`\u0026`, "&"},
{`\u003d`, "="},
{`\u003f`, "?"},
{`\u002f`, "/"},
{`\u003a`, ":"},
{`\u0023`, "#"},
{`\u0040`, "@"},
{`\u0025`, "%"},
{`\u002b`, "+"},
}
for _, r := range replacements {
str = repeatedReplaceAll(str, r.old, r.new)
}
str = strings.TrimSpace(str)
return str
}
func FilterString(str string) string {
// replace newline with comma
str = strings.ReplaceAll(str, "\r", ",")