fix(sanitize): improve handling for newline tabs etc (#1733)

* fix(sanitize): filter properly \t,,, / etc

* jesus christ.

* bah. probably helps if a human reads the tests.

* k done.

* should be safe now?

* edna?

* real life man
This commit is contained in:
Kyle Sanderson 2024-09-19 07:44:19 -07:00 committed by GitHub
parent a4452e4fdc
commit e9cd6b0049
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 172 additions and 16 deletions

View file

@ -890,8 +890,8 @@ func containsMatchFuzzy(tags []string, filters []string) bool {
if filter == "" {
continue
}
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
a := strings.ContainsAny(filter, "?|*")
if a {
@ -916,15 +916,14 @@ func containsMatch(tags []string, filters []string) bool {
continue
}
tag = strings.ToLower(tag)
tag = strings.Trim(tag, " ")
clear(advanced)
for _, filter := range filters {
if filter == "" {
continue
}
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
a := strings.ContainsAny(filter, "?|*")
if a {
@ -948,7 +947,6 @@ func containsAllMatch(tags []string, filters []string) bool {
continue
}
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
found := false
wildFilter := strings.ContainsAny(filter, "?|*")
@ -958,7 +956,6 @@ func containsAllMatch(tags []string, filters []string) bool {
continue
}
tag = strings.ToLower(tag)
tag = strings.Trim(tag, " ")
if tag == filter {
found = true
@ -990,7 +987,6 @@ func containsMatchBasic(tags []string, filters []string) bool {
continue
}
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
if tag == filter {
return true
@ -1008,14 +1004,14 @@ func containsAnySlice(tags []string, filters []string) bool {
continue
}
tag = strings.ToLower(tag)
clear(advanced)
clear(advanced)
for _, filter := range filters {
if filter == "" {
continue
}
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
// check if line contains * or ?, if so try wildcard match, otherwise try substring match
a := strings.ContainsAny(filter, "?|*")
if a {
@ -1038,7 +1034,6 @@ func checkFreeleechPercent(announcePercent int, filterPercent string) bool {
for _, s := range filters {
s = strings.Replace(s, "%", "", -1)
s = strings.Trim(s, " ")
if strings.Contains(s, "-") {
minMax := strings.Split(s, "-")
@ -1085,7 +1080,6 @@ func matchHDR(releaseValues []string, filterValues []string) bool {
continue
}
filter = strings.ToLower(filter)
filter = strings.Trim(filter, " ")
parts := strings.Split(filter, " ")
if len(parts) == 2 {

View file

@ -1914,7 +1914,7 @@ func TestFilter_CheckFilter1(t *testing.T) {
{
name: "test_43",
fields: fields{
Shows: ",Dutchess,Preacher",
Shows: ",Dutchess, preacher",
Seasons: "1",
Episodes: "0",
Resolutions: []string{"2160p"},
@ -1990,6 +1990,8 @@ func TestFilter_CheckFilter1(t *testing.T) {
Indexers: tt.fields.Indexers,
Downloads: tt.fields.Downloads,
}
f.Sanitize()
tt.args.r.ParseString(tt.args.r.TorrentName)
rejections, match := f.CheckFilter(tt.args.r)
assert.Equalf(t, tt.wantRejections, rejections, "CheckFilter(%v)", tt.args.r)

View file

@ -1,6 +1,8 @@
package sanitize
import "strings"
import (
"strings"
)
func String(str string) string {
str = strings.TrimSpace(str)
@ -8,12 +10,48 @@ func String(str string) string {
}
func FilterString(str string) string {
str = strings.TrimSpace(str)
str = strings.Trim(str, ",")
// replace newline with comma
str = strings.ReplaceAll(str, "\r", ",")
str = strings.ReplaceAll(str, "\n", ",")
str = strings.ReplaceAll(str, ",,", ",")
str = strings.ReplaceAll(str, "\v", ",")
str = strings.ReplaceAll(str, "\t", " ")
str = strings.ReplaceAll(str, "\f", "")
str = repeatedReplaceAll(str, " ", " ")
str = repeatedReplaceAll(str, ", ", ",")
str = repeatedReplaceAll(str, " ,", ",")
str = repeatedReplaceAll(str, ",,", ",")
str = strings.Trim(str, ", ")
return str
}
func repeatedReplaceAll(src, old, new string) string {
for i := 0; i != len(src); {
i = len(src)
src = strings.ReplaceAll(src, old, new)
}
return src
}
/*
var interestingChars = regexp.MustCompile(`[^,\r\n\t\f\v]+`)
func FilterString(str string) string {
str = strings.Join(interestingChars.FindAllString(str, -1), ",")
for i := 0; i != len(str); {
i = len(str)
str = strings.ReplaceAll(str, " ", " ")
}
str = strings.ReplaceAll(str, " ,", ",")
str = strings.ReplaceAll(str, ", ", ",")
for i := 0; i != len(str); {
i = len(str)
str = strings.ReplaceAll(str, ",,", ",")
}
str = strings.Trim(str, ", ")
return str
}
*/

View file

@ -0,0 +1,122 @@
package sanitize
import (
"testing"
)
func TestStringAndFilterString(t *testing.T) {
tests := []struct {
name string
input string
expectedString string
expectedFilter string
}{
{
name: "No Whitespace",
input: "Hello",
expectedString: "Hello",
expectedFilter: "Hello",
},
{
name: "Leading and Trailing Spaces",
input: " Hello World ",
expectedString: "Hello World",
expectedFilter: "Hello World",
},
{
name: "Multiple Words with Tabs",
input: "Hello\tWorld",
expectedString: "Hello\tWorld",
expectedFilter: "Hello World",
},
{
name: "Comma Separation",
input: "Hello,World",
expectedString: "Hello,World",
expectedFilter: "Hello,World",
},
{
name: "Newlines and Special Characters",
input: "Hello\nWorld\r\nTest",
expectedString: "Hello\nWorld\r\nTest",
expectedFilter: "Hello,World,Test",
},
{
name: "Empty String",
input: "",
expectedString: "",
expectedFilter: "",
},
{
name: "Whitespace Only",
input: " ",
expectedString: "",
expectedFilter: "",
},
{
name: "Form Feeds and Vertical Tabs",
input: "Hello\fWorld\vTest",
expectedString: "Hello\fWorld\vTest",
expectedFilter: "HelloWorld,Test",
},
{
name: "Multiple Special Characters",
input: "Test,\nWorld\tForm\fFeed\vVertical",
expectedString: "Test,\nWorld\tForm\fFeed\vVertical",
expectedFilter: "Test,World FormFeed,Vertical",
},
{
name: "Whitespace with Newlines and Tabs",
input: " \n\t Test \n World \t ",
expectedString: "Test \n World",
expectedFilter: "Test,World",
},
{
name: "Combination of Special Characters",
input: "\t\n\fTest\v,World\n",
expectedString: "Test\v,World",
expectedFilter: "Test,World",
},
{
name: "Special Characters Only",
input: "\r\n\t\f\v",
expectedString: "",
expectedFilter: "",
},
{
name: "No Interesting Characters",
input: ",\n,\r,\t,\f,\v,",
expectedString: ",\n,\r,\t,\f,\v,",
expectedFilter: "",
},
{
name: "Complex String with Symbols",
input: "Hello @ World!",
expectedString: "Hello @ World!",
expectedFilter: "Hello @ World!",
},
{
name: "Whitespace Between Words",
input: "Hello World",
expectedString: "Hello World",
expectedFilter: "Hello World",
},
{
name: "To the Moon Commas",
input: "Hello,,,,,World",
expectedString: "Hello,,,,,World",
expectedFilter: "Hello,World",
},
}
for i, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if got := String(tt.input); got != tt.expectedString {
t.Errorf("%d String() = %q, want %q", i, got, tt.expectedString)
}
if got := FilterString(tt.input); got != tt.expectedFilter {
t.Errorf("%d FilterString() = %q, want %q", i, got, tt.expectedFilter)
}
})
}
}