fix(sanitize): improve handling for newline tabs etc (#1733)

* fix(sanitize): filter properly \t,,, / etc * jesus christ. * bah. probably helps if a human reads the tests. * k done. * should be safe now? * edna? * real life man
2025-07-22 16:29:12 +00:00 · 2024-09-19 07:44:19 -07:00 · 2024-09-19 07:44:19 -07:00 · e9cd6b0049
commit e9cd6b0049
parent a4452e4fdc
4 changed files with 172 additions and 16 deletions
--- a/internal/domain/filter.go
+++ b/internal/domain/filter.go
@ -890,8 +890,8 @@ func containsMatchFuzzy(tags []string, filters []string) bool {
 			if filter == "" {
 				continue
 			}
+
 			filter = strings.ToLower(filter)
-			filter = strings.Trim(filter, " ")
 			// check if line contains * or ?, if so try wildcard match, otherwise try substring match
 			a := strings.ContainsAny(filter, "?|*")
 			if a {
@ -916,15 +916,14 @@ func containsMatch(tags []string, filters []string) bool {
 			continue
 		}
 		tag = strings.ToLower(tag)
-		tag = strings.Trim(tag, " ")

 		clear(advanced)
 		for _, filter := range filters {
 			if filter == "" {
 				continue
 			}
+
 			filter = strings.ToLower(filter)
-			filter = strings.Trim(filter, " ")
 			// check if line contains * or ?, if so try wildcard match, otherwise try substring match
 			a := strings.ContainsAny(filter, "?|*")
 			if a {
@ -948,7 +947,6 @@ func containsAllMatch(tags []string, filters []string) bool {
 			continue
 		}
 		filter = strings.ToLower(filter)
-		filter = strings.Trim(filter, " ")
 		found := false

 		wildFilter := strings.ContainsAny(filter, "?|*")
@ -958,7 +956,6 @@ func containsAllMatch(tags []string, filters []string) bool {
 				continue
 			}
 			tag = strings.ToLower(tag)
-			tag = strings.Trim(tag, " ")

 			if tag == filter {
 				found = true
@ -990,7 +987,6 @@ func containsMatchBasic(tags []string, filters []string) bool {
 				continue
 			}
 			filter = strings.ToLower(filter)
-			filter = strings.Trim(filter, " ")

 			if tag == filter {
 				return true
@ -1008,14 +1004,14 @@ func containsAnySlice(tags []string, filters []string) bool {
 			continue
 		}
 		tag = strings.ToLower(tag)
-		clear(advanced)

+		clear(advanced)
 		for _, filter := range filters {
 			if filter == "" {
 				continue
 			}
+
 			filter = strings.ToLower(filter)
-			filter = strings.Trim(filter, " ")
 			// check if line contains * or ?, if so try wildcard match, otherwise try substring match
 			a := strings.ContainsAny(filter, "?|*")
 			if a {
@ -1038,7 +1034,6 @@ func checkFreeleechPercent(announcePercent int, filterPercent string) bool {

 	for _, s := range filters {
 		s = strings.Replace(s, "%", "", -1)
-		s = strings.Trim(s, " ")

 		if strings.Contains(s, "-") {
 			minMax := strings.Split(s, "-")
@ -1085,7 +1080,6 @@ func matchHDR(releaseValues []string, filterValues []string) bool {
 			continue
 		}
 		filter = strings.ToLower(filter)
-		filter = strings.Trim(filter, " ")

 		parts := strings.Split(filter, " ")
 		if len(parts) == 2 {
--- a/internal/domain/filter_test.go
+++ b/internal/domain/filter_test.go
@ -1914,7 +1914,7 @@ func TestFilter_CheckFilter1(t *testing.T) {
 		{
 			name: "test_43",
 			fields: fields{
-				Shows:       ",Dutchess,Preacher",
+				Shows:       ",Dutchess, preacher",
 				Seasons:     "1",
 				Episodes:    "0",
 				Resolutions: []string{"2160p"},
@ -1990,6 +1990,8 @@ func TestFilter_CheckFilter1(t *testing.T) {
 				Indexers:             tt.fields.Indexers,
 				Downloads:            tt.fields.Downloads,
 			}
+
+			f.Sanitize()
 			tt.args.r.ParseString(tt.args.r.TorrentName)
 			rejections, match := f.CheckFilter(tt.args.r)
 			assert.Equalf(t, tt.wantRejections, rejections, "CheckFilter(%v)", tt.args.r)
--- a/pkg/sanitize/sanitize.go
+++ b/pkg/sanitize/sanitize.go
@ -1,6 +1,8 @@
 package sanitize

-import "strings"
+import (
+	"strings"
+)

 func String(str string) string {
 	str = strings.TrimSpace(str)
@ -8,12 +10,48 @@ func String(str string) string {
 }

 func FilterString(str string) string {
-	str = strings.TrimSpace(str)
-	str = strings.Trim(str, ",")
-
 	// replace newline with comma
+	str = strings.ReplaceAll(str, "\r", ",")
 	str = strings.ReplaceAll(str, "\n", ",")
-	str = strings.ReplaceAll(str, ",,", ",")
+	str = strings.ReplaceAll(str, "\v", ",")
+	str = strings.ReplaceAll(str, "\t", " ")
+	str = strings.ReplaceAll(str, "\f", "")

+	str = repeatedReplaceAll(str, "  ", " ")
+	str = repeatedReplaceAll(str, ", ", ",")
+	str = repeatedReplaceAll(str, " ,", ",")
+	str = repeatedReplaceAll(str, ",,", ",")
+
+	str = strings.Trim(str, ", ")
 	return str
 }
+
+func repeatedReplaceAll(src, old, new string) string {
+	for i := 0; i != len(src); {
+		i = len(src)
+		src = strings.ReplaceAll(src, old, new)
+	}
+
+	return src
+}
+
+/*
+var interestingChars = regexp.MustCompile(`[^,\r\n\t\f\v]+`)
+
+func FilterString(str string) string {
+	str = strings.Join(interestingChars.FindAllString(str, -1), ",")
+	for i := 0; i != len(str); {
+		i = len(str)
+		str = strings.ReplaceAll(str, "  ", " ")
+	}
+
+	str = strings.ReplaceAll(str, " ,", ",")
+	str = strings.ReplaceAll(str, ", ", ",")
+	for i := 0; i != len(str); {
+		i = len(str)
+		str = strings.ReplaceAll(str, ",,", ",")
+	}
+	str = strings.Trim(str, ", ")
+	return str
+}
+*/
--- a/pkg/sanitize/sanitize_test.go
+++ b/pkg/sanitize/sanitize_test.go
@ -0,0 +1,122 @@
+package sanitize
+
+import (
+	"testing"
+)
+
+func TestStringAndFilterString(t *testing.T) {
+	tests := []struct {
+		name           string
+		input          string
+		expectedString string
+		expectedFilter string
+	}{
+		{
+			name:           "No Whitespace",
+			input:          "Hello",
+			expectedString: "Hello",
+			expectedFilter: "Hello",
+		},
+		{
+			name:           "Leading and Trailing Spaces",
+			input:          "  Hello World  ",
+			expectedString: "Hello World",
+			expectedFilter: "Hello World",
+		},
+		{
+			name:           "Multiple Words with Tabs",
+			input:          "Hello\tWorld",
+			expectedString: "Hello\tWorld",
+			expectedFilter: "Hello World",
+		},
+		{
+			name:           "Comma Separation",
+			input:          "Hello,World",
+			expectedString: "Hello,World",
+			expectedFilter: "Hello,World",
+		},
+		{
+			name:           "Newlines and Special Characters",
+			input:          "Hello\nWorld\r\nTest",
+			expectedString: "Hello\nWorld\r\nTest",
+			expectedFilter: "Hello,World,Test",
+		},
+		{
+			name:           "Empty String",
+			input:          "",
+			expectedString: "",
+			expectedFilter: "",
+		},
+		{
+			name:           "Whitespace Only",
+			input:          "    ",
+			expectedString: "",
+			expectedFilter: "",
+		},
+		{
+			name:           "Form Feeds and Vertical Tabs",
+			input:          "Hello\fWorld\vTest",
+			expectedString: "Hello\fWorld\vTest",
+			expectedFilter: "HelloWorld,Test",
+		},
+		{
+			name:           "Multiple Special Characters",
+			input:          "Test,\nWorld\tForm\fFeed\vVertical",
+			expectedString: "Test,\nWorld\tForm\fFeed\vVertical",
+			expectedFilter: "Test,World FormFeed,Vertical",
+		},
+		{
+			name:           "Whitespace with Newlines and Tabs",
+			input:          " \n\t Test \n World \t ",
+			expectedString: "Test \n World",
+			expectedFilter: "Test,World",
+		},
+		{
+			name:           "Combination of Special Characters",
+			input:          "\t\n\fTest\v,World\n",
+			expectedString: "Test\v,World",
+			expectedFilter: "Test,World",
+		},
+		{
+			name:           "Special Characters Only",
+			input:          "\r\n\t\f\v",
+			expectedString: "",
+			expectedFilter: "",
+		},
+		{
+			name:           "No Interesting Characters",
+			input:          ",\n,\r,\t,\f,\v,",
+			expectedString: ",\n,\r,\t,\f,\v,",
+			expectedFilter: "",
+		},
+		{
+			name:           "Complex String with Symbols",
+			input:          "Hello @ World!",
+			expectedString: "Hello @ World!",
+			expectedFilter: "Hello @ World!",
+		},
+		{
+			name:           "Whitespace Between Words",
+			input:          "Hello    World",
+			expectedString: "Hello    World",
+			expectedFilter: "Hello World",
+		},
+		{
+			name:           "To the Moon Commas",
+			input:          "Hello,,,,,World",
+			expectedString: "Hello,,,,,World",
+			expectedFilter: "Hello,World",
+		},
+	}
+
+	for i, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			if got := String(tt.input); got != tt.expectedString {
+				t.Errorf("%d String() = %q, want %q", i, got, tt.expectedString)
+			}
+			if got := FilterString(tt.input); got != tt.expectedFilter {
+				t.Errorf("%d FilterString() = %q, want %q", i, got, tt.expectedFilter)
+			}
+		})
+	}
+}