From 4f20b2b3f690e1ffec993fbb46852550df2c69ce Mon Sep 17 00:00:00 2001
From: soup <soup@r4tio.dev>
Date: Tue, 14 Nov 2023 21:41:46 +0100
Subject: [PATCH] feat(tests): verify external autobrr.com URLs (#1253)

* feat(tests): Verify autobrr.com URLs

* test: drop body content check

* feat: Improved URL Scanning

* cleaned up comments

* refactor: enhance URL checker for readability and flexibility

- Introduce AutobrrURLChecker struct to group related variables and functions
- Replace ioutil.ReadFile with os.ReadFile for reading files
- Adjust HTTP client setup and request creation for better error handling
- Trim trailing slashes from URLs and deduplicate URLs before making requests
- Reduce sleep duration between requests to 500ms

* fix: move test into test/docs
---
 test/docs/docs_test.go | 117 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 test/docs/docs_test.go

diff --git a/test/docs/docs_test.go b/test/docs/docs_test.go
new file mode 100644
index 0000000..06e0cef
--- /dev/null
+++ b/test/docs/docs_test.go
@@ -0,0 +1,117 @@
+package http
+
+import (
+	"fmt"
+	"io/fs"
+	"net/http"
+	"os"
+	"path/filepath"
+	"regexp"
+	"strings"
+	"testing"
+	"time"
+)
+
+type AutobrrURLChecker struct {
+	BasePath        string
+	AutobrrURLRegex []*regexp.Regexp
+	ValidExtensions map[string]bool
+	SleepDuration   time.Duration
+}
+
+func NewAutobrrURLChecker() *AutobrrURLChecker {
+	return &AutobrrURLChecker{
+		BasePath: "../..", // Base directory to start scanning from
+		AutobrrURLRegex: []*regexp.Regexp{ // Regular expressions to match URLs for checking
+			regexp.MustCompile(`https?://autobrr\.com/[^ \s"')]+`),
+		},
+		ValidExtensions: map[string]bool{ // File extensions to be checked
+			".go":  true,
+			".tsx": true,
+			".md":  true,
+			".yml": true,
+		},
+		SleepDuration: 500 * time.Millisecond, // Duration to wait between requests to avoid rate limiting
+		// I could not find any information from Netlify about acceptable use here.
+	}
+}
+
+func processFile(filePath string, checker *AutobrrURLChecker) ([]string, error) {
+	content, err := os.ReadFile(filePath)
+	if err != nil {
+		return nil, fmt.Errorf("error reading file %s: %w", filePath, err)
+	}
+
+	var allURLMatches []string
+	for _, regex := range checker.AutobrrURLRegex {
+		urlmatches := regex.FindAllString(string(content), -1)
+		allURLMatches = append(allURLMatches, urlmatches...)
+	}
+
+	return allURLMatches, nil
+}
+
+func TestAutobrrURLsInRepository(t *testing.T) {
+	checker := NewAutobrrURLChecker()
+	uniqueURLSet := make(map[string]bool)
+
+	err := filepath.WalkDir(checker.BasePath, func(path string, entry fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+		if entry.IsDir() || !checker.ValidExtensions[filepath.Ext(path)] {
+			return nil
+		}
+
+		fileURLs, err := processFile(path, checker)
+		if err != nil {
+			t.Errorf("Error processing file %s: %v", path, err)
+			return err
+		}
+
+		for _, url := range fileURLs {
+			normalizedURL := strings.TrimRight(strings.Split(url, "#")[0], "/") // Trim the URL by removing any trailing slashes and any URL fragments.
+			uniqueURLSet[normalizedURL] = true
+		}
+		return nil
+	})
+
+	if err != nil {
+		t.Errorf("Error walking the repository directory tree: %v", err)
+		return
+	}
+
+	client := &http.Client{
+		Timeout: time.Second * 10,
+	}
+
+	// Use a slice to store the URLs after they are de-duplicated
+	deduplicatedURLs := make([]string, 0, len(uniqueURLSet))
+	for url := range uniqueURLSet {
+		deduplicatedURLs = append(deduplicatedURLs, url)
+	}
+
+	for _, url := range deduplicatedURLs {
+		t.Run(url, func(t *testing.T) {
+			req, err := http.NewRequest("GET", url, nil)
+			if err != nil {
+				t.Errorf("Failed to create request for url %s: %v", url, err)
+				return
+			}
+			req.Header.Set("User-Agent", "autobrr")
+
+			resp, err := client.Do(req)
+			if err != nil {
+				t.Errorf("Failed to GET url %s: %v", url, err)
+				return
+			}
+			defer resp.Body.Close()
+
+			if resp.StatusCode == http.StatusNotFound {
+				t.Errorf("URL %s returned 404 Not Found", url)
+			}
+
+			time.Sleep(checker.SleepDuration)
+		})
+	}
+}