From 4f20b2b3f690e1ffec993fbb46852550df2c69ce Mon Sep 17 00:00:00 2001 From: soup Date: Tue, 14 Nov 2023 21:41:46 +0100 Subject: [PATCH] feat(tests): verify external autobrr.com URLs (#1253) * feat(tests): Verify autobrr.com URLs * test: drop body content check * feat: Improved URL Scanning * cleaned up comments * refactor: enhance URL checker for readability and flexibility - Introduce AutobrrURLChecker struct to group related variables and functions - Replace ioutil.ReadFile with os.ReadFile for reading files - Adjust HTTP client setup and request creation for better error handling - Trim trailing slashes from URLs and deduplicate URLs before making requests - Reduce sleep duration between requests to 500ms * fix: move test into test/docs --- test/docs/docs_test.go | 117 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 117 insertions(+) create mode 100644 test/docs/docs_test.go diff --git a/test/docs/docs_test.go b/test/docs/docs_test.go new file mode 100644 index 0000000..06e0cef --- /dev/null +++ b/test/docs/docs_test.go @@ -0,0 +1,117 @@ +package http + +import ( + "fmt" + "io/fs" + "net/http" + "os" + "path/filepath" + "regexp" + "strings" + "testing" + "time" +) + +type AutobrrURLChecker struct { + BasePath string + AutobrrURLRegex []*regexp.Regexp + ValidExtensions map[string]bool + SleepDuration time.Duration +} + +func NewAutobrrURLChecker() *AutobrrURLChecker { + return &AutobrrURLChecker{ + BasePath: "../..", // Base directory to start scanning from + AutobrrURLRegex: []*regexp.Regexp{ // Regular expressions to match URLs for checking + regexp.MustCompile(`https?://autobrr\.com/[^ \s"')]+`), + }, + ValidExtensions: map[string]bool{ // File extensions to be checked + ".go": true, + ".tsx": true, + ".md": true, + ".yml": true, + }, + SleepDuration: 500 * time.Millisecond, // Duration to wait between requests to avoid rate limiting + // I could not find any information from Netlify about acceptable use here. + } +} + +func processFile(filePath string, checker *AutobrrURLChecker) ([]string, error) { + content, err := os.ReadFile(filePath) + if err != nil { + return nil, fmt.Errorf("error reading file %s: %w", filePath, err) + } + + var allURLMatches []string + for _, regex := range checker.AutobrrURLRegex { + urlmatches := regex.FindAllString(string(content), -1) + allURLMatches = append(allURLMatches, urlmatches...) + } + + return allURLMatches, nil +} + +func TestAutobrrURLsInRepository(t *testing.T) { + checker := NewAutobrrURLChecker() + uniqueURLSet := make(map[string]bool) + + err := filepath.WalkDir(checker.BasePath, func(path string, entry fs.DirEntry, err error) error { + if err != nil { + return err + } + if entry.IsDir() || !checker.ValidExtensions[filepath.Ext(path)] { + return nil + } + + fileURLs, err := processFile(path, checker) + if err != nil { + t.Errorf("Error processing file %s: %v", path, err) + return err + } + + for _, url := range fileURLs { + normalizedURL := strings.TrimRight(strings.Split(url, "#")[0], "/") // Trim the URL by removing any trailing slashes and any URL fragments. + uniqueURLSet[normalizedURL] = true + } + return nil + }) + + if err != nil { + t.Errorf("Error walking the repository directory tree: %v", err) + return + } + + client := &http.Client{ + Timeout: time.Second * 10, + } + + // Use a slice to store the URLs after they are de-duplicated + deduplicatedURLs := make([]string, 0, len(uniqueURLSet)) + for url := range uniqueURLSet { + deduplicatedURLs = append(deduplicatedURLs, url) + } + + for _, url := range deduplicatedURLs { + t.Run(url, func(t *testing.T) { + req, err := http.NewRequest("GET", url, nil) + if err != nil { + t.Errorf("Failed to create request for url %s: %v", url, err) + return + } + req.Header.Set("User-Agent", "autobrr") + + resp, err := client.Do(req) + if err != nil { + t.Errorf("Failed to GET url %s: %v", url, err) + return + } + defer resp.Body.Close() + + if resp.StatusCode == http.StatusNotFound { + t.Errorf("URL %s returned 404 Not Found", url) + } + + time.Sleep(checker.SleepDuration) + }) + } +}