autobrr/test/docs/docs_test.go
soup 4f20b2b3f6
feat(tests): verify external autobrr.com URLs (#1253)
* feat(tests): Verify autobrr.com URLs

* test: drop body content check

* feat: Improved URL Scanning

* cleaned up comments

* refactor: enhance URL checker for readability and flexibility

- Introduce AutobrrURLChecker struct to group related variables and functions
- Replace ioutil.ReadFile with os.ReadFile for reading files
- Adjust HTTP client setup and request creation for better error handling
- Trim trailing slashes from URLs and deduplicate URLs before making requests
- Reduce sleep duration between requests to 500ms

* fix: move test into test/docs
2023-11-14 21:41:46 +01:00

117 lines
2.9 KiB
Go

package http
import (
"fmt"
"io/fs"
"net/http"
"os"
"path/filepath"
"regexp"
"strings"
"testing"
"time"
)
type AutobrrURLChecker struct {
BasePath string
AutobrrURLRegex []*regexp.Regexp
ValidExtensions map[string]bool
SleepDuration time.Duration
}
func NewAutobrrURLChecker() *AutobrrURLChecker {
return &AutobrrURLChecker{
BasePath: "../..", // Base directory to start scanning from
AutobrrURLRegex: []*regexp.Regexp{ // Regular expressions to match URLs for checking
regexp.MustCompile(`https?://autobrr\.com/[^ \s"')]+`),
},
ValidExtensions: map[string]bool{ // File extensions to be checked
".go": true,
".tsx": true,
".md": true,
".yml": true,
},
SleepDuration: 500 * time.Millisecond, // Duration to wait between requests to avoid rate limiting
// I could not find any information from Netlify about acceptable use here.
}
}
func processFile(filePath string, checker *AutobrrURLChecker) ([]string, error) {
content, err := os.ReadFile(filePath)
if err != nil {
return nil, fmt.Errorf("error reading file %s: %w", filePath, err)
}
var allURLMatches []string
for _, regex := range checker.AutobrrURLRegex {
urlmatches := regex.FindAllString(string(content), -1)
allURLMatches = append(allURLMatches, urlmatches...)
}
return allURLMatches, nil
}
func TestAutobrrURLsInRepository(t *testing.T) {
checker := NewAutobrrURLChecker()
uniqueURLSet := make(map[string]bool)
err := filepath.WalkDir(checker.BasePath, func(path string, entry fs.DirEntry, err error) error {
if err != nil {
return err
}
if entry.IsDir() || !checker.ValidExtensions[filepath.Ext(path)] {
return nil
}
fileURLs, err := processFile(path, checker)
if err != nil {
t.Errorf("Error processing file %s: %v", path, err)
return err
}
for _, url := range fileURLs {
normalizedURL := strings.TrimRight(strings.Split(url, "#")[0], "/") // Trim the URL by removing any trailing slashes and any URL fragments.
uniqueURLSet[normalizedURL] = true
}
return nil
})
if err != nil {
t.Errorf("Error walking the repository directory tree: %v", err)
return
}
client := &http.Client{
Timeout: time.Second * 10,
}
// Use a slice to store the URLs after they are de-duplicated
deduplicatedURLs := make([]string, 0, len(uniqueURLSet))
for url := range uniqueURLSet {
deduplicatedURLs = append(deduplicatedURLs, url)
}
for _, url := range deduplicatedURLs {
t.Run(url, func(t *testing.T) {
req, err := http.NewRequest("GET", url, nil)
if err != nil {
t.Errorf("Failed to create request for url %s: %v", url, err)
return
}
req.Header.Set("User-Agent", "autobrr")
resp, err := client.Do(req)
if err != nil {
t.Errorf("Failed to GET url %s: %v", url, err)
return
}
defer resp.Body.Close()
if resp.StatusCode == http.StatusNotFound {
t.Errorf("URL %s returned 404 Not Found", url)
}
time.Sleep(checker.SleepDuration)
})
}
}