autobrr/internal/list/title.go
soup 0391629862
chore(license): update copyright year in headers (#1929)
* chore: update copyright year in license headers

* Revert "chore: update copyright year in license headers"

This reverts commit 3e58129c431b9a491089ce36b908f9bb6ba38ed3.

* chore: update copyright year in license headers

* fix: sort go imports

* fix: add missing license headers
2025-01-06 22:23:19 +01:00

111 lines
3.7 KiB
Go

// Copyright (c) 2021 - 2025, Ludvig Lundgren and the autobrr contributors.
// SPDX-License-Identifier: GPL-2.0-or-later
package list
import (
"fmt"
"regexp"
"strings"
)
// Regex patterns
// https://www.regular-expressions.info/unicode.html#category
// https://www.ncbi.nlm.nih.gov/staff/beck/charents/hex.html
var (
replaceRegexp = regexp.MustCompile(`[\p{P}\p{Z}\x{00C0}-\x{017E}\x{00AE}]`)
questionmarkRegexp = regexp.MustCompile(`[?]{2,}`)
regionCodeRegexp = regexp.MustCompile(`\(.+\)$`)
parenthesesEndRegexp = regexp.MustCompile(`\)$`)
)
// yearRegexp = regexp.MustCompile(`\(\d{4}\)$`)
func processTitle(title string, matchRelease bool) []string {
// Checking if the title is empty.
if strings.TrimSpace(title) == "" {
return nil
}
// cleans year like (2020) from arr title
//var re = regexp.MustCompile(`(?m)\s(\(\d+\))`)
//title = re.ReplaceAllString(title, "")
t := NewTitleSlice()
if replaceRegexp.ReplaceAllString(title, "") == "" {
t.Add(title, matchRelease)
} else {
// title with all non-alphanumeric characters replaced by "?"
apostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
apostropheTitle = replaceRegexp.ReplaceAllString(apostropheTitle, "?")
apostropheTitle = questionmarkRegexp.ReplaceAllString(apostropheTitle, "*")
t.Add(apostropheTitle, matchRelease)
t.Add(strings.TrimRight(apostropheTitle, "?* "), matchRelease)
// title with apostrophes removed and all non-alphanumeric characters replaced by "?"
noApostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
noApostropheTitle = strings.ReplaceAll(noApostropheTitle, "'", "")
noApostropheTitle = replaceRegexp.ReplaceAllString(noApostropheTitle, "?")
noApostropheTitle = questionmarkRegexp.ReplaceAllString(noApostropheTitle, "*")
t.Add(noApostropheTitle, matchRelease)
t.Add(strings.TrimRight(noApostropheTitle, "?* "), matchRelease)
// title with regions in parentheses removed and all non-alphanumeric characters replaced by "?"
removedRegionCodeApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
removedRegionCodeApostrophe = strings.TrimRight(removedRegionCodeApostrophe, " ")
removedRegionCodeApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeApostrophe, "?")
removedRegionCodeApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeApostrophe, "*")
t.Add(removedRegionCodeApostrophe, matchRelease)
t.Add(strings.TrimRight(removedRegionCodeApostrophe, "?* "), matchRelease)
// title with regions in parentheses and apostrophes removed and all non-alphanumeric characters replaced by "?"
removedRegionCodeNoApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
removedRegionCodeNoApostrophe = strings.TrimRight(removedRegionCodeNoApostrophe, " ")
removedRegionCodeNoApostrophe = strings.ReplaceAll(removedRegionCodeNoApostrophe, "'", "")
removedRegionCodeNoApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "?")
removedRegionCodeNoApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "*")
t.Add(removedRegionCodeNoApostrophe, matchRelease)
t.Add(strings.TrimRight(removedRegionCodeNoApostrophe, "?* "), matchRelease)
}
return t.Titles()
}
type Titles struct {
tm map[string]struct{}
}
func NewTitleSlice() *Titles {
ts := Titles{
tm: map[string]struct{}{},
}
return &ts
}
func (ts *Titles) Add(title string, matchRelease bool) {
if title == "" || title == "*" {
return
}
if matchRelease {
title = strings.Trim(title, "?")
title = fmt.Sprintf("*%v*", title)
}
_, ok := ts.tm[title]
if !ok {
ts.tm[title] = struct{}{}
}
}
func (ts *Titles) Titles() []string {
titles := []string{}
for key := range ts.tm {
titles = append(titles, key)
}
return titles
}