autobrr/internal/list/title.go
Fabricio Silva b724429b97
feat(lists): add anilist support (#1949)
* fix(lists): clear selected list

* chore(web): improve onChange set values for select_wide

* feat(web): add anilist lists

* feat(web): Filter is required on ListForm

* fix(web): ListForm reset url when change type

* feat(lists): add anilist support

* feat(lists): filter duplicates for anilist

* feat(anilist): handle special characters

* fix(lists): better title matching
fix(lists): add alternatives to apostrophe replacement

* test(title): add some anime cases

* feat(anilist): replace unicodes with regex

* feat(lists): move additional anilist processing to autobrr instead of brr api

* feat(lists): clean Unicode Block “Latin Extended-A” chars

---------

Co-authored-by: martylukyy <35452459+martylukyy@users.noreply.github.com>
2025-01-31 19:17:23 +01:00

111 lines
3.8 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

// Copyright (c) 2021 - 2025, Ludvig Lundgren and the autobrr contributors.
// SPDX-License-Identifier: GPL-2.0-or-later
package list
import (
"fmt"
"regexp"
"strings"
)
// Regex patterns
// https://www.regular-expressions.info/unicode.html#category
// https://www.ncbi.nlm.nih.gov/staff/beck/charents/hex.html
var (
replaceRegexp = regexp.MustCompile(`[\p{P}\p{Z}\x{00C0}-\x{017E}\x{00AE}]`)
questionmarkRegexp = regexp.MustCompile(`[?]{2,}`)
regionCodeRegexp = regexp.MustCompile(`\(\S+\)`)
parenthesesEndRegexp = regexp.MustCompile(`\)$`)
)
// yearRegexp = regexp.MustCompile(`\(\d{4}\)$`)
func processTitle(title string, matchRelease bool) []string {
// Checking if the title is empty.
if strings.TrimSpace(title) == "" {
return nil
}
// cleans year like (2020) from arr title
// var re = regexp.MustCompile(`(?m)\s(\(\d+\))`)
// title = re.ReplaceAllString(title, "")
t := NewTitleSlice()
if replaceRegexp.ReplaceAllString(title, "") == "" {
t.Add(title, matchRelease)
} else {
// title with all non-alphanumeric characters replaced by "?"
apostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
apostropheTitle = replaceRegexp.ReplaceAllString(apostropheTitle, "?")
apostropheTitle = questionmarkRegexp.ReplaceAllString(apostropheTitle, "*")
t.Add(apostropheTitle, matchRelease)
t.Add(strings.TrimRight(apostropheTitle, "?* "), matchRelease)
// title with apostrophes removed and all non-alphanumeric characters replaced by "?"
noApostropheTitle := parenthesesEndRegexp.ReplaceAllString(title, "?")
noApostropheTitle = strings.NewReplacer("'", "", "´", "", "`", "", "", "", "", "").Replace(noApostropheTitle)
noApostropheTitle = replaceRegexp.ReplaceAllString(noApostropheTitle, "?")
noApostropheTitle = questionmarkRegexp.ReplaceAllString(noApostropheTitle, "*")
t.Add(noApostropheTitle, matchRelease)
t.Add(strings.TrimRight(noApostropheTitle, "?* "), matchRelease)
// title with regions in parentheses removed and all non-alphanumeric characters replaced by "?"
removedRegionCodeApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
removedRegionCodeApostrophe = strings.TrimRight(removedRegionCodeApostrophe, " ")
removedRegionCodeApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeApostrophe, "?")
removedRegionCodeApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeApostrophe, "*")
t.Add(removedRegionCodeApostrophe, matchRelease)
t.Add(strings.TrimRight(removedRegionCodeApostrophe, "?* "), matchRelease)
// title with regions in parentheses and apostrophes removed and all non-alphanumeric characters replaced by "?"
removedRegionCodeNoApostrophe := regionCodeRegexp.ReplaceAllString(title, "")
removedRegionCodeNoApostrophe = strings.TrimRight(removedRegionCodeNoApostrophe, " ")
removedRegionCodeNoApostrophe = strings.NewReplacer("'", "", "´", "", "`", "", "", "", "", "").Replace(removedRegionCodeNoApostrophe)
removedRegionCodeNoApostrophe = replaceRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "?")
removedRegionCodeNoApostrophe = questionmarkRegexp.ReplaceAllString(removedRegionCodeNoApostrophe, "*")
t.Add(removedRegionCodeNoApostrophe, matchRelease)
t.Add(strings.TrimRight(removedRegionCodeNoApostrophe, "?* "), matchRelease)
}
return t.Titles()
}
type Titles struct {
tm map[string]struct{}
}
func NewTitleSlice() *Titles {
ts := Titles{
tm: map[string]struct{}{},
}
return &ts
}
func (ts *Titles) Add(title string, matchRelease bool) {
if title == "" || title == "*" {
return
}
if matchRelease {
title = strings.Trim(title, "?")
title = fmt.Sprintf("*%v*", title)
}
_, ok := ts.tm[title]
if !ok {
ts.tm[title] = struct{}{}
}
}
func (ts *Titles) Titles() []string {
titles := []string{}
for key := range ts.tm {
titles = append(titles, key)
}
return titles
}