refactor(logs): improve log sanitization performance (#779)

* refactor log sanitization for better performance

* Fix token error, add mutex for safer concurrency

* serve sanitized version directly from memory

* further improvements

made it check for "module":"irc" before running the irc regex checks

* Revert "further improvements"

This reverts commit 260037c3a58e1e760613167d3e15136b313c0612.

* update irc regexes to handle special characters

- added prefixes to the irc patterns
- improved tests

* more improvements for irc regexes

* fixed errors

* another edge case bites the dust

* regex improvements

* lock to 1 worker if cpu cores is 2 or less

* removed unused code

* revert to single thread

Moved regex patterns and replacements into a separate array for easier maintenance and readability.

Optimized the regex patterns that share the same replacement string to avoid redundancy.

Modify SanitizeLogFile to accept io.Writer for direct output

* style

* only check relevant lines

- only check relevant lines
- dont break unless length=0

* revert breaking change

* handle api and auth cases

---------

Co-authored-by: Kyle Sanderson <kyle.leet@gmail.com>
This commit is contained in:
soup 2023-03-26 23:25:23 +02:00 committed by GitHub
parent 5fed0921c5
commit c2fcd91da7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 201 additions and 136 deletions

View file

@ -1,169 +1,181 @@
package http
import (
"bytes"
"io/ioutil"
"os"
"strings"
"testing"
)
func TestSanitizeLogFile(t *testing.T) {
testCases := []struct {
name string
input string
expected string
}{
{
input: "https://beyond-hd.me/torrent/download/auto.t0rrent1d.rssk3y",
expected: "https://beyond-hd.me/torrent/download/auto.t0rrent1d.REDACTED",
name: "BHD_URL",
input: "\"module\":\"filter\" https://beyond-hd.me/torrent/download/auto.t0rrent1d.rssk3y",
expected: "\"module\":\"filter\" https://beyond-hd.me/torrent/download/auto.t0rrent1d.REDACTED",
},
{
input: "https://aither.cc/torrent/download/t0rrent1d.rssk3y",
expected: "https://aither.cc/torrent/download/t0rrent1d.REDACTED",
name: "Standard_UNIT3D_URL",
input: "\"module\":\"filter\" https://aither.cc/torrent/download/t0rrent1d.rssk3y",
expected: "\"module\":\"filter\" https://aither.cc/torrent/download/t0rrent1d.REDACTED",
},
{
input: "https://www.torrentleech.org/rss/download/t0rrent1d/rssk3y/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
expected: "https://www.torrentleech.org/rss/download/t0rrent1d/REDACTED/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
name: "TL_URL",
input: "\"module\":\"filter\" https://www.torrentleech.org/rss/download/t0rrent1d/rssk3y/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
expected: "\"module\":\"filter\" https://www.torrentleech.org/rss/download/t0rrent1d/REDACTED/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
},
{
input: "https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=4uthk3y&torrent_pass=t0rrentp4ss",
expected: "https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=REDACTED&torrent_pass=REDACTED",
name: "auth_key_torrent_pass",
input: "\"module\":\"filter\" https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=4uthk3y&torrent_pass=t0rrentp4ss",
expected: "\"module\":\"filter\" https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=REDACTED&torrent_pass=REDACTED",
},
{
input: "Voyager autobot us3rn4me 1RCK3Y",
expected: "Voyager autobot us3rn4me REDACTED",
input: "\"module\":\"irc\" LiMEY_ !invite 1irck3y us3rn4me",
expected: "\"module\":\"irc\" LiMEY_ !invite REDACTED us3rn4me",
},
{
input: "Satsuki enter #announce us3rn4me 1RCK3Y",
expected: "Satsuki enter #announce us3rn4me REDACTED",
input: "\"module\":\"irc\" Voyager autobot us3rn4me 1irck3y",
expected: "\"module\":\"irc\" Voyager autobot us3rn4me REDACTED",
},
{
input: "Millie announce 1RCK3Y",
expected: "Millie announce REDACTED",
input: "\"module\":\"irc\" Satsuki enter #announce us3rn4me 1irck3y",
expected: "\"module\":\"irc\" Satsuki enter #announce us3rn4me REDACTED",
},
{
input: "DBBot announce 1RCK3Y",
expected: "DBBot announce REDACTED",
input: "\"module\":\"irc\" Sauron bot #ant-announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Sauron bot #ant-announce us3rn4me REDACTED",
},
{
input: "ENDOR !invite us3rnøme 1RCK3Y",
expected: "ENDOR !invite us3rnøme REDACTED",
input: "\"module\":\"irc\" Millie announce IRCKEY",
expected: "\"module\":\"irc\" Millie announce REDACTED",
},
{
input: "Vertigo ENTER #GGn-Announce us3rn4me 1RCK3Y",
expected: "Vertigo ENTER #GGn-Announce us3rn4me REDACTED",
input: "\"module\":\"irc\" DBBot announce IRCKEY",
expected: "\"module\":\"irc\" DBBot announce REDACTED",
},
{
input: "midgards announce 1RCK3Y",
expected: "midgards announce REDACTED",
input: "\"module\":\"irc\" PT-BOT invite IRCKEY",
expected: "\"module\":\"irc\" PT-BOT invite REDACTED",
},
{
input: "HeBoT !invite 1RCK3Y",
expected: "HeBoT !invite REDACTED",
input: "\"module\":\"irc\" midgards announce IRCKEY",
expected: "\"module\":\"irc\" midgards announce REDACTED",
},
{
input: "NBOT !invite 1RCK3Y",
expected: "NBOT !invite REDACTED",
input: "\"module\":\"irc\" HeBoT !invite IRCKEY",
expected: "\"module\":\"irc\" HeBoT !invite REDACTED",
},
{
input: "Muffit bot #nbl-announce us3rn4me 1RCK3Y",
expected: "Muffit bot #nbl-announce us3rn4me REDACTED",
input: "\"module\":\"irc\" NBOT !invite IRCKEY",
expected: "\"module\":\"irc\" NBOT !invite REDACTED",
},
{
input: "hermes enter #announce us3rn4me 1RCK3Y",
expected: "hermes enter #announce us3rn4me REDACTED",
input: "\"module\":\"irc\" PS-Info pass IRCKEY",
expected: "\"module\":\"irc\" PS-Info pass REDACTED",
},
{
input: "LiMEY_ !invite 1RCK3Y us3rn4me",
expected: "LiMEY_ !invite REDACTED us3rn4me",
input: "\"module\":\"irc\" Synd1c4t3 invite IRCKEY",
expected: "\"module\":\"irc\" Synd1c4t3 invite REDACTED",
},
{
input: "PS-Info pass 1RCK3Y",
expected: "PS-Info pass REDACTED",
input: "\"module\":\"irc\" UHDBot invite IRCKEY",
expected: "\"module\":\"irc\" UHDBot invite REDACTED",
},
{
input: "PT-BOT invite 1RCK3Y",
expected: "PT-BOT invite REDACTED",
input: "\"module\":\"irc\" ENDOR !invite us3rn4me IRCKEY",
expected: "\"module\":\"irc\" ENDOR !invite us3rn4me REDACTED",
},
{
input: "Hummingbird ENTER us3rn4me 1RCK3Y #ptp-announce-dev",
expected: "Hummingbird ENTER us3rn4me REDACTED #ptp-announce-dev",
input: "\"module\":\"irc\" Vertigo ENTER #GGn-Announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Vertigo ENTER #GGn-Announce us3rn4me REDACTED",
},
{
input: "Drone enter #red-announce us3rn4me 1RCK3Y",
expected: "Drone enter #red-announce us3rn4me REDACTED",
input: "\"module\":\"irc\" immortal invite us3rn4me IRCKEY",
expected: "\"module\":\"irc\" immortal invite us3rn4me REDACTED",
},
{
input: "SceneHD .invite 1RCK3Y #announce",
expected: "SceneHD .invite REDACTED #announce",
input: "\"module\":\"irc\" Muffit bot #nbl-announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Muffit bot #nbl-announce us3rn4me REDACTED",
},
{
input: "erica letmeinannounce us3rn4me 1RCK3Y",
expected: "erica letmeinannounce us3rn4me REDACTED",
input: "\"module\":\"irc\" hermes enter #announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" hermes enter #announce us3rn4me REDACTED",
},
{
input: "Synd1c4t3 invite 1RCK3Y",
expected: "Synd1c4t3 invite REDACTED",
input: "\"module\":\"irc\" Hummingbird ENTER us3rn4me IRCKEY #ptp-announce-dev",
expected: "\"module\":\"irc\" Hummingbird ENTER us3rn4me REDACTED #ptp-announce-dev",
},
{
input: "UHDBot invite 1RCK3Y",
expected: "UHDBot invite REDACTED",
input: "\"module\":\"irc\" Drone enter #red-announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Drone enter #red-announce us3rn4me REDACTED",
},
{
input: "Sauron bot #ant-announce us3rn4me 1RCK3Y",
expected: "Sauron bot #ant-announce us3rn4me REDACTED",
input: "\"module\":\"irc\" RevoTT !invite us3rn4me IRCKEY",
expected: "\"module\":\"irc\" RevoTT !invite us3rn4me REDACTED",
},
{
input: "RevoTT !invite us3rn4me P4SSK3Y",
expected: "RevoTT !invite us3rn4me REDACTED",
input: "\"module\":\"irc\" SceneHD .invite IRCKEY #announce",
expected: "\"module\":\"irc\" SceneHD .invite REDACTED #announce",
},
{
input: "Cerberus identify us3rn4me P1D",
expected: "Cerberus identify us3rn4me REDACTED",
input: "\"module\":\"irc\" erica letmeinannounce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" erica letmeinannounce us3rn4me REDACTED",
},
{
input: "NickServ IDENTIFY dasøl13sa#!",
expected: "NickServ IDENTIFY REDACTED",
input: "\"module\":\"irc\" Cerberus identify us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Cerberus identify us3rn4me REDACTED",
},
{
input: "--> AUTHENTICATE poasd!232kljøasdj!%",
expected: "--> AUTHENTICATE REDACTED",
input: "\"module\":\"irc\" NickServ IDENTIFY Nvbkødn~vzjHkPEimnJ6PmJw8ayiE#wg",
expected: "\"module\":\"irc\" NickServ IDENTIFY REDACTED",
},
{
input: "\"module\":\"irc\" PRIVMSG NickServ IDENTIFY zAPEJEA8ryYnpj3AiE3KJ",
expected: "\"module\":\"irc\" PRIVMSG NickServ IDENTIFY REDACTED",
},
}
for _, testCase := range testCases {
// Create a temporary file with sample log data
tmpFile, err := ioutil.TempFile("", "test-log-*.log")
if err != nil {
t.Fatal(err)
}
defer os.Remove(tmpFile.Name())
t.Run(testCase.name, func(t *testing.T) {
// Create a temporary file with sample log data
tmpFile, err := ioutil.TempFile("", "test-log-*.log")
if err != nil {
t.Fatal(err)
}
defer os.Remove(tmpFile.Name())
// Write sample log data to the temporary file
_, err = tmpFile.WriteString(testCase.input)
if err != nil {
tmpFile.Close()
t.Fatal(err)
}
err = tmpFile.Close()
if err != nil {
t.Fatal(err)
}
// Write the test case input to the temporary file
_, err = tmpFile.WriteString(testCase.input + "\n")
if err != nil {
tmpFile.Close()
t.Fatal(err)
}
err = tmpFile.Close()
if err != nil {
t.Fatal(err)
}
// Call SanitizeLogFile on the temporary file
sanitizedTmpFilePath, err := SanitizeLogFile(tmpFile.Name())
if err != nil {
t.Fatal(err)
}
defer os.Remove(sanitizedTmpFilePath)
// Create a bytes.Buffer to store the sanitized content
sanitizedContent := &bytes.Buffer{}
// Read the content of the sanitized temporary file
sanitizedData, err := ioutil.ReadFile(sanitizedTmpFilePath)
if err != nil {
t.Fatal(err)
}
// Call SanitizeLogFile on the temporary file
err = SanitizeLogFile(tmpFile.Name(), sanitizedContent)
if err != nil {
t.Fatal(err)
}
// Check if the sanitized data matches the expected content
if string(sanitizedData) != testCase.expected {
t.Errorf("Sanitized data does not match expected data for input: %s\nExpected:\n%s\nActual:\n%s", testCase.input, testCase.expected, sanitizedData)
}
// Read the content of the sanitized content
sanitizedData := sanitizedContent.String()
// Check if the sanitized data matches the expected content
if !strings.Contains(sanitizedData, testCase.expected+"\n") {
t.Errorf("Sanitized data does not match expected data\nExpected:\n%s\nActual:\n%s", testCase.expected, sanitizedData)
}
})
}
}