refactor(logs): improve log sanitization performance (#779)

* refactor log sanitization for better performance

* Fix token error, add mutex for safer concurrency

* serve sanitized version directly from memory

* further improvements

made it check for "module":"irc" before running the irc regex checks

* Revert "further improvements"

This reverts commit 260037c3a58e1e760613167d3e15136b313c0612.

* update irc regexes to handle special characters

- added prefixes to the irc patterns
- improved tests

* more improvements for irc regexes

* fixed errors

* another edge case bites the dust

* regex improvements

* lock to 1 worker if cpu cores is 2 or less

* removed unused code

* revert to single thread

Moved regex patterns and replacements into a separate array for easier maintenance and readability.

Optimized the regex patterns that share the same replacement string to avoid redundancy.

Modify SanitizeLogFile to accept io.Writer for direct output

* style

* only check relevant lines

- only check relevant lines
- dont break unless length=0

* revert breaking change

* handle api and auth cases

---------

Co-authored-by: Kyle Sanderson <kyle.leet@gmail.com>
This commit is contained in:
soup 2023-03-26 23:25:23 +02:00 committed by GitHub
parent 5fed0921c5
commit c2fcd91da7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 201 additions and 136 deletions

View file

@ -1,8 +1,10 @@
package http
import (
"bufio"
"io"
"io/fs"
"io/ioutil"
"log"
"net/http"
"os"
"path"
@ -87,48 +89,103 @@ func (h logsHandler) files(w http.ResponseWriter, r *http.Request) {
render.JSON(w, r, response)
}
var ( // regexes for sanitizing log files
keyValueRegex = regexp.MustCompile(`(torrent_pass|passkey|authkey|secret_key|apikey)=([a-zA-Z0-9]+)`)
combinedRegex = regexp.MustCompile(`(https?://[^\s]+/((rss/download/[a-zA-Z0-9]+/)|torrent/download/((auto\.[a-zA-Z0-9]+\.|[a-zA-Z0-9]+\.))))([a-zA-Z0-9]+)`)
inviteRegex = regexp.MustCompile(`(Voyager autobot [\p{L}0-9]+ |Satsuki enter #announce [\p{L}0-9]+ |Millie announce |DBBot announce |ENDOR !invite [\p{L}0-9]+ |Vertigo ENTER #GGn-Announce [\p{L}0-9]+ |midgards announce |HeBoT !invite |NBOT !invite |Muffit bot #nbl-announce [\p{L}0-9]+ |hermes enter #announce [\p{L}0-9]+ |LiMEY_ !invite |PS-Info pass |PT-BOT invite |Hummingbird ENTER [\p{L}0-9]+ |Drone enter #red-announce [\p{L}0-9]+ |SceneHD \.invite |erica letmeinannounce [\p{L}0-9]+ |Synd1c4t3 invite |UHDBot invite |Sauron bot #ant-announce [\p{L}0-9]+ |RevoTT !invite [\p{L}0-9]+ |Cerberus identify [\p{L}0-9]+ )([\p{L}0-9]+)`)
nickservRegex = regexp.MustCompile(`(NickServ IDENTIFY )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`)
saslRegex = regexp.MustCompile(`(--> AUTHENTICATE )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`)
var (
regexReplacements = []struct {
pattern *regexp.Regexp
repl string
}{
{
pattern: regexp.MustCompile(`(torrent_pass|passkey|authkey|auth|secret_key|api|apikey)=([a-zA-Z0-9]+)`),
repl: "${1}=REDACTED",
},
{
pattern: regexp.MustCompile(`(https?://[^\s]+/((rss/download/[a-zA-Z0-9]+/)|torrent/download/((auto\.[a-zA-Z0-9]+\.|[a-zA-Z0-9]+\.))))([a-zA-Z0-9]+)`),
repl: "${1}REDACTED",
},
{
pattern: regexp.MustCompile(`(NickServ IDENTIFY )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`),
repl: "${1}REDACTED",
},
{
pattern: regexp.MustCompile(`(AUTHENTICATE )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`),
repl: "${1}REDACTED",
},
{
pattern: regexp.MustCompile(
`(?m)(` +
`(?:Voyager autobot\s+\w+|Satsuki enter #announce\s+\w+|Sauron bot #ant-announce\s+\w+|Millie announce|DBBot announce|PT-BOT invite|midgards announce|HeBoT !invite|NBOT !invite|PS-Info pass|Synd1c4t3 invite|UHDBot invite|ENDOR !invite(\s+)\w+|immortal invite(\s+)\w+|Muffit bot #nbl-announce\s+\w+|hermes enter #announce\s+\w+|Drone enter #red-announce\s+\w+|RevoTT !invite\s+\w+|erica letmeinannounce\s+\w+|Cerberus identify\s+\w+)` +
`)(?:\s+[a-zA-Z0-9]+)`),
repl: "$1 REDACTED",
},
{
pattern: regexp.MustCompile(`(LiMEY_ !invite\s+)([a-zA-Z0-9]+)(\s+\w+)`),
repl: "${1}REDACTED${3}",
},
{
pattern: regexp.MustCompile(`(Vertigo ENTER #GGn-Announce\s+)(\w+).([a-zA-Z0-9]+)`),
repl: "$1$2 REDACTED",
},
{
pattern: regexp.MustCompile(`(Hummingbird ENTER\s+\w+).([a-zA-Z0-9]+)(\s+#ptp-announce-dev)`),
repl: "$1 REDACTED$3",
},
{
pattern: regexp.MustCompile(`(SceneHD..invite).([a-zA-Z0-9]+)(\s+#announce)`),
repl: "$1 REDACTED$3",
},
}
)
func SanitizeLogFile(filePath string) (string, error) {
data, err := ioutil.ReadFile(filePath)
func SanitizeLogFile(filePath string, output io.Writer) error {
inFile, err := os.Open(filePath)
if err != nil {
return "", err
return err
}
defer inFile.Close()
reader := bufio.NewReader(inFile)
writer := bufio.NewWriter(output)
defer writer.Flush()
for {
// Read the next line from the file
line, err := reader.ReadString('\n')
if err != nil {
if err != io.EOF {
log.Printf("Error reading line from input file: %v", err)
}
break
}
sanitizedData := string(data)
// Sanitize the line using regexReplacements array
bIRC := strings.Contains(line, `"module":"irc"`)
bFilter := (strings.Contains(line, `"module":"feed"`) ||
strings.Contains(line, `"module":"filter"`)) ||
strings.Contains(line, `"repo":"release"`) ||
strings.Contains(line, `"module":"action"`)
// torrent_pass, passkey, authkey, secret_key, apikey, rsskey
sanitizedData = keyValueRegex.ReplaceAllString(sanitizedData, "${1}=REDACTED")
sanitizedData = combinedRegex.ReplaceAllString(sanitizedData, "${1}REDACTED")
// irc related
sanitizedData = inviteRegex.ReplaceAllString(sanitizedData, "${1}REDACTED")
sanitizedData = nickservRegex.ReplaceAllString(sanitizedData, "${1}REDACTED")
sanitizedData = saslRegex.ReplaceAllString(sanitizedData, "${1}REDACTED")
tmpFile, err := ioutil.TempFile("", "sanitized-log-*.log")
if err != nil {
return "", err
for i := 0; i < len(regexReplacements); i++ {
// Apply the first two patterns only if the line contains "module":"feed",
// "module":"filter", "repo":"release", or "module":"action"
if i < 2 {
if bFilter {
line = regexReplacements[i].pattern.ReplaceAllString(line, regexReplacements[i].repl)
}
} else if bIRC {
// Check for "module":"irc" before applying other patterns
line = regexReplacements[i].pattern.ReplaceAllString(line, regexReplacements[i].repl)
}
}
_, err = tmpFile.WriteString(sanitizedData)
if err != nil {
tmpFile.Close()
return "", err
// Write the sanitized line to the writer
if _, err = writer.WriteString(line); err != nil {
log.Printf("Error writing line to output: %v", err)
return err
}
}
err = tmpFile.Close()
if err != nil {
return "", err
}
return tmpFile.Name(), nil
return nil
}
func (h logsHandler) downloadFile(w http.ResponseWriter, r *http.Request) {
@ -168,9 +225,11 @@ func (h logsHandler) downloadFile(w http.ResponseWriter, r *http.Request) {
filePath := filepath.Join(logsDir, logFile)
// Sanitize the log file
sanitizedFilePath, err := SanitizeLogFile(filePath)
if err != nil {
w.Header().Set("Content-Disposition", "attachment; filename="+strconv.Quote(logFile))
w.Header().Set("Content-Type", "application/octet-stream")
// Sanitize the log file and directly write the output to the HTTP socket
if err := SanitizeLogFile(filePath, w); err != nil {
render.Status(r, http.StatusInternalServerError)
render.JSON(w, r, errorResponse{
Message: err.Error(),
@ -178,12 +237,6 @@ func (h logsHandler) downloadFile(w http.ResponseWriter, r *http.Request) {
})
return
}
defer os.Remove(sanitizedFilePath)
w.Header().Set("Content-Disposition", "attachment; filename="+strconv.Quote(logFile))
w.Header().Set("Content-Type", "application/octet-stream")
http.ServeFile(w, r, sanitizedFilePath)
}
type logFile struct {

View file

@ -1,135 +1,147 @@
package http
import (
"bytes"
"io/ioutil"
"os"
"strings"
"testing"
)
func TestSanitizeLogFile(t *testing.T) {
testCases := []struct {
name string
input string
expected string
}{
{
input: "https://beyond-hd.me/torrent/download/auto.t0rrent1d.rssk3y",
expected: "https://beyond-hd.me/torrent/download/auto.t0rrent1d.REDACTED",
name: "BHD_URL",
input: "\"module\":\"filter\" https://beyond-hd.me/torrent/download/auto.t0rrent1d.rssk3y",
expected: "\"module\":\"filter\" https://beyond-hd.me/torrent/download/auto.t0rrent1d.REDACTED",
},
{
input: "https://aither.cc/torrent/download/t0rrent1d.rssk3y",
expected: "https://aither.cc/torrent/download/t0rrent1d.REDACTED",
name: "Standard_UNIT3D_URL",
input: "\"module\":\"filter\" https://aither.cc/torrent/download/t0rrent1d.rssk3y",
expected: "\"module\":\"filter\" https://aither.cc/torrent/download/t0rrent1d.REDACTED",
},
{
input: "https://www.torrentleech.org/rss/download/t0rrent1d/rssk3y/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
expected: "https://www.torrentleech.org/rss/download/t0rrent1d/REDACTED/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
name: "TL_URL",
input: "\"module\":\"filter\" https://www.torrentleech.org/rss/download/t0rrent1d/rssk3y/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
expected: "\"module\":\"filter\" https://www.torrentleech.org/rss/download/t0rrent1d/REDACTED/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent",
},
{
input: "https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=4uthk3y&torrent_pass=t0rrentp4ss",
expected: "https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=REDACTED&torrent_pass=REDACTED",
name: "auth_key_torrent_pass",
input: "\"module\":\"filter\" https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=4uthk3y&torrent_pass=t0rrentp4ss",
expected: "\"module\":\"filter\" https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=REDACTED&torrent_pass=REDACTED",
},
{
input: "Voyager autobot us3rn4me 1RCK3Y",
expected: "Voyager autobot us3rn4me REDACTED",
input: "\"module\":\"irc\" LiMEY_ !invite 1irck3y us3rn4me",
expected: "\"module\":\"irc\" LiMEY_ !invite REDACTED us3rn4me",
},
{
input: "Satsuki enter #announce us3rn4me 1RCK3Y",
expected: "Satsuki enter #announce us3rn4me REDACTED",
input: "\"module\":\"irc\" Voyager autobot us3rn4me 1irck3y",
expected: "\"module\":\"irc\" Voyager autobot us3rn4me REDACTED",
},
{
input: "Millie announce 1RCK3Y",
expected: "Millie announce REDACTED",
input: "\"module\":\"irc\" Satsuki enter #announce us3rn4me 1irck3y",
expected: "\"module\":\"irc\" Satsuki enter #announce us3rn4me REDACTED",
},
{
input: "DBBot announce 1RCK3Y",
expected: "DBBot announce REDACTED",
input: "\"module\":\"irc\" Sauron bot #ant-announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Sauron bot #ant-announce us3rn4me REDACTED",
},
{
input: "ENDOR !invite us3rnøme 1RCK3Y",
expected: "ENDOR !invite us3rnøme REDACTED",
input: "\"module\":\"irc\" Millie announce IRCKEY",
expected: "\"module\":\"irc\" Millie announce REDACTED",
},
{
input: "Vertigo ENTER #GGn-Announce us3rn4me 1RCK3Y",
expected: "Vertigo ENTER #GGn-Announce us3rn4me REDACTED",
input: "\"module\":\"irc\" DBBot announce IRCKEY",
expected: "\"module\":\"irc\" DBBot announce REDACTED",
},
{
input: "midgards announce 1RCK3Y",
expected: "midgards announce REDACTED",
input: "\"module\":\"irc\" PT-BOT invite IRCKEY",
expected: "\"module\":\"irc\" PT-BOT invite REDACTED",
},
{
input: "HeBoT !invite 1RCK3Y",
expected: "HeBoT !invite REDACTED",
input: "\"module\":\"irc\" midgards announce IRCKEY",
expected: "\"module\":\"irc\" midgards announce REDACTED",
},
{
input: "NBOT !invite 1RCK3Y",
expected: "NBOT !invite REDACTED",
input: "\"module\":\"irc\" HeBoT !invite IRCKEY",
expected: "\"module\":\"irc\" HeBoT !invite REDACTED",
},
{
input: "Muffit bot #nbl-announce us3rn4me 1RCK3Y",
expected: "Muffit bot #nbl-announce us3rn4me REDACTED",
input: "\"module\":\"irc\" NBOT !invite IRCKEY",
expected: "\"module\":\"irc\" NBOT !invite REDACTED",
},
{
input: "hermes enter #announce us3rn4me 1RCK3Y",
expected: "hermes enter #announce us3rn4me REDACTED",
input: "\"module\":\"irc\" PS-Info pass IRCKEY",
expected: "\"module\":\"irc\" PS-Info pass REDACTED",
},
{
input: "LiMEY_ !invite 1RCK3Y us3rn4me",
expected: "LiMEY_ !invite REDACTED us3rn4me",
input: "\"module\":\"irc\" Synd1c4t3 invite IRCKEY",
expected: "\"module\":\"irc\" Synd1c4t3 invite REDACTED",
},
{
input: "PS-Info pass 1RCK3Y",
expected: "PS-Info pass REDACTED",
input: "\"module\":\"irc\" UHDBot invite IRCKEY",
expected: "\"module\":\"irc\" UHDBot invite REDACTED",
},
{
input: "PT-BOT invite 1RCK3Y",
expected: "PT-BOT invite REDACTED",
input: "\"module\":\"irc\" ENDOR !invite us3rn4me IRCKEY",
expected: "\"module\":\"irc\" ENDOR !invite us3rn4me REDACTED",
},
{
input: "Hummingbird ENTER us3rn4me 1RCK3Y #ptp-announce-dev",
expected: "Hummingbird ENTER us3rn4me REDACTED #ptp-announce-dev",
input: "\"module\":\"irc\" Vertigo ENTER #GGn-Announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Vertigo ENTER #GGn-Announce us3rn4me REDACTED",
},
{
input: "Drone enter #red-announce us3rn4me 1RCK3Y",
expected: "Drone enter #red-announce us3rn4me REDACTED",
input: "\"module\":\"irc\" immortal invite us3rn4me IRCKEY",
expected: "\"module\":\"irc\" immortal invite us3rn4me REDACTED",
},
{
input: "SceneHD .invite 1RCK3Y #announce",
expected: "SceneHD .invite REDACTED #announce",
input: "\"module\":\"irc\" Muffit bot #nbl-announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Muffit bot #nbl-announce us3rn4me REDACTED",
},
{
input: "erica letmeinannounce us3rn4me 1RCK3Y",
expected: "erica letmeinannounce us3rn4me REDACTED",
input: "\"module\":\"irc\" hermes enter #announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" hermes enter #announce us3rn4me REDACTED",
},
{
input: "Synd1c4t3 invite 1RCK3Y",
expected: "Synd1c4t3 invite REDACTED",
input: "\"module\":\"irc\" Hummingbird ENTER us3rn4me IRCKEY #ptp-announce-dev",
expected: "\"module\":\"irc\" Hummingbird ENTER us3rn4me REDACTED #ptp-announce-dev",
},
{
input: "UHDBot invite 1RCK3Y",
expected: "UHDBot invite REDACTED",
input: "\"module\":\"irc\" Drone enter #red-announce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Drone enter #red-announce us3rn4me REDACTED",
},
{
input: "Sauron bot #ant-announce us3rn4me 1RCK3Y",
expected: "Sauron bot #ant-announce us3rn4me REDACTED",
input: "\"module\":\"irc\" RevoTT !invite us3rn4me IRCKEY",
expected: "\"module\":\"irc\" RevoTT !invite us3rn4me REDACTED",
},
{
input: "RevoTT !invite us3rn4me P4SSK3Y",
expected: "RevoTT !invite us3rn4me REDACTED",
input: "\"module\":\"irc\" SceneHD .invite IRCKEY #announce",
expected: "\"module\":\"irc\" SceneHD .invite REDACTED #announce",
},
{
input: "Cerberus identify us3rn4me P1D",
expected: "Cerberus identify us3rn4me REDACTED",
input: "\"module\":\"irc\" erica letmeinannounce us3rn4me IRCKEY",
expected: "\"module\":\"irc\" erica letmeinannounce us3rn4me REDACTED",
},
{
input: "NickServ IDENTIFY dasøl13sa#!",
expected: "NickServ IDENTIFY REDACTED",
input: "\"module\":\"irc\" Cerberus identify us3rn4me IRCKEY",
expected: "\"module\":\"irc\" Cerberus identify us3rn4me REDACTED",
},
{
input: "--> AUTHENTICATE poasd!232kljøasdj!%",
expected: "--> AUTHENTICATE REDACTED",
input: "\"module\":\"irc\" NickServ IDENTIFY Nvbkødn~vzjHkPEimnJ6PmJw8ayiE#wg",
expected: "\"module\":\"irc\" NickServ IDENTIFY REDACTED",
},
{
input: "\"module\":\"irc\" PRIVMSG NickServ IDENTIFY zAPEJEA8ryYnpj3AiE3KJ",
expected: "\"module\":\"irc\" PRIVMSG NickServ IDENTIFY REDACTED",
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
// Create a temporary file with sample log data
tmpFile, err := ioutil.TempFile("", "test-log-*.log")
if err != nil {
@ -137,8 +149,8 @@ func TestSanitizeLogFile(t *testing.T) {
}
defer os.Remove(tmpFile.Name())
// Write sample log data to the temporary file
_, err = tmpFile.WriteString(testCase.input)
// Write the test case input to the temporary file
_, err = tmpFile.WriteString(testCase.input + "\n")
if err != nil {
tmpFile.Close()
t.Fatal(err)
@ -148,22 +160,22 @@ func TestSanitizeLogFile(t *testing.T) {
t.Fatal(err)
}
// Call SanitizeLogFile on the temporary file
sanitizedTmpFilePath, err := SanitizeLogFile(tmpFile.Name())
if err != nil {
t.Fatal(err)
}
defer os.Remove(sanitizedTmpFilePath)
// Create a bytes.Buffer to store the sanitized content
sanitizedContent := &bytes.Buffer{}
// Read the content of the sanitized temporary file
sanitizedData, err := ioutil.ReadFile(sanitizedTmpFilePath)
// Call SanitizeLogFile on the temporary file
err = SanitizeLogFile(tmpFile.Name(), sanitizedContent)
if err != nil {
t.Fatal(err)
}
// Read the content of the sanitized content
sanitizedData := sanitizedContent.String()
// Check if the sanitized data matches the expected content
if string(sanitizedData) != testCase.expected {
t.Errorf("Sanitized data does not match expected data for input: %s\nExpected:\n%s\nActual:\n%s", testCase.input, testCase.expected, sanitizedData)
}
if !strings.Contains(sanitizedData, testCase.expected+"\n") {
t.Errorf("Sanitized data does not match expected data\nExpected:\n%s\nActual:\n%s", testCase.expected, sanitizedData)
}
})
}
}