From c2fcd91da7eb1878ba0ecde2c953862c859896ee Mon Sep 17 00:00:00 2001 From: soup Date: Sun, 26 Mar 2023 23:25:23 +0200 Subject: [PATCH] refactor(logs): improve log sanitization performance (#779) * refactor log sanitization for better performance * Fix token error, add mutex for safer concurrency * serve sanitized version directly from memory * further improvements made it check for "module":"irc" before running the irc regex checks * Revert "further improvements" This reverts commit 260037c3a58e1e760613167d3e15136b313c0612. * update irc regexes to handle special characters - added prefixes to the irc patterns - improved tests * more improvements for irc regexes * fixed errors * another edge case bites the dust * regex improvements * lock to 1 worker if cpu cores is 2 or less * removed unused code * revert to single thread Moved regex patterns and replacements into a separate array for easier maintenance and readability. Optimized the regex patterns that share the same replacement string to avoid redundancy. Modify SanitizeLogFile to accept io.Writer for direct output * style * only check relevant lines - only check relevant lines - dont break unless length=0 * revert breaking change * handle api and auth cases --------- Co-authored-by: Kyle Sanderson --- internal/http/logs.go | 147 ++++++++++++++------- internal/http/logs_sanitize_test.go | 190 +++++++++++++++------------- 2 files changed, 201 insertions(+), 136 deletions(-) diff --git a/internal/http/logs.go b/internal/http/logs.go index ebcb899..6f01421 100644 --- a/internal/http/logs.go +++ b/internal/http/logs.go @@ -1,8 +1,10 @@ package http import ( + "bufio" + "io" "io/fs" - "io/ioutil" + "log" "net/http" "os" "path" @@ -87,48 +89,103 @@ func (h logsHandler) files(w http.ResponseWriter, r *http.Request) { render.JSON(w, r, response) } -var ( // regexes for sanitizing log files - keyValueRegex = regexp.MustCompile(`(torrent_pass|passkey|authkey|secret_key|apikey)=([a-zA-Z0-9]+)`) - combinedRegex = regexp.MustCompile(`(https?://[^\s]+/((rss/download/[a-zA-Z0-9]+/)|torrent/download/((auto\.[a-zA-Z0-9]+\.|[a-zA-Z0-9]+\.))))([a-zA-Z0-9]+)`) - inviteRegex = regexp.MustCompile(`(Voyager autobot [\p{L}0-9]+ |Satsuki enter #announce [\p{L}0-9]+ |Millie announce |DBBot announce |ENDOR !invite [\p{L}0-9]+ |Vertigo ENTER #GGn-Announce [\p{L}0-9]+ |midgards announce |HeBoT !invite |NBOT !invite |Muffit bot #nbl-announce [\p{L}0-9]+ |hermes enter #announce [\p{L}0-9]+ |LiMEY_ !invite |PS-Info pass |PT-BOT invite |Hummingbird ENTER [\p{L}0-9]+ |Drone enter #red-announce [\p{L}0-9]+ |SceneHD \.invite |erica letmeinannounce [\p{L}0-9]+ |Synd1c4t3 invite |UHDBot invite |Sauron bot #ant-announce [\p{L}0-9]+ |RevoTT !invite [\p{L}0-9]+ |Cerberus identify [\p{L}0-9]+ )([\p{L}0-9]+)`) - nickservRegex = regexp.MustCompile(`(NickServ IDENTIFY )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`) - saslRegex = regexp.MustCompile(`(--> AUTHENTICATE )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`) +var ( + regexReplacements = []struct { + pattern *regexp.Regexp + repl string + }{ + { + pattern: regexp.MustCompile(`(torrent_pass|passkey|authkey|auth|secret_key|api|apikey)=([a-zA-Z0-9]+)`), + repl: "${1}=REDACTED", + }, + { + pattern: regexp.MustCompile(`(https?://[^\s]+/((rss/download/[a-zA-Z0-9]+/)|torrent/download/((auto\.[a-zA-Z0-9]+\.|[a-zA-Z0-9]+\.))))([a-zA-Z0-9]+)`), + repl: "${1}REDACTED", + }, + { + pattern: regexp.MustCompile(`(NickServ IDENTIFY )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`), + repl: "${1}REDACTED", + }, + { + pattern: regexp.MustCompile(`(AUTHENTICATE )([\p{L}0-9!#%&*+/:;<=>?@^_` + "`" + `{|}~]+)`), + repl: "${1}REDACTED", + }, + { + pattern: regexp.MustCompile( + `(?m)(` + + `(?:Voyager autobot\s+\w+|Satsuki enter #announce\s+\w+|Sauron bot #ant-announce\s+\w+|Millie announce|DBBot announce|PT-BOT invite|midgards announce|HeBoT !invite|NBOT !invite|PS-Info pass|Synd1c4t3 invite|UHDBot invite|ENDOR !invite(\s+)\w+|immortal invite(\s+)\w+|Muffit bot #nbl-announce\s+\w+|hermes enter #announce\s+\w+|Drone enter #red-announce\s+\w+|RevoTT !invite\s+\w+|erica letmeinannounce\s+\w+|Cerberus identify\s+\w+)` + + `)(?:\s+[a-zA-Z0-9]+)`), + repl: "$1 REDACTED", + }, + { + pattern: regexp.MustCompile(`(LiMEY_ !invite\s+)([a-zA-Z0-9]+)(\s+\w+)`), + repl: "${1}REDACTED${3}", + }, + { + pattern: regexp.MustCompile(`(Vertigo ENTER #GGn-Announce\s+)(\w+).([a-zA-Z0-9]+)`), + repl: "$1$2 REDACTED", + }, + { + pattern: regexp.MustCompile(`(Hummingbird ENTER\s+\w+).([a-zA-Z0-9]+)(\s+#ptp-announce-dev)`), + repl: "$1 REDACTED$3", + }, + { + pattern: regexp.MustCompile(`(SceneHD..invite).([a-zA-Z0-9]+)(\s+#announce)`), + repl: "$1 REDACTED$3", + }, + } ) -func SanitizeLogFile(filePath string) (string, error) { - data, err := ioutil.ReadFile(filePath) +func SanitizeLogFile(filePath string, output io.Writer) error { + inFile, err := os.Open(filePath) if err != nil { - return "", err + return err + } + defer inFile.Close() + + reader := bufio.NewReader(inFile) + writer := bufio.NewWriter(output) + defer writer.Flush() + + for { + // Read the next line from the file + line, err := reader.ReadString('\n') + + if err != nil { + if err != io.EOF { + log.Printf("Error reading line from input file: %v", err) + } + break + } + + // Sanitize the line using regexReplacements array + bIRC := strings.Contains(line, `"module":"irc"`) + bFilter := (strings.Contains(line, `"module":"feed"`) || + strings.Contains(line, `"module":"filter"`)) || + strings.Contains(line, `"repo":"release"`) || + strings.Contains(line, `"module":"action"`) + + for i := 0; i < len(regexReplacements); i++ { + // Apply the first two patterns only if the line contains "module":"feed", + // "module":"filter", "repo":"release", or "module":"action" + if i < 2 { + if bFilter { + line = regexReplacements[i].pattern.ReplaceAllString(line, regexReplacements[i].repl) + } + } else if bIRC { + // Check for "module":"irc" before applying other patterns + line = regexReplacements[i].pattern.ReplaceAllString(line, regexReplacements[i].repl) + } + } + + // Write the sanitized line to the writer + if _, err = writer.WriteString(line); err != nil { + log.Printf("Error writing line to output: %v", err) + return err + } } - sanitizedData := string(data) - - // torrent_pass, passkey, authkey, secret_key, apikey, rsskey - sanitizedData = keyValueRegex.ReplaceAllString(sanitizedData, "${1}=REDACTED") - sanitizedData = combinedRegex.ReplaceAllString(sanitizedData, "${1}REDACTED") - - // irc related - sanitizedData = inviteRegex.ReplaceAllString(sanitizedData, "${1}REDACTED") - sanitizedData = nickservRegex.ReplaceAllString(sanitizedData, "${1}REDACTED") - sanitizedData = saslRegex.ReplaceAllString(sanitizedData, "${1}REDACTED") - - tmpFile, err := ioutil.TempFile("", "sanitized-log-*.log") - if err != nil { - return "", err - } - - _, err = tmpFile.WriteString(sanitizedData) - if err != nil { - tmpFile.Close() - return "", err - } - - err = tmpFile.Close() - if err != nil { - return "", err - } - - return tmpFile.Name(), nil + return nil } func (h logsHandler) downloadFile(w http.ResponseWriter, r *http.Request) { @@ -168,9 +225,11 @@ func (h logsHandler) downloadFile(w http.ResponseWriter, r *http.Request) { filePath := filepath.Join(logsDir, logFile) - // Sanitize the log file - sanitizedFilePath, err := SanitizeLogFile(filePath) - if err != nil { + w.Header().Set("Content-Disposition", "attachment; filename="+strconv.Quote(logFile)) + w.Header().Set("Content-Type", "application/octet-stream") + + // Sanitize the log file and directly write the output to the HTTP socket + if err := SanitizeLogFile(filePath, w); err != nil { render.Status(r, http.StatusInternalServerError) render.JSON(w, r, errorResponse{ Message: err.Error(), @@ -178,12 +237,6 @@ func (h logsHandler) downloadFile(w http.ResponseWriter, r *http.Request) { }) return } - defer os.Remove(sanitizedFilePath) - - w.Header().Set("Content-Disposition", "attachment; filename="+strconv.Quote(logFile)) - w.Header().Set("Content-Type", "application/octet-stream") - - http.ServeFile(w, r, sanitizedFilePath) } type logFile struct { diff --git a/internal/http/logs_sanitize_test.go b/internal/http/logs_sanitize_test.go index 495d011..8b9c9c7 100644 --- a/internal/http/logs_sanitize_test.go +++ b/internal/http/logs_sanitize_test.go @@ -1,169 +1,181 @@ package http import ( + "bytes" "io/ioutil" "os" + "strings" "testing" ) func TestSanitizeLogFile(t *testing.T) { testCases := []struct { + name string input string expected string }{ { - input: "https://beyond-hd.me/torrent/download/auto.t0rrent1d.rssk3y", - expected: "https://beyond-hd.me/torrent/download/auto.t0rrent1d.REDACTED", + name: "BHD_URL", + input: "\"module\":\"filter\" https://beyond-hd.me/torrent/download/auto.t0rrent1d.rssk3y", + expected: "\"module\":\"filter\" https://beyond-hd.me/torrent/download/auto.t0rrent1d.REDACTED", }, { - input: "https://aither.cc/torrent/download/t0rrent1d.rssk3y", - expected: "https://aither.cc/torrent/download/t0rrent1d.REDACTED", + name: "Standard_UNIT3D_URL", + input: "\"module\":\"filter\" https://aither.cc/torrent/download/t0rrent1d.rssk3y", + expected: "\"module\":\"filter\" https://aither.cc/torrent/download/t0rrent1d.REDACTED", }, { - input: "https://www.torrentleech.org/rss/download/t0rrent1d/rssk3y/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent", - expected: "https://www.torrentleech.org/rss/download/t0rrent1d/REDACTED/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent", + name: "TL_URL", + input: "\"module\":\"filter\" https://www.torrentleech.org/rss/download/t0rrent1d/rssk3y/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent", + expected: "\"module\":\"filter\" https://www.torrentleech.org/rss/download/t0rrent1d/REDACTED/Dark+Places+1974+1080p+BluRay+x264-GAZER.torrent", }, { - input: "https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=4uthk3y&torrent_pass=t0rrentp4ss", - expected: "https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=REDACTED&torrent_pass=REDACTED", + name: "auth_key_torrent_pass", + input: "\"module\":\"filter\" https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=4uthk3y&torrent_pass=t0rrentp4ss", + expected: "\"module\":\"filter\" https://alpharatio.cc/torrents.php?action=download&id=t0rrent1d&authkey=REDACTED&torrent_pass=REDACTED", }, { - input: "Voyager autobot us3rn4me 1RCK3Y", - expected: "Voyager autobot us3rn4me REDACTED", + input: "\"module\":\"irc\" LiMEY_ !invite 1irck3y us3rn4me", + expected: "\"module\":\"irc\" LiMEY_ !invite REDACTED us3rn4me", }, { - input: "Satsuki enter #announce us3rn4me 1RCK3Y", - expected: "Satsuki enter #announce us3rn4me REDACTED", + input: "\"module\":\"irc\" Voyager autobot us3rn4me 1irck3y", + expected: "\"module\":\"irc\" Voyager autobot us3rn4me REDACTED", }, { - input: "Millie announce 1RCK3Y", - expected: "Millie announce REDACTED", + input: "\"module\":\"irc\" Satsuki enter #announce us3rn4me 1irck3y", + expected: "\"module\":\"irc\" Satsuki enter #announce us3rn4me REDACTED", }, { - input: "DBBot announce 1RCK3Y", - expected: "DBBot announce REDACTED", + input: "\"module\":\"irc\" Sauron bot #ant-announce us3rn4me IRCKEY", + expected: "\"module\":\"irc\" Sauron bot #ant-announce us3rn4me REDACTED", }, { - input: "ENDOR !invite us3rnøme 1RCK3Y", - expected: "ENDOR !invite us3rnøme REDACTED", + input: "\"module\":\"irc\" Millie announce IRCKEY", + expected: "\"module\":\"irc\" Millie announce REDACTED", }, { - input: "Vertigo ENTER #GGn-Announce us3rn4me 1RCK3Y", - expected: "Vertigo ENTER #GGn-Announce us3rn4me REDACTED", + input: "\"module\":\"irc\" DBBot announce IRCKEY", + expected: "\"module\":\"irc\" DBBot announce REDACTED", }, { - input: "midgards announce 1RCK3Y", - expected: "midgards announce REDACTED", + input: "\"module\":\"irc\" PT-BOT invite IRCKEY", + expected: "\"module\":\"irc\" PT-BOT invite REDACTED", }, { - input: "HeBoT !invite 1RCK3Y", - expected: "HeBoT !invite REDACTED", + input: "\"module\":\"irc\" midgards announce IRCKEY", + expected: "\"module\":\"irc\" midgards announce REDACTED", }, { - input: "NBOT !invite 1RCK3Y", - expected: "NBOT !invite REDACTED", + input: "\"module\":\"irc\" HeBoT !invite IRCKEY", + expected: "\"module\":\"irc\" HeBoT !invite REDACTED", }, { - input: "Muffit bot #nbl-announce us3rn4me 1RCK3Y", - expected: "Muffit bot #nbl-announce us3rn4me REDACTED", + input: "\"module\":\"irc\" NBOT !invite IRCKEY", + expected: "\"module\":\"irc\" NBOT !invite REDACTED", }, { - input: "hermes enter #announce us3rn4me 1RCK3Y", - expected: "hermes enter #announce us3rn4me REDACTED", + input: "\"module\":\"irc\" PS-Info pass IRCKEY", + expected: "\"module\":\"irc\" PS-Info pass REDACTED", }, { - input: "LiMEY_ !invite 1RCK3Y us3rn4me", - expected: "LiMEY_ !invite REDACTED us3rn4me", + input: "\"module\":\"irc\" Synd1c4t3 invite IRCKEY", + expected: "\"module\":\"irc\" Synd1c4t3 invite REDACTED", }, { - input: "PS-Info pass 1RCK3Y", - expected: "PS-Info pass REDACTED", + input: "\"module\":\"irc\" UHDBot invite IRCKEY", + expected: "\"module\":\"irc\" UHDBot invite REDACTED", }, { - input: "PT-BOT invite 1RCK3Y", - expected: "PT-BOT invite REDACTED", + input: "\"module\":\"irc\" ENDOR !invite us3rn4me IRCKEY", + expected: "\"module\":\"irc\" ENDOR !invite us3rn4me REDACTED", }, { - input: "Hummingbird ENTER us3rn4me 1RCK3Y #ptp-announce-dev", - expected: "Hummingbird ENTER us3rn4me REDACTED #ptp-announce-dev", + input: "\"module\":\"irc\" Vertigo ENTER #GGn-Announce us3rn4me IRCKEY", + expected: "\"module\":\"irc\" Vertigo ENTER #GGn-Announce us3rn4me REDACTED", }, { - input: "Drone enter #red-announce us3rn4me 1RCK3Y", - expected: "Drone enter #red-announce us3rn4me REDACTED", + input: "\"module\":\"irc\" immortal invite us3rn4me IRCKEY", + expected: "\"module\":\"irc\" immortal invite us3rn4me REDACTED", }, { - input: "SceneHD .invite 1RCK3Y #announce", - expected: "SceneHD .invite REDACTED #announce", + input: "\"module\":\"irc\" Muffit bot #nbl-announce us3rn4me IRCKEY", + expected: "\"module\":\"irc\" Muffit bot #nbl-announce us3rn4me REDACTED", }, { - input: "erica letmeinannounce us3rn4me 1RCK3Y", - expected: "erica letmeinannounce us3rn4me REDACTED", + input: "\"module\":\"irc\" hermes enter #announce us3rn4me IRCKEY", + expected: "\"module\":\"irc\" hermes enter #announce us3rn4me REDACTED", }, { - input: "Synd1c4t3 invite 1RCK3Y", - expected: "Synd1c4t3 invite REDACTED", + input: "\"module\":\"irc\" Hummingbird ENTER us3rn4me IRCKEY #ptp-announce-dev", + expected: "\"module\":\"irc\" Hummingbird ENTER us3rn4me REDACTED #ptp-announce-dev", }, { - input: "UHDBot invite 1RCK3Y", - expected: "UHDBot invite REDACTED", + input: "\"module\":\"irc\" Drone enter #red-announce us3rn4me IRCKEY", + expected: "\"module\":\"irc\" Drone enter #red-announce us3rn4me REDACTED", }, { - input: "Sauron bot #ant-announce us3rn4me 1RCK3Y", - expected: "Sauron bot #ant-announce us3rn4me REDACTED", + input: "\"module\":\"irc\" RevoTT !invite us3rn4me IRCKEY", + expected: "\"module\":\"irc\" RevoTT !invite us3rn4me REDACTED", }, { - input: "RevoTT !invite us3rn4me P4SSK3Y", - expected: "RevoTT !invite us3rn4me REDACTED", + input: "\"module\":\"irc\" SceneHD .invite IRCKEY #announce", + expected: "\"module\":\"irc\" SceneHD .invite REDACTED #announce", }, { - input: "Cerberus identify us3rn4me P1D", - expected: "Cerberus identify us3rn4me REDACTED", + input: "\"module\":\"irc\" erica letmeinannounce us3rn4me IRCKEY", + expected: "\"module\":\"irc\" erica letmeinannounce us3rn4me REDACTED", }, { - input: "NickServ IDENTIFY dasøl13sa#!", - expected: "NickServ IDENTIFY REDACTED", + input: "\"module\":\"irc\" Cerberus identify us3rn4me IRCKEY", + expected: "\"module\":\"irc\" Cerberus identify us3rn4me REDACTED", }, { - input: "--> AUTHENTICATE poasd!232kljøasdj!%", - expected: "--> AUTHENTICATE REDACTED", + input: "\"module\":\"irc\" NickServ IDENTIFY Nvbkødn~vzjHkPEimnJ6PmJw8ayiE#wg", + expected: "\"module\":\"irc\" NickServ IDENTIFY REDACTED", + }, + { + input: "\"module\":\"irc\" PRIVMSG NickServ IDENTIFY zAPEJEA8ryYnpj3AiE3KJ", + expected: "\"module\":\"irc\" PRIVMSG NickServ IDENTIFY REDACTED", }, } for _, testCase := range testCases { - // Create a temporary file with sample log data - tmpFile, err := ioutil.TempFile("", "test-log-*.log") - if err != nil { - t.Fatal(err) - } - defer os.Remove(tmpFile.Name()) + t.Run(testCase.name, func(t *testing.T) { + // Create a temporary file with sample log data + tmpFile, err := ioutil.TempFile("", "test-log-*.log") + if err != nil { + t.Fatal(err) + } + defer os.Remove(tmpFile.Name()) - // Write sample log data to the temporary file - _, err = tmpFile.WriteString(testCase.input) - if err != nil { - tmpFile.Close() - t.Fatal(err) - } - err = tmpFile.Close() - if err != nil { - t.Fatal(err) - } + // Write the test case input to the temporary file + _, err = tmpFile.WriteString(testCase.input + "\n") + if err != nil { + tmpFile.Close() + t.Fatal(err) + } + err = tmpFile.Close() + if err != nil { + t.Fatal(err) + } - // Call SanitizeLogFile on the temporary file - sanitizedTmpFilePath, err := SanitizeLogFile(tmpFile.Name()) - if err != nil { - t.Fatal(err) - } - defer os.Remove(sanitizedTmpFilePath) + // Create a bytes.Buffer to store the sanitized content + sanitizedContent := &bytes.Buffer{} - // Read the content of the sanitized temporary file - sanitizedData, err := ioutil.ReadFile(sanitizedTmpFilePath) - if err != nil { - t.Fatal(err) - } + // Call SanitizeLogFile on the temporary file + err = SanitizeLogFile(tmpFile.Name(), sanitizedContent) + if err != nil { + t.Fatal(err) + } - // Check if the sanitized data matches the expected content - if string(sanitizedData) != testCase.expected { - t.Errorf("Sanitized data does not match expected data for input: %s\nExpected:\n%s\nActual:\n%s", testCase.input, testCase.expected, sanitizedData) - } + // Read the content of the sanitized content + sanitizedData := sanitizedContent.String() + + // Check if the sanitized data matches the expected content + if !strings.Contains(sanitizedData, testCase.expected+"\n") { + t.Errorf("Sanitized data does not match expected data\nExpected:\n%s\nActual:\n%s", testCase.expected, sanitizedData) + } + }) } }