Skip to content

Commit

Permalink
Include log text in snapshots (#597)
Browse files Browse the repository at this point in the history
  • Loading branch information
seanlinsley authored Oct 4, 2024
1 parent 9b1a707 commit a25800b
Show file tree
Hide file tree
Showing 18 changed files with 334 additions and 701 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// +heroku goVersion go1.20
// +heroku goVersion go1.21

module github.com/pganalyze/collector

Expand Down Expand Up @@ -98,4 +98,4 @@ require (
google.golang.org/grpc v1.58.3 // indirect
)

go 1.20
go 1.21
57 changes: 0 additions & 57 deletions grant/logs.go

This file was deleted.

3 changes: 1 addition & 2 deletions input/system/heroku/logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ import (
"time"

"github.com/kr/logfmt"
"github.com/pganalyze/collector/grant"
"github.com/pganalyze/collector/logs"
"github.com/pganalyze/collector/output"
"github.com/pganalyze/collector/state"
Expand Down Expand Up @@ -69,7 +68,7 @@ func processSystemMetrics(ctx context.Context, timestamp time.Time, content []by

prefixedLogger := logger.WithPrefix(server.Config.SectionName)

grant, err := grant.GetDefaultGrant(ctx, server, globalCollectionOpts, prefixedLogger)
grant, err := output.GetGrant(ctx, server, globalCollectionOpts, prefixedLogger)
if err != nil {
prefixedLogger.PrintError("Could not get default grant for system snapshot: %s", err)
return
Expand Down
3 changes: 1 addition & 2 deletions logs/analyze.go
Original file line number Diff line number Diff line change
Expand Up @@ -2266,8 +2266,7 @@ func AnalyzeBackendLogLines(logLines []state.LogLine) (logLinesOut []state.LogLi
logLinesOut = append(logLinesOut, logLine)
}

// Ensure no other part of the system accidentally sends log line contents, as
// they should be considered opaque from here on
// Remove log line content. Note that ReplaceSecrets adds it back after secrets have been removed.
for idx := range logLinesOut {
logLinesOut[idx].Content = ""
}
Expand Down
4 changes: 2 additions & 2 deletions logs/querysample/tracing.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import (

const otelSpanName = "EXPLAIN Plan"

func urlToSample(server *state.Server, grant state.GrantLogs, sample state.PostgresQuerySample) string {
func urlToSample(server *state.Server, grant state.Grant, sample state.PostgresQuerySample) string {
fp := util.FingerprintQuery(sample.Query, server.Config.FilterQueryText, -1)
fpBin := make([]byte, 8)
binary.BigEndian.PutUint64(fpBin, fp)
Expand Down Expand Up @@ -66,7 +66,7 @@ func startAndEndTime(traceState trace.TraceState, sample state.PostgresQuerySamp
return
}

func ExportQuerySamplesAsTraceSpans(ctx context.Context, server *state.Server, logger *util.Logger, grant state.GrantLogs, samples []state.PostgresQuerySample) {
func ExportQuerySamplesAsTraceSpans(ctx context.Context, server *state.Server, logger *util.Logger, grant state.Grant, samples []state.PostgresQuerySample) {
exportCount := 0
for _, sample := range samples {
if !sample.HasExplain {
Expand Down
63 changes: 15 additions & 48 deletions logs/replace.go
Original file line number Diff line number Diff line change
@@ -1,74 +1,41 @@
package logs

import (
"slices"
"sort"

"github.com/pganalyze/collector/state"
)

type logRange struct {
start int64
end int64 // When working with this range, the character at the index of end is *excluded*
}

const replacementChar = 'X'

// ReplaceSecrets - Replaces the secrets of the specified kind with the replacement character in the text
func ReplaceSecrets(input []byte, logLines []state.LogLine, filterLogSecret []state.LogSecretKind) []byte {
var goodRanges []logRange
const replacement = "[redacted]"

func ReplaceSecrets(input []byte, logLines []state.LogLine, filterLogSecret []state.LogSecretKind) {
filterUnidentified := false
for _, k := range filterLogSecret {
if k == state.UnidentifiedLogSecret {
filterUnidentified = true
}
}

for _, logLine := range logLines {
goodRanges = append(goodRanges, logRange{start: logLine.ByteStart, end: logLine.ByteContentStart})
if logLine.ReviewedForSecrets {
for idx, logLine := range logLines {
if filterUnidentified && logLines[idx].Classification == 0 {
logLines[idx].Content = replacement + "\n"
} else {
content := input[logLines[idx].ByteContentStart:logLines[idx].ByteEnd]
sort.Slice(logLine.SecretMarkers, func(i, j int) bool {
return logLine.SecretMarkers[i].ByteStart < logLine.SecretMarkers[j].ByteEnd
})

// We're creating a good range when we find a filtered secret or the end (everything before is marked as good)
nextIdxToEvaluate := logLine.ByteContentStart
bytesChecked := 0
offset := 0
for _, m := range logLine.SecretMarkers {
filter := false
for _, k := range filterLogSecret {
if m.Kind == k {
filter = true
if m.Kind == k && m.ByteStart > bytesChecked {
content = slices.Replace(content, m.ByteStart-offset, m.ByteEnd-offset, []byte(replacement)...)
bytesChecked = m.ByteEnd
offset += (m.ByteEnd - m.ByteStart) - len(replacement)
}
}
if filter {
firstFilteredIdx := logLine.ByteContentStart + int64(m.ByteStart)
goodRanges = append(goodRanges, logRange{start: nextIdxToEvaluate, end: firstFilteredIdx})
nextIdxToEvaluate = logLine.ByteContentStart + int64(m.ByteEnd)
}
}
// No more markers means the rest of the line is safe
if nextIdxToEvaluate < logLine.ByteEnd {
goodRanges = append(goodRanges, logRange{start: nextIdxToEvaluate, end: logLine.ByteEnd})
}
} else if !filterUnidentified {
goodRanges = append(goodRanges, logRange{start: logLine.ByteContentStart, end: logLine.ByteEnd})
}
}
sort.Slice(goodRanges, func(i, j int) bool {
return goodRanges[i].start < goodRanges[j].start
})

lastGood := int64(0)
for _, r := range goodRanges {
for i := lastGood; i < r.start; i++ {
input[i] = replacementChar
}
lastGood = r.end
}
if len(goodRanges) > 0 || filterUnidentified {
for i := lastGood; i < int64(len(input)); i++ {
input[i] = replacementChar
logLines[idx].Content = string(content)
}
}
return input
}
32 changes: 13 additions & 19 deletions logs/replace_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,42 +22,32 @@ var replaceTests = []replaceTestpair{
{
filterLogSecret: "all",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 1242.570 ms statement: SELECT 1\n",
output: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 1242.570 ms statement: XXXXXXXX\n",
output: "duration: 1242.570 ms statement: [redacted]\n",
},
{
filterLogSecret: "statement_text",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 1242.570 ms statement: SELECT 1\n",
output: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 1242.570 ms statement: XXXXXXXX\n",
output: "duration: 1242.570 ms statement: [redacted]\n",
},
{
filterLogSecret: "statement_parameter",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 4079.697 ms execute <unnamed>: \nSELECT * FROM x WHERE y = $1 LIMIT $2\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:DETAIL: parameters: $1 = 'long string', $2 = '1'\n",
output: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 4079.697 ms execute <unnamed>: \nSELECT * FROM x WHERE y = $1 LIMIT $2\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:DETAIL: parameters: $1 = 'XXXXXXXXXXX', $2 = 'X'\n",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 4079.697 ms execute <unnamed>: \nSELECT * FROM x WHERE y = $1 LIMIT $2\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:DETAIL: parameters: $1 = 'long string', $2 = '1', $3 = 'long string'\n",
output: "duration: 4079.697 ms execute <unnamed>: \nSELECT * FROM x WHERE y = $1 LIMIT $2\nparameters: $1 = '[redacted]', $2 = '[redacted]', $3 = '[redacted]'\n",
},
{
filterLogSecret: "none",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: division by zero\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: Unknown Data\n",
output: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: division by zero\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: Unknown Data\n",
output: "division by zero\nUnknown Data\n",
},
{
filterLogSecret: "unidentified",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: division by zero\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: Unknown Data\n",
output: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: division by zero\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:ERROR: XXXXXXXXXXXXX",
},
{
filterLogSecret: "none",
input: "Unknown Data\n",
output: "Unknown Data\n",
},
{
filterLogSecret: "unidentified",
input: "Unknown Data\n",
output: "XXXXXXXXXXXXX",
output: "division by zero\n[redacted]\n",
},
{
filterLogSecret: "statement_text, statement_parameter, unidentified",
input: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 4079.697 ms execute <unnamed>: \nSELECT * FROM x WHERE y = $1 LIMIT $2\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:DETAIL: parameters: $1 = 'long string', $2 = '1'\n",
output: "2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:LOG: duration: 4079.697 ms execute <unnamed>: \nXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n2018-03-11 20:00:02 UTC:1.1.1.1(2):a@b:[3]:DETAIL: parameters: $1 = 'XXXXXXXXXXX', $2 = 'X'\n",
output: "duration: 4079.697 ms execute <unnamed>: \n[redacted]\n[redacted]\n",
},
}

Expand All @@ -67,12 +57,16 @@ func TestReplaceSecrets(t *testing.T) {
server := state.MakeServer(config.ServerConfig{}, false)
server.LogParser = logs.NewLogParser(logs.LogPrefixAmazonRds, nil, false)
logLines, _ := logs.ParseAndAnalyzeBuffer(reader, time.Time{}, server)
output := logs.ReplaceSecrets([]byte(pair.input), logLines, state.ParseFilterLogSecret(pair.filterLogSecret))
logs.ReplaceSecrets([]byte(pair.input), logLines, state.ParseFilterLogSecret(pair.filterLogSecret))

cfg := pretty.CompareConfig
cfg.SkipZeroFields = true

if diff := cfg.Compare(pair.output, string(output)); diff != "" {
output := ""
for _, logLine := range logLines {
output += logLine.Content
}
if diff := cfg.Compare(pair.output, output); diff != "" {
t.Errorf("For filter \"%s\", text:\n%vdiff: (-want +got)\n%s", pair.filterLogSecret, pair.input, diff)
}
}
Expand Down
6 changes: 5 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,11 @@ func main() {
reader := strings.NewReader(content)
logReader := logs.NewMaybeHerokuLogReader(reader)
logLines, _ := logs.ParseAndAnalyzeBuffer(logReader, time.Time{}, state.MakeServer(config.ServerConfig{}, false))
output := logs.ReplaceSecrets(contentBytes, logLines, state.ParseFilterLogSecret(filterLogSecret))
logs.ReplaceSecrets(contentBytes, logLines, state.ParseFilterLogSecret(filterLogSecret))
output := ""
for _, logLine := range logLines {
output += logLine.Content
}
fmt.Printf("%s", output)
return
}
Expand Down
12 changes: 3 additions & 9 deletions output/compact_logs.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,19 +9,13 @@ import (
"github.com/pganalyze/collector/util"
)

// UploadAndSendLogs - Filters the log file, then uploads it to the storage and sends the metadata to the API
func UploadAndSendLogs(ctx context.Context, server *state.Server, grant state.GrantLogs, collectionOpts state.CollectionOpts, logger *util.Logger, logState state.TransientLogState) error {
if collectionOpts.SubmitCollectedData && grant.EncryptionKey.CiphertextBlob != "" {
logState.LogFiles = EncryptAndUploadLogfiles(ctx, server.Config.HTTPClientWithRetry, grant.Logdata, grant.EncryptionKey, logger, logState.LogFiles)
}

// UploadAndSendLogs - Filters the log file, then uploads it
func UploadAndSendLogs(ctx context.Context, server *state.Server, grant state.Grant, collectionOpts state.CollectionOpts, logger *util.Logger, logState state.TransientLogState) error {
ls, r := transform.LogStateToLogSnapshot(server, logState)
s := pganalyze_collector.CompactSnapshot{
BaseRefs: &r,
Data: &pganalyze_collector.CompactSnapshot_LogSnapshot{LogSnapshot: &ls},
}

snapshotGrant := state.Grant{Valid: true, S3URL: grant.Snapshot.S3URL, S3Fields: grant.Snapshot.S3Fields}

snapshotGrant := state.Grant{Valid: true, S3URL: grant.S3URL, S3Fields: grant.S3Fields}
return uploadAndSubmitCompactSnapshot(ctx, s, snapshotGrant, server, collectionOpts, logger, logState.CollectedAt, false, "logs")
}
4 changes: 2 additions & 2 deletions grant/default.go → output/grant.go
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package grant
package output

import (
"context"
Expand All @@ -12,7 +12,7 @@ import (
"github.com/pganalyze/collector/util"
)

func GetDefaultGrant(ctx context.Context, server *state.Server, globalCollectionOpts state.CollectionOpts, logger *util.Logger) (state.Grant, error) {
func GetGrant(ctx context.Context, server *state.Server, globalCollectionOpts state.CollectionOpts, logger *util.Logger) (state.Grant, error) {
grant := state.Grant{Config: pganalyze_collector.ServerMessage_Config{Features: &pganalyze_collector.ServerMessage_Features{}}}
req, err := http.NewRequestWithContext(ctx, "GET", server.Config.APIBaseURL+"/v2/snapshots/grant", nil)
if err != nil {
Expand Down
Loading

0 comments on commit a25800b

Please sign in to comment.