From 64c28edbdf45fd600819f52b5bfe27c1381f652c Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sat, 30 Mar 2024 15:03:50 +0100 Subject: [PATCH 01/13] Implement CaddyParser --- logscan/caddy_parser.go | 103 ++++++++++++++++++++++++++++++++++++++++ logscan/logscan.go | 8 +++- 2 files changed, 110 insertions(+), 1 deletion(-) create mode 100644 logscan/caddy_parser.go diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go new file mode 100644 index 00000000..8fd9eba8 --- /dev/null +++ b/logscan/caddy_parser.go @@ -0,0 +1,103 @@ +package logscan + +import ( + "encoding/json" + "fmt" + "math" + "time" +) + +type CaddyLogEntry struct { + Timestamp float64 `json:"ts"` + Request Request `json:"request"` + Duration float64 `json:"duration"` + Size_ int `json:"size"` + Status_ int `json:"status"` + RespHeaders Headers `json:"resp_headers"` +} + +type Request struct { + RemoteAddr string `json:"remote_addr"` + Proto string `json:"proto"` + Method string `json:"method"` + Host string `json:"host"` + URI string `json:"uri"` + Headers Headers `json:"headers"` +} + +type Headers struct { + UserAgent []string `json:"User-Agent"` + Referer []string `json:"Referer"` + ContentType []string `json:"Content-Type"` + XForwardedFor []string `json:"X-Forwarded-For"` + AcceptLanguage []string `json:"Accept-Language"` +} + +type CaddyParser struct { +} + +func (p CaddyParser) Parse(line string) (Line, bool, error) { + var logEntry CaddyLogEntry + err := json.Unmarshal([]byte(line), &logEntry) + if err != nil { + fmt.Println("Error unmarshalling JSON:", err) + return nil, false, err + } + + return logEntry, false, nil +} + +var _ LineParser = CaddyParser{} +var _ Line = CaddyLogEntry{} + +func (l CaddyLogEntry) Host() string { return l.Request.Host } +func (l CaddyLogEntry) RemoteAddr() string { return l.Request.RemoteAddr } +func (l CaddyLogEntry) Method() string { return l.Request.Method } +func (l CaddyLogEntry) HTTP() string { return l.Request.Proto } +func (l CaddyLogEntry) Path() string { return l.Request.URI } +func (l CaddyLogEntry) Status() int { return l.Status_ } +func (l CaddyLogEntry) Size() int { return l.Size_ } + +func (l CaddyLogEntry) Query() string { + return "" // TODO +} +func (l CaddyLogEntry) XForwardedFor() string { + if len(l.Request.Headers.XForwardedFor) > 0 { + return l.Request.Headers.XForwardedFor[0] + } + return "" +} +func (l CaddyLogEntry) Referrer() string { + if len(l.Request.Headers.Referer) > 0 { + return l.Request.Headers.Referer[0] + } + return "" +} +func (l CaddyLogEntry) UserAgent() string { + if len(l.Request.Headers.UserAgent) > 0 { + return l.Request.Headers.UserAgent[0] + } + return "" +} +func (l CaddyLogEntry) ContentType() string { + if len(l.Request.Headers.ContentType) > 0 { + return l.Request.Headers.ContentType[0] + } + return "" +} +func (l CaddyLogEntry) Language() string { + if len(l.Request.Headers.AcceptLanguage) > 0 { + return l.Request.Headers.AcceptLanguage[0] + } + return "" +} + +func (l CaddyLogEntry) Timing() time.Duration { + return time.Duration(l.Duration) * time.Millisecond +} + +func (l CaddyLogEntry) Datetime(scan *Scanner) (time.Time, error) { + sec, dec := math.Modf(l.Timestamp) + t := time.Unix(int64(sec), int64(dec*(1e9))) + return t, nil +} diff --git a/logscan/logscan.go b/logscan/logscan.go index 165cc1d2..05dc9282 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -134,7 +134,13 @@ func NewFollow(ctx context.Context, file, format, date, tyme, datetime string, e } func makeNew(format, date, tyme, datetime string, exclude []string) (*Scanner, error) { - p, err := newRegexParser(format, date, tyme, datetime, exclude) + var p LineParser + var err error + if format == "caddy" { + p = CaddyParser{} + } else { + p, err = newRegexParser(format, date, tyme, datetime, exclude) + } if err != nil { return nil, err } From 071f0058f1435b3ccf409089d9e915042889299b Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sat, 30 Mar 2024 16:49:56 +0100 Subject: [PATCH 02/13] add test --- logscan/caddy_parser.go | 21 +++++----- logscan/caddy_parser_test.go | 72 +++++++++++++++++++++++++++++++++++ logscan/caddy_testdata/1.json | 66 ++++++++++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 10 deletions(-) create mode 100644 logscan/caddy_parser_test.go create mode 100644 logscan/caddy_testdata/1.json diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index 8fd9eba8..41d8a232 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -61,6 +61,17 @@ func (l CaddyLogEntry) Size() int { return l.Size_ } func (l CaddyLogEntry) Query() string { return "" // TODO } + +func (l CaddyLogEntry) Timing() time.Duration { + // TODO: `Second` should depend on the log format + return time.Duration(l.Duration * float64(time.Second)) +} + +func (l CaddyLogEntry) Datetime(scan *Scanner) (time.Time, error) { + sec, dec := math.Modf(l.Timestamp) + t := time.Unix(int64(sec), int64(dec*(1e9))) + return t, nil +} func (l CaddyLogEntry) XForwardedFor() string { if len(l.Request.Headers.XForwardedFor) > 0 { return l.Request.Headers.XForwardedFor[0] @@ -91,13 +102,3 @@ func (l CaddyLogEntry) Language() string { } return "" } - -func (l CaddyLogEntry) Timing() time.Duration { - return time.Duration(l.Duration) * time.Millisecond -} - -func (l CaddyLogEntry) Datetime(scan *Scanner) (time.Time, error) { - sec, dec := math.Modf(l.Timestamp) - t := time.Unix(int64(sec), int64(dec*(1e9))) - return t, nil -} diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go new file mode 100644 index 00000000..4a86391a --- /dev/null +++ b/logscan/caddy_parser_test.go @@ -0,0 +1,72 @@ +package logscan + +import ( + "io/ioutil" + "testing" + "time" +) + +func TestParseLine(t *testing.T) { + data, err := ioutil.ReadFile("./caddy_testdata/1.json") + if err != nil { + t.Fatal(err) + } + p := CaddyParser{} + line, skip, err := p.Parse(string(data)) + if skip { + t.Fatalf("Entry skipped") + } + if err != nil { + t.Fatalf("Failed to parse: %#v", err) + } + + if line.Host() != "host.example.com" { + t.Fatalf("Unexpected Host: %#v", line.Host()) + } + if line.RemoteAddr() != "1.2.3.4:5678" { + t.Fatalf("Unexpected RemoteAddr: %#v", line.RemoteAddr()) + } + if line.Method() != "GET" { + t.Fatalf("Unexpected Method: %#v", line.Method()) + } + if line.HTTP() != "HTTP/1.1" { + t.Fatalf("Unexpected HTTP: %#v", line.HTTP()) + } + if line.Path() != "/absolute_uri.html" { + t.Fatalf("Unexpected Path: %#v", line.Path()) + } + if line.Status() != 200 { + t.Fatalf("Unexpected Status: %#v", line.Status()) + } + if line.Size() != 2803 { + t.Fatalf("Unexpected Size: %#v", line.Size()) + } + if line.Query() != "" { + t.Fatalf("Unexpected Query: %#v", line.Query()) + } + if line.Timing() != 1234567 { + t.Fatalf("Unexpected Timing: %#v", line.Timing()) + } + dt, err := line.Datetime(nil) + if err != nil { + t.Fatalf("Failed to parse Datetime: %#v", err) + } + if dt != time.Date(2024, 02, 01, 14, 32, 01, 656359195, time.Local) { + t.Fatalf("Unexpected Datetime: %#v", dt) + } + if line.XForwardedFor() != "" { + t.Fatalf("Unexpected XForwardedFor: %#v", line.XForwardedFor()) + } + if line.Referrer() != "https://another.example.com/" { + t.Fatalf("Unexpected Referrer: %#v", line.Referrer()) + } + if line.UserAgent() != "This is the user agent" { + t.Fatalf("Unexpected UserAgent: %#v", line.UserAgent()) + } + if line.ContentType() != "" { + t.Fatalf("Unexpected ContentType: %#v", line.ContentType()) + } + if line.Language() != "en" { + t.Fatalf("Unexpected Language: %#v", line.Language()) + } +} diff --git a/logscan/caddy_testdata/1.json b/logscan/caddy_testdata/1.json new file mode 100644 index 00000000..8b174631 --- /dev/null +++ b/logscan/caddy_testdata/1.json @@ -0,0 +1,66 @@ +{ + "level": "info", + "ts": 1706794321.6563592, + "logger": "http.log.access.log1", + "msg": "handled request", + "request": { + "remote_addr": "1.2.3.4:5678", + "proto": "HTTP/1.1", + "method": "GET", + "host": "host.example.com", + "uri": "/absolute_uri.html", + "headers": { + "Connection": [ + "Close" + ], + "User-Agent": [ + "This is the user agent" + ], + "Accept-Language": [ + "en" + ], + "Referer": [ + "https://another.example.com/" + ], + "Accept-Encoding": [ + "gzip, deflate" + ], + "Accept": [ + "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" + ] + }, + "tls": { + "resumed": false, + "version": 772, + "cipher_suite": 4865, + "proto": "", + "proto_mutual": true, + "server_name": "host.example.com" + } + }, + "common_log": "1.2.3.4 - - [01/Feb/2024:14:32:01 +0100] \"GET /absolute_uri.html HTTP/1.1\" 200 2803", + "duration": 0.001234567, + "size": 2803, + "status": 200, + "resp_headers": { + "Content-Encoding": [ + "gzip" + ], + "Vary": [ + "Accept-Encoding" + ], + "Server": [ + "Caddy" + ], + "Etag": [ + "\"s0rqswb68\"" + ], + "Content-Type": [ + "text/html; charset=utf-8" + ], + "Last-Modified": [ + "Sun, 10 Sep 2023 12:02:56 GMT" + ] + } +} + From d19d28b72280b6d0335630a85dae00dbea5809f1 Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sat, 30 Mar 2024 16:58:56 +0100 Subject: [PATCH 03/13] Add support for query params --- logscan/caddy_parser.go | 15 +++++++++++++-- logscan/caddy_parser_test.go | 2 +- logscan/caddy_testdata/1.json | 2 +- 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index 41d8a232..a527371f 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -4,6 +4,7 @@ import ( "encoding/json" "fmt" "math" + "net/url" "time" ) @@ -54,12 +55,22 @@ func (l CaddyLogEntry) Host() string { return l.Request.Host } func (l CaddyLogEntry) RemoteAddr() string { return l.Request.RemoteAddr } func (l CaddyLogEntry) Method() string { return l.Request.Method } func (l CaddyLogEntry) HTTP() string { return l.Request.Proto } -func (l CaddyLogEntry) Path() string { return l.Request.URI } func (l CaddyLogEntry) Status() int { return l.Status_ } func (l CaddyLogEntry) Size() int { return l.Size_ } +func (l CaddyLogEntry) Path() string { + u, err := url.Parse(l.Request.URI) + if err != nil { + return "" + } + return u.Path +} func (l CaddyLogEntry) Query() string { - return "" // TODO + u, err := url.Parse(l.Request.URI) + if err != nil { + return "" + } + return u.RawQuery } func (l CaddyLogEntry) Timing() time.Duration { diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index 4a86391a..1bcd5679 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -41,7 +41,7 @@ func TestParseLine(t *testing.T) { if line.Size() != 2803 { t.Fatalf("Unexpected Size: %#v", line.Size()) } - if line.Query() != "" { + if line.Query() != "queryparam=value" { t.Fatalf("Unexpected Query: %#v", line.Query()) } if line.Timing() != 1234567 { diff --git a/logscan/caddy_testdata/1.json b/logscan/caddy_testdata/1.json index 8b174631..6553824a 100644 --- a/logscan/caddy_testdata/1.json +++ b/logscan/caddy_testdata/1.json @@ -8,7 +8,7 @@ "proto": "HTTP/1.1", "method": "GET", "host": "host.example.com", - "uri": "/absolute_uri.html", + "uri": "/absolute_uri.html?queryparam=value", "headers": { "Connection": [ "Close" From df82269fec95bcf1885c3175e20576bde8452bc6 Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sat, 30 Mar 2024 18:11:00 +0100 Subject: [PATCH 04/13] Implement Datetime parsing --- cmd/goatcounter/import.go | 3 ++ logscan/caddy_parser.go | 58 ++++++++++++++++++++++++++++++------ logscan/caddy_parser_test.go | 39 +++++++++++++++++++++++- logscan/logscan.go | 5 ++-- logscan/logscan_test.go | 2 +- logscan/regex_parser.go | 4 +-- 6 files changed, 96 insertions(+), 15 deletions(-) diff --git a/cmd/goatcounter/import.go b/cmd/goatcounter/import.go index 75fcbc21..91a644d4 100644 --- a/cmd/goatcounter/import.go +++ b/cmd/goatcounter/import.go @@ -195,6 +195,9 @@ Date and time parsing: rfc3339nano 2006-01-02T15:04:05.999999999Z07:00 The full documentation is available at https://pkg.go.dev/time + + The 'caddy' format _also_ accepts: + 'unix_seconds_float', 'unix_milli_float' and 'unix_nano' for datetime ` func cmdImport(f zli.Flags, ready chan<- struct{}, stop chan struct{}) error { diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index a527371f..3d468fde 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -8,13 +8,14 @@ import ( "time" ) +// https://caddyserver.com/docs/caddyfile/directives/log type CaddyLogEntry struct { - Timestamp float64 `json:"ts"` - Request Request `json:"request"` - Duration float64 `json:"duration"` - Size_ int `json:"size"` - Status_ int `json:"status"` - RespHeaders Headers `json:"resp_headers"` + Timestamp interface{} `json:"ts"` + Request Request `json:"request"` + Duration float64 `json:"duration"` + Size_ int `json:"size"` + Status_ int `json:"status"` + RespHeaders Headers `json:"resp_headers"` } type Request struct { @@ -35,6 +36,7 @@ type Headers struct { } type CaddyParser struct { + datetime string } func (p CaddyParser) Parse(line string) (Line, bool, error) { @@ -75,12 +77,50 @@ func (l CaddyLogEntry) Query() string { func (l CaddyLogEntry) Timing() time.Duration { // TODO: `Second` should depend on the log format + // {seconds, nano, string} where string in {1m32.05s, 6.31ms} return time.Duration(l.Duration * float64(time.Second)) } -func (l CaddyLogEntry) Datetime(scan *Scanner) (time.Time, error) { - sec, dec := math.Modf(l.Timestamp) - t := time.Unix(int64(sec), int64(dec*(1e9))) +func (l CaddyLogEntry) Datetime(s *Scanner) (time.Time, error) { + /* time_format can be + + - unix_seconds_float Floating-point number of seconds since the Unix epoch. + - unix_milli_float Floating-point number of milliseconds since the Unix epoch. + - unix_nano Integer number of nanoseconds since the Unix epoch. + - iso8601 Example: 2006-01-02T15:04:05.000Z0700 + - rfc3339 Example: 2006-01-02T15:04:05Z07:00 + - rfc3339_nano Example: 2006-01-02T15:04:05.999999999Z07:00 + - wall Example: 2006/01/02 15:04:05 + - wall_milli Example: 2006/01/02 15:04:05.000 + - wall_nano Example: 2006/01/02 15:04:05.000000000 + - common_log Example: 02/Jan/2006:15:04:05 -0700 + + Or, any compatible time layout string; see the Go documentation for full details. + */ + + parser := s.lp.(CaddyParser) + var t time.Time + var err error + switch parser.datetime { + case "", "unix_seconds_float": + // Caddy's default + v := l.Timestamp.(float64) + sec, dec := math.Modf(v) + t = time.Unix(int64(sec), int64(dec*(1e9))) + case "unix_milli_float": + v := l.Timestamp.(float64) + sec, dec := math.Modf(v / 1000) + t = time.Unix(int64(sec), int64(dec*(1e9))) + case "unix_nano": + v := l.Timestamp.(float64) + t = time.UnixMicro(int64(v / 1000)) + default: + v := l.Timestamp.(string) + t, err = time.Parse(parser.datetime, v) + if err != nil { + return time.Unix(0, 0).UTC(), err + } + } return t, nil } func (l CaddyLogEntry) XForwardedFor() string { diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index 1bcd5679..ef05603a 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -47,7 +47,7 @@ func TestParseLine(t *testing.T) { if line.Timing() != 1234567 { t.Fatalf("Unexpected Timing: %#v", line.Timing()) } - dt, err := line.Datetime(nil) + dt, err := line.Datetime(p) if err != nil { t.Fatalf("Failed to parse Datetime: %#v", err) } @@ -70,3 +70,40 @@ func TestParseLine(t *testing.T) { t.Fatalf("Unexpected Language: %#v", line.Language()) } } + +func TestParseLineDatetimeFormat(t *testing.T) { + + epoch := time.Unix(0, 0).UTC() + var testdata = []struct { + format string + input string + delta time.Duration + }{ + {"", `{"ts":1.5}`, 1500 * time.Millisecond}, // default value + {"unix_seconds_float", `{"ts":1.5}`, 1500 * time.Millisecond}, + {"unix_milli_float", `{"ts":1500}`, 1500 * time.Millisecond}, + {"unix_milli_float", `{"ts":1500.1}`, 1_500_100 * time.Microsecond}, + {"unix_nano", `{"ts":1500000000}`, 1_500_000_000 * time.Nanosecond}, + {time.RFC3339, `{"ts":"1970-01-01T00:00:05+00:00"}`, 5 * time.Second}, + } + for _, tt := range testdata { + t.Run(tt.format, func(t *testing.T) { + p := CaddyParser{datetime: tt.format} + line, skip, err := p.Parse(tt.input) + if skip { + t.Fatalf("Entry skipped") + } + if err != nil { + t.Fatalf("Failed to parse: %#v", err) + } + dt, err := line.Datetime(p) + if err != nil { + t.Fatalf("Failed to parse Datetime: %#v", err) + } + expected := epoch.Add(tt.delta) + if dt.UTC() != expected.UTC() { + t.Fatalf("Unexpected Datetime: %#v vs %#v", dt, expected) + } + }) + } +} diff --git a/logscan/logscan.go b/logscan/logscan.go index 05dc9282..f69ccda2 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -51,7 +51,7 @@ type Line interface { Language() string Timing() time.Duration - Datetime(scan *Scanner) (time.Time, error) + Datetime(s *Scanner) (time.Time, error) } const ( @@ -136,8 +136,9 @@ func NewFollow(ctx context.Context, file, format, date, tyme, datetime string, e func makeNew(format, date, tyme, datetime string, exclude []string) (*Scanner, error) { var p LineParser var err error + if format == "caddy" { - p = CaddyParser{} + p = CaddyParser{datetime: datetime} } else { p, err = newRegexParser(format, date, tyme, datetime, exclude) } diff --git a/logscan/logscan_test.go b/logscan/logscan_test.go index 64727b01..989cbf68 100644 --- a/logscan/logscan_test.go +++ b/logscan/logscan_test.go @@ -127,7 +127,7 @@ func TestNew(t *testing.T) { t.Errorf("\nwant: %v\ngot: %v", w, data) } - dt, err := data.Datetime(scan) + dt, err := data.Datetime(scan.lp) if err != nil { t.Logf("%q %q %q", w["date"], w["time"], w["datetime"]) t.Fatal(err) diff --git a/logscan/regex_parser.go b/logscan/regex_parser.go index 476cf35c..73a883be 100644 --- a/logscan/regex_parser.go +++ b/logscan/regex_parser.go @@ -224,8 +224,8 @@ func (l RegexLine) Timing() time.Duration { return 0 } -func (l RegexLine) Datetime(scan *Scanner) (time.Time, error) { - parser := scan.lp.(*RegexParser) +func (l RegexLine) Datetime(scanner *Scanner) (time.Time, error) { + parser := scanner.lp.(*RegexParser) s, ok := l["date"] if ok { t, err := time.Parse(parser.date, s) From 0e9ebcc3b0a0f01adab42b814b36f70093d39375 Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sat, 30 Mar 2024 18:46:24 +0100 Subject: [PATCH 05/13] Flip Datetime to be publicly exposed --- cmd/goatcounter/import.go | 2 +- logscan/caddy_parser.go | 4 ++-- logscan/logscan.go | 6 +++++- logscan/regex_parser.go | 4 ++-- 4 files changed, 10 insertions(+), 6 deletions(-) diff --git a/cmd/goatcounter/import.go b/cmd/goatcounter/import.go index 91a644d4..a170f067 100644 --- a/cmd/goatcounter/import.go +++ b/cmd/goatcounter/import.go @@ -378,7 +378,7 @@ func importLog( UserAgent: line.UserAgent(), } - hit.CreatedAt, err = line.Datetime(scan) + hit.CreatedAt, err = scan.Datetime(line) if err != nil { zlog.Error(err) continue diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index 3d468fde..f9823711 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -81,7 +81,7 @@ func (l CaddyLogEntry) Timing() time.Duration { return time.Duration(l.Duration * float64(time.Second)) } -func (l CaddyLogEntry) Datetime(s *Scanner) (time.Time, error) { +func (l CaddyLogEntry) Datetime(lp LineParser) (time.Time, error) { /* time_format can be - unix_seconds_float Floating-point number of seconds since the Unix epoch. @@ -98,7 +98,7 @@ func (l CaddyLogEntry) Datetime(s *Scanner) (time.Time, error) { Or, any compatible time layout string; see the Go documentation for full details. */ - parser := s.lp.(CaddyParser) + parser := lp.(CaddyParser) var t time.Time var err error switch parser.datetime { diff --git a/logscan/logscan.go b/logscan/logscan.go index f69ccda2..dbd8aa2e 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -51,7 +51,7 @@ type Line interface { Language() string Timing() time.Duration - Datetime(s *Scanner) (time.Time, error) + Datetime(lp LineParser) (time.Time, error) } const ( @@ -179,3 +179,7 @@ start: return parsed, line, s.lineno, nil } + +func (s *Scanner) Datetime(l Line) (time.Time, error) { + return l.Datetime(s.lp) +} diff --git a/logscan/regex_parser.go b/logscan/regex_parser.go index 73a883be..13500454 100644 --- a/logscan/regex_parser.go +++ b/logscan/regex_parser.go @@ -224,8 +224,8 @@ func (l RegexLine) Timing() time.Duration { return 0 } -func (l RegexLine) Datetime(scanner *Scanner) (time.Time, error) { - parser := scanner.lp.(*RegexParser) +func (l RegexLine) Datetime(lp LineParser) (time.Time, error) { + parser := lp.(*RegexParser) s, ok := l["date"] if ok { t, err := time.Parse(parser.date, s) From 7854f502d2ec37ca085c3ba21f866b906ebd5dc4 Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sat, 30 Mar 2024 18:51:50 +0100 Subject: [PATCH 06/13] Parse URIs with leading slashes --- logscan/caddy_parser.go | 2 +- logscan/caddy_parser_test.go | 15 ++++++++++++++- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index f9823711..b8cba17f 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -60,7 +60,7 @@ func (l CaddyLogEntry) HTTP() string { return l.Request.Proto } func (l CaddyLogEntry) Status() int { return l.Status_ } func (l CaddyLogEntry) Size() int { return l.Size_ } func (l CaddyLogEntry) Path() string { - u, err := url.Parse(l.Request.URI) + u, err := url.ParseRequestURI(l.Request.URI) if err != nil { return "" } diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index ef05603a..6140382f 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -72,7 +72,6 @@ func TestParseLine(t *testing.T) { } func TestParseLineDatetimeFormat(t *testing.T) { - epoch := time.Unix(0, 0).UTC() var testdata = []struct { format string @@ -107,3 +106,17 @@ func TestParseLineDatetimeFormat(t *testing.T) { }) } } + +func TestParseUrl(t *testing.T) { + p := CaddyParser{} + line, skip, err := p.Parse(`{"request": {"uri": "//asd"}}`) + if skip { + t.Fatalf("Entry skipped") + } + if err != nil { + t.Fatalf("Failed to parse: %#v", err) + } + if line.Path() != "//asd" { + t.Fatalf("Unexpected Path: %#v", line.Path()) + } +} From a92d7fa26a8167cc038ca39cb3573e0e85a323ba Mon Sep 17 00:00:00 2001 From: David Ventura Date: Sun, 31 Mar 2024 18:41:48 +0200 Subject: [PATCH 07/13] Convert field names to enum and implement exclusion for Caddy logs --- logscan/caddy_parser.go | 64 ++++++++++++++++++++++- logscan/logscan.go | 83 ++++++++++++++++++++++++++++-- logscan/regex_parser.go | 109 +++++++++------------------------------- 3 files changed, 167 insertions(+), 89 deletions(-) diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index b8cba17f..72236fb5 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -5,7 +5,10 @@ import ( "fmt" "math" "net/url" + "strings" "time" + + "github.com/bmatcuk/doublestar/v4" ) // https://caddyserver.com/docs/caddyfile/directives/log @@ -36,7 +39,8 @@ type Headers struct { } type CaddyParser struct { - datetime string + datetime string + excludePatterns []excludePattern } func (p CaddyParser) Parse(line string) (Line, bool, error) { @@ -47,6 +51,11 @@ func (p CaddyParser) Parse(line string) (Line, bool, error) { return nil, false, err } + for _, e := range p.excludePatterns { + if logEntry.matchesPattern(e) { + return nil, true, nil + } + } return logEntry, false, nil } @@ -153,3 +162,56 @@ func (l CaddyLogEntry) Language() string { } return "" } + +func (l CaddyLogEntry) fieldValue(name string) string { + switch name { + default: + panic(fmt.Sprintf("Received invalid field request: %s", name)) + case fieldUserAgent: + return l.UserAgent() + case fieldHost: + return l.Host() + case fieldRemoteAddr: + return l.RemoteAddr() + case fieldAcceptLanguage: + return l.Language() + case fieldContentType: + return l.ContentType() + case fieldHttp: + return l.HTTP() + case fieldMethod: + return l.Method() + case fieldPath: + return l.Path() + case fieldQuery: + return l.Query() + case fieldReferrer: + return l.Referrer() + case fieldSize: + return fmt.Sprint(l.Size()) + case fieldStatus: + return fmt.Sprint(l.Status()) + case fieldXff: + return l.XForwardedFor() + } +} + +func (l CaddyLogEntry) matchesPattern(e excludePattern) bool { + var m bool + fieldValue := l.fieldValue(e.field) + switch e.kind { + default: + m = strings.Contains(fieldValue, e.pattern) + case excludeGlob: + // We use doublestar instead of filepath.Match() because the latter + // doesn't support "**" and "{a,b}" patterns, both of which are very + // useful here. + m, _ = doublestar.Match(e.pattern, fieldValue) + case excludeRe: + m = e.re.MatchString(fieldValue) + } + if e.negate { + return !m + } + return m +} diff --git a/logscan/logscan.go b/logscan/logscan.go index dbd8aa2e..1ea1f387 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -10,9 +10,11 @@ import ( "fmt" "io" "regexp" + "slices" "strings" "time" + "github.com/bmatcuk/doublestar/v4" "zgo.at/errors" "zgo.at/follow" "zgo.at/zlog" @@ -20,10 +22,26 @@ import ( var reFormat = regexp.MustCompile(`\\\$[\w-_]+`) -var fields = []string{"ignore", "time", "date", "datetime", "remote_addr", - "xff", "method", "status", "http", "path", "query", "referrer", - "user_agent", "host", "content_type", "timing_sec", "timing_milli", - "timing_micro", "size"} +const ( + fieldAcceptLanguage = "accept_language" + fieldContentType = "content_type" + fieldHost = "host" + fieldHttp = "http" + fieldMethod = "method" + fieldPath = "path" + fieldQuery = "query" + fieldReferrer = "referrer" + fieldRemoteAddr = "remote_addr" + fieldSize = "size" + fieldStatus = "status" + fieldUserAgent = "user_agent" + fieldXff = "xff" +) + +var fields = []string{"ignore", "time", "date", "datetime", fieldRemoteAddr, + fieldXff, fieldMethod, fieldStatus, fieldHttp, fieldPath, fieldQuery, fieldReferrer, + fieldUserAgent, fieldHost, fieldContentType, "timing_sec", "timing_milli", + "timing_micro", fieldSize} const ( excludeContains = 0 @@ -183,3 +201,60 @@ start: func (s *Scanner) Datetime(l Line) (time.Time, error) { return l.Datetime(s.lp) } + +type excludePattern struct { + kind int // exclude* constant + negate bool // ! present + field string // "path", "content_type" + pattern string // ".gif", "*.gif" + re *regexp.Regexp // only if kind=excludeRe +} + +func processExcludes(exclude []string) ([]excludePattern, error) { + // "static" needs to expand to two values. + for i, e := range exclude { + switch e { + case "static": + // Note: maybe check if using glob patterns is faster? + exclude[i] = `path:re:.*\.(:?js|css|gif|jpe?g|png|svg|ico|web[mp]|mp[34])$` + exclude = append(exclude, `content_type:re:^(?:text/(?:css|javascript)|image/(?:png|gif|jpeg|svg\+xml|webp)).*?`) + case "html": + exclude[i] = "content_type:^text/html.*?" + case "redirect": + exclude[i] = "status:glob:30[0123]" + } + } + + patterns := make([]excludePattern, 0, len(exclude)) + for _, e := range exclude { + var p excludePattern + if strings.HasPrefix(e, "!") { + p.negate = true + e = e[1:] + } + + p.field, p.pattern, _ = strings.Cut(e, ":") + if !slices.Contains(fields, p.field) { + return nil, fmt.Errorf("invalid field %q in exclude pattern %q", p.field, e) + } + if p.pattern == "" { + return nil, fmt.Errorf("no pattern in %q", e) + } + + var err error + switch { + case strings.HasPrefix(p.pattern, "glob:"): + p.kind, p.pattern = excludeGlob, p.pattern[5:] + _, err = doublestar.Match(p.pattern, "") + case strings.HasPrefix(p.pattern, "re:"): + p.kind, p.pattern = excludeRe, p.pattern[3:] + p.re, err = regexp.Compile(p.pattern) + } + if err != nil { + return nil, fmt.Errorf("invalid exclude pattern: %q: %w", e, err) + } + patterns = append(patterns, p) + } + + return patterns, nil +} diff --git a/logscan/regex_parser.go b/logscan/regex_parser.go index 13500454..85386454 100644 --- a/logscan/regex_parser.go +++ b/logscan/regex_parser.go @@ -3,7 +3,6 @@ package logscan import ( "fmt" "regexp" - "slices" "strconv" "strings" "time" @@ -33,7 +32,7 @@ func (p RegexParser) Parse(line string) (Line, bool, error) { } } for _, e := range p.exclude { - if parsed.exclude(e) { + if parsed.matchesPattern(e) { return nil, true, nil } } @@ -93,30 +92,29 @@ func newRegexParser(format, date, tyme, datetime string, exclude []string) (*Reg } } - case "host": + case fieldHost: p = `(?:xn--)?[a-zA-Z0-9.-]+` - case "remote_addr": + case fieldRemoteAddr: p = `[0-9a-fA-F:.]+` - case "xff": + case fieldXff: p = `[0-9a-fA-F:. ,]+` - - case "method": + case fieldMethod: p = `[A-Z]{3,10}` - case "status": + case fieldStatus: p = `\d{3}` - case "http": + case fieldHttp: p = `HTTP/[\d.]+` - case "path": + case fieldPath: p = `/.*?` case "timing_sec": p = `[\d.]+` case "timing_milli", "timing_micro": p = `\d+` - case "size": + case fieldSize: p = `(?:\d+|-)` - case "referrer", "user_agent": + case fieldReferrer, fieldUserAgent: p = `.*?` - case "query", "content_type": + case fieldQuery, fieldContentType: // Default } return "(?P<" + m + ">" + p + ")" @@ -135,78 +133,21 @@ func newRegexParser(format, date, tyme, datetime string, exclude []string) (*Reg }, nil } -type excludePattern struct { - kind int // exclude* constant - negate bool // ! present - field string // "path", "content_type" - pattern string // ".gif", "*.gif" - re *regexp.Regexp // only if kind=excludeRe -} - -func processExcludes(exclude []string) ([]excludePattern, error) { - // "static" needs to expand to two values. - for i, e := range exclude { - switch e { - case "static": - // Note: maybe check if using glob patterns is faster? - exclude[i] = `path:re:.*\.(:?js|css|gif|jpe?g|png|svg|ico|web[mp]|mp[34])$` - exclude = append(exclude, `content_type:re:^(?:text/(?:css|javascript)|image/(?:png|gif|jpeg|svg\+xml|webp)).*?`) - case "html": - exclude[i] = "content_type:^text/html.*?" - case "redirect": - exclude[i] = "status:glob:30[0123]" - } - } - - patterns := make([]excludePattern, 0, len(exclude)) - for _, e := range exclude { - var p excludePattern - if strings.HasPrefix(e, "!") { - p.negate = true - e = e[1:] - } - - p.field, p.pattern, _ = strings.Cut(e, ":") - if !slices.Contains(fields, p.field) { - return nil, fmt.Errorf("invalid field %q in exclude pattern %q", p.field, e) - } - if p.pattern == "" { - return nil, fmt.Errorf("no pattern in %q", e) - } - - var err error - switch { - case strings.HasPrefix(p.pattern, "glob:"): - p.kind, p.pattern = excludeGlob, p.pattern[5:] - _, err = doublestar.Match(p.pattern, "") - case strings.HasPrefix(p.pattern, "re:"): - p.kind, p.pattern = excludeRe, p.pattern[3:] - p.re, err = regexp.Compile(p.pattern) - } - if err != nil { - return nil, fmt.Errorf("invalid exclude pattern: %q: %w", e, err) - } - patterns = append(patterns, p) - } - - return patterns, nil -} - type RegexLine map[string]string -func (l RegexLine) Host() string { return l["host"] } -func (l RegexLine) RemoteAddr() string { return l["remote_addr"] } -func (l RegexLine) XForwardedFor() string { return l["xff"] } -func (l RegexLine) Method() string { return l["method"] } -func (l RegexLine) HTTP() string { return l["http"] } -func (l RegexLine) Path() string { return l["path"] } -func (l RegexLine) Query() string { return l["query"] } -func (l RegexLine) Referrer() string { return l["referrer"] } -func (l RegexLine) UserAgent() string { return l["user_agent"] } -func (l RegexLine) ContentType() string { return l["content_type"] } -func (l RegexLine) Status() int { return toI(l["status"]) } -func (l RegexLine) Size() int { return toI(l["size"]) } -func (l RegexLine) Language() string { return l["accept_language"] } +func (l RegexLine) Host() string { return l[fieldHost] } +func (l RegexLine) RemoteAddr() string { return l[fieldRemoteAddr] } +func (l RegexLine) XForwardedFor() string { return l[fieldXff] } +func (l RegexLine) Method() string { return l[fieldMethod] } +func (l RegexLine) HTTP() string { return l[fieldHttp] } +func (l RegexLine) Path() string { return l[fieldPath] } +func (l RegexLine) Query() string { return l[fieldQuery] } +func (l RegexLine) Referrer() string { return l[fieldReferrer] } +func (l RegexLine) UserAgent() string { return l[fieldUserAgent] } +func (l RegexLine) ContentType() string { return l[fieldContentType] } +func (l RegexLine) Status() int { return toI(l[fieldStatus]) } +func (l RegexLine) Size() int { return toI(l[fieldSize]) } +func (l RegexLine) Language() string { return l[fieldAcceptLanguage] } func (l RegexLine) Timing() time.Duration { s, ok := l["timing_sec"] @@ -259,7 +200,7 @@ func toUi64(s string) uint64 { var _ Line = RegexLine{} -func (l RegexLine) exclude(e excludePattern) bool { +func (l RegexLine) matchesPattern(e excludePattern) bool { var m bool switch e.kind { default: From 57a9aa4c93e3927e27ce23a6d30c4ab3ed32d577 Mon Sep 17 00:00:00 2001 From: David Ventura Date: Mon, 1 Apr 2024 12:30:24 +0200 Subject: [PATCH 08/13] Pass excludePatterns to all parsers --- logscan/logscan.go | 9 +++++++-- logscan/regex_parser.go | 10 +++------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/logscan/logscan.go b/logscan/logscan.go index 1ea1f387..9b390b1f 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -155,10 +155,15 @@ func makeNew(format, date, tyme, datetime string, exclude []string) (*Scanner, e var p LineParser var err error + excludePatt, err := processExcludes(exclude) + if err != nil { + return nil, err + } + if format == "caddy" { - p = CaddyParser{datetime: datetime} + p = CaddyParser{datetime: datetime, excludePatterns: excludePatt} } else { - p, err = newRegexParser(format, date, tyme, datetime, exclude) + p, err = newRegexParser(format, date, tyme, datetime, excludePatt) } if err != nil { return nil, err diff --git a/logscan/regex_parser.go b/logscan/regex_parser.go index 85386454..f25ea473 100644 --- a/logscan/regex_parser.go +++ b/logscan/regex_parser.go @@ -42,18 +42,14 @@ func (p RegexParser) Parse(line string) (Line, bool, error) { var _ LineParser = RegexParser{} -func newRegexParser(format, date, tyme, datetime string, exclude []string) (*RegexParser, error) { +func newRegexParser(format, date, tyme, datetime string, exclude []excludePattern) (*RegexParser, error) { of := format format, date, tyme, datetime = getFormat(format, date, tyme, datetime) if format == "" { return nil, errors.Errorf("unknown format: %s", of) } - excludePatt, err := processExcludes(exclude) - if err != nil { - return nil, err - } - + var err error pat := reFormat.ReplaceAllStringFunc(regexp.QuoteMeta(format), func(m string) string { m = m[2:] @@ -129,7 +125,7 @@ func newRegexParser(format, date, tyme, datetime string, exclude []string) (*Reg date: date, time: tyme, datetime: datetime, - exclude: excludePatt, + exclude: exclude, }, nil } From 1964d3f225d1fd79af382897f537e180bb82cc28 Mon Sep 17 00:00:00 2001 From: David Ventura Date: Mon, 8 Apr 2024 21:54:39 +0200 Subject: [PATCH 09/13] Add test case with multiple lines --- logscan/caddy_parser_test.go | 60 +++++++++++++++++++++++++++++++++++ logscan/caddy_testdata/2.json | 5 +++ 2 files changed, 65 insertions(+) create mode 100644 logscan/caddy_testdata/2.json diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index 6140382f..cf320caa 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -1,7 +1,11 @@ package logscan import ( + "context" + "io" "io/ioutil" + "os" + "reflect" "testing" "time" ) @@ -120,3 +124,59 @@ func TestParseUrl(t *testing.T) { t.Fatalf("Unexpected Path: %#v", line.Path()) } } + +func TestParseMultipleLines(t *testing.T) { + want := []CaddyLogEntry{ + CaddyLogEntry{Timestamp: 1706788852.6825173, + Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + Duration: 0.000455129, + Size_: 0, + Status_: 304, + RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + CaddyLogEntry{Timestamp: 1706788853.7180748, + Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Duration: 0.000356122, + Size_: 0, + Status_: 304, + RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + CaddyLogEntry{Timestamp: 1706788854.7159958, + Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Duration: 0.000728, Size_: 0, Status_: 304, RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + CaddyLogEntry{Timestamp: 1706788855.7197819, + Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Duration: 0.000275939, + Size_: 0, + Status_: 304, + RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + CaddyLogEntry{Timestamp: 1706788856.6911514, + Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Duration: 0.000210732, + Size_: 0, + Status_: 304, + RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}, + }, + } + fd, err := os.Open("./caddy_testdata/2.json") + if err != nil { + t.Fatal(err) + } + scan, err := New(fd, `caddy`, "", "", "", []string{}) + if err != nil { + t.Fatal(err) + } + got := []CaddyLogEntry{} + for { + data, _, _, err := scan.Line(context.Background()) + if err == io.EOF { + break + } + if err != nil { + t.Fatal(err) + } + got = append(got, data.(CaddyLogEntry)) + } + + if !reflect.DeepEqual(got, want) { + t.Errorf("\ngot: %#v\nwant: %#v", got, want) + } +} diff --git a/logscan/caddy_testdata/2.json b/logscan/caddy_testdata/2.json new file mode 100644 index 00000000..726fc261 --- /dev/null +++ b/logscan/caddy_testdata/2.json @@ -0,0 +1,5 @@ +{"level":"info","ts":1706788852.6825173,"logger":"http.log.access.log1","msg":"handled request","request":{"remote_addr":"1.2.3.4:41844","proto":"HTTP/2.0","method":"HEAD","host":"host.example.com","uri":"/path.html","headers":{"Cache-Control":["max-age=0"],"Sec-Ch-Ua-Platform":["\"Windows\""],"Sec-Fetch-Site":["same-origin"],"Sec-Fetch-Dest":["empty"],"Sec-Ch-Ua":["\"Not A(Brand\";v=\"99\", \"Google Chrome\";v=\"121\", \"Chromium\";v=\"121\""],"If-Modified-Since":["Thu, 01 Feb 2024 08:54:41 GMT"],"Sec-Ch-Ua-Mobile":["?0"],"User-Agent":["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"],"Accept":["*/*"],"Sec-Fetch-Mode":["cors"],"Accept-Encoding":["gzip, deflate, br"]},"tls":{"resumed":true,"version":772,"cipher_suite":4865,"proto":"h2","proto_mutual":true,"server_name":"host.example.com"}},"common_log":"1.2.3.4 - - [01/Feb/2024:13:00:52 +0100] \"HEAD /path.html HTTP/2.0\" 304 0","duration":0.000455129,"size":0,"status":304,"resp_headers":{"Server":["Caddy"],"Etag":["\"s0rqsw1124\""]}} +{"level":"info","ts":1706788853.7180748,"logger":"http.log.access.log1","msg":"handled request","request":{"remote_addr":"1.2.3.4:41844","proto":"HTTP/2.0","method":"HEAD","host":"host.example.com","uri":"/path.html","headers":{"Accept-Encoding":["gzip, deflate, br"],"Accept-Language":["ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"],"Cache-Control":["max-age=0"],"Sec-Ch-Ua":["\"Not A(Brand\";v=\"99\", \"Google Chrome\";v=\"121\", \"Chromium\";v=\"121\""],"Sec-Ch-Ua-Mobile":["?0"],"Accept":["*/*"],"Sec-Fetch-Dest":["empty"],"If-Modified-Since":["Thu, 01 Feb 2024 08:54:41 GMT"],"Sec-Ch-Ua-Platform":["\"Windows\""],"Sec-Fetch-Site":["same-origin"],"Sec-Fetch-Mode":["cors"]},"tls":{"resumed":true,"version":772,"cipher_suite":4865,"proto":"h2","proto_mutual":true,"server_name":"host.example.com"}},"common_log":"1.2.3.4 - - [01/Feb/2024:13:00:53 +0100] \"HEAD /path.html HTTP/2.0\" 304 0","duration":0.000356122,"size":0,"status":304,"resp_headers":{"Server":["Caddy"],"Etag":["\"s0rqsw1124\""]}} +{"level":"info","ts":1706788854.7159958,"logger":"http.log.access.log1","msg":"handled request","request":{"remote_addr":"1.2.3.4:41844","proto":"HTTP/2.0","method":"HEAD","host":"host.example.com","uri":"/path.html","headers":{"Sec-Fetch-Mode":["cors"],"Cache-Control":["max-age=0"],"Sec-Ch-Ua":["\"Not A(Brand\";v=\"99\", \"Google Chrome\";v=\"121\", \"Chromium\";v=\"121\""],"Sec-Ch-Ua-Platform":["\"Windows\""],"Accept":["*/*"],"Sec-Fetch-Site":["same-origin"],"Accept-Language":["ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"],"If-Modified-Since":["Thu, 01 Feb 2024 08:54:41 GMT"],"Sec-Ch-Ua-Mobile":["?0"],"User-Agent":["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"],"Sec-Fetch-Dest":["empty"],"Accept-Encoding":["gzip, deflate, br"]},"tls":{"resumed":true,"version":772,"cipher_suite":4865,"proto":"h2","proto_mutual":true,"server_name":"host.example.com"}},"common_log":"1.2.3.4 - - [01/Feb/2024:13:00:54 +0100] \"HEAD /path.html HTTP/2.0\" 304 0","duration":0.000728,"size":0,"status":304,"resp_headers":{"Server":["Caddy"],"Etag":["\"s0rqsw1124\""]}} +{"level":"info","ts":1706788855.7197819,"logger":"http.log.access.log1","msg":"handled request","request":{"remote_addr":"1.2.3.4:41844","proto":"HTTP/2.0","method":"HEAD","host":"host.example.com","uri":"/path.html","headers":{"Sec-Ch-Ua-Platform":["\"Windows\""],"Sec-Fetch-Site":["same-origin"],"Sec-Fetch-Mode":["cors"],"Sec-Fetch-Dest":["empty"],"Accept-Language":["ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"],"Sec-Ch-Ua-Mobile":["?0"],"User-Agent":["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"],"If-Modified-Since":["Thu, 01 Feb 2024 08:54:41 GMT"],"Accept":["*/*"],"Accept-Encoding":["gzip, deflate, br"],"Cache-Control":["max-age=0"],"Sec-Ch-Ua":["\"Not A(Brand\";v=\"99\", \"Google Chrome\";v=\"121\", \"Chromium\";v=\"121\""]},"tls":{"resumed":true,"version":772,"cipher_suite":4865,"proto":"h2","proto_mutual":true,"server_name":"host.example.com"}},"common_log":"1.2.3.4 - - [01/Feb/2024:13:00:55 +0100] \"HEAD /path.html HTTP/2.0\" 304 0","duration":0.000275939,"size":0,"status":304,"resp_headers":{"Server":["Caddy"],"Etag":["\"s0rqsw1124\""]}} +{"level":"info","ts":1706788856.6911514,"logger":"http.log.access.log1","msg":"handled request","request":{"remote_addr":"1.2.3.4:41844","proto":"HTTP/2.0","method":"HEAD","host":"host.example.com","uri":"/path.html","headers":{"Sec-Ch-Ua":["\"Not A(Brand\";v=\"99\", \"Google Chrome\";v=\"121\", \"Chromium\";v=\"121\""],"If-Modified-Since":["Thu, 01 Feb 2024 08:54:41 GMT"],"Sec-Ch-Ua-Mobile":["?0"],"Sec-Fetch-Dest":["empty"],"Sec-Fetch-Mode":["cors"],"Accept-Encoding":["gzip, deflate, br"],"Accept-Language":["ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"],"Cache-Control":["max-age=0"],"User-Agent":["Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"],"Sec-Ch-Ua-Platform":["\"Windows\""],"Accept":["*/*"],"Sec-Fetch-Site":["same-origin"]},"tls":{"resumed":true,"version":772,"cipher_suite":4865,"proto":"h2","proto_mutual":true,"server_name":"host.example.com"}},"common_log":"1.2.3.4 - - [01/Feb/2024:13:00:56 +0100] \"HEAD /path.html HTTP/2.0\" 304 0","duration":0.000210732,"size":0,"status":304,"resp_headers":{"Etag":["\"s0rqsw1124\""],"Server":["Caddy"]}} From 0aec32bd47a915ac1eaaaac19b4967c2ad0211a5 Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Tue, 9 Apr 2024 11:06:48 +0100 Subject: [PATCH 10/13] Some basic stylistic changes; put matchesPattern() in a generic function --- cmd/goatcounter/import.go | 1 + logscan/caddy_parser.go | 122 ++++++++++++++--------------------- logscan/caddy_parser_test.go | 27 ++++---- logscan/logscan.go | 23 ++++++- logscan/logscan_test.go | 7 ++ logscan/regex_parser.go | 23 +------ 6 files changed, 92 insertions(+), 111 deletions(-) diff --git a/cmd/goatcounter/import.go b/cmd/goatcounter/import.go index a170f067..69ff023a 100644 --- a/cmd/goatcounter/import.go +++ b/cmd/goatcounter/import.go @@ -76,6 +76,7 @@ Flags: combined-vhost NCSA Combined Log with virtual host common Common Log Format (CLF) common-vhost Common Log Format (CLF) with virtual host + caddy Caddy JSON logs log:[fmt] Custom log format; see "goatcounter help logfile" for details. diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index 72236fb5..9b708948 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -5,43 +5,39 @@ import ( "fmt" "math" "net/url" - "strings" "time" - - "github.com/bmatcuk/doublestar/v4" ) // https://caddyserver.com/docs/caddyfile/directives/log -type CaddyLogEntry struct { - Timestamp interface{} `json:"ts"` - Request Request `json:"request"` - Duration float64 `json:"duration"` - Size_ int `json:"size"` - Status_ int `json:"status"` - RespHeaders Headers `json:"resp_headers"` -} - -type Request struct { - RemoteAddr string `json:"remote_addr"` - Proto string `json:"proto"` - Method string `json:"method"` - Host string `json:"host"` - URI string `json:"uri"` - Headers Headers `json:"headers"` -} - -type Headers struct { - UserAgent []string `json:"User-Agent"` - Referer []string `json:"Referer"` - ContentType []string `json:"Content-Type"` - XForwardedFor []string `json:"X-Forwarded-For"` - AcceptLanguage []string `json:"Accept-Language"` -} - -type CaddyParser struct { - datetime string - excludePatterns []excludePattern -} +type ( + CaddyParser struct { + datetime string + excludePatterns []excludePattern + } + CaddyLogEntry struct { + Timestamp any `json:"ts"` + Request CaddyRequest `json:"request"` + Duration float64 `json:"duration"` + Size_ int `json:"size"` + Status_ int `json:"status"` + RespHeaders CaddyHeaders `json:"resp_headers"` + } + CaddyRequest struct { + RemoteAddr string `json:"remote_addr"` + Proto string `json:"proto"` + Method string `json:"method"` + Host string `json:"host"` + URI string `json:"uri"` + Headers CaddyHeaders `json:"headers"` + } + CaddyHeaders struct { + UserAgent []string `json:"User-Agent"` + Referer []string `json:"Referer"` + ContentType []string `json:"Content-Type"` + XForwardedFor []string `json:"X-Forwarded-For"` + AcceptLanguage []string `json:"Accept-Language"` + } +) func (p CaddyParser) Parse(line string) (Line, bool, error) { var logEntry CaddyLogEntry @@ -59,9 +55,6 @@ func (p CaddyParser) Parse(line string) (Line, bool, error) { return logEntry, false, nil } -var _ LineParser = CaddyParser{} -var _ Line = CaddyLogEntry{} - func (l CaddyLogEntry) Host() string { return l.Request.Host } func (l CaddyLogEntry) RemoteAddr() string { return l.Request.RemoteAddr } func (l CaddyLogEntry) Method() string { return l.Request.Method } @@ -91,25 +84,26 @@ func (l CaddyLogEntry) Timing() time.Duration { } func (l CaddyLogEntry) Datetime(lp LineParser) (time.Time, error) { - /* time_format can be - - - unix_seconds_float Floating-point number of seconds since the Unix epoch. - - unix_milli_float Floating-point number of milliseconds since the Unix epoch. - - unix_nano Integer number of nanoseconds since the Unix epoch. - - iso8601 Example: 2006-01-02T15:04:05.000Z0700 - - rfc3339 Example: 2006-01-02T15:04:05Z07:00 - - rfc3339_nano Example: 2006-01-02T15:04:05.999999999Z07:00 - - wall Example: 2006/01/02 15:04:05 - - wall_milli Example: 2006/01/02 15:04:05.000 - - wall_nano Example: 2006/01/02 15:04:05.000000000 - - common_log Example: 02/Jan/2006:15:04:05 -0700 - - Or, any compatible time layout string; see the Go documentation for full details. - */ - - parser := lp.(CaddyParser) - var t time.Time - var err error + // time_format can be: + // + // - unix_seconds_float Floating-point number of seconds since the Unix epoch. + // - unix_milli_float Floating-point number of milliseconds since the Unix epoch. + // - unix_nano Integer number of nanoseconds since the Unix epoch. + // - iso8601 Example: 2006-01-02T15:04:05.000Z0700 + // - rfc3339 Example: 2006-01-02T15:04:05Z07:00 + // - rfc3339_nano Example: 2006-01-02T15:04:05.999999999Z07:00 + // - wall Example: 2006/01/02 15:04:05 + // - wall_milli Example: 2006/01/02 15:04:05.000 + // - wall_nano Example: 2006/01/02 15:04:05.000000000 + // - common_log Example: 02/Jan/2006:15:04:05 -0700 + // + // Or any compatible time layout string; see the Go documentation for full details. + + var ( + parser = lp.(CaddyParser) + t time.Time + err error + ) switch parser.datetime { case "", "unix_seconds_float": // Caddy's default @@ -197,21 +191,5 @@ func (l CaddyLogEntry) fieldValue(name string) string { } func (l CaddyLogEntry) matchesPattern(e excludePattern) bool { - var m bool - fieldValue := l.fieldValue(e.field) - switch e.kind { - default: - m = strings.Contains(fieldValue, e.pattern) - case excludeGlob: - // We use doublestar instead of filepath.Match() because the latter - // doesn't support "**" and "{a,b}" patterns, both of which are very - // useful here. - m, _ = doublestar.Match(e.pattern, fieldValue) - case excludeRe: - m = e.re.MatchString(fieldValue) - } - if e.negate { - return !m - } - return m + return matchesPattern(e, l.fieldValue(e.field)) } diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index cf320caa..b48fe5d6 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -3,7 +3,6 @@ package logscan import ( "context" "io" - "io/ioutil" "os" "reflect" "testing" @@ -11,7 +10,7 @@ import ( ) func TestParseLine(t *testing.T) { - data, err := ioutil.ReadFile("./caddy_testdata/1.json") + data, err := os.ReadFile("./caddy_testdata/1.json") if err != nil { t.Fatal(err) } @@ -128,39 +127,39 @@ func TestParseUrl(t *testing.T) { func TestParseMultipleLines(t *testing.T) { want := []CaddyLogEntry{ CaddyLogEntry{Timestamp: 1706788852.6825173, - Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, Duration: 0.000455129, Size_: 0, Status_: 304, - RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, CaddyLogEntry{Timestamp: 1706788853.7180748, - Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, Duration: 0.000356122, Size_: 0, Status_: 304, - RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, CaddyLogEntry{Timestamp: 1706788854.7159958, - Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, - Duration: 0.000728, Size_: 0, Status_: 304, RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Duration: 0.000728, Size_: 0, Status_: 304, RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, CaddyLogEntry{Timestamp: 1706788855.7197819, - Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, Duration: 0.000275939, Size_: 0, Status_: 304, - RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, + RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, CaddyLogEntry{Timestamp: 1706788856.6911514, - Request: Request{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: Headers{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, + Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, Duration: 0.000210732, Size_: 0, Status_: 304, - RespHeaders: Headers{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}, + RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}, }, } - fd, err := os.Open("./caddy_testdata/2.json") + fp, err := os.Open("./caddy_testdata/2.json") if err != nil { t.Fatal(err) } - scan, err := New(fd, `caddy`, "", "", "", []string{}) + scan, err := New(fp, `caddy`, "", "", "", []string{}) if err != nil { t.Fatal(err) } diff --git a/logscan/logscan.go b/logscan/logscan.go index 9b390b1f..c15e907b 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -169,9 +169,7 @@ func makeNew(format, date, tyme, datetime string, exclude []string) (*Scanner, e return nil, err } - return &Scanner{ - lp: p, - }, nil + return &Scanner{lp: p}, nil } // Line processes a single line. @@ -263,3 +261,22 @@ func processExcludes(exclude []string) ([]excludePattern, error) { return patterns, nil } + +func matchesPattern(e excludePattern, v string) bool { + var m bool + switch e.kind { + default: + m = strings.Contains(v, e.pattern) + case excludeGlob: + // We use doublestar instead of filepath.Match() because the latter + // doesn't support "**" and "{a,b}" patterns, both of which are very + // useful here. + m, _ = doublestar.Match(e.pattern, v) + case excludeRe: + m = e.re.MatchString(v) + } + if e.negate { + return !m + } + return m +} diff --git a/logscan/logscan_test.go b/logscan/logscan_test.go index 989cbf68..21e8eb5f 100644 --- a/logscan/logscan_test.go +++ b/logscan/logscan_test.go @@ -17,6 +17,13 @@ import ( "zgo.at/zstd/ztest" ) +var ( + _ LineParser = RegexParser{} + _ Line = RegexLine{} + _ LineParser = CaddyParser{} + _ Line = CaddyLogEntry{} +) + func TestErrors(t *testing.T) { _, err := New(strings.NewReader(""), "log:$xxx", "", "", "", nil) if !ztest.ErrorContains(err, "unknown format specifier: $xxx") { diff --git a/logscan/regex_parser.go b/logscan/regex_parser.go index f25ea473..d1c4170c 100644 --- a/logscan/regex_parser.go +++ b/logscan/regex_parser.go @@ -4,10 +4,8 @@ import ( "fmt" "regexp" "strconv" - "strings" "time" - "github.com/bmatcuk/doublestar/v4" "zgo.at/errors" ) @@ -40,8 +38,6 @@ func (p RegexParser) Parse(line string) (Line, bool, error) { return parsed, false, nil } -var _ LineParser = RegexParser{} - func newRegexParser(format, date, tyme, datetime string, exclude []excludePattern) (*RegexParser, error) { of := format format, date, tyme, datetime = getFormat(format, date, tyme, datetime) @@ -194,23 +190,6 @@ func toUi64(s string) uint64 { return n } -var _ Line = RegexLine{} - func (l RegexLine) matchesPattern(e excludePattern) bool { - var m bool - switch e.kind { - default: - m = strings.Contains(l[e.field], e.pattern) - case excludeGlob: - // We use doublestar instead of filepath.Match() because the latter - // doesn't support "**" and "{a,b}" patterns, both of which are very - // useful here. - m, _ = doublestar.Match(e.pattern, l[e.field]) - case excludeRe: - m = e.re.MatchString(l[e.field]) - } - if e.negate { - return !m - } - return m + return matchesPattern(e, l[e.field]) } From e50fa17f635f9102366bb6ff79ee427a33b17fd1 Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Tue, 9 Apr 2024 11:10:12 +0100 Subject: [PATCH 11/13] Put all testfiles in testdata/ --- logscan/caddy_parser_test.go | 6 +- logscan/logscan_test.go | 3 + .../{caddy_testdata => testdata/caddy}/1.json | 58 ++++++------------- .../{caddy_testdata => testdata/caddy}/2.json | 0 4 files changed, 23 insertions(+), 44 deletions(-) rename logscan/{caddy_testdata => testdata/caddy}/1.json (50%) rename logscan/{caddy_testdata => testdata/caddy}/2.json (100%) diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index b48fe5d6..923f6e2a 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -10,7 +10,7 @@ import ( ) func TestParseLine(t *testing.T) { - data, err := os.ReadFile("./caddy_testdata/1.json") + data, err := os.ReadFile("./testdata/caddy/1.json") if err != nil { t.Fatal(err) } @@ -76,7 +76,7 @@ func TestParseLine(t *testing.T) { func TestParseLineDatetimeFormat(t *testing.T) { epoch := time.Unix(0, 0).UTC() - var testdata = []struct { + testdata := []struct { format string input string delta time.Duration @@ -155,7 +155,7 @@ func TestParseMultipleLines(t *testing.T) { RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}, }, } - fp, err := os.Open("./caddy_testdata/2.json") + fp, err := os.Open("./testdata/caddy/2.json") if err != nil { t.Fatal(err) } diff --git a/logscan/logscan_test.go b/logscan/logscan_test.go index 21e8eb5f..78c88d8c 100644 --- a/logscan/logscan_test.go +++ b/logscan/logscan_test.go @@ -93,6 +93,9 @@ func TestNew(t *testing.T) { } for _, f := range files { + if f.IsDir() { + continue + } t.Run(f.Name(), func(t *testing.T) { fp, err := os.Open("./testdata/" + f.Name()) if err != nil { diff --git a/logscan/caddy_testdata/1.json b/logscan/testdata/caddy/1.json similarity index 50% rename from logscan/caddy_testdata/1.json rename to logscan/testdata/caddy/1.json index 6553824a..357c1d9c 100644 --- a/logscan/caddy_testdata/1.json +++ b/logscan/testdata/caddy/1.json @@ -3,6 +3,10 @@ "ts": 1706794321.6563592, "logger": "http.log.access.log1", "msg": "handled request", + "common_log": "1.2.3.4 - - [01/Feb/2024:14:32:01 +0100] \"GET /absolute_uri.html HTTP/1.1\" 200 2803", + "duration": 0.001234567, + "size": 2803, + "status": 200, "request": { "remote_addr": "1.2.3.4:5678", "proto": "HTTP/1.1", @@ -10,24 +14,12 @@ "host": "host.example.com", "uri": "/absolute_uri.html?queryparam=value", "headers": { - "Connection": [ - "Close" - ], - "User-Agent": [ - "This is the user agent" - ], - "Accept-Language": [ - "en" - ], - "Referer": [ - "https://another.example.com/" - ], - "Accept-Encoding": [ - "gzip, deflate" - ], - "Accept": [ - "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" - ] + "Connection": ["Close"], + "User-Agent": ["This is the user agent"], + "Accept-Language": ["en"], + "Referer": ["https://another.example.com/"], + "Accept-Encoding": ["gzip, deflate"], + "Accept": ["text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"] }, "tls": { "resumed": false, @@ -38,29 +30,13 @@ "server_name": "host.example.com" } }, - "common_log": "1.2.3.4 - - [01/Feb/2024:14:32:01 +0100] \"GET /absolute_uri.html HTTP/1.1\" 200 2803", - "duration": 0.001234567, - "size": 2803, - "status": 200, "resp_headers": { - "Content-Encoding": [ - "gzip" - ], - "Vary": [ - "Accept-Encoding" - ], - "Server": [ - "Caddy" - ], - "Etag": [ - "\"s0rqswb68\"" - ], - "Content-Type": [ - "text/html; charset=utf-8" - ], - "Last-Modified": [ - "Sun, 10 Sep 2023 12:02:56 GMT" - ] + "Content-Encoding": ["gzip"], + "Vary": ["Accept-Encoding"], + "Server": ["Caddy"], + "Etag": ["\"s0rqswb68\""], + "Content-Type": ["text/html; charset=utf-8"], + "Last-Modified": ["Sun, 10 Sep 2023 12:02:56 GMT"] } } - + diff --git a/logscan/caddy_testdata/2.json b/logscan/testdata/caddy/2.json similarity index 100% rename from logscan/caddy_testdata/2.json rename to logscan/testdata/caddy/2.json From 5e08829e5deb47beb00f86c3804d63853f324aeb Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Tue, 9 Apr 2024 11:17:23 +0100 Subject: [PATCH 12/13] Reformat test cases for readability --- logscan/caddy_parser_test.go | 109 ++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 27 deletions(-) diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index 923f6e2a..6d06203f 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -126,33 +126,88 @@ func TestParseUrl(t *testing.T) { func TestParseMultipleLines(t *testing.T) { want := []CaddyLogEntry{ - CaddyLogEntry{Timestamp: 1706788852.6825173, - Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, - Duration: 0.000455129, - Size_: 0, - Status_: 304, - RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, - CaddyLogEntry{Timestamp: 1706788853.7180748, - Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, - Duration: 0.000356122, - Size_: 0, - Status_: 304, - RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, - CaddyLogEntry{Timestamp: 1706788854.7159958, - Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, - Duration: 0.000728, Size_: 0, Status_: 304, RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, - CaddyLogEntry{Timestamp: 1706788855.7197819, - Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, - Duration: 0.000275939, - Size_: 0, - Status_: 304, - RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}}, - CaddyLogEntry{Timestamp: 1706788856.6911514, - Request: CaddyRequest{RemoteAddr: "1.2.3.4:41844", Proto: "HTTP/2.0", Method: "HEAD", Host: "host.example.com", URI: "/path.html", Headers: CaddyHeaders{UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}}}, - Duration: 0.000210732, - Size_: 0, - Status_: 304, - RespHeaders: CaddyHeaders{UserAgent: []string(nil), Referer: []string(nil), ContentType: []string(nil), XForwardedFor: []string(nil), AcceptLanguage: []string(nil)}, + CaddyLogEntry{ + Timestamp: 1706788852.6825173, + Duration: 0.000455129, + Size_: 0, + Status_: 304, + Request: CaddyRequest{ + RemoteAddr: "1.2.3.4:41844", + Proto: "HTTP/2.0", + Method: "HEAD", + Host: "host.example.com", + URI: "/path.html", + Headers: CaddyHeaders{ + UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, + }, + }, + }, + CaddyLogEntry{ + Timestamp: 1706788853.7180748, + Duration: 0.000356122, + Size_: 0, + Status_: 304, + Request: CaddyRequest{ + RemoteAddr: "1.2.3.4:41844", + Proto: "HTTP/2.0", + Method: "HEAD", + Host: "host.example.com", + URI: "/path.html", + Headers: CaddyHeaders{ + AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}, + }, + }, + }, + CaddyLogEntry{ + Timestamp: 1706788854.7159958, + Duration: 0.000728, + Size_: 0, + Status_: 304, + Request: CaddyRequest{ + RemoteAddr: "1.2.3.4:41844", + Proto: "HTTP/2.0", + Method: "HEAD", + Host: "host.example.com", + URI: "/path.html", + Headers: CaddyHeaders{ + UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, + AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}, + }, + }, + }, + CaddyLogEntry{ + Timestamp: 1706788855.7197819, + Duration: 0.000275939, + Size_: 0, + Status_: 304, + Request: CaddyRequest{ + RemoteAddr: "1.2.3.4:41844", + Proto: "HTTP/2.0", + Method: "HEAD", + Host: "host.example.com", + URI: "/path.html", + Headers: CaddyHeaders{ + UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, + AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}, + }, + }, + }, + CaddyLogEntry{ + Timestamp: 1706788856.6911514, + Duration: 0.000210732, + Size_: 0, + Status_: 304, + Request: CaddyRequest{ + RemoteAddr: "1.2.3.4:41844", + Proto: "HTTP/2.0", + Method: "HEAD", + Host: "host.example.com", + URI: "/path.html", + Headers: CaddyHeaders{ + UserAgent: []string{"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/121.0.0.0 Safari/537.36"}, + AcceptLanguage: []string{"ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7"}, + }, + }, }, } fp, err := os.Open("./testdata/caddy/2.json") From beec73b434c342d0b2c2778842e61cfc721c135d Mon Sep 17 00:00:00 2001 From: Martin Tournoij Date: Mon, 30 Sep 2024 15:06:56 +0100 Subject: [PATCH 13/13] Fix test and staticcheck --- logscan/caddy_parser.go | 2 +- logscan/caddy_parser_test.go | 4 ++-- logscan/logscan.go | 4 ++-- logscan/regex_parser.go | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/logscan/caddy_parser.go b/logscan/caddy_parser.go index 9b708948..488f5974 100644 --- a/logscan/caddy_parser.go +++ b/logscan/caddy_parser.go @@ -171,7 +171,7 @@ func (l CaddyLogEntry) fieldValue(name string) string { return l.Language() case fieldContentType: return l.ContentType() - case fieldHttp: + case fieldHTTP: return l.HTTP() case fieldMethod: return l.Method() diff --git a/logscan/caddy_parser_test.go b/logscan/caddy_parser_test.go index 6d06203f..832302f8 100644 --- a/logscan/caddy_parser_test.go +++ b/logscan/caddy_parser_test.go @@ -54,8 +54,8 @@ func TestParseLine(t *testing.T) { if err != nil { t.Fatalf("Failed to parse Datetime: %#v", err) } - if dt != time.Date(2024, 02, 01, 14, 32, 01, 656359195, time.Local) { - t.Fatalf("Unexpected Datetime: %#v", dt) + if w := time.Date(2024, 02, 01, 13, 32, 01, 656359195, time.UTC); dt.UTC() != w { + t.Fatalf("Unexpected Datetime:\nhave: %#v\nwant: %#v", dt.UTC(), w) } if line.XForwardedFor() != "" { t.Fatalf("Unexpected XForwardedFor: %#v", line.XForwardedFor()) diff --git a/logscan/logscan.go b/logscan/logscan.go index c15e907b..af06cde9 100644 --- a/logscan/logscan.go +++ b/logscan/logscan.go @@ -26,7 +26,7 @@ const ( fieldAcceptLanguage = "accept_language" fieldContentType = "content_type" fieldHost = "host" - fieldHttp = "http" + fieldHTTP = "http" fieldMethod = "method" fieldPath = "path" fieldQuery = "query" @@ -39,7 +39,7 @@ const ( ) var fields = []string{"ignore", "time", "date", "datetime", fieldRemoteAddr, - fieldXff, fieldMethod, fieldStatus, fieldHttp, fieldPath, fieldQuery, fieldReferrer, + fieldXff, fieldMethod, fieldStatus, fieldHTTP, fieldPath, fieldQuery, fieldReferrer, fieldUserAgent, fieldHost, fieldContentType, "timing_sec", "timing_milli", "timing_micro", fieldSize} diff --git a/logscan/regex_parser.go b/logscan/regex_parser.go index a4d599cf..ec56545c 100644 --- a/logscan/regex_parser.go +++ b/logscan/regex_parser.go @@ -94,7 +94,7 @@ func newRegexParser(format, date, tyme, datetime string, exclude []excludePatter p = `[A-Z]{3,10}` case fieldStatus: p = `\d{3}` - case fieldHttp: + case fieldHTTP: p = `HTTP/[\d.]+` case fieldPath: p = `/.*?` @@ -131,7 +131,7 @@ func (l RegexLine) Host() string { return l[fieldHost] } func (l RegexLine) RemoteAddr() string { return l[fieldRemoteAddr] } func (l RegexLine) XForwardedFor() string { return l[fieldXff] } func (l RegexLine) Method() string { return l[fieldMethod] } -func (l RegexLine) HTTP() string { return l[fieldHttp] } +func (l RegexLine) HTTP() string { return l[fieldHTTP] } func (l RegexLine) Path() string { return l[fieldPath] } func (l RegexLine) Query() string { return l[fieldQuery] } func (l RegexLine) Referrer() string { return l[fieldReferrer] }