Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Caddy JSON log parser #730

Open
wants to merge 14 commits into
base: master
Choose a base branch
from
5 changes: 4 additions & 1 deletion cmd/goatcounter/import.go
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,9 @@ Date and time parsing:
rfc3339nano 2006-01-02T15:04:05.999999999Z07:00

The full documentation is available at https://pkg.go.dev/time

The 'caddy' format _also_ accepts:
'unix_seconds_float', 'unix_milli_float' and 'unix_nano' for datetime
`

func cmdImport(f zli.Flags, ready chan<- struct{}, stop chan struct{}) error {
Expand Down Expand Up @@ -375,7 +378,7 @@ func importLog(
UserAgent: line.UserAgent(),
}

hit.CreatedAt, err = line.Datetime(scan)
hit.CreatedAt, err = scan.Datetime(line)
if err != nil {
zlog.Error(err)
continue
Expand Down
217 changes: 217 additions & 0 deletions logscan/caddy_parser.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,217 @@
package logscan

import (
"encoding/json"
"fmt"
"math"
"net/url"
"strings"
"time"

"github.com/bmatcuk/doublestar/v4"
)

// https://caddyserver.com/docs/caddyfile/directives/log
type CaddyLogEntry struct {
Timestamp interface{} `json:"ts"`
Request Request `json:"request"`
Duration float64 `json:"duration"`
Size_ int `json:"size"`
Status_ int `json:"status"`
RespHeaders Headers `json:"resp_headers"`
}

type Request struct {
RemoteAddr string `json:"remote_addr"`
Proto string `json:"proto"`
Method string `json:"method"`
Host string `json:"host"`
URI string `json:"uri"`
Headers Headers `json:"headers"`
}

type Headers struct {
UserAgent []string `json:"User-Agent"`
Referer []string `json:"Referer"`
ContentType []string `json:"Content-Type"`
XForwardedFor []string `json:"X-Forwarded-For"`
AcceptLanguage []string `json:"Accept-Language"`
}

type CaddyParser struct {
datetime string
excludePatterns []excludePattern
}

func (p CaddyParser) Parse(line string) (Line, bool, error) {
var logEntry CaddyLogEntry
err := json.Unmarshal([]byte(line), &logEntry)
if err != nil {
fmt.Println("Error unmarshalling JSON:", err)
return nil, false, err
}

for _, e := range p.excludePatterns {
if logEntry.matchesPattern(e) {
return nil, true, nil
}
}
return logEntry, false, nil
}

var _ LineParser = CaddyParser{}
var _ Line = CaddyLogEntry{}

func (l CaddyLogEntry) Host() string { return l.Request.Host }
func (l CaddyLogEntry) RemoteAddr() string { return l.Request.RemoteAddr }
func (l CaddyLogEntry) Method() string { return l.Request.Method }
func (l CaddyLogEntry) HTTP() string { return l.Request.Proto }
func (l CaddyLogEntry) Status() int { return l.Status_ }
func (l CaddyLogEntry) Size() int { return l.Size_ }
func (l CaddyLogEntry) Path() string {
u, err := url.ParseRequestURI(l.Request.URI)
if err != nil {
return ""
}
return u.Path
}

func (l CaddyLogEntry) Query() string {
u, err := url.Parse(l.Request.URI)
if err != nil {
return ""
}
return u.RawQuery
}

func (l CaddyLogEntry) Timing() time.Duration {
// TODO: `Second` should depend on the log format
// {seconds, nano, string} where string in {1m32.05s, 6.31ms}
return time.Duration(l.Duration * float64(time.Second))
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wasn't clear on the best way of implementing this

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think this can probably be auto-detected based on the type? Otherwise probably need to add a new flag for it or something. Need to look a bit at what the options are here – if you add a (failing) testcase for all possible options then I would have a better idea.

}

func (l CaddyLogEntry) Datetime(lp LineParser) (time.Time, error) {
/* time_format can be

- unix_seconds_float Floating-point number of seconds since the Unix epoch.
- unix_milli_float Floating-point number of milliseconds since the Unix epoch.
- unix_nano Integer number of nanoseconds since the Unix epoch.
- iso8601 Example: 2006-01-02T15:04:05.000Z0700
- rfc3339 Example: 2006-01-02T15:04:05Z07:00
- rfc3339_nano Example: 2006-01-02T15:04:05.999999999Z07:00
- wall Example: 2006/01/02 15:04:05
- wall_milli Example: 2006/01/02 15:04:05.000
- wall_nano Example: 2006/01/02 15:04:05.000000000
- common_log Example: 02/Jan/2006:15:04:05 -0700

Or, any compatible time layout string; see the Go documentation for full details.
*/

parser := lp.(CaddyParser)
var t time.Time
var err error
switch parser.datetime {
case "", "unix_seconds_float":
// Caddy's default
v := l.Timestamp.(float64)
sec, dec := math.Modf(v)
t = time.Unix(int64(sec), int64(dec*(1e9)))
case "unix_milli_float":
v := l.Timestamp.(float64)
sec, dec := math.Modf(v / 1000)
t = time.Unix(int64(sec), int64(dec*(1e9)))
case "unix_nano":
v := l.Timestamp.(float64)
t = time.UnixMicro(int64(v / 1000))
default:
v := l.Timestamp.(string)
t, err = time.Parse(parser.datetime, v)
if err != nil {
return time.Unix(0, 0).UTC(), err
}
}
return t, nil
}
func (l CaddyLogEntry) XForwardedFor() string {
if len(l.Request.Headers.XForwardedFor) > 0 {
return l.Request.Headers.XForwardedFor[0]
}
return ""
}
func (l CaddyLogEntry) Referrer() string {
if len(l.Request.Headers.Referer) > 0 {
return l.Request.Headers.Referer[0]
}
return ""
}
func (l CaddyLogEntry) UserAgent() string {
if len(l.Request.Headers.UserAgent) > 0 {
return l.Request.Headers.UserAgent[0]
}
return ""
}
func (l CaddyLogEntry) ContentType() string {
if len(l.Request.Headers.ContentType) > 0 {
return l.Request.Headers.ContentType[0]
}
return ""
}
func (l CaddyLogEntry) Language() string {
if len(l.Request.Headers.AcceptLanguage) > 0 {
return l.Request.Headers.AcceptLanguage[0]
}
return ""
}

func (l CaddyLogEntry) fieldValue(name string) string {
switch name {
default:
panic(fmt.Sprintf("Received invalid field request: %s", name))
case fieldUserAgent:
return l.UserAgent()
case fieldHost:
return l.Host()
case fieldRemoteAddr:
return l.RemoteAddr()
case fieldAcceptLanguage:
return l.Language()
case fieldContentType:
return l.ContentType()
case fieldHttp:
return l.HTTP()
case fieldMethod:
return l.Method()
case fieldPath:
return l.Path()
case fieldQuery:
return l.Query()
case fieldReferrer:
return l.Referrer()
case fieldSize:
return fmt.Sprint(l.Size())
case fieldStatus:
return fmt.Sprint(l.Status())
case fieldXff:
return l.XForwardedFor()
}
}

func (l CaddyLogEntry) matchesPattern(e excludePattern) bool {
var m bool
fieldValue := l.fieldValue(e.field)
switch e.kind {
default:
m = strings.Contains(fieldValue, e.pattern)
case excludeGlob:
// We use doublestar instead of filepath.Match() because the latter
// doesn't support "**" and "{a,b}" patterns, both of which are very
// useful here.
m, _ = doublestar.Match(e.pattern, fieldValue)
case excludeRe:
m = e.re.MatchString(fieldValue)
}
if e.negate {
return !m
}
return m
}
122 changes: 122 additions & 0 deletions logscan/caddy_parser_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
package logscan

import (
"io/ioutil"
"testing"
"time"
)

func TestParseLine(t *testing.T) {
data, err := ioutil.ReadFile("./caddy_testdata/1.json")
if err != nil {
t.Fatal(err)
}
p := CaddyParser{}
line, skip, err := p.Parse(string(data))
if skip {
t.Fatalf("Entry skipped")
}
if err != nil {
t.Fatalf("Failed to parse: %#v", err)
}

if line.Host() != "host.example.com" {
t.Fatalf("Unexpected Host: %#v", line.Host())
}
if line.RemoteAddr() != "1.2.3.4:5678" {
t.Fatalf("Unexpected RemoteAddr: %#v", line.RemoteAddr())
}
if line.Method() != "GET" {
t.Fatalf("Unexpected Method: %#v", line.Method())
}
if line.HTTP() != "HTTP/1.1" {
t.Fatalf("Unexpected HTTP: %#v", line.HTTP())
}
if line.Path() != "/absolute_uri.html" {
t.Fatalf("Unexpected Path: %#v", line.Path())
}
if line.Status() != 200 {
t.Fatalf("Unexpected Status: %#v", line.Status())
}
if line.Size() != 2803 {
t.Fatalf("Unexpected Size: %#v", line.Size())
}
if line.Query() != "queryparam=value" {
t.Fatalf("Unexpected Query: %#v", line.Query())
}
if line.Timing() != 1234567 {
t.Fatalf("Unexpected Timing: %#v", line.Timing())
}
dt, err := line.Datetime(p)
if err != nil {
t.Fatalf("Failed to parse Datetime: %#v", err)
}
if dt != time.Date(2024, 02, 01, 14, 32, 01, 656359195, time.Local) {
t.Fatalf("Unexpected Datetime: %#v", dt)
}
if line.XForwardedFor() != "" {
t.Fatalf("Unexpected XForwardedFor: %#v", line.XForwardedFor())
}
if line.Referrer() != "https://another.example.com/" {
t.Fatalf("Unexpected Referrer: %#v", line.Referrer())
}
if line.UserAgent() != "This is the user agent" {
t.Fatalf("Unexpected UserAgent: %#v", line.UserAgent())
}
if line.ContentType() != "" {
t.Fatalf("Unexpected ContentType: %#v", line.ContentType())
}
if line.Language() != "en" {
t.Fatalf("Unexpected Language: %#v", line.Language())
}
}

func TestParseLineDatetimeFormat(t *testing.T) {
epoch := time.Unix(0, 0).UTC()
var testdata = []struct {
format string
input string
delta time.Duration
}{
{"", `{"ts":1.5}`, 1500 * time.Millisecond}, // default value
{"unix_seconds_float", `{"ts":1.5}`, 1500 * time.Millisecond},
{"unix_milli_float", `{"ts":1500}`, 1500 * time.Millisecond},
{"unix_milli_float", `{"ts":1500.1}`, 1_500_100 * time.Microsecond},
{"unix_nano", `{"ts":1500000000}`, 1_500_000_000 * time.Nanosecond},
{time.RFC3339, `{"ts":"1970-01-01T00:00:05+00:00"}`, 5 * time.Second},
}
for _, tt := range testdata {
t.Run(tt.format, func(t *testing.T) {
p := CaddyParser{datetime: tt.format}
line, skip, err := p.Parse(tt.input)
if skip {
t.Fatalf("Entry skipped")
}
if err != nil {
t.Fatalf("Failed to parse: %#v", err)
}
dt, err := line.Datetime(p)
if err != nil {
t.Fatalf("Failed to parse Datetime: %#v", err)
}
expected := epoch.Add(tt.delta)
if dt.UTC() != expected.UTC() {
t.Fatalf("Unexpected Datetime: %#v vs %#v", dt, expected)
}
})
}
}

func TestParseUrl(t *testing.T) {
p := CaddyParser{}
line, skip, err := p.Parse(`{"request": {"uri": "//asd"}}`)
if skip {
t.Fatalf("Entry skipped")
}
if err != nil {
t.Fatalf("Failed to parse: %#v", err)
}
if line.Path() != "//asd" {
t.Fatalf("Unexpected Path: %#v", line.Path())
}
}
Loading