-
-
Notifications
You must be signed in to change notification settings - Fork 202
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Implement Caddy JSON log parser #730
Open
DavidVentura
wants to merge
14
commits into
arp242:master
Choose a base branch
from
DavidVentura:implement-caddy
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from 8 commits
Commits
Show all changes
14 commits
Select commit
Hold shift + click to select a range
64c28ed
Implement CaddyParser
DavidVentura 071f005
add test
DavidVentura d19d28b
Add support for query params
DavidVentura df82269
Implement Datetime parsing
DavidVentura 0e9ebcc
Flip Datetime to be publicly exposed
DavidVentura 7854f50
Parse URIs with leading slashes
DavidVentura a92d7fa
Convert field names to enum and implement exclusion for Caddy logs
DavidVentura 57a9aa4
Pass excludePatterns to all parsers
DavidVentura 1964d3f
Add test case with multiple lines
DavidVentura 0aec32b
Some basic stylistic changes; put matchesPattern() in a generic function
arp242 e50fa17
Put all testfiles in testdata/
arp242 5e08829
Reformat test cases for readability
arp242 ebe6874
Merge branch 'master' into implement-caddy
arp242 beec73b
Fix test and staticcheck
arp242 File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,217 @@ | ||
package logscan | ||
|
||
import ( | ||
"encoding/json" | ||
"fmt" | ||
"math" | ||
"net/url" | ||
"strings" | ||
"time" | ||
|
||
"github.com/bmatcuk/doublestar/v4" | ||
) | ||
|
||
// https://caddyserver.com/docs/caddyfile/directives/log | ||
type CaddyLogEntry struct { | ||
Timestamp interface{} `json:"ts"` | ||
Request Request `json:"request"` | ||
Duration float64 `json:"duration"` | ||
Size_ int `json:"size"` | ||
Status_ int `json:"status"` | ||
RespHeaders Headers `json:"resp_headers"` | ||
} | ||
|
||
type Request struct { | ||
RemoteAddr string `json:"remote_addr"` | ||
Proto string `json:"proto"` | ||
Method string `json:"method"` | ||
Host string `json:"host"` | ||
URI string `json:"uri"` | ||
Headers Headers `json:"headers"` | ||
} | ||
|
||
type Headers struct { | ||
UserAgent []string `json:"User-Agent"` | ||
Referer []string `json:"Referer"` | ||
ContentType []string `json:"Content-Type"` | ||
XForwardedFor []string `json:"X-Forwarded-For"` | ||
AcceptLanguage []string `json:"Accept-Language"` | ||
} | ||
|
||
type CaddyParser struct { | ||
datetime string | ||
excludePatterns []excludePattern | ||
} | ||
|
||
func (p CaddyParser) Parse(line string) (Line, bool, error) { | ||
var logEntry CaddyLogEntry | ||
err := json.Unmarshal([]byte(line), &logEntry) | ||
if err != nil { | ||
fmt.Println("Error unmarshalling JSON:", err) | ||
return nil, false, err | ||
} | ||
|
||
for _, e := range p.excludePatterns { | ||
if logEntry.matchesPattern(e) { | ||
return nil, true, nil | ||
} | ||
} | ||
return logEntry, false, nil | ||
} | ||
|
||
var _ LineParser = CaddyParser{} | ||
var _ Line = CaddyLogEntry{} | ||
|
||
func (l CaddyLogEntry) Host() string { return l.Request.Host } | ||
func (l CaddyLogEntry) RemoteAddr() string { return l.Request.RemoteAddr } | ||
func (l CaddyLogEntry) Method() string { return l.Request.Method } | ||
func (l CaddyLogEntry) HTTP() string { return l.Request.Proto } | ||
func (l CaddyLogEntry) Status() int { return l.Status_ } | ||
func (l CaddyLogEntry) Size() int { return l.Size_ } | ||
func (l CaddyLogEntry) Path() string { | ||
u, err := url.ParseRequestURI(l.Request.URI) | ||
if err != nil { | ||
return "" | ||
} | ||
return u.Path | ||
} | ||
|
||
func (l CaddyLogEntry) Query() string { | ||
u, err := url.Parse(l.Request.URI) | ||
if err != nil { | ||
return "" | ||
} | ||
return u.RawQuery | ||
} | ||
|
||
func (l CaddyLogEntry) Timing() time.Duration { | ||
// TODO: `Second` should depend on the log format | ||
// {seconds, nano, string} where string in {1m32.05s, 6.31ms} | ||
return time.Duration(l.Duration * float64(time.Second)) | ||
} | ||
|
||
func (l CaddyLogEntry) Datetime(lp LineParser) (time.Time, error) { | ||
/* time_format can be | ||
|
||
- unix_seconds_float Floating-point number of seconds since the Unix epoch. | ||
- unix_milli_float Floating-point number of milliseconds since the Unix epoch. | ||
- unix_nano Integer number of nanoseconds since the Unix epoch. | ||
- iso8601 Example: 2006-01-02T15:04:05.000Z0700 | ||
- rfc3339 Example: 2006-01-02T15:04:05Z07:00 | ||
- rfc3339_nano Example: 2006-01-02T15:04:05.999999999Z07:00 | ||
- wall Example: 2006/01/02 15:04:05 | ||
- wall_milli Example: 2006/01/02 15:04:05.000 | ||
- wall_nano Example: 2006/01/02 15:04:05.000000000 | ||
- common_log Example: 02/Jan/2006:15:04:05 -0700 | ||
|
||
Or, any compatible time layout string; see the Go documentation for full details. | ||
*/ | ||
|
||
parser := lp.(CaddyParser) | ||
var t time.Time | ||
var err error | ||
switch parser.datetime { | ||
case "", "unix_seconds_float": | ||
// Caddy's default | ||
v := l.Timestamp.(float64) | ||
sec, dec := math.Modf(v) | ||
t = time.Unix(int64(sec), int64(dec*(1e9))) | ||
case "unix_milli_float": | ||
v := l.Timestamp.(float64) | ||
sec, dec := math.Modf(v / 1000) | ||
t = time.Unix(int64(sec), int64(dec*(1e9))) | ||
case "unix_nano": | ||
v := l.Timestamp.(float64) | ||
t = time.UnixMicro(int64(v / 1000)) | ||
default: | ||
v := l.Timestamp.(string) | ||
t, err = time.Parse(parser.datetime, v) | ||
if err != nil { | ||
return time.Unix(0, 0).UTC(), err | ||
} | ||
} | ||
return t, nil | ||
} | ||
func (l CaddyLogEntry) XForwardedFor() string { | ||
if len(l.Request.Headers.XForwardedFor) > 0 { | ||
return l.Request.Headers.XForwardedFor[0] | ||
} | ||
return "" | ||
} | ||
func (l CaddyLogEntry) Referrer() string { | ||
if len(l.Request.Headers.Referer) > 0 { | ||
return l.Request.Headers.Referer[0] | ||
} | ||
return "" | ||
} | ||
func (l CaddyLogEntry) UserAgent() string { | ||
if len(l.Request.Headers.UserAgent) > 0 { | ||
return l.Request.Headers.UserAgent[0] | ||
} | ||
return "" | ||
} | ||
func (l CaddyLogEntry) ContentType() string { | ||
if len(l.Request.Headers.ContentType) > 0 { | ||
return l.Request.Headers.ContentType[0] | ||
} | ||
return "" | ||
} | ||
func (l CaddyLogEntry) Language() string { | ||
if len(l.Request.Headers.AcceptLanguage) > 0 { | ||
return l.Request.Headers.AcceptLanguage[0] | ||
} | ||
return "" | ||
} | ||
|
||
func (l CaddyLogEntry) fieldValue(name string) string { | ||
switch name { | ||
default: | ||
panic(fmt.Sprintf("Received invalid field request: %s", name)) | ||
case fieldUserAgent: | ||
return l.UserAgent() | ||
case fieldHost: | ||
return l.Host() | ||
case fieldRemoteAddr: | ||
return l.RemoteAddr() | ||
case fieldAcceptLanguage: | ||
return l.Language() | ||
case fieldContentType: | ||
return l.ContentType() | ||
case fieldHttp: | ||
return l.HTTP() | ||
case fieldMethod: | ||
return l.Method() | ||
case fieldPath: | ||
return l.Path() | ||
case fieldQuery: | ||
return l.Query() | ||
case fieldReferrer: | ||
return l.Referrer() | ||
case fieldSize: | ||
return fmt.Sprint(l.Size()) | ||
case fieldStatus: | ||
return fmt.Sprint(l.Status()) | ||
case fieldXff: | ||
return l.XForwardedFor() | ||
} | ||
} | ||
|
||
func (l CaddyLogEntry) matchesPattern(e excludePattern) bool { | ||
var m bool | ||
fieldValue := l.fieldValue(e.field) | ||
switch e.kind { | ||
default: | ||
m = strings.Contains(fieldValue, e.pattern) | ||
case excludeGlob: | ||
// We use doublestar instead of filepath.Match() because the latter | ||
// doesn't support "**" and "{a,b}" patterns, both of which are very | ||
// useful here. | ||
m, _ = doublestar.Match(e.pattern, fieldValue) | ||
case excludeRe: | ||
m = e.re.MatchString(fieldValue) | ||
} | ||
if e.negate { | ||
return !m | ||
} | ||
return m | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
package logscan | ||
|
||
import ( | ||
"io/ioutil" | ||
"testing" | ||
"time" | ||
) | ||
|
||
func TestParseLine(t *testing.T) { | ||
data, err := ioutil.ReadFile("./caddy_testdata/1.json") | ||
if err != nil { | ||
t.Fatal(err) | ||
} | ||
p := CaddyParser{} | ||
line, skip, err := p.Parse(string(data)) | ||
if skip { | ||
t.Fatalf("Entry skipped") | ||
} | ||
if err != nil { | ||
t.Fatalf("Failed to parse: %#v", err) | ||
} | ||
|
||
if line.Host() != "host.example.com" { | ||
t.Fatalf("Unexpected Host: %#v", line.Host()) | ||
} | ||
if line.RemoteAddr() != "1.2.3.4:5678" { | ||
t.Fatalf("Unexpected RemoteAddr: %#v", line.RemoteAddr()) | ||
} | ||
if line.Method() != "GET" { | ||
t.Fatalf("Unexpected Method: %#v", line.Method()) | ||
} | ||
if line.HTTP() != "HTTP/1.1" { | ||
t.Fatalf("Unexpected HTTP: %#v", line.HTTP()) | ||
} | ||
if line.Path() != "/absolute_uri.html" { | ||
t.Fatalf("Unexpected Path: %#v", line.Path()) | ||
} | ||
if line.Status() != 200 { | ||
t.Fatalf("Unexpected Status: %#v", line.Status()) | ||
} | ||
if line.Size() != 2803 { | ||
t.Fatalf("Unexpected Size: %#v", line.Size()) | ||
} | ||
if line.Query() != "queryparam=value" { | ||
t.Fatalf("Unexpected Query: %#v", line.Query()) | ||
} | ||
if line.Timing() != 1234567 { | ||
t.Fatalf("Unexpected Timing: %#v", line.Timing()) | ||
} | ||
dt, err := line.Datetime(p) | ||
if err != nil { | ||
t.Fatalf("Failed to parse Datetime: %#v", err) | ||
} | ||
if dt != time.Date(2024, 02, 01, 14, 32, 01, 656359195, time.Local) { | ||
t.Fatalf("Unexpected Datetime: %#v", dt) | ||
} | ||
if line.XForwardedFor() != "" { | ||
t.Fatalf("Unexpected XForwardedFor: %#v", line.XForwardedFor()) | ||
} | ||
if line.Referrer() != "https://another.example.com/" { | ||
t.Fatalf("Unexpected Referrer: %#v", line.Referrer()) | ||
} | ||
if line.UserAgent() != "This is the user agent" { | ||
t.Fatalf("Unexpected UserAgent: %#v", line.UserAgent()) | ||
} | ||
if line.ContentType() != "" { | ||
t.Fatalf("Unexpected ContentType: %#v", line.ContentType()) | ||
} | ||
if line.Language() != "en" { | ||
t.Fatalf("Unexpected Language: %#v", line.Language()) | ||
} | ||
} | ||
|
||
func TestParseLineDatetimeFormat(t *testing.T) { | ||
epoch := time.Unix(0, 0).UTC() | ||
var testdata = []struct { | ||
format string | ||
input string | ||
delta time.Duration | ||
}{ | ||
{"", `{"ts":1.5}`, 1500 * time.Millisecond}, // default value | ||
{"unix_seconds_float", `{"ts":1.5}`, 1500 * time.Millisecond}, | ||
{"unix_milli_float", `{"ts":1500}`, 1500 * time.Millisecond}, | ||
{"unix_milli_float", `{"ts":1500.1}`, 1_500_100 * time.Microsecond}, | ||
{"unix_nano", `{"ts":1500000000}`, 1_500_000_000 * time.Nanosecond}, | ||
{time.RFC3339, `{"ts":"1970-01-01T00:00:05+00:00"}`, 5 * time.Second}, | ||
} | ||
for _, tt := range testdata { | ||
t.Run(tt.format, func(t *testing.T) { | ||
p := CaddyParser{datetime: tt.format} | ||
line, skip, err := p.Parse(tt.input) | ||
if skip { | ||
t.Fatalf("Entry skipped") | ||
} | ||
if err != nil { | ||
t.Fatalf("Failed to parse: %#v", err) | ||
} | ||
dt, err := line.Datetime(p) | ||
if err != nil { | ||
t.Fatalf("Failed to parse Datetime: %#v", err) | ||
} | ||
expected := epoch.Add(tt.delta) | ||
if dt.UTC() != expected.UTC() { | ||
t.Fatalf("Unexpected Datetime: %#v vs %#v", dt, expected) | ||
} | ||
}) | ||
} | ||
} | ||
|
||
func TestParseUrl(t *testing.T) { | ||
p := CaddyParser{} | ||
line, skip, err := p.Parse(`{"request": {"uri": "//asd"}}`) | ||
if skip { | ||
t.Fatalf("Entry skipped") | ||
} | ||
if err != nil { | ||
t.Fatalf("Failed to parse: %#v", err) | ||
} | ||
if line.Path() != "//asd" { | ||
t.Fatalf("Unexpected Path: %#v", line.Path()) | ||
} | ||
} |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I wasn't clear on the best way of implementing this
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this can probably be auto-detected based on the type? Otherwise probably need to add a new flag for it or something. Need to look a bit at what the options are here – if you add a (failing) testcase for all possible options then I would have a better idea.