Skip to content

Commit

Permalink
Introduce revamped health/readiness
Browse files Browse the repository at this point in the history
This moves the health code into its own package. It also allows for the
enablement of a readiness endpoint (`/readyz`) if a `--readyz` flag is
passed to audito-maldito.

The health system was also refactored to take a number of components for
which we'll be able to output if they're ready or not. These components
are then listed as part of the readyz endpoint output.

In the future, the intent is to also add a livez endpoint.

Signed-off-by: Juan Antonio Osorio <[email protected]>
  • Loading branch information
JAORMX committed Apr 18, 2023
1 parent 826afd5 commit 5c08335
Show file tree
Hide file tree
Showing 11 changed files with 226 additions and 130 deletions.
28 changes: 20 additions & 8 deletions internal/app/app.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"github.com/metal-toolbox/audito-maldito/internal/auditd"
"github.com/metal-toolbox/audito-maldito/internal/auditd/dirreader"
"github.com/metal-toolbox/audito-maldito/internal/common"
"github.com/metal-toolbox/audito-maldito/internal/health"
"github.com/metal-toolbox/audito-maldito/internal/journald"
"github.com/metal-toolbox/audito-maldito/internal/metrics"
"github.com/metal-toolbox/audito-maldito/internal/processors"
Expand All @@ -40,11 +41,12 @@ OPTIONS
var logger *zap.SugaredLogger

//nolint
func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig *zap.Config) error {
func Run(ctx context.Context, osArgs []string, h *health.Health, optLoggerConfig *zap.Config) error {
var bootID string
var auditlogpath string
var auditLogDirPath string
var enableMetrics bool
var enableHealthz bool
logLevel := zapcore.DebugLevel // TODO: Switch default back to zapcore.ErrorLevel.

flagSet := flag.NewFlagSet(osArgs[0], flag.ContinueOnError)
Expand All @@ -55,6 +57,7 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig
flagSet.StringVar(&auditLogDirPath, "audit-dir-path", "/var/log/audit", "Path to the Linux audit log directory")
flagSet.Var(&logLevel, "log-level", "Set the log level according to zapcore.Level")
flagSet.BoolVar(&enableMetrics, "metrics", false, "Enable Prometheus HTTP /metrics server")
flagSet.BoolVar(&enableHealthz, "healthz", false, "Enable HTTP health endpoints server")
flagSet.Usage = func() {
os.Stderr.WriteString(usage)
flagSet.PrintDefaults()
Expand Down Expand Up @@ -114,11 +117,20 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig
return fmt.Errorf("failed to open audit log file: %w", auditfileerr)
}

server := &http.Server{Addr: ":2112"}

if enableMetrics {
server := &http.Server{Addr: ":2112"}
http.Handle("/metrics", promhttp.Handler())
}

if enableHealthz {
http.Handle("/readyz", h.ReadyzHandler())
// TODO: Add livez endpoint
}

if enableMetrics || enableHealthz {
eg.Go(func() error {
http.Handle("/metrics", promhttp.Handler())
logger.Infoln("Starting HTTP metrics server on :2112")
logger.Infoln("Starting HTTP server on :2112")
if err := server.ListenAndServe(); err != nil {
logger.Errorf("Failed to start HTTP metrics server: %v", err)
return err
Expand All @@ -139,10 +151,10 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig
auditLogDirPath, err)
}

h.AddReadiness()
h.AddReadiness(dirreader.DirReaderComponentName)
go func() {
<-logDirReader.InitFilesDone()
h.OnReady()
h.OnReady(dirreader.DirReaderComponentName)
}()

eg.Go(func() error {
Expand Down Expand Up @@ -210,7 +222,7 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig
}
})
} else {
h.AddReadiness()
h.AddReadiness(journald.JournaldReaderComponentName)
eg.Go(func() error {
jp := journald.Processor{
BootID: bootID,
Expand All @@ -232,7 +244,7 @@ func Run(ctx context.Context, osArgs []string, h *common.Health, optLoggerConfig
})
}

h.AddReadiness()
h.AddReadiness(auditd.AuditdProcessorComponentName)
eg.Go(func() error {
ap := auditd.Auditd{
After: time.UnixMicro(int64(lastReadJournalTS)),
Expand Down
11 changes: 9 additions & 2 deletions internal/auditd/auditd.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ import (

"github.com/metal-toolbox/audito-maldito/internal/auditd/sessiontracker"
"github.com/metal-toolbox/audito-maldito/internal/common"
"github.com/metal-toolbox/audito-maldito/internal/health"
)

const (
// AuditdProcessorComponentName is the name of the component
// that reads from auditd. This is used in the health check.
AuditdProcessorComponentName = "auditd-processor"
)

// libaudit variables.
Expand Down Expand Up @@ -48,7 +55,7 @@ type Auditd struct {
// EventW is the auditevent.EventWriter to write events to.
EventW *auditevent.EventWriter

Health *common.Health
Health *health.Health
}

// TODO: Write documentation about creating a splunk query that shows
Expand Down Expand Up @@ -82,7 +89,7 @@ func (o *Auditd) Read(ctx context.Context) error {
staleDataTicker := time.NewTicker(staleDataCleanupInterval)
defer staleDataTicker.Stop()

o.Health.OnReady()
o.Health.OnReady(AuditdProcessorComponentName)

for {
select {
Expand Down
5 changes: 3 additions & 2 deletions internal/auditd/auditd_good_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"go.uber.org/zap"

"github.com/metal-toolbox/audito-maldito/internal/common"
"github.com/metal-toolbox/audito-maldito/internal/health"
"github.com/metal-toolbox/audito-maldito/internal/testtools"
)

Expand Down Expand Up @@ -82,7 +83,7 @@ func TestAuditd_Read_GoodRemoteUserLoginFirst(t *testing.T) {
Events: events,
T: t,
}),
Health: common.NewSingleReadinessHealth(),
Health: health.NewSingleReadinessHealth(AuditdProcessorComponentName),
}

exited := make(chan error, 1)
Expand Down Expand Up @@ -145,7 +146,7 @@ func TestAuditd_Read_GoodAuditdEventsFirst(t *testing.T) {
Events: events,
T: t,
}),
Health: common.NewSingleReadinessHealth(),
Health: health.NewSingleReadinessHealth(AuditdProcessorComponentName),
}

exited := make(chan error, 1)
Expand Down
7 changes: 4 additions & 3 deletions internal/auditd/auditd_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import (
"github.com/metal-toolbox/audito-maldito/internal/auditd/sessiontracker"
fakest "github.com/metal-toolbox/audito-maldito/internal/auditd/sessiontracker/fakes"
"github.com/metal-toolbox/audito-maldito/internal/common"
"github.com/metal-toolbox/audito-maldito/internal/health"
"github.com/metal-toolbox/audito-maldito/internal/testtools"
)

Expand All @@ -38,7 +39,7 @@ func TestAuditd_Read_RemoteLoginError(t *testing.T) {
Events: events,
T: t,
}),
Health: common.NewSingleReadinessHealth(),
Health: health.NewSingleReadinessHealth(AuditdProcessorComponentName),
}

errs := make(chan error, 1)
Expand Down Expand Up @@ -82,7 +83,7 @@ func TestAuditd_Read_ParseAuditLogError(t *testing.T) {
Events: events,
T: t,
}),
Health: common.NewSingleReadinessHealth(),
Health: health.NewSingleReadinessHealth(AuditdProcessorComponentName),
}

errs := make(chan error, 1)
Expand Down Expand Up @@ -127,7 +128,7 @@ func TestAuditd_Read_AuditEventError(t *testing.T) {
Events: events,
T: t,
}),
Health: common.NewSingleReadinessHealth(),
Health: health.NewSingleReadinessHealth(AuditdProcessorComponentName),
}

cancelEventWFn()
Expand Down
6 changes: 6 additions & 0 deletions internal/auditd/dirreader/dirreader.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ import (
"github.com/fsnotify/fsnotify"
)

const (
// DirReaderComponentName is the component name for the dir reader.
// This is used for health checks.
DirReaderComponentName = "auditlog-dirreader"
)

// StartLogDirReader creates and starts a LogDirReader for
// the specified directory path (e.g., "/var/log/audit").
//
Expand Down
77 changes: 0 additions & 77 deletions internal/common/health.go

This file was deleted.

Loading

0 comments on commit 5c08335

Please sign in to comment.