From 888e63fc1598757844717f349af352535a889d60 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 4 Dec 2024 09:25:44 +0530 Subject: [PATCH 01/18] Added skip audit/unenroll flag to uninstall command --- .../1733248787-flag-to-skip-fleet-audit.yaml | 32 +++++++++++++++++++ internal/pkg/agent/cmd/install.go | 4 +-- internal/pkg/agent/cmd/uninstall.go | 4 ++- internal/pkg/agent/install/uninstall.go | 4 +-- 4 files changed, 39 insertions(+), 5 deletions(-) create mode 100644 changelog/fragments/1733248787-flag-to-skip-fleet-audit.yaml diff --git a/changelog/fragments/1733248787-flag-to-skip-fleet-audit.yaml b/changelog/fragments/1733248787-flag-to-skip-fleet-audit.yaml new file mode 100644 index 00000000000..023cb7e589f --- /dev/null +++ b/changelog/fragments/1733248787-flag-to-skip-fleet-audit.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Add a flag to skip audit/unenroll call to fleet server during uninstall + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +description: This change adds a flag to skip audit/unenroll call to fleet server. While uninstalling elastic-agent it tries to notify fleet server about the uninstallation. But in somecases users might know that the fleet server is unreachable and this notification logs multiple failures continuously. Adding this flag skips this call. + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: "elastic-agent" + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +#issue: https://github.com/owner/repo/1234 diff --git a/internal/pkg/agent/cmd/install.go b/internal/pkg/agent/cmd/install.go index b6f8447b779..f189b71e029 100644 --- a/internal/pkg/agent/cmd/install.go +++ b/internal/pkg/agent/cmd/install.go @@ -252,7 +252,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error { return err } } else { - err := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar) + err := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar, false) if err != nil { progBar.Describe("Uninstall from binary failed") return err @@ -276,7 +276,7 @@ func installCmd(streams *cli.IOStreams, cmd *cobra.Command) error { defer func() { if err != nil { progBar.Describe("Uninstalling") - innerErr := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar) + innerErr := install.Uninstall(cmd.Context(), cfgFile, topPath, "", log, progBar, false) if innerErr != nil { progBar.Describe("Failed to Uninstall") } else { diff --git a/internal/pkg/agent/cmd/uninstall.go b/internal/pkg/agent/cmd/uninstall.go index 0fb69157082..2c7964ed3f6 100644 --- a/internal/pkg/agent/cmd/uninstall.go +++ b/internal/pkg/agent/cmd/uninstall.go @@ -36,6 +36,7 @@ Unless -f is used this command will ask confirmation before performing removal. cmd.Flags().BoolP("force", "f", false, "Force overwrite the current and do not prompt for confirmation") cmd.Flags().String("uninstall-token", "", "Uninstall token required for protected agent uninstall") + cmd.Flags().Bool("skip-fleet-audit", false, "Skip fleet audit/unenroll") return cmd } @@ -60,6 +61,7 @@ func uninstallCmd(streams *cli.IOStreams, cmd *cobra.Command) error { force, _ := cmd.Flags().GetBool("force") uninstallToken, _ := cmd.Flags().GetString("uninstall-token") + skipFleetAudit, _ := cmd.Flags().GetBool("skip-fleet-audit") if status == install.Broken { if !force { fmt.Fprintf(streams.Out, "Elastic Agent is installed but currently broken: %s\n", reason) @@ -94,7 +96,7 @@ func uninstallCmd(streams *cli.IOStreams, cmd *cobra.Command) error { fmt.Fprint(os.Stderr, logBuff.String()) }() - err = install.Uninstall(cmd.Context(), paths.ConfigFile(), paths.Top(), uninstallToken, log, progBar) + err = install.Uninstall(cmd.Context(), paths.ConfigFile(), paths.Top(), uninstallToken, log, progBar, skipFleetAudit) if err != nil { progBar.Describe("Failed to uninstall agent") return fmt.Errorf("error uninstalling agent: %w", err) diff --git a/internal/pkg/agent/install/uninstall.go b/internal/pkg/agent/install/uninstall.go index ea9cfca4ca9..bd7bf50a9ba 100644 --- a/internal/pkg/agent/install/uninstall.go +++ b/internal/pkg/agent/install/uninstall.go @@ -49,7 +49,7 @@ var ( ) // Uninstall uninstalls persistently Elastic Agent on the system. -func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *progressbar.ProgressBar) error { +func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log *logp.Logger, pt *progressbar.ProgressBar, skipFleetAudit bool) error { cwd, err := os.Getwd() if err != nil { return fmt.Errorf("unable to get current working directory") @@ -146,7 +146,7 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log // Skip on Windows because of https://github.com/elastic/elastic-agent/issues/5952 // Once the root-cause is identified then this can be re-enabled on Windows. - if notifyFleet && runtime.GOOS != "windows" { + if notifyFleet && runtime.GOOS != "windows" && !skipFleetAudit { notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it } From b5fb853a030ec8e38fca5ad83cc72ea9cd0998c6 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 10 Dec 2024 23:25:23 +0530 Subject: [PATCH 02/18] Added tests for skip-fleet-audit flag --- internal/pkg/agent/install/uninstall.go | 19 +++++++++++-------- internal/pkg/agent/install/uninstall_test.go | 18 ++++++++++++++++++ 2 files changed, 29 insertions(+), 8 deletions(-) diff --git a/internal/pkg/agent/install/uninstall.go b/internal/pkg/agent/install/uninstall.go index bd7bf50a9ba..bc2ba50a9f4 100644 --- a/internal/pkg/agent/install/uninstall.go +++ b/internal/pkg/agent/install/uninstall.go @@ -111,8 +111,6 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log } } - // will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install - notifyFleet := false var ai *info.AgentInfo c, err := operations.LoadFullAgentConfig(ctx, log, cfgFile, false, unprivileged) if err != nil { @@ -127,8 +125,7 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log ai, err = info.NewAgentInfo(ctx, false) if err != nil { pt.Describe(fmt.Sprintf("unable to read agent info, Fleet will not be notified of uninstall: %v", err)) - } else { - notifyFleet = true + skipFleetAudit = true } } @@ -144,15 +141,21 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log } pt.Describe("Removed install directory") + notifyFleetIfNeeded(ctx, log, pt, cfg, ai, skipFleetAudit, notifyFleetAuditUninstall) + return nil +} + +func notifyFleetIfNeeded(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo, skipFleetAudit bool, notifyFleetAuditUninstall NotifyFleetAuditUninstall) { + // will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install // Skip on Windows because of https://github.com/elastic/elastic-agent/issues/5952 // Once the root-cause is identified then this can be re-enabled on Windows. - if notifyFleet && runtime.GOOS != "windows" && !skipFleetAudit { - notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it + if ai != nil && cfg != nil && !skipFleetAudit && runtime.GOOS != "windows" { + notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it) } - - return nil } +type NotifyFleetAuditUninstall func(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo) error + // notifyFleetAuditUninstall will attempt to notify fleet-server of the agent's uninstall. // // There are retries for the attempt after a 10s wait, but it is a best-effort approach. diff --git a/internal/pkg/agent/install/uninstall_test.go b/internal/pkg/agent/install/uninstall_test.go index c07c4e3ade6..52c250a4eee 100644 --- a/internal/pkg/agent/install/uninstall_test.go +++ b/internal/pkg/agent/install/uninstall_test.go @@ -227,3 +227,21 @@ func TestNotifyFleetAuditUnenroll(t *testing.T) { }) } + +type MockNotifyFleetAuditUninstall struct { + Called bool +} + +func (m *MockNotifyFleetAuditUninstall) Call(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo) { + m.Called = true +} +func TestSkipFleetAuditUnenroll(t *testing.T) { + log, _ := logp.NewInMemory("test", zap.NewDevelopmentEncoderConfig()) + pt := progressbar.NewOptions(-1, progressbar.OptionSetWriter(io.Discard)) + ai := &info.AgentInfo{} + cfg := &configuration.Configuration{} + + mockNotify := &MockNotifyFleetAuditUninstall{} + notifyFleetIfNeeded(context.Background(), log, pt, cfg, ai, true, notifyFleetAuditUninstall) + assert.False(t, mockNotify.Called, "NotifyFleetAuditUninstall should not be called when skipFleetAudit is true") +} From b659cc2a83fa4f607fb262a622f912d485797f2c Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 10 Dec 2024 23:36:57 +0530 Subject: [PATCH 03/18] Added tests for skip-fleet-audit flag --- internal/pkg/agent/install/uninstall.go | 12 +++++++----- internal/pkg/agent/install/uninstall_test.go | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/internal/pkg/agent/install/uninstall.go b/internal/pkg/agent/install/uninstall.go index bc2ba50a9f4..ea4b0d4d4bd 100644 --- a/internal/pkg/agent/install/uninstall.go +++ b/internal/pkg/agent/install/uninstall.go @@ -111,6 +111,8 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log } } + // will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install + notifyFleet := false var ai *info.AgentInfo c, err := operations.LoadFullAgentConfig(ctx, log, cfgFile, false, unprivileged) if err != nil { @@ -125,7 +127,8 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log ai, err = info.NewAgentInfo(ctx, false) if err != nil { pt.Describe(fmt.Sprintf("unable to read agent info, Fleet will not be notified of uninstall: %v", err)) - skipFleetAudit = true + } else { + notifyFleet = true } } @@ -141,15 +144,14 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log } pt.Describe("Removed install directory") - notifyFleetIfNeeded(ctx, log, pt, cfg, ai, skipFleetAudit, notifyFleetAuditUninstall) + notifyFleetIfNeeded(ctx, log, pt, cfg, ai, notifyFleet, skipFleetAudit, notifyFleetAuditUninstall) return nil } -func notifyFleetIfNeeded(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo, skipFleetAudit bool, notifyFleetAuditUninstall NotifyFleetAuditUninstall) { - // will only notify fleet of the uninstall command if it can gather config and agentinfo, and is not a stand-alone install +func notifyFleetIfNeeded(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo, notifyFleet, skipFleetAudit bool, notifyFleetAuditUninstall NotifyFleetAuditUninstall) { // Skip on Windows because of https://github.com/elastic/elastic-agent/issues/5952 // Once the root-cause is identified then this can be re-enabled on Windows. - if ai != nil && cfg != nil && !skipFleetAudit && runtime.GOOS != "windows" { + if notifyFleet && runtime.GOOS != "windows" && !skipFleetAudit { notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it) } } diff --git a/internal/pkg/agent/install/uninstall_test.go b/internal/pkg/agent/install/uninstall_test.go index 52c250a4eee..b88e27c154d 100644 --- a/internal/pkg/agent/install/uninstall_test.go +++ b/internal/pkg/agent/install/uninstall_test.go @@ -242,6 +242,6 @@ func TestSkipFleetAuditUnenroll(t *testing.T) { cfg := &configuration.Configuration{} mockNotify := &MockNotifyFleetAuditUninstall{} - notifyFleetIfNeeded(context.Background(), log, pt, cfg, ai, true, notifyFleetAuditUninstall) + notifyFleetIfNeeded(context.Background(), log, pt, cfg, ai, true, true, notifyFleetAuditUninstall) assert.False(t, mockNotify.Called, "NotifyFleetAuditUninstall should not be called when skipFleetAudit is true") } From 17bd84d8fde39984aafebc81599c280a12865b41 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 10 Dec 2024 23:42:59 +0530 Subject: [PATCH 04/18] Added tests for skip-fleet-audit flag --- internal/pkg/agent/install/uninstall_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/pkg/agent/install/uninstall_test.go b/internal/pkg/agent/install/uninstall_test.go index b88e27c154d..862f75fe402 100644 --- a/internal/pkg/agent/install/uninstall_test.go +++ b/internal/pkg/agent/install/uninstall_test.go @@ -236,8 +236,8 @@ func (m *MockNotifyFleetAuditUninstall) Call(ctx context.Context, log *logp.Logg m.Called = true } func TestSkipFleetAuditUnenroll(t *testing.T) { - log, _ := logp.NewInMemory("test", zap.NewDevelopmentEncoderConfig()) - pt := progressbar.NewOptions(-1, progressbar.OptionSetWriter(io.Discard)) + log := &logp.Logger{} + pt := &progressbar.ProgressBar{} ai := &info.AgentInfo{} cfg := &configuration.Configuration{} From 0e120522df9badd5575ea3eeda67386f341cfa49 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sat, 14 Dec 2024 09:00:34 +0530 Subject: [PATCH 05/18] ran mage fmt --- internal/pkg/agent/install/uninstall.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/pkg/agent/install/uninstall.go b/internal/pkg/agent/install/uninstall.go index cf81491ebaa..f6af241c7ec 100644 --- a/internal/pkg/agent/install/uninstall.go +++ b/internal/pkg/agent/install/uninstall.go @@ -143,12 +143,12 @@ func Uninstall(ctx context.Context, cfgFile, topPath, uninstallToken string, log aerrors.M("directory", paths.Top())) } pt.Describe("Removed install directory") - + notifyFleetIfNeeded(ctx, log, pt, cfg, ai, notifyFleet, skipFleetAudit, notifyFleetAuditUninstall) return nil } -//Injecting notifyFleetAuditUninstall for easier unit testing +// Injecting notifyFleetAuditUninstall for easier unit testing func notifyFleetIfNeeded(ctx context.Context, log *logp.Logger, pt *progressbar.ProgressBar, cfg *configuration.Configuration, ai *info.AgentInfo, notifyFleet, skipFleetAudit bool, notifyFleetAuditUninstall NotifyFleetAuditUninstall) { if notifyFleet && !skipFleetAudit { notifyFleetAuditUninstall(ctx, log, pt, cfg, ai) //nolint:errcheck // ignore the error as we can't act on it) From 9d83f969ae89b3fb6f355697f7b170cc0524c99b Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Thu, 19 Dec 2024 22:52:00 +0530 Subject: [PATCH 06/18] Test all combinations for which fleet audit/unenroll will be skipped --- internal/pkg/agent/install/uninstall_test.go | 28 +++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/internal/pkg/agent/install/uninstall_test.go b/internal/pkg/agent/install/uninstall_test.go index 52685112cc8..d2465fa1d87 100644 --- a/internal/pkg/agent/install/uninstall_test.go +++ b/internal/pkg/agent/install/uninstall_test.go @@ -242,7 +242,29 @@ func TestSkipFleetAuditUnenroll(t *testing.T) { cfg := &configuration.Configuration{} var agentID agentInfo = "testID" - mockNotify := &MockNotifyFleetAuditUninstall{} - notifyFleetIfNeeded(context.Background(), log, pt, cfg, agentID, true, false, true, notifyFleetAuditUninstall) - assert.False(t, mockNotify.Called, "NotifyFleetAuditUninstall should not be called when skipFleetAudit is true") + testCases := []struct { + notifyFleet bool + localFleet bool + skipFleetAudit bool + }{ + {true, true, true}, + {true, true, false}, + {true, false, true}, + {false, true, true}, + {false, true, false}, + {false, false, true}, + {false, false, false}, + } + + for i, tc := range testCases { + t.Run( + fmt.Sprintf("test case #%d: %t:%t:%t", i, tc.notifyFleet, tc.localFleet, tc.skipFleetAudit), + func(t *testing.T) { + mockNotify := &MockNotifyFleetAuditUninstall{} + notifyFleetIfNeeded(context.Background(), log, pt, cfg, agentID, tc.notifyFleet, tc.localFleet, tc.skipFleetAudit, notifyFleetAuditUninstall) + assert.False(t, mockNotify.Called, "NotifyFleetAuditUninstall should not be invoked when notifyFleet: %t - localFleet: %t - skipFleetAudit: %t", tc.notifyFleet, tc.localFleet, tc.skipFleetAudit) + }, + ) + } + } From bee2d6d32594db09a46ef9bff909f87c60f39e51 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Sat, 4 Jan 2025 20:01:58 +0530 Subject: [PATCH 07/18] Added status logger --- internal/pkg/agent/cmd/enroll_cmd.go | 2 ++ internal/pkg/fleetapi/enroll_cmd.go | 12 ++++++------ 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 6c3df8e8998..2e461965091 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -524,6 +524,8 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ } c.log.Infof("1st enrollment attempt failed, retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) + c.log.Warn(err.Error()) + signal := make(chan struct{}) defer close(signal) backExp := backoff.NewExpBackoff(signal, enrollBackoffInit, enrollBackoffMax) diff --git a/internal/pkg/fleetapi/enroll_cmd.go b/internal/pkg/fleetapi/enroll_cmd.go index 768b76cddfd..49ef33208fe 100644 --- a/internal/pkg/fleetapi/enroll_cmd.go +++ b/internal/pkg/fleetapi/enroll_cmd.go @@ -33,10 +33,10 @@ var ErrConnRefused = errors.New("connection refused") var ErrTemporaryServerError = errors.New("temporary server error, please retry later") // temporaryServerErrorCodes defines status codes that allow clients to retry their request. -var temporaryServerErrorCodes = map[int]struct{}{ - http.StatusBadGateway: {}, - http.StatusServiceUnavailable: {}, - http.StatusGatewayTimeout: {}, +var temporaryServerErrorCodes = map[int]string{ + http.StatusBadGateway: "BadGateway", + http.StatusServiceUnavailable: "ServiceUnavailable", + http.StatusGatewayTimeout: "GatewayTimeout", } const ( @@ -223,8 +223,8 @@ func (e *EnrollCmd) Execute(ctx context.Context, r *EnrollRequest) (*EnrollRespo return nil, ErrTooManyRequests } - if _, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary { - return nil, fmt.Errorf("received code %d: %w", resp.StatusCode, ErrTemporaryServerError) + if status, temporary := temporaryServerErrorCodes[resp.StatusCode]; temporary { + return nil, fmt.Errorf("received status code %d (%s): %w", resp.StatusCode, status, ErrTemporaryServerError) } if resp.StatusCode != http.StatusOK { From f8098ce3a5e5e4f37fe553c89787c24825138aea Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Sat, 4 Jan 2025 20:23:22 +0530 Subject: [PATCH 08/18] Added status logger --- ...-log-fleet-enroll-failure-status-code.yaml | 32 +++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml diff --git a/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml b/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml new file mode 100644 index 00000000000..54e04d76d2f --- /dev/null +++ b/changelog/fragments/1736002257-log-fleet-enroll-failure-status-code.yaml @@ -0,0 +1,32 @@ +# Kind can be one of: +# - breaking-change: a change to previously-documented behavior +# - deprecation: functionality that is being removed in a later release +# - bug-fix: fixes a problem in a previous version +# - enhancement: extends functionality but does not break or fix existing behavior +# - feature: new functionality +# - known-issue: problems that we are aware of in a given version +# - security: impacts on the security of a product or a user’s deployment. +# - upgrade: important information for someone upgrading from a prior version +# - other: does not fit into any of the other categories +kind: enhancement + +# Change summary; a 80ish characters long description of the change. +summary: Added logger to print the status and code when enrollment call to fleet failed. + +# Long description; in case the summary is not enough to describe the change +# this field accommodate a description without length limits. +# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment. +#description: + +# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc. +component: elastic-agent + +# PR URL; optional; the PR number that added the changeset. +# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added. +# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number. +# Please provide it if you are adding a fragment for a different PR. +#pr: https://github.com/owner/repo/1234 + +# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of). +# If not present is automatically filled by the tooling with the issue linked to the PR number. +issue: https://github.com/elastic/elastic-agent/issues/6287 From 733ebaeb442bfbe9cf364a4dcd5e27744e0dbb87 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Mon, 6 Jan 2025 08:22:55 +0530 Subject: [PATCH 09/18] Added loggers for fleet enrollment failure --- internal/pkg/agent/cmd/enroll_cmd.go | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 2e461965091..1692890db4b 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -523,8 +523,8 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ return nil } - c.log.Infof("1st enrollment attempt failed, retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) - c.log.Warn(err.Error()) + c.log.Infof("1st enrollment attempt failed: %s", err.Error()) + c.log.Infof("Retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) signal := make(chan struct{}) defer close(signal) @@ -532,13 +532,14 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ for { retry := false - if errors.Is(err, fleetapi.ErrTooManyRequests) { + switch err.Error() { + case fleetapi.ErrTooManyRequests.Error(): c.log.Warn("Too many requests on the remote server, will retry in a moment.") retry = true - } else if errors.Is(err, fleetapi.ErrConnRefused) { + case fleetapi.ErrConnRefused.Error(): c.log.Warn("Remote server is not ready to accept connections, will retry in a moment.") retry = true - } else if errors.Is(err, fleetapi.ErrTemporaryServerError) { + case fleetapi.ErrTemporaryServerError.Error(): c.log.Warn("Remote server failed to handle the request, will retry in a moment.") retry = true } @@ -548,6 +549,9 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ backExp.Wait() c.log.Infof("Retrying enrollment to URL: %s", c.client.URI()) err = c.enroll(ctx, persistentConfig) + if err != nil { + c.log.Infof("Enrollment attempt failed: %s", err.Error()) + } } return err From 23879b24cf3938023c53a3e518ba8a214d68dbec Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Mon, 6 Jan 2025 09:10:15 +0530 Subject: [PATCH 10/18] Added loggers for fleet enrollment failure --- internal/pkg/agent/cmd/enroll_cmd.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 1692890db4b..39cef61af3d 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -523,7 +523,7 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ return nil } - c.log.Infof("1st enrollment attempt failed: %s", err.Error()) + c.log.Warnf("1st enrollment attempt failed: %s", err.Error()) c.log.Infof("Retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) signal := make(chan struct{}) @@ -532,14 +532,14 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ for { retry := false - switch err.Error() { - case fleetapi.ErrTooManyRequests.Error(): + switch { + case errors.Is(err, fleetapi.ErrTooManyRequests): c.log.Warn("Too many requests on the remote server, will retry in a moment.") retry = true - case fleetapi.ErrConnRefused.Error(): + case errors.Is(err, fleetapi.ErrConnRefused): c.log.Warn("Remote server is not ready to accept connections, will retry in a moment.") retry = true - case fleetapi.ErrTemporaryServerError.Error(): + case errors.Is(err, fleetapi.ErrTemporaryServerError): c.log.Warn("Remote server failed to handle the request, will retry in a moment.") retry = true } @@ -550,7 +550,7 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ c.log.Infof("Retrying enrollment to URL: %s", c.client.URI()) err = c.enroll(ctx, persistentConfig) if err != nil { - c.log.Infof("Enrollment attempt failed: %s", err.Error()) + c.log.Warnf("Enrollment attempt failed: %s", err.Error()) } } From 7029b4430fe12209d783fbeff37a71970d3ff560 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Mon, 6 Jan 2025 09:21:10 +0530 Subject: [PATCH 11/18] Skip retry if there is no error --- internal/pkg/agent/cmd/enroll_cmd.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 39cef61af3d..043b9443f1d 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -551,6 +551,8 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ err = c.enroll(ctx, persistentConfig) if err != nil { c.log.Warnf("Enrollment attempt failed: %s", err.Error()) + } else { + break } } From 42597cbc7acaa3d9d673a4403f6b87440e378966 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Mon, 6 Jan 2025 20:55:23 +0530 Subject: [PATCH 12/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 12 +++++++----- internal/pkg/fleetapi/enroll_cmd.go | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 043b9443f1d..db833a096ee 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -523,8 +523,7 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ return nil } - c.log.Warnf("1st enrollment attempt failed: %s", err.Error()) - c.log.Infof("Retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) + c.log.Infof("1st enrollment attempt failed, retrying enrolling to URL: %s with exponential backoff (init %s, max %s)", c.client.URI(), enrollBackoffInit, enrollBackoffMax) signal := make(chan struct{}) defer close(signal) @@ -534,14 +533,17 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ retry := false switch { case errors.Is(err, fleetapi.ErrTooManyRequests): - c.log.Warn("Too many requests on the remote server, will retry in a moment.") + c.log.Warnf("%s. Too many requests on the remote server, will retry in a moment.", err.Error()) retry = true case errors.Is(err, fleetapi.ErrConnRefused): - c.log.Warn("Remote server is not ready to accept connections, will retry in a moment.") + c.log.Warnf("%s. Remote server is not ready to accept connections, will retry in a moment.", err.Error()) retry = true case errors.Is(err, fleetapi.ErrTemporaryServerError): - c.log.Warn("Remote server failed to handle the request, will retry in a moment.") + c.log.Warnf("%s. Remote server failed to handle the request, will retry in a moment.", err.Error()) retry = true + case err != nil: + c.log.Warnf("Enrollment failed: %s", err.Error()) + retry = false } if !retry { break diff --git a/internal/pkg/fleetapi/enroll_cmd.go b/internal/pkg/fleetapi/enroll_cmd.go index 49ef33208fe..984b4713ec1 100644 --- a/internal/pkg/fleetapi/enroll_cmd.go +++ b/internal/pkg/fleetapi/enroll_cmd.go @@ -24,10 +24,10 @@ import ( type EnrollType string // ErrTooManyRequests is received when the remote server is overloaded. -var ErrTooManyRequests = errors.New("too many requests received (429)") +var ErrTooManyRequests = errors.New("Received status 429(TooManyRequest)") // ErrConnRefused is returned when the connection to the server is refused. -var ErrConnRefused = errors.New("connection refused") +var ErrConnRefused = errors.New("Connection refused") // ErrTemporaryServerError is returned when the request caused a temporary server error var ErrTemporaryServerError = errors.New("temporary server error, please retry later") From c7f5fe22a9f238ef6082126ab9f105076c5782cf Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Mon, 6 Jan 2025 20:57:33 +0530 Subject: [PATCH 13/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 5 ----- 1 file changed, 5 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index db833a096ee..4f051a36acd 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -551,11 +551,6 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ backExp.Wait() c.log.Infof("Retrying enrollment to URL: %s", c.client.URI()) err = c.enroll(ctx, persistentConfig) - if err != nil { - c.log.Warnf("Enrollment attempt failed: %s", err.Error()) - } else { - break - } } return err From 238239c2e3ed6a967334d92ea6650e5fbe7e8499 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Mon, 6 Jan 2025 20:58:01 +0530 Subject: [PATCH 14/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 1 - 1 file changed, 1 deletion(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 4f051a36acd..8d30d4684a3 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -543,7 +543,6 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ retry = true case err != nil: c.log.Warnf("Enrollment failed: %s", err.Error()) - retry = false } if !retry { break From 916eb0849e43b1d95057f88403a16275e6430c4d Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Tue, 7 Jan 2025 20:30:48 +0530 Subject: [PATCH 15/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 9 ++++----- internal/pkg/fleetapi/enroll_cmd.go | 4 ++-- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 8d30d4684a3..274370390b5 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -533,21 +533,20 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ retry := false switch { case errors.Is(err, fleetapi.ErrTooManyRequests): - c.log.Warnf("%s. Too many requests on the remote server, will retry in a moment.", err.Error()) + c.log.Warnf("Too many requests on the remote server, will retry in a moment.") retry = true case errors.Is(err, fleetapi.ErrConnRefused): - c.log.Warnf("%s. Remote server is not ready to accept connections, will retry in a moment.", err.Error()) + c.log.Warnf("Remote server is not ready to accept connections(Connection Refused), will retry in a moment.", err.Error()) retry = true case errors.Is(err, fleetapi.ErrTemporaryServerError): - c.log.Warnf("%s. Remote server failed to handle the request, will retry in a moment.", err.Error()) + c.log.Warnf("Remote server failed to handle the request(%s), will retry in a moment.", err.Error()) retry = true case err != nil: c.log.Warnf("Enrollment failed: %s", err.Error()) } - if !retry { + if !retry || !backExp.Wait() { break } - backExp.Wait() c.log.Infof("Retrying enrollment to URL: %s", c.client.URI()) err = c.enroll(ctx, persistentConfig) } diff --git a/internal/pkg/fleetapi/enroll_cmd.go b/internal/pkg/fleetapi/enroll_cmd.go index 984b4713ec1..49ef33208fe 100644 --- a/internal/pkg/fleetapi/enroll_cmd.go +++ b/internal/pkg/fleetapi/enroll_cmd.go @@ -24,10 +24,10 @@ import ( type EnrollType string // ErrTooManyRequests is received when the remote server is overloaded. -var ErrTooManyRequests = errors.New("Received status 429(TooManyRequest)") +var ErrTooManyRequests = errors.New("too many requests received (429)") // ErrConnRefused is returned when the connection to the server is refused. -var ErrConnRefused = errors.New("Connection refused") +var ErrConnRefused = errors.New("connection refused") // ErrTemporaryServerError is returned when the request caused a temporary server error var ErrTemporaryServerError = errors.New("temporary server error, please retry later") From dd47311d8f7c24d85c4898503fcf569a04bab5c0 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Tue, 7 Jan 2025 20:34:13 +0530 Subject: [PATCH 16/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 274370390b5..848bc380c28 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -533,10 +533,10 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ retry := false switch { case errors.Is(err, fleetapi.ErrTooManyRequests): - c.log.Warnf("Too many requests on the remote server, will retry in a moment.") + c.log.Warn("Too many requests on the remote server, will retry in a moment.") retry = true case errors.Is(err, fleetapi.ErrConnRefused): - c.log.Warnf("Remote server is not ready to accept connections(Connection Refused), will retry in a moment.", err.Error()) + c.log.Warn("Remote server is not ready to accept connections(Connection Refused), will retry in a moment.") retry = true case errors.Is(err, fleetapi.ErrTemporaryServerError): c.log.Warnf("Remote server failed to handle the request(%s), will retry in a moment.", err.Error()) From 3a043786f99284647b1530735386a73d8413cc75 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Tue, 7 Jan 2025 20:35:02 +0530 Subject: [PATCH 17/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 848bc380c28..6d188ca35b5 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -544,7 +544,7 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ case err != nil: c.log.Warnf("Enrollment failed: %s", err.Error()) } - if !retry || !backExp.Wait() { + if !retry { break } c.log.Infof("Retrying enrollment to URL: %s", c.client.URI()) From 45220c2235510f5e39db26c11f9335a2584250f8 Mon Sep 17 00:00:00 2001 From: "rohit.ms" Date: Tue, 7 Jan 2025 20:35:29 +0530 Subject: [PATCH 18/18] Added logger for fleet enrollment failures --- internal/pkg/agent/cmd/enroll_cmd.go | 1 + 1 file changed, 1 insertion(+) diff --git a/internal/pkg/agent/cmd/enroll_cmd.go b/internal/pkg/agent/cmd/enroll_cmd.go index 6d188ca35b5..28f5c135794 100644 --- a/internal/pkg/agent/cmd/enroll_cmd.go +++ b/internal/pkg/agent/cmd/enroll_cmd.go @@ -547,6 +547,7 @@ func (c *enrollCmd) enrollWithBackoff(ctx context.Context, persistentConfig map[ if !retry { break } + backExp.Wait() c.log.Infof("Retrying enrollment to URL: %s", c.client.URI()) err = c.enroll(ctx, persistentConfig) }