From 1293ea20b0c1ca674828461380d48071ac85ff98 Mon Sep 17 00:00:00 2001 From: lubedacht <132355999+lubedacht@users.noreply.github.com> Date: Tue, 14 May 2024 14:17:00 +0200 Subject: [PATCH] :bug: Prevent volume deletion during node replacement (#119) **What is the purpose of this pull request/Why do we need it?** When using a CSI the volumes, which are additionally attached to a server, are not handled by CAPIC. When nodes are replaced, the current implementation will force delete all volumes, which have been attached to the server. When a cluster deletion is not requested, we need to make sure to only delete the volumes which are explicitly managed by CAPIC **Description of changes:** * Adds a check that will ensure to only delete the volumes that are managed by CAPIC when a cluster was not flagged for deletion. **Checklist:** - [x] Unit Tests added - [x] Includes [emojis](https://github.com/kubernetes-sigs/kubebuilder-release-tools?tab=readme-ov-file#kubebuilder-project-versioning) --- docs/quickstart.md | 4 +- internal/ionoscloud/client.go | 4 +- internal/ionoscloud/client/client.go | 26 +++++- internal/ionoscloud/client/errors.go | 1 + internal/ionoscloud/clienttest/mock_client.go | 87 ++++++++++++++++--- internal/service/cloud/server.go | 27 +++++- internal/service/cloud/server_test.go | 70 +++++++++++++-- internal/service/cloud/suite_test.go | 1 + scope/cluster.go | 5 ++ scope/cluster_test.go | 57 ++++++++++++ 10 files changed, 254 insertions(+), 28 deletions(-) diff --git a/docs/quickstart.md b/docs/quickstart.md index c6ffb8d0..fd043414 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -139,7 +139,9 @@ KUBECONFIG=ionos-quickstart.kubeconfig kubectl get nodes TODO(gfariasalves): Add instructions about installing a CNI or available flavours -### Cleaning a cluster +### Cleanup + +**Note: Deleting a cluster will also delete any associated volumes that have been attached to the servers** ```sh kubectl delete cluster ionos-quickstart diff --git a/internal/ionoscloud/client.go b/internal/ionoscloud/client.go index 370c0c1f..49f1fe24 100644 --- a/internal/ionoscloud/client.go +++ b/internal/ionoscloud/client.go @@ -33,10 +33,12 @@ type Client interface { // GetServer returns the server that matches the provided serverID in the specified data center. GetServer(ctx context.Context, datacenterID, serverID string) (*sdk.Server, error) // DeleteServer deletes the server that matches the provided serverID in the specified data center. - DeleteServer(ctx context.Context, datacenterID, serverID string) (string, error) + DeleteServer(ctx context.Context, datacenterID, serverID string, deleteVolumes bool) (string, error) // StartServer starts the server that matches the provided serverID in the specified data center. // Returning the location and an error if starting the server fails. StartServer(ctx context.Context, datacenterID, serverID string) (string, error) + // DeleteVolume deletes the volume that matches the provided volumeID in the specified data center. + DeleteVolume(ctx context.Context, datacenterID, volumeID string) (string, error) // CreateLAN creates a new LAN with the provided properties in the specified data center, // returning the request path. CreateLAN(ctx context.Context, datacenterID string, properties sdk.LanPropertiesPost) (string, error) diff --git a/internal/ionoscloud/client/client.go b/internal/ionoscloud/client/client.go index a2efd007..d6e0657a 100644 --- a/internal/ionoscloud/client/client.go +++ b/internal/ionoscloud/client/client.go @@ -152,7 +152,7 @@ func (c *IonosCloudClient) GetServer(ctx context.Context, datacenterID, serverID } // DeleteServer deletes the server that matches the provided serverID in the specified data center. -func (c *IonosCloudClient) DeleteServer(ctx context.Context, datacenterID, serverID string) (string, error) { +func (c *IonosCloudClient) DeleteServer(ctx context.Context, datacenterID, serverID string, deleteVolumes bool) (string, error) { if datacenterID == "" { return "", errDatacenterIDIsEmpty } @@ -161,7 +161,7 @@ func (c *IonosCloudClient) DeleteServer(ctx context.Context, datacenterID, serve } req, err := c.API.ServersApi. DatacentersServersDelete(ctx, datacenterID, serverID). - DeleteVolumes(true). + DeleteVolumes(deleteVolumes). Execute() if err != nil { return "", fmt.Errorf(apiCallErrWrapper, err) @@ -195,6 +195,28 @@ func (c *IonosCloudClient) StartServer(ctx context.Context, datacenterID, server return "", errLocationHeaderEmpty } +// DeleteVolume deletes the volume that matches the provided volumeID in the specified data center. +func (c *IonosCloudClient) DeleteVolume(ctx context.Context, datacenterID, volumeID string) (string, error) { + if datacenterID == "" { + return "", errDatacenterIDIsEmpty + } + + if volumeID == "" { + return "", errVolumeIDIsEmpty + } + + resp, err := c.API.VolumesApi.DatacentersVolumesDelete(ctx, datacenterID, volumeID).Execute() + if err != nil { + return "", fmt.Errorf(apiCallErrWrapper, err) + } + + if location := resp.Header.Get(locationHeaderKey); location != "" { + return location, nil + } + + return "", errLocationHeaderEmpty +} + // CreateLAN creates a new LAN with the provided properties in the specified data center, // returning the request location. func (c *IonosCloudClient) CreateLAN(ctx context.Context, datacenterID string, properties sdk.LanPropertiesPost, diff --git a/internal/ionoscloud/client/errors.go b/internal/ionoscloud/client/errors.go index 22669f8e..e882d662 100644 --- a/internal/ionoscloud/client/errors.go +++ b/internal/ionoscloud/client/errors.go @@ -21,6 +21,7 @@ import "errors" var ( errDatacenterIDIsEmpty = errors.New("error parsing data center ID: value cannot be empty") errServerIDIsEmpty = errors.New("error parsing server ID: value cannot be empty") + errVolumeIDIsEmpty = errors.New("error parsing volume ID: value cannot be empty") errLANIDIsEmpty = errors.New("error parsing LAN ID: value cannot be empty") errNICIDIsEmpty = errors.New("error parsing NIC ID: value cannot be empty") errIPBlockIDIsEmpty = errors.New("error parsing IP block ID: value cannot be empty") diff --git a/internal/ionoscloud/clienttest/mock_client.go b/internal/ionoscloud/clienttest/mock_client.go index 878cbfdd..48eb3f4b 100644 --- a/internal/ionoscloud/clienttest/mock_client.go +++ b/internal/ionoscloud/clienttest/mock_client.go @@ -338,9 +338,9 @@ func (_c *MockClient_DeleteLAN_Call) RunAndReturn(run func(context.Context, stri return _c } -// DeleteServer provides a mock function with given fields: ctx, datacenterID, serverID -func (_m *MockClient) DeleteServer(ctx context.Context, datacenterID string, serverID string) (string, error) { - ret := _m.Called(ctx, datacenterID, serverID) +// DeleteServer provides a mock function with given fields: ctx, datacenterID, serverID, deleteVolumes +func (_m *MockClient) DeleteServer(ctx context.Context, datacenterID string, serverID string, deleteVolumes bool) (string, error) { + ret := _m.Called(ctx, datacenterID, serverID, deleteVolumes) if len(ret) == 0 { panic("no return value specified for DeleteServer") @@ -348,17 +348,17 @@ func (_m *MockClient) DeleteServer(ctx context.Context, datacenterID string, ser var r0 string var r1 error - if rf, ok := ret.Get(0).(func(context.Context, string, string) (string, error)); ok { - return rf(ctx, datacenterID, serverID) + if rf, ok := ret.Get(0).(func(context.Context, string, string, bool) (string, error)); ok { + return rf(ctx, datacenterID, serverID, deleteVolumes) } - if rf, ok := ret.Get(0).(func(context.Context, string, string) string); ok { - r0 = rf(ctx, datacenterID, serverID) + if rf, ok := ret.Get(0).(func(context.Context, string, string, bool) string); ok { + r0 = rf(ctx, datacenterID, serverID, deleteVolumes) } else { r0 = ret.Get(0).(string) } - if rf, ok := ret.Get(1).(func(context.Context, string, string) error); ok { - r1 = rf(ctx, datacenterID, serverID) + if rf, ok := ret.Get(1).(func(context.Context, string, string, bool) error); ok { + r1 = rf(ctx, datacenterID, serverID, deleteVolumes) } else { r1 = ret.Error(1) } @@ -375,13 +375,14 @@ type MockClient_DeleteServer_Call struct { // - ctx context.Context // - datacenterID string // - serverID string -func (_e *MockClient_Expecter) DeleteServer(ctx interface{}, datacenterID interface{}, serverID interface{}) *MockClient_DeleteServer_Call { - return &MockClient_DeleteServer_Call{Call: _e.mock.On("DeleteServer", ctx, datacenterID, serverID)} +// - deleteVolumes bool +func (_e *MockClient_Expecter) DeleteServer(ctx interface{}, datacenterID interface{}, serverID interface{}, deleteVolumes interface{}) *MockClient_DeleteServer_Call { + return &MockClient_DeleteServer_Call{Call: _e.mock.On("DeleteServer", ctx, datacenterID, serverID, deleteVolumes)} } -func (_c *MockClient_DeleteServer_Call) Run(run func(ctx context.Context, datacenterID string, serverID string)) *MockClient_DeleteServer_Call { +func (_c *MockClient_DeleteServer_Call) Run(run func(ctx context.Context, datacenterID string, serverID string, deleteVolumes bool)) *MockClient_DeleteServer_Call { _c.Call.Run(func(args mock.Arguments) { - run(args[0].(context.Context), args[1].(string), args[2].(string)) + run(args[0].(context.Context), args[1].(string), args[2].(string), args[3].(bool)) }) return _c } @@ -391,7 +392,65 @@ func (_c *MockClient_DeleteServer_Call) Return(_a0 string, _a1 error) *MockClien return _c } -func (_c *MockClient_DeleteServer_Call) RunAndReturn(run func(context.Context, string, string) (string, error)) *MockClient_DeleteServer_Call { +func (_c *MockClient_DeleteServer_Call) RunAndReturn(run func(context.Context, string, string, bool) (string, error)) *MockClient_DeleteServer_Call { + _c.Call.Return(run) + return _c +} + +// DeleteVolume provides a mock function with given fields: ctx, datacenterID, volumeID +func (_m *MockClient) DeleteVolume(ctx context.Context, datacenterID string, volumeID string) (string, error) { + ret := _m.Called(ctx, datacenterID, volumeID) + + if len(ret) == 0 { + panic("no return value specified for DeleteVolume") + } + + var r0 string + var r1 error + if rf, ok := ret.Get(0).(func(context.Context, string, string) (string, error)); ok { + return rf(ctx, datacenterID, volumeID) + } + if rf, ok := ret.Get(0).(func(context.Context, string, string) string); ok { + r0 = rf(ctx, datacenterID, volumeID) + } else { + r0 = ret.Get(0).(string) + } + + if rf, ok := ret.Get(1).(func(context.Context, string, string) error); ok { + r1 = rf(ctx, datacenterID, volumeID) + } else { + r1 = ret.Error(1) + } + + return r0, r1 +} + +// MockClient_DeleteVolume_Call is a *mock.Call that shadows Run/Return methods with type explicit version for method 'DeleteVolume' +type MockClient_DeleteVolume_Call struct { + *mock.Call +} + +// DeleteVolume is a helper method to define mock.On call +// - ctx context.Context +// - datacenterID string +// - volumeID string +func (_e *MockClient_Expecter) DeleteVolume(ctx interface{}, datacenterID interface{}, volumeID interface{}) *MockClient_DeleteVolume_Call { + return &MockClient_DeleteVolume_Call{Call: _e.mock.On("DeleteVolume", ctx, datacenterID, volumeID)} +} + +func (_c *MockClient_DeleteVolume_Call) Run(run func(ctx context.Context, datacenterID string, volumeID string)) *MockClient_DeleteVolume_Call { + _c.Call.Run(func(args mock.Arguments) { + run(args[0].(context.Context), args[1].(string), args[2].(string)) + }) + return _c +} + +func (_c *MockClient_DeleteVolume_Call) Return(_a0 string, _a1 error) *MockClient_DeleteVolume_Call { + _c.Call.Return(_a0, _a1) + return _c +} + +func (_c *MockClient_DeleteVolume_Call) RunAndReturn(run func(context.Context, string, string) (string, error)) *MockClient_DeleteVolume_Call { _c.Call.Return(run) return _c } diff --git a/internal/service/cloud/server.go b/internal/service/cloud/server.go index 8b88ba33..10718cb3 100644 --- a/internal/service/cloud/server.go +++ b/internal/service/cloud/server.go @@ -138,7 +138,7 @@ func (s *Service) ReconcileServerDeletion(ctx context.Context, ms *scope.Machine } } - err = s.deleteServer(ctx, ms, *server.Id) + err = s.deleteServer(ctx, ms, server) return err == nil, err } @@ -239,11 +239,30 @@ func (s *Service) getServer(ctx context.Context, ms *scope.Machine) (*sdk.Server return nil, err } -func (s *Service) deleteServer(ctx context.Context, ms *scope.Machine, serverID string) error { +func (s *Service) deleteServer(ctx context.Context, ms *scope.Machine, server *sdk.Server) error { log := s.logger.WithName("deleteServer") - log.V(4).Info("Deleting server", "serverID", serverID) - requestLocation, err := s.ionosClient.DeleteServer(ctx, ms.DatacenterID(), serverID) + serverID := ptr.Deref(server.GetId(), "") + + deleteVolumes := ms.ClusterScope.IsDeleted() + bootVolumeID := server.GetProperties().GetBootVolume().GetId() + if !deleteVolumes && bootVolumeID != nil { + // We need to make sure to only delete volumes if the cluster is being deleted. + // If a node is being replaced, we only delete the boot volume and keep all other volumes. + // The CSI will take care of re-attaching the existing volumes to the new node. + + requestLocation, err := s.ionosClient.DeleteVolume(ctx, ms.DatacenterID(), *bootVolumeID) + if err != nil { + return fmt.Errorf("failed to request boot volume deletion: %w", err) + } + + ms.IonosMachine.SetCurrentRequest(http.MethodDelete, sdk.RequestStatusQueued, requestLocation) + log.V(4).Info("Successfully requested for boot volume deletion", "location", requestLocation) + return nil + } + + log.V(4).Info("Deleting server", "serverID", serverID, "deleteVolumes", deleteVolumes) + requestLocation, err := s.ionosClient.DeleteServer(ctx, ms.DatacenterID(), serverID, deleteVolumes) if err != nil { return fmt.Errorf("failed to request server deletion: %w", err) } diff --git a/internal/service/cloud/server_test.go b/internal/service/cloud/server_test.go index f0284ace..45dc4069 100644 --- a/internal/service/cloud/server_test.go +++ b/internal/service/cloud/server_test.go @@ -193,14 +193,68 @@ func (s *serverSuite) TestReconcileServerDeletion() { reqLocation := "delete/location" s.mockGetServerDeletionRequestCall(exampleServerID).Return(nil, nil) - s.mockDeleteServerCall(exampleServerID).Return(reqLocation, nil) + s.mockDeleteServerCall(exampleServerID, false).Return(reqLocation, nil) res, err := s.service.ReconcileServerDeletion(s.ctx, s.machineScope) + s.validateSuccessfulDeletionResponse(res, err, reqLocation) +} + +func (s *serverSuite) TestReconcileServerDeletionDeleteBootVolume() { + s.mockGetServerCall(exampleServerID).Return(&sdk.Server{ + Id: ptr.To(exampleServerID), + Properties: &sdk.ServerProperties{ + BootVolume: &sdk.ResourceReference{ + Id: ptr.To(exampleBootVolumeID), + }, + }, + }, nil).Once() + + s.mockGetServerCall(exampleServerID).Return(&sdk.Server{ + Id: ptr.To(exampleServerID), + }, nil).Once() + + reqLocationVolume := "delete/location/volume" + reqLocationServer := "delete/location/server" + + s.mockDeleteVolumeCall(exampleBootVolumeID).Return(reqLocationVolume, nil).Once() + + s.mockGetServerDeletionRequestCall(exampleServerID).Return(nil, nil) + s.mockDeleteServerCall(exampleServerID, false).Return(reqLocationServer, nil) + + res, err := s.service.ReconcileServerDeletion(s.ctx, s.machineScope) + s.validateSuccessfulDeletionResponse(res, err, reqLocationVolume) + + res, err = s.service.ReconcileServerDeletion(s.ctx, s.machineScope) + s.validateSuccessfulDeletionResponse(res, err, reqLocationServer) +} + +func (s *serverSuite) TestReconcileServerDeletionDeleteAllVolumes() { + s.clusterScope.Cluster.DeletionTimestamp = ptr.To(metav1.Now()) + s.mockGetServerCall(exampleServerID).Return(&sdk.Server{ + Id: ptr.To(exampleServerID), + Properties: &sdk.ServerProperties{ + BootVolume: &sdk.ResourceReference{ + Id: ptr.To(exampleBootVolumeID), + }, + }, + }, nil).Once() + + reqLocation := "delete/location" + s.mockGetServerDeletionRequestCall(exampleServerID).Return(nil, nil) + s.mockDeleteServerCall(exampleServerID, true).Return(reqLocation, nil) + + res, err := s.service.ReconcileServerDeletion(s.ctx, s.machineScope) + s.validateSuccessfulDeletionResponse(res, err, reqLocation) +} + +func (s *serverSuite) validateSuccessfulDeletionResponse(success bool, err error, requestLocation string) { + s.T().Helper() + s.NoError(err) - s.True(res) + s.True(success) s.NotNil(s.machineScope.IonosMachine.Status.CurrentRequest) s.Equal(http.MethodDelete, s.machineScope.IonosMachine.Status.CurrentRequest.Method) - s.Equal(s.machineScope.IonosMachine.Status.CurrentRequest.RequestPath, reqLocation) + s.Equal(s.machineScope.IonosMachine.Status.CurrentRequest.RequestPath, requestLocation) } func (s *serverSuite) TestReconcileServerDeletionServerNotFound() { @@ -281,7 +335,7 @@ func (s *serverSuite) TestReconcileServerDeletionRequestFailed() { exampleRequest := s.exampleDeleteRequest(sdk.RequestStatusFailed, exampleServerID) s.mockGetServerDeletionRequestCall(exampleServerID).Return([]sdk.Request{exampleRequest}, nil) - s.mockDeleteServerCall(exampleServerID).Return("delete/triggered", nil) + s.mockDeleteServerCall(exampleServerID, false).Return("delete/triggered", nil) res, err := s.service.ReconcileServerDeletion(s.ctx, s.machineScope) s.NoError(err) @@ -352,8 +406,12 @@ func (s *serverSuite) mockListServersCall() *clienttest.MockClient_ListServers_C return s.ionosClient.EXPECT().ListServers(s.ctx, s.machineScope.DatacenterID()) } -func (s *serverSuite) mockDeleteServerCall(serverID string) *clienttest.MockClient_DeleteServer_Call { - return s.ionosClient.EXPECT().DeleteServer(s.ctx, s.machineScope.DatacenterID(), serverID) +func (s *serverSuite) mockDeleteVolumeCall(volumeID string) *clienttest.MockClient_DeleteVolume_Call { + return s.ionosClient.EXPECT().DeleteVolume(s.ctx, s.machineScope.DatacenterID(), volumeID) +} + +func (s *serverSuite) mockDeleteServerCall(serverID string, deleteVolumes bool) *clienttest.MockClient_DeleteServer_Call { + return s.ionosClient.EXPECT().DeleteServer(s.ctx, s.machineScope.DatacenterID(), serverID, deleteVolumes) } func (s *serverSuite) mockGetServerCreationRequestCall() *clienttest.MockClient_GetRequests_Call { diff --git a/internal/service/cloud/suite_test.go b/internal/service/cloud/suite_test.go index c5cc8d53..08f0f627 100644 --- a/internal/service/cloud/suite_test.go +++ b/internal/service/cloud/suite_test.go @@ -67,6 +67,7 @@ const ( exampleSecondaryNICID = "f3b3f8e4-3b6d-4b6d-8f1d-3e3e6e3e3e3d" exampleIPBlockID = "f882d597-4ee2-4b89-b01a-cbecd0f513d8" exampleServerID = "dd426c63-cd1d-4c02-aca3-13b4a27c2ebf" + exampleBootVolumeID = "dd426c63-cd1d-4c02-aca3-13b4a27c2ebf" exampleSecondaryServerID = "dd426c63-cd1d-4c02-aca3-13b4a27c2ebd" exampleRequestPath = "/test" exampleLocation = "de/txl" diff --git a/scope/cluster.go b/scope/cluster.go index daf6a16e..22f46f22 100644 --- a/scope/cluster.go +++ b/scope/cluster.go @@ -148,6 +148,11 @@ func (c *Cluster) Location() string { return c.IonosCluster.Spec.Location } +// IsDeleted checks if the cluster was requested for deletion. +func (c *Cluster) IsDeleted() bool { + return !c.Cluster.DeletionTimestamp.IsZero() || !c.IonosCluster.DeletionTimestamp.IsZero() +} + // PatchObject will apply all changes from the IonosCloudCluster. // It will also make sure to patch the status subresource. func (c *Cluster) PatchObject() error { diff --git a/scope/cluster_test.go b/scope/cluster_test.go index 45699c32..f573d52f 100644 --- a/scope/cluster_test.go +++ b/scope/cluster_test.go @@ -260,6 +260,63 @@ func TestClusterListMachines(t *testing.T) { } } +func TestClusterIsDeleted(t *testing.T) { + scheme := runtime.NewScheme() + require.NoError(t, infrav1.AddToScheme(scheme)) + cl := fake.NewClientBuilder().WithScheme(scheme).Build() + + now := metav1.Now() + + tests := []struct { + name string + cluster *clusterv1.Cluster + ionosCluster *infrav1.IonosCloudCluster + want bool + }{ + { + name: "cluster is not deleted", + cluster: &clusterv1.Cluster{}, + ionosCluster: &infrav1.IonosCloudCluster{}, + want: false, + }, + { + name: "cluster is deleted with only cluster deletion timestamp", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}}, + ionosCluster: &infrav1.IonosCloudCluster{}, + want: true, + }, + { + name: "cluster is deleted with only ionos cluster deletion timestamp", + cluster: &clusterv1.Cluster{}, + ionosCluster: &infrav1.IonosCloudCluster{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}}, + want: true, + }, + { + name: "cluster is deleted with both deletion timestamps", + cluster: &clusterv1.Cluster{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}}, + ionosCluster: &infrav1.IonosCloudCluster{ObjectMeta: metav1.ObjectMeta{DeletionTimestamp: &now}}, + want: true, + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + params := ClusterParams{ + Client: cl, + Cluster: test.cluster, + IonosCluster: test.ionosCluster, + } + + c, err := NewCluster(params) + require.NoError(t, err) + require.NotNil(t, c) + + got := c.IsDeleted() + require.Equal(t, test.want, got) + }) + } +} + func buildMachineWithLabel(name string, labels map[string]string) *infrav1.IonosCloudMachine { return &infrav1.IonosCloudMachine{ ObjectMeta: metav1.ObjectMeta{