From 860890046da2f529875c673ea85a66c73e561f42 Mon Sep 17 00:00:00 2001 From: Matt Hook Date: Tue, 24 Oct 2023 09:24:09 +1300 Subject: [PATCH] fix(registry): remove k8s registry secrets when registries are removed [EE-5768] (#10369) --- .../endpoints/endpoint_registry_access.go | 2 +- api/http/handler/registries/handler.go | 12 +-- .../handler/registries/registry_delete.go | 64 +++++++++++++-- .../handler/registries/registry_update.go | 2 +- api/internal/registryutils/ecr_kube_secret.go | 2 +- api/internal/snapshot/snapshot.go | 5 +- api/kubernetes/cli/registries.go | 10 +-- api/pendingactions/delete_registry_secrets.go | 76 ++++++++++++++++++ api/pendingactions/pendingactions.go | 78 ++++++++++++------- api/portainer.go | 2 +- 10 files changed, 206 insertions(+), 47 deletions(-) create mode 100644 api/pendingactions/delete_registry_secrets.go diff --git a/api/http/handler/endpoints/endpoint_registry_access.go b/api/http/handler/endpoints/endpoint_registry_access.go index ce6a62d15..aaad283c6 100644 --- a/api/http/handler/endpoints/endpoint_registry_access.go +++ b/api/http/handler/endpoints/endpoint_registry_access.go @@ -140,7 +140,7 @@ func (handler *Handler) updateKubeAccess(endpoint *portainer.Endpoint, registry } for namespace := range namespacesToRemove { - err := cli.DeleteRegistrySecret(registry, namespace) + err := cli.DeleteRegistrySecret(registry.ID, namespace) if err != nil { return err } diff --git a/api/http/handler/registries/handler.go b/api/http/handler/registries/handler.go index 3d9faec6a..089853afe 100644 --- a/api/http/handler/registries/handler.go +++ b/api/http/handler/registries/handler.go @@ -8,6 +8,7 @@ import ( "github.com/portainer/portainer/api/http/proxy" "github.com/portainer/portainer/api/http/security" "github.com/portainer/portainer/api/kubernetes/cli" + "github.com/portainer/portainer/api/pendingactions" httperror "github.com/portainer/portainer/pkg/libhttp/error" "github.com/portainer/portainer/pkg/libhttp/request" @@ -25,11 +26,12 @@ func hideFields(registry *portainer.Registry, hideAccesses bool) { // Handler is the HTTP handler used to handle registry operations. type Handler struct { *mux.Router - requestBouncer security.BouncerService - DataStore dataservices.DataStore - FileService portainer.FileService - ProxyManager *proxy.Manager - K8sClientFactory *cli.ClientFactory + requestBouncer security.BouncerService + DataStore dataservices.DataStore + FileService portainer.FileService + ProxyManager *proxy.Manager + K8sClientFactory *cli.ClientFactory + PendingActionsService *pendingactions.PendingActionsService } // NewHandler creates a handler to manage registry operations. diff --git a/api/http/handler/registries/registry_delete.go b/api/http/handler/registries/registry_delete.go index 5c5263f12..c483977f1 100644 --- a/api/http/handler/registries/registry_delete.go +++ b/api/http/handler/registries/registry_delete.go @@ -1,14 +1,18 @@ package registries import ( + "fmt" "net/http" portainer "github.com/portainer/portainer/api" httperrors "github.com/portainer/portainer/api/http/errors" "github.com/portainer/portainer/api/http/security" + "github.com/portainer/portainer/api/pendingactions" httperror "github.com/portainer/portainer/pkg/libhttp/error" "github.com/portainer/portainer/pkg/libhttp/request" "github.com/portainer/portainer/pkg/libhttp/response" + + "github.com/rs/zerolog/log" ) // @id RegistryDelete @@ -38,11 +42,9 @@ func (handler *Handler) registryDelete(w http.ResponseWriter, r *http.Request) * return httperror.BadRequest("Invalid registry identifier route variable", err) } - _, err = handler.DataStore.Registry().Read(portainer.RegistryID(registryID)) - if handler.DataStore.IsErrObjectNotFound(err) { - return httperror.NotFound("Unable to find a registry with the specified identifier inside the database", err) - } else if err != nil { - return httperror.InternalServerError("Unable to find a registry with the specified identifier inside the database", err) + registry, err := handler.DataStore.Registry().Read(portainer.RegistryID(registryID)) + if err != nil { + return httperror.InternalServerError(fmt.Sprintf("Unable to load registry %q from the database", registry.Name), err) } err = handler.DataStore.Registry().Delete(portainer.RegistryID(registryID)) @@ -50,5 +52,57 @@ func (handler *Handler) registryDelete(w http.ResponseWriter, r *http.Request) * return httperror.InternalServerError("Unable to remove the registry from the database", err) } + err = handler.deleteKubernetesSecrets(registry) + if err != nil { + return httperror.InternalServerError("Unable to delete registry secrets", err) + } + return response.Empty(w) } + +func (handler *Handler) deleteKubernetesSecrets(registry *portainer.Registry) error { + + for endpointId, access := range registry.RegistryAccesses { + if access.Namespaces != nil { + // Obtain a kubeclient for the endpoint + endpoint, err := handler.DataStore.Endpoint().Endpoint(endpointId) + if err != nil { + // Skip environments that can't be loaded from the DB + log.Warn().Err(err).Msgf("Unable to load the environment with id %d from the database", endpointId) + continue + } + + cli, err := handler.K8sClientFactory.GetKubeClient(endpoint) + if err != nil { + // Skip environments that can't get a kubeclient from + log.Warn().Err(err).Msgf("Unable to get kubernetes client for environment %d", endpointId) + continue + } + + failedNamespaces := make([]string, 0) + for _, ns := range access.Namespaces { + err = cli.DeleteRegistrySecret(registry.ID, ns) + if err != nil { + failedNamespaces = append(failedNamespaces, ns) + log.Warn().Err(err).Msgf("Unable to delete registry secret %q from namespace %q for environment %d. Retrying offline", cli.RegistrySecretName(registry.ID), ns, endpointId) + } + } + + if len(failedNamespaces) > 0 { + handler.PendingActionsService.Create(portainer.PendingActions{ + EndpointID: endpointId, + Action: pendingactions.DeletePortainerK8sRegistrySecrets, + + // When extracting the data, this is the type we need to pull out + // i.e. pendingactions.DeletePortainerK8sRegistrySecretsData + ActionData: pendingactions.DeletePortainerK8sRegistrySecretsData{ + RegistryID: registry.ID, + Namespaces: failedNamespaces, + }, + }) + } + } + } + + return nil +} diff --git a/api/http/handler/registries/registry_update.go b/api/http/handler/registries/registry_update.go index c98b52dae..7b6b1967c 100644 --- a/api/http/handler/registries/registry_update.go +++ b/api/http/handler/registries/registry_update.go @@ -207,7 +207,7 @@ func (handler *Handler) updateEndpointRegistryAccess(endpoint *portainer.Endpoin } for _, namespace := range endpointAccess.Namespaces { - err := cli.DeleteRegistrySecret(registry, namespace) + err := cli.DeleteRegistrySecret(registry.ID, namespace) if err != nil { return err } diff --git a/api/internal/registryutils/ecr_kube_secret.go b/api/internal/registryutils/ecr_kube_secret.go index 9f1f3c644..2a7f5a868 100644 --- a/api/internal/registryutils/ecr_kube_secret.go +++ b/api/internal/registryutils/ecr_kube_secret.go @@ -35,7 +35,7 @@ func RefreshEcrSecret(cli portainer.KubeClient, endpoint *portainer.Endpoint, da return } - err = cli.DeleteRegistrySecret(®istry, namespace) + err = cli.DeleteRegistrySecret(registry.ID, namespace) if err != nil { return } diff --git a/api/internal/snapshot/snapshot.go b/api/internal/snapshot/snapshot.go index 6d57abbcc..11cfe85b8 100644 --- a/api/internal/snapshot/snapshot.go +++ b/api/internal/snapshot/snapshot.go @@ -312,7 +312,10 @@ func updateEndpointStatus(tx dataservices.DataStoreTx, endpoint *portainer.Endpo // Run the pending actions if latestEndpointReference.Status == portainer.EndpointStatusUp { - pendingActionsService.Execute(endpoint.ID) + err = pendingActionsService.Execute(endpoint.ID) + if err != nil { + log.Error().Err(err).Msg("background schedule error (environment snapshot), unable to execute pending actions") + } } } diff --git a/api/kubernetes/cli/registries.go b/api/kubernetes/cli/registries.go index ed6fb736c..f58c65eba 100644 --- a/api/kubernetes/cli/registries.go +++ b/api/kubernetes/cli/registries.go @@ -32,8 +32,8 @@ type ( } ) -func (kcl *KubeClient) DeleteRegistrySecret(registry *portainer.Registry, namespace string) error { - err := kcl.cli.CoreV1().Secrets(namespace).Delete(context.TODO(), registrySecretName(registry), metav1.DeleteOptions{}) +func (kcl *KubeClient) DeleteRegistrySecret(registry portainer.RegistryID, namespace string) error { + err := kcl.cli.CoreV1().Secrets(namespace).Delete(context.TODO(), kcl.RegistrySecretName(registry), metav1.DeleteOptions{}) if err != nil && !k8serrors.IsNotFound(err) { return errors.Wrap(err, "failed removing secret") } @@ -64,7 +64,7 @@ func (kcl *KubeClient) CreateRegistrySecret(registry *portainer.Registry, namesp secret := &v1.Secret{ TypeMeta: metav1.TypeMeta{}, ObjectMeta: metav1.ObjectMeta{ - Name: registrySecretName(registry), + Name: kcl.RegistrySecretName(registry.ID), Labels: map[string]string{ labelRegistryType: strconv.Itoa(int(registry.Type)), }, @@ -103,6 +103,6 @@ func (cli *KubeClient) IsRegistrySecret(namespace, secretName string) (bool, err } -func registrySecretName(registry *portainer.Registry) string { - return fmt.Sprintf("registry-%d", registry.ID) +func (*KubeClient) RegistrySecretName(registryID portainer.RegistryID) string { + return fmt.Sprintf("registry-%d", registryID) } diff --git a/api/pendingactions/delete_registry_secrets.go b/api/pendingactions/delete_registry_secrets.go new file mode 100644 index 000000000..bb7f618ae --- /dev/null +++ b/api/pendingactions/delete_registry_secrets.go @@ -0,0 +1,76 @@ +package pendingactions + +import ( + "fmt" + + portainer "github.com/portainer/portainer/api" + "github.com/rs/zerolog/log" +) + +type DeletePortainerK8sRegistrySecretsData struct { + RegistryID portainer.RegistryID `json:"RegistryID"` + Namespaces []string `json:"Namespaces"` +} + +func (service *PendingActionsService) DeleteKubernetesRegistrySecrets(endpoint *portainer.Endpoint, registryData *DeletePortainerK8sRegistrySecretsData) error { + if endpoint == nil || registryData == nil { + return nil + } + + kubeClient, err := service.clientFactory.GetKubeClient(endpoint) + if err != nil { + return err + } + + for _, namespace := range registryData.Namespaces { + err = kubeClient.DeleteRegistrySecret(registryData.RegistryID, namespace) + if err != nil { + return err + } + } + + return nil +} + +// Failure in this code is basically a bug. So if we get one we should log it and continue. +func convertToDeletePortainerK8sRegistrySecretsData(actionData interface{}) (*DeletePortainerK8sRegistrySecretsData, error) { + var registryData DeletePortainerK8sRegistrySecretsData + + // Due to the way data is stored and subsequently read from the database, we can't directly type assert the actionData to + // the type DeletePortainerK8sRegistrySecretsData. It's stored as a map[string]interface{} and we need to extract the + // data from that map. + if data, ok := actionData.(map[string]interface{}); ok { + for key, value := range data { + switch key { + case "Namespaces": + if namespaces, ok := value.([]interface{}); ok { + registryData.Namespaces = make([]string, len(namespaces)) + for i, ns := range namespaces { + if namespace, ok := ns.(string); ok { + registryData.Namespaces[i] = namespace + } + } + } else { + // we shouldn't ever see this. It's a bug if we do. + log.Debug().Msgf("DeletePortainerK8sRegistrySecrets: Failed to convert Namespaces to []interface{}") + } + case "RegistryID": + if registryID, ok := value.(float64); ok { + registryData.RegistryID = portainer.RegistryID(registryID) + } else { + // we shouldn't ever see this. It's a bug if we do. + log.Debug().Msgf("DeletePortainerK8sRegistrySecrets: Failed to convert RegistryID to float64") + } + } + } + + log.Debug().Msgf("DeletePortainerK8sRegistrySecrets: %+v", registryData) + } else { + // this should not happen. It's a bug if it does. As the actionData is defined + // by what portainer puts in it. It never comes from a user or external source so it shouldn't fail. + // Nevertheless we should check it in case of db corruption or developer mistake down the road + return nil, fmt.Errorf("type assertion failed in convertToDeletePortainerK8sRegistrySecretsData") + } + + return ®istryData, nil +} diff --git a/api/pendingactions/pendingactions.go b/api/pendingactions/pendingactions.go index 5a12ba41e..4487a0966 100644 --- a/api/pendingactions/pendingactions.go +++ b/api/pendingactions/pendingactions.go @@ -12,6 +12,11 @@ import ( "github.com/rs/zerolog/log" ) +const ( + CleanNAPWithOverridePolicies = "CleanNAPWithOverridePolicies" + DeletePortainerK8sRegistrySecrets = "DeletePortainerK8sRegistrySecrets" +) + type ( PendingActionsService struct { authorizationService *authorization.Service @@ -49,33 +54,32 @@ func (service *PendingActionsService) Execute(id portainer.EndpointID) error { endpoint, err := service.dataStore.Endpoint().Endpoint(id) if err != nil { - return fmt.Errorf("failed to retrieve endpoint %d: %w", id, err) + return fmt.Errorf("failed to retrieve environment %d: %w", id, err) } if endpoint.Status != portainer.EndpointStatusUp { - log.Debug().Msgf("Endpoint %d is not up", id) - return fmt.Errorf("endpoint %d is not up: %w", id, err) + log.Debug().Msgf("Environment %q (id: %d) is not up", endpoint.Name, id) + return fmt.Errorf("environment %q (id: %d) is not up: %w", endpoint.Name, id, err) } pendingActions, err := service.dataStore.PendingActions().ReadAll() if err != nil { log.Error().Err(err).Msgf("failed to retrieve pending actions") - return fmt.Errorf("failed to retrieve pending actions for endpoint %d: %w", id, err) + return fmt.Errorf("failed to retrieve pending actions for environment %d: %w", id, err) } for _, endpointPendingAction := range pendingActions { if endpointPendingAction.EndpointID == id { err := service.executePendingAction(endpointPendingAction, endpoint) if err != nil { - log.Error().Err(err).Msgf("failed to execute pending action") + log.Warn().Err(err).Msgf("failed to execute pending action") return fmt.Errorf("failed to execute pending action: %w", err) - } else { - // delete the pending action - err := service.dataStore.PendingActions().Delete(endpointPendingAction.ID) - if err != nil { - log.Error().Err(err).Msgf("failed to delete pending action") - return fmt.Errorf("failed to delete pending action: %w", err) - } + } + + err = service.dataStore.PendingActions().Delete(endpointPendingAction.ID) + if err != nil { + log.Error().Err(err).Msgf("failed to delete pending action") + return fmt.Errorf("failed to delete pending action: %w", err) } } } @@ -84,30 +88,50 @@ func (service *PendingActionsService) Execute(id portainer.EndpointID) error { } func (service *PendingActionsService) executePendingAction(pendingAction portainer.PendingActions, endpoint *portainer.Endpoint) error { - log.Debug().Msgf("Executing pending action %s for endpoint %d", pendingAction.Action, pendingAction.EndpointID) + log.Debug().Msgf("Executing pending action %s for environment %d", pendingAction.Action, pendingAction.EndpointID) defer func() { - log.Debug().Msgf("End executing pending action %s for endpoint %d", pendingAction.Action, pendingAction.EndpointID) + log.Debug().Msgf("End executing pending action %s for environment %d", pendingAction.Action, pendingAction.EndpointID) }() switch pendingAction.Action { - case "CleanNAPWithOverridePolicies": + case CleanNAPWithOverridePolicies: if (pendingAction.ActionData == nil) || (pendingAction.ActionData.(portainer.EndpointGroupID) == 0) { service.authorizationService.CleanNAPWithOverridePolicies(service.dataStore, endpoint, nil) - } else { - endpointGroupID := pendingAction.ActionData.(portainer.EndpointGroupID) - endpointGroup, err := service.dataStore.EndpointGroup().Read(portainer.EndpointGroupID(endpointGroupID)) - if err != nil { - log.Error().Err(err).Msgf("Error reading endpoint group to clean NAP with override policies for endpoint %d and endpoint group %d", endpoint.ID, endpointGroup.ID) - return fmt.Errorf("failed to retrieve endpoint group %d: %w", endpointGroupID, err) - } - err = service.authorizationService.CleanNAPWithOverridePolicies(service.dataStore, endpoint, endpointGroup) - if err != nil { - log.Error().Err(err).Msgf("Error cleaning NAP with override policies for endpoint %d and endpoint group %d", endpoint.ID, endpointGroup.ID) - return fmt.Errorf("failed to clean NAP with override policies for endpoint %d and endpoint group %d: %w", endpoint.ID, endpointGroup.ID, err) - } + return nil } + + endpointGroupID := pendingAction.ActionData.(portainer.EndpointGroupID) + endpointGroup, err := service.dataStore.EndpointGroup().Read(portainer.EndpointGroupID(endpointGroupID)) + if err != nil { + log.Error().Err(err).Msgf("Error reading environment group to clean NAP with override policies for environment %d and environment group %d", endpoint.ID, endpointGroup.ID) + return fmt.Errorf("failed to retrieve environment group %d: %w", endpointGroupID, err) + } + err = service.authorizationService.CleanNAPWithOverridePolicies(service.dataStore, endpoint, endpointGroup) + if err != nil { + log.Error().Err(err).Msgf("Error cleaning NAP with override policies for environment %d and environment group %d", endpoint.ID, endpointGroup.ID) + return fmt.Errorf("failed to clean NAP with override policies for environment %d and environment group %d: %w", endpoint.ID, endpointGroup.ID, err) + } + + return nil + case DeletePortainerK8sRegistrySecrets: + if pendingAction.ActionData == nil { + return nil + } + + registryData, err := convertToDeletePortainerK8sRegistrySecretsData(pendingAction.ActionData) + if err != nil { + return fmt.Errorf("failed to parse pendingActionData: %w", err) + } + + err = service.DeleteKubernetesRegistrySecrets(endpoint, registryData) + if err != nil { + log.Warn().Err(err).Int("endpoint_id", int(endpoint.ID)).Msgf("Unable to delete kubernetes registry secrets") + return fmt.Errorf("failed to delete kubernetes registry secrets for environment %d: %w", endpoint.ID, err) + } + return nil } + return nil } diff --git a/api/portainer.go b/api/portainer.go index 1e2950e5d..dfc7ac369 100644 --- a/api/portainer.go +++ b/api/portainer.go @@ -1508,7 +1508,7 @@ type ( GetMaxResourceLimits(name string, overCommitEnabled bool, resourceOverCommitPercent int) (K8sNodeLimits, error) GetNamespaceAccessPolicies() (map[string]K8sNamespaceAccessPolicy, error) UpdateNamespaceAccessPolicies(accessPolicies map[string]K8sNamespaceAccessPolicy) error - DeleteRegistrySecret(registry *Registry, namespace string) error + DeleteRegistrySecret(registry RegistryID, namespace string) error CreateRegistrySecret(registry *Registry, namespace string) error IsRegistrySecret(namespace, secretName string) (bool, error) ToggleSystemState(namespace string, isSystem bool) error