diff --git a/pkg/controller/controller_utils.go b/pkg/controller/controller_utils.go index 0a5cf14876..4da92b9590 100644 --- a/pkg/controller/controller_utils.go +++ b/pkg/controller/controller_utils.go @@ -768,6 +768,9 @@ func (s ActiveMachines) Less(i, j int) bool { } } + machineDeletionTimeI, perri := time.Parse(time.RFC3339, s[i].Annotations[machineutils.MarkedForDeletionTime]) + machineDeletionTimeJ, perrj := time.Parse(time.RFC3339, s[j].Annotations[machineutils.MarkedForDeletionTime]) + // Map containing machinePhase priority // the lower the priority, the more likely // it is to be deleted @@ -781,28 +784,23 @@ func (s ActiveMachines) Less(i, j int) bool { v1alpha1.MachineRunning: 6, } + switch { // Case-1: Initially we try to prioritize machine deletion based on machinePriority annotation. + case machineIPriority != machineJPriority: + return machineIPriority < machineJPriority // Case-2: If both priorities are equal, then we pick the one with older MarkedForDeletionTime annotation + case perri == nil && perrj != nil: + return true + case perri != nil && perrj == nil: + return false + case perri == nil && perrj == nil && !machineDeletionTimeI.Equal(machineDeletionTimeJ): + return machineDeletionTimeI.Before(machineDeletionTimeJ) // Case-3: If both don't have the MarkedForDeletionTime annotation, then we look at their machinePhase // and prioritize as mentioned in the above map - // Case-4: If all above cases are false, we prioritize based on creation time - if machineIPriority != machineJPriority { - return machineIPriority < machineJPriority - } else if s[i].Annotations[machineutils.MarkedForDeletionTime] != s[j].Annotations[machineutils.MarkedForDeletionTime] { - machineIDeletionTime, machineJDeletionTime := s[i].Annotations[machineutils.MarkedForDeletionTime], s[j].Annotations[machineutils.MarkedForDeletionTime] - if machineIDeletionTime != "" && machineJDeletionTime == "" { - return true - } else if machineIDeletionTime == "" && machineJDeletionTime != "" { - return false - } else { - // TODO: have error handling for parsing time - timeI, _ := time.Parse(time.RFC3339, machineIDeletionTime) - timeJ, _ := time.Parse(time.RFC3339, machineJDeletionTime) - return timeI.Before(timeJ) - } - } else if m[s[i].Status.CurrentStatus.Phase] != m[s[j].Status.CurrentStatus.Phase] { + case m[s[i].Status.CurrentStatus.Phase] != m[s[j].Status.CurrentStatus.Phase]: return m[s[i].Status.CurrentStatus.Phase] < m[s[j].Status.CurrentStatus.Phase] - } else if s[i].CreationTimestamp != s[j].CreationTimestamp { + // Case-4: If all above cases are false, we prioritize based on creation time + case s[i].CreationTimestamp != s[j].CreationTimestamp: return s[i].CreationTimestamp.Before(&s[j].CreationTimestamp) } diff --git a/pkg/controller/deployment.go b/pkg/controller/deployment.go index b8137f4a2d..0cdce7f5ab 100644 --- a/pkg/controller/deployment.go +++ b/pkg/controller/deployment.go @@ -47,8 +47,9 @@ import ( "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" ) -// triggerDeletionData contains the annotation to be put on machineDeployment, a boolean to indicate whether annotation is changed and -// map of machine and its markedDeletionTime +// triggerDeletionData is used to store the data related to the machines for which deletion +// should be triggered by MCM and the time when scaler decided to scale-down those machines. +// This data is stored in the TriggerDeletionByMCM annotation on the MachineDeployment. type triggerDeletionData struct { markedMachines []*v1alpha1.Machine markedMachineDeletionTimes []string @@ -707,7 +708,7 @@ func (dc *controller) updateMachineAndMachineDeploymentDeletionAnnotations(ctx c // computeMachineTriggerDeletionData computes the data related to machines that are triggered for deletion based on the annotation on the MachineDeployment. func (dc *controller) computeMachineTriggerDeletionData(mcd *v1alpha1.MachineDeployment) *triggerDeletionData { - oldTriggerDeletionAnnotationList := annotations.GetMachineNamesTriggeredForDeletion(mcd) + oldTriggerDeletionAnnotationList := annotations.GetMachineNamesWithDeletionTimesTriggeredForDeletion(mcd) newTriggerDeletionAnnotationList := make([]string, 0) markedMachines := make([]*v1alpha1.Machine, 0) markedMachineDeletionTimes := make([]string, 0) @@ -755,6 +756,7 @@ func (dc *controller) computeMachineTriggerDeletionData(mcd *v1alpha1.MachineDep } } +// TODO: separate the logic of adjusting the annotation value and updating the annotation on the MachineDeployment into two functions, and add unit tests for the function that computes the new annotation value. func (dc *controller) adjustingMachineDeploymentDeletionAnnotations(ctx context.Context, mcd *v1alpha1.MachineDeployment) (*v1alpha1.MachineDeployment, error) { if mcd.Annotations[machineutils.TriggerDeletionByMCM] == "" { return mcd, nil diff --git a/pkg/controller/deployment_sync.go b/pkg/controller/deployment_sync.go index 6a1436a4a2..8984542755 100644 --- a/pkg/controller/deployment_sync.go +++ b/pkg/controller/deployment_sync.go @@ -31,7 +31,6 @@ import ( "github.com/gardener/machine-controller-manager/pkg/apis/machine/v1alpha1" labelsutil "github.com/gardener/machine-controller-manager/pkg/util/labels" - "github.com/gardener/machine-controller-manager/pkg/util/provider/machineutils" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -550,15 +549,12 @@ func (dc *controller) scaleMachineSet(ctx context.Context, is *v1alpha1.MachineS // TODO: Do not mutate the machine set here, instead simply compare the annotation and if they mismatch // call SetReplicasAnnotations inside the following if clause. Then we can also move the deep-copy from // above inside the if too. - annotationsNeedUpdate := SetReplicasAnnotations(isCopy, (deployment.Spec.Replicas), (deployment.Spec.Replicas)+MaxSurge(*deployment)) - // We keep the LDRCBST annotation of MCS always in sync with MCD, regardless of whether any replica change was observed. - LDRCBSTchanged := deployment.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime] != is.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime] + replicaAnnotationNeedsUpdate := SetReplicasAnnotations(isCopy, (deployment.Spec.Replicas), (deployment.Spec.Replicas)+MaxSurge(*deployment)) scaled := false var err error - if sizeNeedsUpdate || annotationsNeedUpdate || LDRCBSTchanged { + if sizeNeedsUpdate || replicaAnnotationNeedsUpdate { isCopy.Spec.Replicas = newScale - isCopy.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime] = deployment.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime] is, err = dc.controlMachineClient.MachineSets(isCopy.Namespace).Update(ctx, isCopy, metav1.UpdateOptions{}) if err == nil && sizeNeedsUpdate { scaled = true diff --git a/pkg/controller/deployment_test.go b/pkg/controller/deployment_test.go index e6e7540382..9d78e19f88 100644 --- a/pkg/controller/deployment_test.go +++ b/pkg/controller/deployment_test.go @@ -1567,9 +1567,11 @@ var _ = Describe("machineDeployment", func() { testMachine *machinev1.Machine testNode *corev1.Node ptrBool bool + ts string ) BeforeEach(func() { ptrBool = true + ts = time.Now().Format(time.RFC3339) testMachineDeployment = &machinev1.MachineDeployment{ ObjectMeta: metav1.ObjectMeta{ Name: "MachineDeployment-test", @@ -1753,6 +1755,7 @@ var _ = Describe("machineDeployment", func() { waitForCacheSync(stop, c) actualMachineDeployment, _ := c.controlMachineClient.MachineDeployments(testNamespace).Get(context.Background(), testMachineDeployment.Name, metav1.GetOptions{}) + waitForCacheSync(stop, c) actualMachineSets, _ := c.controlMachineClient.MachineSets(testNamespace).List(context.Background(), metav1.ListOptions{}) waitForCacheSync(stop, c) actualMachines, _ := c.controlMachineClient.Machines(testNamespace).List(context.Background(), metav1.ListOptions{}) @@ -1973,6 +1976,18 @@ var _ = Describe("machineDeployment", func() { return nil }, ), + Entry("set LDRCBST annotation on the machineSet and TriggerDeletionByMCM annotation is not set on the machineSet", + func(testMachineDeployment *machinev1.MachineDeployment, _ *machinev1.MachineSet) { + testMachineDeployment.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime] = ts + testMachineDeployment.Annotations[machineutils.TriggerDeletionByMCM] = fmt.Sprintf("%s~%s", testMachine.Name, time.Now().Format(time.RFC3339)) + }, + func(_ *machinev1.MachineDeployment, mcs []machinev1.MachineSet, _ []machinev1.Machine, _ *corev1.Node) error { + Expect(mcs[0].Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime]).To(Equal(ts)) + _, exists := mcs[0].Annotations[machineutils.TriggerDeletionByMCM] + Expect(exists).To(BeFalse()) + return nil + }, + ), ) }) diff --git a/pkg/controller/deployment_util.go b/pkg/controller/deployment_util.go index 002fd2a7ff..64f3653510 100644 --- a/pkg/controller/deployment_util.go +++ b/pkg/controller/deployment_util.go @@ -25,7 +25,6 @@ package controller import ( "context" "fmt" - "github.com/gardener/machine-controller-manager/pkg/util/nodeops" "maps" "reflect" "sort" @@ -33,6 +32,8 @@ import ( "strings" "time" + "github.com/gardener/machine-controller-manager/pkg/util/nodeops" + v1 "k8s.io/api/core/v1" apiequality "k8s.io/apimachinery/pkg/api/equality" "k8s.io/apimachinery/pkg/api/meta" @@ -452,13 +453,14 @@ func UpdateMachineSetClassKind(deployment *v1alpha1.MachineDeployment, newIS *v1 } var annotationsToSkip = map[string]bool{ - v1.LastAppliedConfigAnnotation: true, - RevisionAnnotation: true, - RevisionHistoryAnnotation: true, - DesiredReplicasAnnotation: true, - MaxReplicasAnnotation: true, - PreferNoScheduleKey: true, - UnfreezeAnnotation: true, + v1.LastAppliedConfigAnnotation: true, + RevisionAnnotation: true, + RevisionHistoryAnnotation: true, + DesiredReplicasAnnotation: true, + MaxReplicasAnnotation: true, + PreferNoScheduleKey: true, + UnfreezeAnnotation: true, + machineutils.TriggerDeletionByMCM: true, } // skipCopyAnnotation returns true if we should skip copying the annotation with the given annotation key diff --git a/pkg/controller/machineset.go b/pkg/controller/machineset.go index cc8eed7696..79ca7ab26c 100644 --- a/pkg/controller/machineset.go +++ b/pkg/controller/machineset.go @@ -437,7 +437,6 @@ func (c *controller) manageReplicas(ctx context.Context, allMachines []*v1alpha1 if machinesWithoutUpdateSuccessfulLabelDiff > BurstReplicas { machinesWithoutUpdateSuccessfulLabelDiff = BurstReplicas } - logMachinesPriorityAndMarkedDeletionTime(machinesWithoutUpdateSuccessfulLabel) staleMachines = append(staleMachines, getMachinesToDelete(machinesWithoutUpdateSuccessfulLabel, machinesWithoutUpdateSuccessfulLabelDiff)...) } @@ -825,7 +824,11 @@ func (c *controller) updateMachineSetFinalizers(ctx context.Context, machineSet // that machine is added to the staleMachine list to be deleted. // This is done to have consistency between machineDeployment replica change and the machines marked for deletion. func getMachinesMarkedForDeletion(machineList []*v1alpha1.Machine, machineSet *v1alpha1.MachineSet) (staleMachines []*v1alpha1.Machine) { - machineSetLRCA, perr := time.Parse(time.RFC3339, machineSet.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime]) + LDRCBST := machineSet.Annotations[machineutils.LastDeploymentReplicaChangeByScalerTime] + if LDRCBST == "" { + return + } + machineSetLDRCBST, perr := time.Parse(time.RFC3339, LDRCBST) if perr != nil { klog.Warningf("Unable to parse %q of machineset %q: %v", machineutils.LastDeploymentReplicaChangeByScalerTime, machineSet.Name, perr) return @@ -841,7 +844,7 @@ func getMachinesMarkedForDeletion(machineList []*v1alpha1.Machine, machineSet *v klog.Infof("Error parsing time from annotation %q=%q on machine %q: %v", machineutils.MarkedForDeletionTime, markedMachineDeletionTime, machine.Name, err) continue } - if machineLRCA.Before(machineSetLRCA) || machineLRCA.Equal(machineSetLRCA) { + if machineLRCA.Before(machineSetLDRCBST) || machineLRCA.Equal(machineSetLDRCBST) { staleMachines = append(staleMachines, machine) } } diff --git a/pkg/controller/machineset_util.go b/pkg/controller/machineset_util.go index 2183e2c97b..a7173bed30 100644 --- a/pkg/controller/machineset_util.go +++ b/pkg/controller/machineset_util.go @@ -203,22 +203,18 @@ func copyMachineSetClassKindToMachines(machineset *v1alpha1.MachineSet, machine return false } -func logMachinesPriorityAndMarkedDeletionTime(machines []*v1alpha1.Machine) { +func logMachinesToDelete(machines []*v1alpha1.Machine) { for _, m := range machines { priority := m.Annotations[machineutils.MachinePriority] markedDeletionTime := m.Annotations[machineutils.MarkedForDeletionTime] if priority == "1" { - klog.V(3).Infof("Machine %q has %s annotation set to 1 and %s set to %q", m.Name, machineutils.MachinePriority, machineutils.MarkedForDeletionTime, markedDeletionTime) + klog.V(3).Infof("Machine %q needs to be deleted, since it has %s annotation set to 1 and %s set to %q", m.Name, machineutils.MachinePriority, machineutils.MarkedForDeletionTime, markedDeletionTime) + } else { + klog.V(3).Infof("Machine %q needs to be deleted", m.Name) } } } -func logMachinesToDelete(machines []*v1alpha1.Machine) { - for _, m := range machines { - klog.V(3).Infof("Machine %q needs to be deleted", m.Name) - } -} - // uniqueMachines returns the input slice with duplicates removed (by MachineKey), // preserving the first occurrence order. func uniqueMachines(machines []*v1alpha1.Machine) []*v1alpha1.Machine { diff --git a/pkg/util/annotations/annotations.go b/pkg/util/annotations/annotations.go index 901a0cdacc..5fc817d0c5 100644 --- a/pkg/util/annotations/annotations.go +++ b/pkg/util/annotations/annotations.go @@ -74,9 +74,8 @@ func DeleteAnnotation(nodeAnnotations map[string]string, annotations map[string] return newAnnotations, deleted } -// GetMachineNamesTriggeredForDeletion returns the set of machine names contained within the machineutils.TriggerDeletionByMCM annotation on the given MachineDeployment -// TODO: function name is not accurate. Have another look at any scope of improvements -func GetMachineNamesTriggeredForDeletion(mcd *v1alpha1.MachineDeployment) []string { +// GetMachineNamesWithDeletionTimesTriggeredForDeletion returns the set of machine names with their marked for deletion times contained within the machineutils.TriggerDeletionByMCM annotation on the given MachineDeployment +func GetMachineNamesWithDeletionTimesTriggeredForDeletion(mcd *v1alpha1.MachineDeployment) []string { if mcd.Annotations == nil || mcd.Annotations[machineutils.TriggerDeletionByMCM] == "" { return nil }