diff --git a/.golangci.yml b/.golangci.yml index cffdca26..9e5d938c 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -71,6 +71,7 @@ linters-settings: - github.com/go-logr/logr - github.com/coredns/corefile-migration/migration - github.com/pkg/errors + - github.com/davecgh/go-spew/spew - k8s.io/api - k8s.io/apimachinery/pkg diff --git a/controlplane/controllers/kthreescontrolplane_controller.go b/controlplane/controllers/kthreescontrolplane_controller.go index 4819aa1f..5526cf58 100644 --- a/controlplane/controllers/kthreescontrolplane_controller.go +++ b/controlplane/controllers/kthreescontrolplane_controller.go @@ -520,6 +520,10 @@ func (r *KThreesControlPlaneReconciler) reconcile(ctx context.Context, cluster * return reconcile.Result{}, err } + if err := r.syncMachines(ctx, controlPlane); err != nil { + return ctrl.Result{}, errors.Wrap(err, "failed to sync Machines") + } + // Aggregate the operational state of all the machines; while aggregating we are adding the // source ref (reason@machine/name) so the problem can be easily tracked down to its source machine. conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false)) diff --git a/controlplane/controllers/scale.go b/controlplane/controllers/scale.go index 462836aa..1fe1afd4 100644 --- a/controlplane/controllers/scale.go +++ b/controlplane/controllers/scale.go @@ -418,7 +418,7 @@ func (r *KThreesControlPlaneReconciler) updateMachine(ctx context.Context, machi // There are small differences in how we calculate the Machine depending on if it // is a create or update. Example: for a new Machine we have to calculate a new name, // while for an existing Machine we have to use the name of the existing Machine. -// Also, for an existing Machine, we will not copy its labels, as they are not managed by the KThreesControlPlane controller +// Also, for an existing Machine, we will not copy its labels, as they are not managed by the KThreesControlPlane controller. func (r *KThreesControlPlaneReconciler) computeDesiredMachine(kcp *controlplanev1.KThreesControlPlane, cluster *clusterv1.Cluster, failureDomain *string, existingMachine *clusterv1.Machine) (*clusterv1.Machine, error) { var machineName string var machineUID types.UID diff --git a/go.mod b/go.mod index 34123939..3777f483 100644 --- a/go.mod +++ b/go.mod @@ -4,6 +4,7 @@ go 1.21 require ( github.com/coredns/corefile-migration v1.0.21 + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc github.com/go-logr/logr v1.4.1 github.com/google/uuid v1.4.0 github.com/onsi/ginkgo v1.16.5 @@ -12,7 +13,9 @@ require ( github.com/pkg/errors v0.9.1 go.etcd.io/etcd/api/v3 v3.5.13 go.etcd.io/etcd/client/v3 v3.5.13 + golang.org/x/exp v0.0.0-20230905200255-921286631fa9 google.golang.org/grpc v1.60.1 + google.golang.org/protobuf v1.33.0 k8s.io/api v0.29.3 k8s.io/apimachinery v0.29.3 k8s.io/apiserver v0.29.3 @@ -44,7 +47,6 @@ require ( github.com/coredns/caddy v1.1.0 // indirect github.com/coreos/go-semver v0.3.1 // indirect github.com/coreos/go-systemd/v22 v22.5.0 // indirect - github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/distribution/reference v0.5.0 // indirect github.com/docker/docker v25.0.5+incompatible // indirect github.com/docker/go-connections v0.5.0 // indirect @@ -120,7 +122,6 @@ require ( go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.26.0 // indirect golang.org/x/crypto v0.21.0 // indirect - golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect golang.org/x/net v0.23.0 // indirect golang.org/x/oauth2 v0.18.0 // indirect golang.org/x/sync v0.6.0 // indirect @@ -134,7 +135,6 @@ require ( google.golang.org/genproto v0.0.0-20231106174013-bbf56f31fb17 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20231106174013-bbf56f31fb17 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20231120223509-83a465c0220f // indirect - google.golang.org/protobuf v1.33.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect diff --git a/test/e2e/config/k3s-docker.yaml b/test/e2e/config/k3s-docker.yaml index 71bdf58d..a2dbed2d 100644 --- a/test/e2e/config/k3s-docker.yaml +++ b/test/e2e/config/k3s-docker.yaml @@ -65,7 +65,7 @@ providers: # ${ProjectRoot}/metadata.yaml to init the management cluster # this version should be updated when ${ProjectRoot}/metadata.yaml # is modified - - name: v0.1.99 # next; use manifest from source files + - name: v0.2.99 # next; use manifest from source files value: "../../../bootstrap/config/default" files: - sourcePath: "../../../metadata.yaml" @@ -73,7 +73,7 @@ providers: - name: k3s type: ControlPlaneProvider versions: - - name: v0.1.99 # next; use manifest from source files + - name: v0.2.99 # next; use manifest from source files value: "../../../controlplane/config/default" files: - sourcePath: "../../../metadata.yaml" diff --git a/test/e2e/inplace_rollout_test.go b/test/e2e/inplace_rollout_test.go new file mode 100644 index 00000000..84c4c648 --- /dev/null +++ b/test/e2e/inplace_rollout_test.go @@ -0,0 +1,267 @@ +//go:build e2e +// +build e2e + +/* +Copyright 2021 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package e2e + +import ( + "context" + "fmt" + "path/filepath" + "time" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "golang.org/x/exp/rand" + + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + "k8s.io/utils/pointer" + + controlplanev1 "github.com/k3s-io/cluster-api-k3s/controlplane/api/v1beta2" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/test/framework" + "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util" + "sigs.k8s.io/cluster-api/util/labels" + "sigs.k8s.io/cluster-api/util/patch" + "sigs.k8s.io/controller-runtime/pkg/client" +) + +// This test follows CAPI's clusterclass_rollout test to test the inplace mutable fields +// setting on ControlPlane object could be rollout to underlying machines. +// The original test does not apply to k3s cluster as it modified controlPlane fields specific to KubeadmControlPlane. +// Link to CAPI clusterclass_rollout test: https://github.com/kubernetes-sigs/cluster-api/blob/main/test/e2e/clusterclass_rollout.go +var _ = Describe("Inplace mutable fields rollout test", func() { + var ( + ctx = context.TODO() + specName = "inplace-rollout" + namespace *corev1.Namespace + cancelWatches context.CancelFunc + result *ApplyClusterTemplateAndWaitResult + clusterName string + clusterctlLogFolder string + infrastructureProvider string + ) + + BeforeEach(func() { + Expect(e2eConfig.Variables).To(HaveKey(KubernetesVersion)) + + clusterName = fmt.Sprintf("capik3s-inplace-%s", util.RandomString(6)) + infrastructureProvider = "docker" + + // Setup a Namespace where to host objects for this spec and create a watcher for the namespace events. + namespace, cancelWatches = setupSpecNamespace(ctx, specName, bootstrapClusterProxy, artifactFolder) + + result = new(ApplyClusterTemplateAndWaitResult) + + clusterctlLogFolder = filepath.Join(artifactFolder, "clusters", bootstrapClusterProxy.GetName()) + }) + + AfterEach(func() { + cleanInput := cleanupInput{ + SpecName: specName, + Cluster: result.Cluster, + ClusterProxy: bootstrapClusterProxy, + Namespace: namespace, + CancelWatches: cancelWatches, + IntervalsGetter: e2eConfig.GetIntervals, + SkipCleanup: skipCleanup, + ArtifactFolder: artifactFolder, + } + + dumpSpecResourcesAndCleanup(ctx, cleanInput) + }) + + Context("Modifying inplace mutable fields", func() { + It("Should apply new value without triggering rollout", func() { + By("Creating a workload cluster with v1beta1 controlplane") + ApplyClusterTemplateAndWait(ctx, ApplyClusterTemplateAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + ConfigCluster: clusterctl.ConfigClusterInput{ + LogFolder: clusterctlLogFolder, + ClusterctlConfigPath: clusterctlConfigPath, + KubeconfigPath: bootstrapClusterProxy.GetKubeconfigPath(), + InfrastructureProvider: infrastructureProvider, + Flavor: "topology", + Namespace: namespace.Name, + ClusterName: clusterName, + KubernetesVersion: e2eConfig.GetVariable(KubernetesVersion), + ControlPlaneMachineCount: pointer.Int64Ptr(1), + WorkerMachineCount: pointer.Int64Ptr(1), + }, + WaitForClusterIntervals: e2eConfig.GetIntervals(specName, "wait-cluster"), + WaitForControlPlaneIntervals: e2eConfig.GetIntervals(specName, "wait-control-plane"), + WaitForMachineDeployments: e2eConfig.GetIntervals(specName, "wait-worker-nodes"), + }, result) + + By("Rolling out changes to control plane, MachineDeployments, and MachinePools (in-place)") + machinesBeforeUpgrade := getMachinesByCluster(ctx, bootstrapClusterProxy.GetClient(), result.Cluster) + By("Modifying the control plane configuration via Cluster topology and wait for changes to be applied to the control plane object (in-place)") + modifyControlPlaneViaClusterAndWait(ctx, modifyControlPlaneViaClusterAndWaitInput{ + ClusterProxy: bootstrapClusterProxy, + Cluster: result.Cluster, + ModifyControlPlaneTopology: func(topology *clusterv1.ControlPlaneTopology) { + // Drop existing labels and annotations and set new ones. + topology.Metadata.Labels = map[string]string{ + "Cluster.topology.controlPlane.newLabel": "Cluster.topology.controlPlane.newLabelValue", + } + topology.Metadata.Annotations = map[string]string{ + "Cluster.topology.controlPlane.newAnnotation": "Cluster.topology.controlPlane.newAnnotationValue", + } + topology.NodeDrainTimeout = &metav1.Duration{Duration: time.Duration(rand.Intn(20)) * time.Second} //nolint:gosec + topology.NodeDeletionTimeout = &metav1.Duration{Duration: time.Duration(rand.Intn(20)) * time.Second} //nolint:gosec + topology.NodeVolumeDetachTimeout = &metav1.Duration{Duration: time.Duration(rand.Intn(20)) * time.Second} //nolint:gosec + }, + WaitForControlPlane: e2eConfig.GetIntervals(specName, "wait-control-plane"), + }) + + By("Verifying there are no unexpected rollouts through in-place rollout") + Consistently(func(g Gomega) { + machinesAfterUpgrade := getMachinesByCluster(ctx, bootstrapClusterProxy.GetClient(), result.Cluster) + g.Expect(machinesAfterUpgrade.Equal(machinesBeforeUpgrade)).To(BeTrue(), "Machines must not be replaced through in-place rollout") + }, 30*time.Second, 1*time.Second).Should(Succeed()) + }) + }) +}) + +// modifyControlPlaneViaClusterAndWaitInput is the input type for modifyControlPlaneViaClusterAndWait. +type modifyControlPlaneViaClusterAndWaitInput struct { + ClusterProxy framework.ClusterProxy + Cluster *clusterv1.Cluster + ModifyControlPlaneTopology func(topology *clusterv1.ControlPlaneTopology) + WaitForControlPlane []interface{} +} + +// modifyControlPlaneViaClusterAndWait modifies the ControlPlaneTopology of a Cluster topology via ModifyControlPlaneTopology. +// It then waits until the changes are rolled out to the ControlPlane of the Cluster. +func modifyControlPlaneViaClusterAndWait(ctx context.Context, input modifyControlPlaneViaClusterAndWaitInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for modifyControlPlaneViaClusterAndWait") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling modifyControlPlaneViaClusterAndWait") + Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling modifyControlPlaneViaClusterAndWait") + + mgmtClient := input.ClusterProxy.GetClient() + + Byf("Modifying the control plane topology of Cluster %s", klog.KObj(input.Cluster)) + + // Patch the control plane topology in the Cluster. + patchHelper, err := patch.NewHelper(input.Cluster, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + input.ModifyControlPlaneTopology(&input.Cluster.Spec.Topology.ControlPlane) + Expect(patchHelper.Patch(ctx, input.Cluster)).To(Succeed()) + + // NOTE: We only wait until the change is rolled out to the control plane object and not to the control plane machines. + Byf("Waiting for control plane rollout to complete.") + Eventually(func(g Gomega) { + // Get the ControlPlane. + controlPlaneRef := input.Cluster.Spec.ControlPlaneRef + controlPlaneTopology := input.Cluster.Spec.Topology.ControlPlane + + // Get KThreesControlPlane object. + cpObj := &controlplanev1.KThreesControlPlane{} + kcpObjKey := client.ObjectKey{ + Namespace: input.Cluster.Namespace, + Name: controlPlaneRef.Name, + } + err = mgmtClient.Get(ctx, kcpObjKey, cpObj) + g.Expect(err).ToNot(HaveOccurred()) + + // Verify that the fields from Cluster topology are set on the control plane. + assertControlPlaneTopologyFields(g, cpObj, controlPlaneTopology) + + // Verify that the control plane machines have the required fields. + cluster := input.Cluster + controlPlaneMachineList := &clusterv1.MachineList{} + g.Expect(mgmtClient.List(ctx, controlPlaneMachineList, client.InNamespace(cluster.Namespace), client.MatchingLabels{ + clusterv1.MachineControlPlaneLabel: "", + clusterv1.ClusterNameLabel: cluster.Name, + })).To(Succeed()) + for _, m := range controlPlaneMachineList.Items { + metadata := m.ObjectMeta + for k, v := range controlPlaneTopology.Metadata.Labels { + g.Expect(metadata.Labels).To(HaveKeyWithValue(k, v)) + } + for k, v := range controlPlaneTopology.Metadata.Annotations { + g.Expect(metadata.Annotations).To(HaveKeyWithValue(k, v)) + } + + if controlPlaneTopology.NodeDrainTimeout != nil { + nodeDrainTimeout := m.Spec.NodeDrainTimeout + g.Expect(nodeDrainTimeout).To(Equal(controlPlaneTopology.NodeDrainTimeout)) + } + + if controlPlaneTopology.NodeDeletionTimeout != nil { + nodeDeletionTimeout := m.Spec.NodeDeletionTimeout + g.Expect(nodeDeletionTimeout).To(Equal(controlPlaneTopology.NodeDeletionTimeout)) + } + + if controlPlaneTopology.NodeVolumeDetachTimeout != nil { + nodeVolumeDetachTimeout := m.Spec.NodeVolumeDetachTimeout + g.Expect(nodeVolumeDetachTimeout).To(Equal(controlPlaneTopology.NodeVolumeDetachTimeout)) + } + } + }, input.WaitForControlPlane...).Should(Succeed()) +} + +// assertControlPlaneTopologyFields asserts that all fields set in the ControlPlaneTopology have been set on the ControlPlane. +// Note: We intentionally focus on the fields set in the ControlPlaneTopology and ignore the ones set through ClusterClass or +// ControlPlane template as we want to validate that the fields of the ControlPlaneTopology have been propagated correctly. +func assertControlPlaneTopologyFields(g Gomega, controlPlane *controlplanev1.KThreesControlPlane, controlPlaneTopology clusterv1.ControlPlaneTopology) { + metadata := controlPlane.ObjectMeta + for k, v := range controlPlaneTopology.Metadata.Labels { + g.Expect(metadata.Labels).To(HaveKeyWithValue(k, v)) + } + for k, v := range controlPlaneTopology.Metadata.Annotations { + g.Expect(metadata.Annotations).To(HaveKeyWithValue(k, v)) + } + + if controlPlaneTopology.NodeDrainTimeout != nil { + nodeDrainTimeout := controlPlane.Spec.MachineTemplate.NodeDrainTimeout + g.Expect(nodeDrainTimeout).To(Equal(controlPlaneTopology.NodeDrainTimeout)) + } + + if controlPlaneTopology.NodeDeletionTimeout != nil { + nodeDeletionTimeout := controlPlane.Spec.MachineTemplate.NodeDeletionTimeout + g.Expect(nodeDeletionTimeout).To(Equal(controlPlaneTopology.NodeDeletionTimeout)) + } + + if controlPlaneTopology.NodeVolumeDetachTimeout != nil { + nodeVolumeDetachTimeout := controlPlane.Spec.MachineTemplate.NodeVolumeDetachTimeout + g.Expect(nodeVolumeDetachTimeout).To(Equal(controlPlaneTopology.NodeVolumeDetachTimeout)) + } +} + +// getMachinesByCluster gets the Machines of a Cluster and returns them as a Set of Machine names. +// Note: This excludes MachinePool Machines as they are not replaced by rollout yet. +func getMachinesByCluster(ctx context.Context, client client.Client, cluster *clusterv1.Cluster) sets.Set[string] { + machines := sets.Set[string]{} + machinesByCluster := framework.GetMachinesByCluster(ctx, framework.GetMachinesByClusterInput{ + Lister: client, + ClusterName: cluster.Name, + Namespace: cluster.Namespace, + }) + for i := range machinesByCluster { + m := machinesByCluster[i] + if !labels.IsMachinePoolOwned(&m) { + machines.Insert(m.Name) + } + } + return machines +}