From 9ac14aefdd20455a5a4c4396d7e5b2692cf0d6b8 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 17 Jul 2024 11:48:36 +0200 Subject: [PATCH 01/32] rollout tests --- templates/aws/ccm.yaml | 168 ++++++++++++++++++ test/e2e/cluster_upgrade.go | 4 +- test/e2e/cluster_upgrade_test.go | 11 +- test/e2e/config/ck8s-docker.yaml | 7 +- .../cluster-template-upgrades.yaml | 136 ++++++++++++++ test/e2e/helpers.go | 92 +++++++++- 6 files changed, 405 insertions(+), 13 deletions(-) create mode 100644 templates/aws/ccm.yaml create mode 100644 test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml diff --git a/templates/aws/ccm.yaml b/templates/aws/ccm.yaml new file mode 100644 index 00000000..bca6a22f --- /dev/null +++ b/templates/aws/ccm.yaml @@ -0,0 +1,168 @@ +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: aws-cloud-controller-manager + namespace: kube-system + labels: + k8s-app: aws-cloud-controller-manager +spec: + selector: + matchLabels: + k8s-app: aws-cloud-controller-manager + updateStrategy: + type: RollingUpdate + template: + metadata: + labels: + k8s-app: aws-cloud-controller-manager + spec: + tolerations: + - key: node.cloudprovider.kubernetes.io/uninitialized + value: "true" + effect: NoSchedule + - key: node-role.kubernetes.io/master + effect: NoSchedule + - effect: NoSchedule + key: node-role.kubernetes.io/control-plane + affinity: + nodeAffinity: + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: node-role.kubernetes.io/control-plane + operator: Exists + - matchExpressions: + - key: node-role.kubernetes.io/master + operator: Exists + serviceAccountName: cloud-controller-manager + containers: + - name: aws-cloud-controller-manager + image: gcr.io/k8s-staging-provider-aws/cloud-controller-manager:v1.30.2 + args: + - --v=2 + resources: + requests: + cpu: 200m + hostNetwork: true +--- +apiVersion: v1 +kind: ServiceAccount +metadata: + name: cloud-controller-manager + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: cloud-controller-manager:apiserver-authentication-reader + namespace: kube-system +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: extension-apiserver-authentication-reader +subjects: + - apiGroup: "" + kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: system:cloud-controller-manager +rules: + - apiGroups: + - "" + resources: + - events + verbs: + - create + - patch + - update + - apiGroups: + - "" + resources: + - nodes + verbs: + - '*' + - apiGroups: + - "" + resources: + - nodes/status + verbs: + - patch + - apiGroups: + - "" + resources: + - services + verbs: + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - services/status + verbs: + - list + - patch + - update + - watch + - apiGroups: + - "" + resources: + - serviceaccounts + verbs: + - create + - apiGroups: + - "" + resources: + - persistentvolumes + verbs: + - get + - list + - update + - watch + - apiGroups: + - "" + resources: + - configmaps + verbs: + - list + - watch + - apiGroups: + - "" + resources: + - endpoints + verbs: + - create + - get + - list + - watch + - update + - apiGroups: + - coordination.k8s.io + resources: + - leases + verbs: + - create + - get + - list + - watch + - update +--- +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: system:cloud-controller-manager +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:cloud-controller-manager +subjects: + - apiGroup: "" + kind: ServiceAccount + name: cloud-controller-manager + namespace: kube-system diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index 0a9ce02b..ac3d4061 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -139,7 +139,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp ClusterctlConfigPath: input.ClusterctlConfigPath, KubeconfigPath: input.BootstrapClusterProxy.GetKubeconfigPath(), InfrastructureProvider: *input.InfrastructureProvider, - Flavor: ptr.Deref(input.Flavor, ""), + Flavor: ptr.Deref(input.Flavor, "upgrades"), Namespace: namespace.Name, ClusterName: clusterName, KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersion), @@ -157,6 +157,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp ClusterProxy: input.BootstrapClusterProxy, Cluster: result.Cluster, ControlPlane: result.ControlPlane, + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-1.30", clusterName)), KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), }) @@ -167,6 +168,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp Cluster: result.Cluster, UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), MachineDeployments: result.MachineDeployments, + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-1.30-0", clusterName)), WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }) diff --git a/test/e2e/cluster_upgrade_test.go b/test/e2e/cluster_upgrade_test.go index a058b65c..1d775d4a 100644 --- a/test/e2e/cluster_upgrade_test.go +++ b/test/e2e/cluster_upgrade_test.go @@ -25,12 +25,7 @@ import ( ) var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { - BeforeEach(func() { - // TODO(bschimke): Remove once we find a way to run e2e tests with other infrastructure providers that support snap. - Skip("Skipping the upgrade tests as snap does not work on CAPD.") - }) - - Context("Upgrading a cluster with 1 control plane", func() { + /* Context("Upgrading a cluster with 1 control plane", func() { ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { return ClusterUpgradeSpecInput{ E2EConfig: e2eConfig, @@ -43,7 +38,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { WorkerMachineCount: ptr.To[int64](2), } }) - }) + }) */ Context("Upgrading a cluster with HA control plane", func() { ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { @@ -54,7 +49,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { ArtifactFolder: artifactFolder, SkipCleanup: skipCleanup, InfrastructureProvider: ptr.To("docker"), - ControlPlaneMachineCount: ptr.To[int64](3), + ControlPlaneMachineCount: ptr.To[int64](4), WorkerMachineCount: ptr.To[int64](1), } }) diff --git a/test/e2e/config/ck8s-docker.yaml b/test/e2e/config/ck8s-docker.yaml index a622c8c1..38ccb7b4 100644 --- a/test/e2e/config/ck8s-docker.yaml +++ b/test/e2e/config/ck8s-docker.yaml @@ -51,9 +51,10 @@ providers: - old: "imagePullPolicy: Always" new: "imagePullPolicy: IfNotPresent" files: - - sourcePath: "../data/infrastructure-docker/cluster-template.yaml" - sourcePath: "../data/infrastructure-docker/cluster-template-kcp-remediation.yaml" - sourcePath: "../data/infrastructure-docker/cluster-template-md-remediation.yaml" + - sourcePath: "../data/infrastructure-docker/cluster-template-upgrades.yaml" + - sourcePath: "../data/infrastructure-docker/cluster-template.yaml" - name: ck8s type: BootstrapProvider versions: @@ -84,8 +85,8 @@ providers: variables: KUBERNETES_VERSION_MANAGEMENT: "v1.28.0" - KUBERNETES_VERSION: "v1.30.0" - KUBERNETES_VERSION_UPGRADE_TO: "v1.30.1" + KUBERNETES_VERSION: "v1.29.6" + KUBERNETES_VERSION_UPGRADE_TO: "v1.30.2" IP_FAMILY: "IPv4" KIND_IMAGE_VERSION: "v1.28.0" diff --git a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml new file mode 100644 index 00000000..9ce48302 --- /dev/null +++ b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml @@ -0,0 +1,136 @@ +# TODO: copied and modified from https://github.com/k3s-io/cluster-api-k3s/pull/93/files#diff-c4a336ec56832a2ff7aed26c94d0d67ae3a0e6139d30701cc53c0f0962fe8cca +# should be the same as samples/docker/quickstart.yaml in the future +# for testing the quickstart scenario +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: ${CLUSTER_NAME} + namespace: ${NAMESPACE} +spec: + clusterNetwork: + pods: + cidrBlocks: + - 10.1.0.0/16 + services: + cidrBlocks: + - 10.152.0.0/16 + serviceDomain: cluster.local + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: CK8sControlPlane + name: ${CLUSTER_NAME}-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: ${CLUSTER_NAME} +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: ${CLUSTER_NAME} +spec: {} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +kind: CK8sControlPlane +metadata: + name: ${CLUSTER_NAME}-control-plane + namespace: ${NAMESPACE} +spec: + machineTemplate: + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: ${CLUSTER_NAME}-control-plane-1.29 + spec: + airGapped: true + controlPlane: + extraKubeAPIServerArgs: + --anonymous-auth: "true" + replicas: ${CONTROL_PLANE_MACHINE_COUNT} + version: ${KUBERNETES_VERSION} +# Initial template for the machine deployment +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane-1.29 + namespace: ${NAMESPACE} +spec: + template: + spec: + customImage: k8s-snap:dev-1.29 + +# After upgrade template for the machine deployment +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane-1.30 + namespace: ${NAMESPACE} +spec: + template: + spec: + customImage: k8s-snap:dev-1.30 +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: worker-md-0 + namespace: ${NAMESPACE} +spec: + clusterName: ${CLUSTER_NAME} + replicas: ${WORKER_MACHINE_COUNT} + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: ${CLUSTER_NAME} + + # This label will be needed for upgrade test + # it will be used as a selector for only selecting + # machines belonging to this machine deployment + cluster.x-k8s.io/deployment-name: worker-md-0 + template: + metadata: + labels: + cluster.x-k8s.io/deployment-name: worker-md-0 + spec: + version: ${KUBERNETES_VERSION} + clusterName: ${CLUSTER_NAME} + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: CK8sConfigTemplate + name: ${CLUSTER_NAME}-md-0 + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: ${CLUSTER_NAME}-md-1.29-0 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: ${CLUSTER_NAME}-md-1.29-0 + namespace: ${NAMESPACE} +spec: + template: + spec: + customImage: k8s-snap:dev-1.29 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: ${CLUSTER_NAME}-md-1.30-0 + namespace: ${NAMESPACE} +spec: + template: + spec: + customImage: k8s-snap:dev-1.30 +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: CK8sConfigTemplate +metadata: + name: ${CLUSTER_NAME}-md-0 + namespace: ${NAMESPACE} +spec: + template: + spec: + airGapped: true diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 5c0cc56a..24c0b96a 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -37,6 +37,7 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" + "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" @@ -558,6 +559,7 @@ type UpgradeControlPlaneAndWaitForUpgradeInput struct { Cluster *clusterv1.Cluster ControlPlane *controlplanev1.CK8sControlPlane KubernetesUpgradeVersion string + UpgradeMachineTemplate *string WaitForMachinesToBeUpgraded []interface{} } @@ -577,12 +579,23 @@ func UpgradeControlPlaneAndWaitForUpgrade(ctx context.Context, input UpgradeCont input.ControlPlane.Spec.Version = input.KubernetesUpgradeVersion + // Create a new ObjectReference for the infrastructure provider + newInfrastructureRef := corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", // Adjust based on your infrastructure API version + Kind: "DockerMachineTemplate", + Name: fmt.Sprintf("%s-control-plane-1.30", input.Cluster.Name), + Namespace: input.ControlPlane.Spec.MachineTemplate.InfrastructureRef.Namespace, + } + + // Update the infrastructureRef + input.ControlPlane.Spec.MachineTemplate.InfrastructureRef = newInfrastructureRef + Eventually(func() error { return patchHelper.Patch(ctx, input.ControlPlane) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch the new kubernetes version to KCP %s", klog.KObj(input.ControlPlane)) Byf("Waiting for control-plane machines to have the upgraded kubernetes version") - framework.WaitForControlPlaneMachinesToBeUpgraded(ctx, framework.WaitForControlPlaneMachinesToBeUpgradedInput{ + WaitForControlPlaneMachinesToBeUpgraded(ctx, framework.WaitForControlPlaneMachinesToBeUpgradedInput{ Lister: mgmtClient, Cluster: input.Cluster, MachineCount: int(*input.ControlPlane.Spec.Replicas), @@ -590,6 +603,83 @@ func UpgradeControlPlaneAndWaitForUpgrade(ctx context.Context, input UpgradeCont }, input.WaitForMachinesToBeUpgraded...) } +// WaitForControlPlaneMachinesToBeUpgraded waits until all machines are upgraded to the correct Kubernetes version. +func WaitForControlPlaneMachinesToBeUpgraded(ctx context.Context, input framework.WaitForControlPlaneMachinesToBeUpgradedInput, intervals ...interface{}) { + Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForControlPlaneMachinesToBeUpgraded") + Expect(input.Lister).ToNot(BeNil(), "Invalid argument. input.Lister can't be nil when calling WaitForControlPlaneMachinesToBeUpgraded") + Expect(input.KubernetesUpgradeVersion).ToNot(BeEmpty(), "Invalid argument. input.KubernetesUpgradeVersion can't be empty when calling WaitForControlPlaneMachinesToBeUpgraded") + Expect(input.MachineCount).To(BeNumerically(">", 0), "Invalid argument. input.MachineCount can't be smaller than 1 when calling WaitForControlPlaneMachinesToBeUpgraded") + + Byf("Ensuring all control-plane machines have upgraded kubernetes version %s", input.KubernetesUpgradeVersion) + + Eventually(func() (int, error) { + machines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{ + Lister: input.Lister, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + + upgraded := 0 + Byf("Checking %d Machines", len(machines)) + for _, machine := range machines { + m := machine + Byf("Checking Machine %s/%s", m.Namespace, m.Name) + Byf("m.Spec.Version %s == %s input.KubernetesUpgradeVersion = %v", *m.Spec.Version, input.KubernetesUpgradeVersion, *m.Spec.Version == input.KubernetesUpgradeVersion) + Byf("conditions.IsTrue(&m, clusterv1.MachineNodeHealthyCondition) %v", conditions.IsTrue(&m, clusterv1.MachineNodeHealthyCondition)) + if *m.Spec.Version == input.KubernetesUpgradeVersion && conditions.IsTrue(&m, clusterv1.MachineNodeHealthyCondition) { + upgraded++ + } + } + if len(machines) > upgraded { + Byf("old Machines remain") + return 0, errors.New("old Machines remain") + } + return upgraded, nil + }, intervals...).Should(Equal(input.MachineCount), "Timed out waiting for all control-plane machines in Cluster %s to be upgraded to kubernetes version %s", klog.KObj(input.Cluster), input.KubernetesUpgradeVersion) +} + +// UpgradeMachineDeploymentsAndWait upgrades a machine deployment and waits for its machines to be upgraded. +func UpgradeMachineDeploymentsAndWait(ctx context.Context, input framework.UpgradeMachineDeploymentsAndWaitInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for UpgradeMachineDeploymentsAndWait") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling UpgradeMachineDeploymentsAndWait") + Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling UpgradeMachineDeploymentsAndWait") + Expect(input.UpgradeVersion).ToNot(BeNil(), "Invalid argument. input.UpgradeVersion can't be nil when calling UpgradeMachineDeploymentsAndWait") + Expect(input.MachineDeployments).ToNot(BeEmpty(), "Invalid argument. input.MachineDeployments can't be empty when calling UpgradeMachineDeploymentsAndWait") + + mgmtClient := input.ClusterProxy.GetClient() + + for _, deployment := range input.MachineDeployments { + patchHelper, err := patch.NewHelper(deployment, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + + oldVersion := deployment.Spec.Template.Spec.Version + deployment.Spec.Template.Spec.Version = &input.UpgradeVersion + // Create a new ObjectReference for the infrastructure provider + newInfrastructureRef := corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", // Adjust based on your infrastructure API version + Kind: "DockerMachineTemplate", + Name: fmt.Sprintf("%s-md-1.30-0", input.Cluster.Name), + Namespace: deployment.Spec.Template.Spec.InfrastructureRef.Namespace, + } + + // Update the infrastructureRef + deployment.Spec.Template.Spec.InfrastructureRef = newInfrastructureRef + Eventually(func() error { + return patchHelper.Patch(ctx, deployment) + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch Kubernetes version on MachineDeployment %s", klog.KObj(deployment)) + + Byf("Waiting for Kubernetes versions of machines in MachineDeployment %s to be upgraded from %s to %s", + klog.KObj(deployment), *oldVersion, input.UpgradeVersion) + framework.WaitForMachineDeploymentMachinesToBeUpgraded(ctx, framework.WaitForMachineDeploymentMachinesToBeUpgradedInput{ + Lister: mgmtClient, + Cluster: input.Cluster, + MachineCount: int(*deployment.Spec.Replicas), + KubernetesUpgradeVersion: input.UpgradeVersion, + MachineDeployment: *deployment, + }, input.WaitForMachinesToBeUpgraded...) + } +} + type WaitForNodesReadyInput struct { Lister framework.Lister KubernetesVersion string From bb0663d33f573e6201e0b236f0f78035a21f92a5 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 17 Jul 2024 13:15:29 +0200 Subject: [PATCH 02/32] reduce to 3 --- test/e2e/cluster_upgrade_test.go | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/test/e2e/cluster_upgrade_test.go b/test/e2e/cluster_upgrade_test.go index 1d775d4a..5829f75d 100644 --- a/test/e2e/cluster_upgrade_test.go +++ b/test/e2e/cluster_upgrade_test.go @@ -25,7 +25,11 @@ import ( ) var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { - /* Context("Upgrading a cluster with 1 control plane", func() { + Context("Upgrading a cluster with 1 control plane", func() { + It("Non-HA upgrades require in-place upgrades which are not supported yet.", + // TODO(ben): Enable this test once we have support for in-place upgrades. + func() { Skip("") }, + ) ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { return ClusterUpgradeSpecInput{ E2EConfig: e2eConfig, @@ -38,7 +42,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { WorkerMachineCount: ptr.To[int64](2), } }) - }) */ + }) Context("Upgrading a cluster with HA control plane", func() { ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { @@ -49,7 +53,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { ArtifactFolder: artifactFolder, SkipCleanup: skipCleanup, InfrastructureProvider: ptr.To("docker"), - ControlPlaneMachineCount: ptr.To[int64](4), + ControlPlaneMachineCount: ptr.To[int64](3), WorkerMachineCount: ptr.To[int64](1), } }) From 9b03bd6765bf9aa5bf55010bdbdba7836d978931 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 24 Jul 2024 12:09:50 +0200 Subject: [PATCH 03/32] Update docker build process and CI --- .github/workflows/e2e.yaml | 16 ++++++++++++---- pkg/ck8s/workload_cluster.go | 2 +- templates/docker/Dockerfile | 12 ++++++++++++ .../cluster-template-kcp-remediation.yaml | 4 ++-- .../cluster-template-md-remediation.yaml | 4 ++-- .../cluster-template-upgrades.yaml | 4 ++-- .../infrastructure-docker/cluster-template.yaml | 5 ++--- 7 files changed, 33 insertions(+), 14 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index d054c4e1..6bcd6bf8 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -25,14 +25,17 @@ jobs: - name: Build k8s-snap image run: | cd templates/docker - sudo docker build . -t k8s-snap:dev + # TODO(ben): required for the rollout upgrade test. Remove KUBERNETES_VERSION_OVERRIDE once we have a 1.31 release. + sudo docker build . -t k8s-snap:dev-1.29 --build-args BRANCH=main --build-args KUBERNETES_VERSION_OVERRIDE=v1.29.6 + sudo docker build . -t k8s-snap:dev-latest --build-args BRANCH=main - name: Save provider image run: | sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev sudo chmod 775 provider-images.tar - name: Save k8s-snap image run: | - sudo docker save -o k8s-snap-image.tar k8s-snap:dev + sudo docker save -o k8s-snap-image-1.29.tar k8s-snap:dev-1.29 + sudo docker save -o k8s-snap-image-1.30.tar k8s-snap:dev-latest sudo chmod 775 k8s-snap-image.tar - name: Upload artifacts uses: actions/upload-artifact@v4 @@ -71,8 +74,13 @@ jobs: path: . - name: Load provider image run: sudo docker load -i provider-images.tar - - name: Load k8s-snap image - run: sudo docker load -i k8s-snap-image.tar + - name: Load k8s-snap 1.29 image + if: matrix.ginkgo_focus == "Workload cluster upgrade" + run: | + sudo docker load -i k8s-snap-image-1.29.tar + - name: Load k8s-snap 1.30 image + run: | + sudo docker load -i k8s-snap-image-1.30.tar - name: Create docker network run: | sudo docker network create kind --driver=bridge -o com.docker.network.bridge.enable_ip_masquerade=true diff --git a/pkg/ck8s/workload_cluster.go b/pkg/ck8s/workload_cluster.go index 93668cc2..ebce977e 100644 --- a/pkg/ck8s/workload_cluster.go +++ b/pkg/ck8s/workload_cluster.go @@ -290,7 +290,7 @@ func (w *Workload) doK8sdRequest(ctx context.Context, method, endpoint string, r return fmt.Errorf("k8sd request failed: %s", responseBody.Error) } if responseBody.Metadata == nil || response == nil { - // Nothing to decode + // No response expected. return nil } if err := json.Unmarshal(responseBody.Metadata, response); err != nil { diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index 1a4c6e38..afb237ff 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -36,6 +36,12 @@ FROM $BUILD_BASE AS builder ARG REPO=https://github.com/canonical/k8s-snap ARG BRANCH=main +## Override the Kubernetes version from the branch. +## Note(ben): We only have 1.30-release branches for k8s-snap right now. +## For the rollout upgrades, we need to have a different minor version. +## This is a temporary solution until we have a 1.31 release branch. +ARG KUBERNETES_VERSION_OVERRIDE="" + ## NOTE(neoaggelos): install dependencies needed to build the tools ## !!!IMPORTANT!!! Keep up to date with "snapcraft.yaml:parts.build-deps.build-packages" RUN apt-get update \ @@ -86,6 +92,12 @@ RUN /src/k8s-snap/build-scripts/build-component.sh helm ## kubernetes build FROM builder AS build-kubernetes +ENV KUBERNETES_VERSION_OVERRIDE=${KUBERNETES_VERSION_OVERRIDE} +RUN if [ -n "$KUBERNETES_VERSION_OVERRIDE" ]; then \ + echo "Overwriting Kubernetes version with $KUBERNETES_VERSION_OVERRIDE"; \ + echo "$KUBERNETES_VERSION_OVERRIDE" > /src/k8s-snap/build-scripts/components/kubernetes/version; \ + cat /src/k8s-snap/build-scripts/components/kubernetes/version; \ + fi RUN /src/k8s-snap/build-scripts/build-component.sh kubernetes ## runc build diff --git a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml index 8c8cb43b..36043cc0 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml @@ -84,7 +84,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev + customImage: k8s-snap:dev-latest --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -127,7 +127,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev + customImage: k8s-snap:dev-latest --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml index 7b2bfe64..d4d8ac0a 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev + customImage: k8s-snap:dev-latest --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -101,7 +101,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev + customImage: k8s-snap:dev-latest --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml index 9ce48302..9f0c2fb9 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml @@ -70,7 +70,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-latest --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -123,7 +123,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-latest --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template.yaml b/test/e2e/data/infrastructure-docker/cluster-template.yaml index b33ac49d..c2df62ad 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template.yaml @@ -57,8 +57,7 @@ metadata: spec: template: spec: - # TODO: make this customable - customImage: k8s-snap:dev + customImage: k8s-snap:dev-latest --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -101,7 +100,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev + customImage: k8s-snap:dev-latest --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate From ef2c035b79cac956425898c47f66cc367fe1fb8e Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 24 Jul 2024 12:18:05 +0200 Subject: [PATCH 04/32] Pin upgrade version to 1.30 --- .github/workflows/e2e.yaml | 4 ++-- .../cluster-template-kcp-remediation.yaml | 4 ++-- .../cluster-template-md-remediation.yaml | 4 ++-- .../data/infrastructure-docker/cluster-template-upgrades.yaml | 4 ++-- test/e2e/data/infrastructure-docker/cluster-template.yaml | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 6bcd6bf8..04a48f3d 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -27,7 +27,7 @@ jobs: cd templates/docker # TODO(ben): required for the rollout upgrade test. Remove KUBERNETES_VERSION_OVERRIDE once we have a 1.31 release. sudo docker build . -t k8s-snap:dev-1.29 --build-args BRANCH=main --build-args KUBERNETES_VERSION_OVERRIDE=v1.29.6 - sudo docker build . -t k8s-snap:dev-latest --build-args BRANCH=main + sudo docker build . -t k8s-snap:dev-1.30 --build-args BRANCH=release-1.30 - name: Save provider image run: | sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev @@ -35,7 +35,7 @@ jobs: - name: Save k8s-snap image run: | sudo docker save -o k8s-snap-image-1.29.tar k8s-snap:dev-1.29 - sudo docker save -o k8s-snap-image-1.30.tar k8s-snap:dev-latest + sudo docker save -o k8s-snap-image-1.30.tar k8s-snap:dev-1.30 sudo chmod 775 k8s-snap-image.tar - name: Upload artifacts uses: actions/upload-artifact@v4 diff --git a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml index 36043cc0..6b0d0f1f 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml @@ -84,7 +84,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -127,7 +127,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml index d4d8ac0a..ce06f5ad 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -101,7 +101,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml index 9f0c2fb9..9ce48302 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml @@ -70,7 +70,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -123,7 +123,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template.yaml b/test/e2e/data/infrastructure-docker/cluster-template.yaml index c2df62ad..59ed9950 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -100,7 +100,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-latest + customImage: k8s-snap:dev-1.30 --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate From c3dcbdfc0a4755116d1f56ccc486f69482296eee Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 24 Jul 2024 12:29:58 +0200 Subject: [PATCH 05/32] fix e2e workflow --- .github/workflows/e2e.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 04a48f3d..a2486cce 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -75,7 +75,7 @@ jobs: - name: Load provider image run: sudo docker load -i provider-images.tar - name: Load k8s-snap 1.29 image - if: matrix.ginkgo_focus == "Workload cluster upgrade" + if: matrix.ginkgo_focus == 'Workload cluster upgrade' run: | sudo docker load -i k8s-snap-image-1.29.tar - name: Load k8s-snap 1.30 image From 53b36e09ba9430eb45344f4a4c6f19701d22808d Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 24 Jul 2024 13:47:46 +0200 Subject: [PATCH 06/32] build args flag --- .github/workflows/e2e.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index a2486cce..4f95492a 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -26,8 +26,8 @@ jobs: run: | cd templates/docker # TODO(ben): required for the rollout upgrade test. Remove KUBERNETES_VERSION_OVERRIDE once we have a 1.31 release. - sudo docker build . -t k8s-snap:dev-1.29 --build-args BRANCH=main --build-args KUBERNETES_VERSION_OVERRIDE=v1.29.6 - sudo docker build . -t k8s-snap:dev-1.30 --build-args BRANCH=release-1.30 + sudo docker build . -t k8s-snap:dev-1.29 --build-arg BRANCH=main --build-arg KUBERNETES_VERSION_OVERRIDE=v1.29.6 + sudo docker build . -t k8s-snap:dev-1.30 --build-arg BRANCH=release-1.30 - name: Save provider image run: | sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev From 8fcfba238e832c9b4922aef24e69db5a8907489f Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 24 Jul 2024 15:20:57 +0200 Subject: [PATCH 07/32] fix naming --- .github/workflows/e2e.yaml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 4f95492a..6d23f84f 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -36,14 +36,16 @@ jobs: run: | sudo docker save -o k8s-snap-image-1.29.tar k8s-snap:dev-1.29 sudo docker save -o k8s-snap-image-1.30.tar k8s-snap:dev-1.30 - sudo chmod 775 k8s-snap-image.tar + sudo chmod 775 k8s-snap-image-1.29.tar + sudo chmod 775 k8s-snap-image-1.30.tar - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: e2e-images path: | provider-images.tar - k8s-snap-image.tar + k8s-snap-image-1.29.tar + k8s-snap-image-1.30.tar run-e2e-tests: name: Run E2E Tests From 1ff682d710750eb781340b6dc715dfb416e49168 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 24 Jul 2024 17:26:27 +0200 Subject: [PATCH 08/32] login to Github --- .github/workflows/e2e.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 6d23f84f..f31fd935 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -11,6 +11,14 @@ jobs: name: Build & Run E2E Images runs-on: [self-hosted, linux, X64, jammy, large] steps: + - + name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + # We run into rate limiting issues if we don't authenticate + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - name: Check out repo uses: actions/checkout@v4 - name: Install requirements @@ -60,6 +68,14 @@ jobs: - "Workload cluster scaling" - "Workload cluster upgrade" steps: + - + name: Login to GitHub Container Registry + uses: docker/login-action@v3 + with: + # We run into rate limiting issues if we don't authenticate + registry: ghcr.io + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} - name: Check out repo uses: actions/checkout@v4 - name: Install requirements From fb7a237651ddd4f42b03def7b21e20bbdeb50e59 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 25 Jul 2024 11:19:33 +0200 Subject: [PATCH 09/32] add tmate debug --- .github/workflows/e2e.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index f31fd935..4a50da65 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -110,3 +110,6 @@ jobs: - name: Run e2e tests run: | sudo GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e + - name: Setup tmate session + if: ${{ failure() }} + uses: canonical/action-tmate@main From 7b5101ddd0df164f57c40b9cc95b7288c750dd4e Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 25 Jul 2024 12:12:16 +0200 Subject: [PATCH 10/32] do not fail fast --- .github/workflows/e2e.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 4a50da65..791ad9b4 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -67,6 +67,7 @@ jobs: - "Workload cluster creation" - "Workload cluster scaling" - "Workload cluster upgrade" + fail-fast: false steps: - name: Login to GitHub Container Registry From f7e7e41688a38f2f3a62a1587b175d3acd3c60e0 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 25 Jul 2024 13:51:05 +0200 Subject: [PATCH 11/32] use main branch --- .github/workflows/e2e.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 791ad9b4..29fa48fe 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -35,7 +35,8 @@ jobs: cd templates/docker # TODO(ben): required for the rollout upgrade test. Remove KUBERNETES_VERSION_OVERRIDE once we have a 1.31 release. sudo docker build . -t k8s-snap:dev-1.29 --build-arg BRANCH=main --build-arg KUBERNETES_VERSION_OVERRIDE=v1.29.6 - sudo docker build . -t k8s-snap:dev-1.30 --build-arg BRANCH=release-1.30 + # TODO(ben): release-1.30 is missing extra-node-args hence we need to use main branch for now. + sudo docker build . -t k8s-snap:dev-1.30 --build-arg BRANCH=main - name: Save provider image run: | sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev From 481b788480355927ca980d3f8910cb1d0b2a2e7b Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 31 Jul 2024 12:16:38 +0200 Subject: [PATCH 12/32] use bigger runner --- .github/workflows/e2e.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 29fa48fe..3ec0a279 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -58,7 +58,7 @@ jobs: run-e2e-tests: name: Run E2E Tests - runs-on: [self-hosted, linux, X64, jammy, large] + runs-on: [self-hosted, linux, X64, jammy, xlarge] needs: build-e2e-images strategy: matrix: From b534742c8a2fa379ef7bb1eb2ad74f41667b4f60 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 1 Aug 2024 10:03:28 +0200 Subject: [PATCH 13/32] remove logging function --- test/e2e/helpers.go | 40 ++-------------------------------------- 1 file changed, 2 insertions(+), 38 deletions(-) diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 24c0b96a..bae7c2d7 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -37,7 +37,6 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" - "sigs.k8s.io/cluster-api/util/conditions" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" @@ -595,7 +594,7 @@ func UpgradeControlPlaneAndWaitForUpgrade(ctx context.Context, input UpgradeCont }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch the new kubernetes version to KCP %s", klog.KObj(input.ControlPlane)) Byf("Waiting for control-plane machines to have the upgraded kubernetes version") - WaitForControlPlaneMachinesToBeUpgraded(ctx, framework.WaitForControlPlaneMachinesToBeUpgradedInput{ + framework.WaitForControlPlaneMachinesToBeUpgraded(ctx, framework.WaitForControlPlaneMachinesToBeUpgradedInput{ Lister: mgmtClient, Cluster: input.Cluster, MachineCount: int(*input.ControlPlane.Spec.Replicas), @@ -603,41 +602,6 @@ func UpgradeControlPlaneAndWaitForUpgrade(ctx context.Context, input UpgradeCont }, input.WaitForMachinesToBeUpgraded...) } -// WaitForControlPlaneMachinesToBeUpgraded waits until all machines are upgraded to the correct Kubernetes version. -func WaitForControlPlaneMachinesToBeUpgraded(ctx context.Context, input framework.WaitForControlPlaneMachinesToBeUpgradedInput, intervals ...interface{}) { - Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForControlPlaneMachinesToBeUpgraded") - Expect(input.Lister).ToNot(BeNil(), "Invalid argument. input.Lister can't be nil when calling WaitForControlPlaneMachinesToBeUpgraded") - Expect(input.KubernetesUpgradeVersion).ToNot(BeEmpty(), "Invalid argument. input.KubernetesUpgradeVersion can't be empty when calling WaitForControlPlaneMachinesToBeUpgraded") - Expect(input.MachineCount).To(BeNumerically(">", 0), "Invalid argument. input.MachineCount can't be smaller than 1 when calling WaitForControlPlaneMachinesToBeUpgraded") - - Byf("Ensuring all control-plane machines have upgraded kubernetes version %s", input.KubernetesUpgradeVersion) - - Eventually(func() (int, error) { - machines := framework.GetControlPlaneMachinesByCluster(ctx, framework.GetControlPlaneMachinesByClusterInput{ - Lister: input.Lister, - ClusterName: input.Cluster.Name, - Namespace: input.Cluster.Namespace, - }) - - upgraded := 0 - Byf("Checking %d Machines", len(machines)) - for _, machine := range machines { - m := machine - Byf("Checking Machine %s/%s", m.Namespace, m.Name) - Byf("m.Spec.Version %s == %s input.KubernetesUpgradeVersion = %v", *m.Spec.Version, input.KubernetesUpgradeVersion, *m.Spec.Version == input.KubernetesUpgradeVersion) - Byf("conditions.IsTrue(&m, clusterv1.MachineNodeHealthyCondition) %v", conditions.IsTrue(&m, clusterv1.MachineNodeHealthyCondition)) - if *m.Spec.Version == input.KubernetesUpgradeVersion && conditions.IsTrue(&m, clusterv1.MachineNodeHealthyCondition) { - upgraded++ - } - } - if len(machines) > upgraded { - Byf("old Machines remain") - return 0, errors.New("old Machines remain") - } - return upgraded, nil - }, intervals...).Should(Equal(input.MachineCount), "Timed out waiting for all control-plane machines in Cluster %s to be upgraded to kubernetes version %s", klog.KObj(input.Cluster), input.KubernetesUpgradeVersion) -} - // UpgradeMachineDeploymentsAndWait upgrades a machine deployment and waits for its machines to be upgraded. func UpgradeMachineDeploymentsAndWait(ctx context.Context, input framework.UpgradeMachineDeploymentsAndWaitInput) { Expect(ctx).NotTo(BeNil(), "ctx is required for UpgradeMachineDeploymentsAndWait") @@ -656,7 +620,7 @@ func UpgradeMachineDeploymentsAndWait(ctx context.Context, input framework.Upgra deployment.Spec.Template.Spec.Version = &input.UpgradeVersion // Create a new ObjectReference for the infrastructure provider newInfrastructureRef := corev1.ObjectReference{ - APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", // Adjust based on your infrastructure API version + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", Kind: "DockerMachineTemplate", Name: fmt.Sprintf("%s-md-1.30-0", input.Cluster.Name), Namespace: deployment.Spec.Template.Spec.InfrastructureRef.Namespace, From f9b4a995ca95297f422ea41a4d56d4249f0910bc Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 1 Aug 2024 14:03:08 +0200 Subject: [PATCH 14/32] update DockerMachineTemplate instead of replacing it --- templates/aws/ccm.yaml | 168 ------------------ templates/docker/Dockerfile | 14 +- test/e2e/cluster_upgrade.go | 8 +- .../cluster-template-upgrades.yaml | 11 -- test/e2e/helpers.go | 51 +++--- 5 files changed, 39 insertions(+), 213 deletions(-) delete mode 100644 templates/aws/ccm.yaml diff --git a/templates/aws/ccm.yaml b/templates/aws/ccm.yaml deleted file mode 100644 index bca6a22f..00000000 --- a/templates/aws/ccm.yaml +++ /dev/null @@ -1,168 +0,0 @@ ---- -apiVersion: apps/v1 -kind: DaemonSet -metadata: - name: aws-cloud-controller-manager - namespace: kube-system - labels: - k8s-app: aws-cloud-controller-manager -spec: - selector: - matchLabels: - k8s-app: aws-cloud-controller-manager - updateStrategy: - type: RollingUpdate - template: - metadata: - labels: - k8s-app: aws-cloud-controller-manager - spec: - tolerations: - - key: node.cloudprovider.kubernetes.io/uninitialized - value: "true" - effect: NoSchedule - - key: node-role.kubernetes.io/master - effect: NoSchedule - - effect: NoSchedule - key: node-role.kubernetes.io/control-plane - affinity: - nodeAffinity: - requiredDuringSchedulingIgnoredDuringExecution: - nodeSelectorTerms: - - matchExpressions: - - key: node-role.kubernetes.io/control-plane - operator: Exists - - matchExpressions: - - key: node-role.kubernetes.io/master - operator: Exists - serviceAccountName: cloud-controller-manager - containers: - - name: aws-cloud-controller-manager - image: gcr.io/k8s-staging-provider-aws/cloud-controller-manager:v1.30.2 - args: - - --v=2 - resources: - requests: - cpu: 200m - hostNetwork: true ---- -apiVersion: v1 -kind: ServiceAccount -metadata: - name: cloud-controller-manager - namespace: kube-system ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: RoleBinding -metadata: - name: cloud-controller-manager:apiserver-authentication-reader - namespace: kube-system -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: Role - name: extension-apiserver-authentication-reader -subjects: - - apiGroup: "" - kind: ServiceAccount - name: cloud-controller-manager - namespace: kube-system ---- -apiVersion: rbac.authorization.k8s.io/v1 -kind: ClusterRole -metadata: - name: system:cloud-controller-manager -rules: - - apiGroups: - - "" - resources: - - events - verbs: - - create - - patch - - update - - apiGroups: - - "" - resources: - - nodes - verbs: - - '*' - - apiGroups: - - "" - resources: - - nodes/status - verbs: - - patch - - apiGroups: - - "" - resources: - - services - verbs: - - list - - patch - - update - - watch - - apiGroups: - - "" - resources: - - services/status - verbs: - - list - - patch - - update - - watch - - apiGroups: - - "" - resources: - - serviceaccounts - verbs: - - create - - apiGroups: - - "" - resources: - - persistentvolumes - verbs: - - get - - list - - update - - watch - - apiGroups: - - "" - resources: - - configmaps - verbs: - - list - - watch - - apiGroups: - - "" - resources: - - endpoints - verbs: - - create - - get - - list - - watch - - update - - apiGroups: - - coordination.k8s.io - resources: - - leases - verbs: - - create - - get - - list - - watch - - update ---- -kind: ClusterRoleBinding -apiVersion: rbac.authorization.k8s.io/v1 -metadata: - name: system:cloud-controller-manager -roleRef: - apiGroup: rbac.authorization.k8s.io - kind: ClusterRole - name: system:cloud-controller-manager -subjects: - - apiGroup: "" - kind: ServiceAccount - name: cloud-controller-manager - namespace: kube-system diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index afb237ff..f12ea402 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -36,11 +36,7 @@ FROM $BUILD_BASE AS builder ARG REPO=https://github.com/canonical/k8s-snap ARG BRANCH=main -## Override the Kubernetes version from the branch. -## Note(ben): We only have 1.30-release branches for k8s-snap right now. -## For the rollout upgrades, we need to have a different minor version. -## This is a temporary solution until we have a 1.31 release branch. -ARG KUBERNETES_VERSION_OVERRIDE="" +ARG KUBERNETES_VERSION="" ## NOTE(neoaggelos): install dependencies needed to build the tools ## !!!IMPORTANT!!! Keep up to date with "snapcraft.yaml:parts.build-deps.build-packages" @@ -92,12 +88,8 @@ RUN /src/k8s-snap/build-scripts/build-component.sh helm ## kubernetes build FROM builder AS build-kubernetes -ENV KUBERNETES_VERSION_OVERRIDE=${KUBERNETES_VERSION_OVERRIDE} -RUN if [ -n "$KUBERNETES_VERSION_OVERRIDE" ]; then \ - echo "Overwriting Kubernetes version with $KUBERNETES_VERSION_OVERRIDE"; \ - echo "$KUBERNETES_VERSION_OVERRIDE" > /src/k8s-snap/build-scripts/components/kubernetes/version; \ - cat /src/k8s-snap/build-scripts/components/kubernetes/version; \ - fi +ENV KUBERNETES_VERSION=${KUBERNETES_VERSION} +RUN [ -n "$KUBERNETES_VERSION" ] && echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version RUN /src/k8s-snap/build-scripts/build-component.sh kubernetes ## runc build diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index ac3d4061..4d5b2acd 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -152,12 +152,18 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }, result) + By("Upgrading the DockerMachineTemplate") + UpgradeDockerMachineTemplateAndWaitForUpgrade(ctx, UpgradeDockerMachineTemplateAndWaitForUpgradeInput{ + ClusterProxy: input.BootstrapClusterProxy, + ControlPlane: result.ControlPlane, + CustomImage: "k8s-snap:dev-1.30", + }) + By("Upgrading the Kubernetes control-plane") UpgradeControlPlaneAndWaitForUpgrade(ctx, UpgradeControlPlaneAndWaitForUpgradeInput{ ClusterProxy: input.BootstrapClusterProxy, Cluster: result.Cluster, ControlPlane: result.ControlPlane, - UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-1.30", clusterName)), KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), }) diff --git a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml index 9ce48302..c6eefc94 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml @@ -60,17 +60,6 @@ spec: spec: customImage: k8s-snap:dev-1.29 -# After upgrade template for the machine deployment ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -kind: DockerMachineTemplate -metadata: - name: ${CLUSTER_NAME}-control-plane-1.30 - namespace: ${NAMESPACE} -spec: - template: - spec: - customImage: k8s-snap:dev-1.30 --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index bae7c2d7..035d820f 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -37,6 +37,7 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" + dockerv1beta1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" @@ -552,13 +553,40 @@ func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForContr }) } +// UpgradeDockerMachineAndWaitForUpgradeInput is the input type for UpgradeDockerMachineAndWaitForUpgrade. +type UpgradeDockerMachineTemplateAndWaitForUpgradeInput struct { + ClusterProxy framework.ClusterProxy + ControlPlane *controlplanev1.CK8sControlPlane + CustomImage string +} + +// UpgradeDockerMachineTemplateAndWaitForUpgrade upgrades a DockerMachineTemplate custom image and waits for it to be upgraded. +func UpgradeDockerMachineTemplateAndWaitForUpgrade(ctx context.Context, input UpgradeDockerMachineTemplateAndWaitForUpgradeInput) { + Byf("Patching the DockerMachineTemplate image to use the updated custom image") + mgmtClient := input.ClusterProxy.GetClient() + + dockerMachineTemplate := &dockerv1beta1.DockerMachineTemplate{} + err := mgmtClient.Get(ctx, client.ObjectKey{Name: input.ControlPlane.Spec.MachineTemplate.InfrastructureRef.Name, Namespace: input.ControlPlane.Namespace}, dockerMachineTemplate) + Expect(err).ToNot(HaveOccurred()) + + patchHelperDocker, err := patch.NewHelper(dockerMachineTemplate, mgmtClient) + Expect(err).ToNot(HaveOccurred()) + dockerMachineTemplate.Spec.Template.Spec.CustomImage = input.CustomImage + Eventually(func() error { + err := patchHelperDocker.Patch(ctx, dockerMachineTemplate) + if err != nil { + Byf("Failed to patch the DockerMachineTemplate: %v", err) + } + return err + }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch the DockerMachineTemplate") +} + // UpgradeControlPlaneAndWaitForUpgradeInput is the input type for UpgradeControlPlaneAndWaitForUpgrade. type UpgradeControlPlaneAndWaitForUpgradeInput struct { ClusterProxy framework.ClusterProxy Cluster *clusterv1.Cluster ControlPlane *controlplanev1.CK8sControlPlane KubernetesUpgradeVersion string - UpgradeMachineTemplate *string WaitForMachinesToBeUpgraded []interface{} } @@ -578,17 +606,6 @@ func UpgradeControlPlaneAndWaitForUpgrade(ctx context.Context, input UpgradeCont input.ControlPlane.Spec.Version = input.KubernetesUpgradeVersion - // Create a new ObjectReference for the infrastructure provider - newInfrastructureRef := corev1.ObjectReference{ - APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", // Adjust based on your infrastructure API version - Kind: "DockerMachineTemplate", - Name: fmt.Sprintf("%s-control-plane-1.30", input.Cluster.Name), - Namespace: input.ControlPlane.Spec.MachineTemplate.InfrastructureRef.Namespace, - } - - // Update the infrastructureRef - input.ControlPlane.Spec.MachineTemplate.InfrastructureRef = newInfrastructureRef - Eventually(func() error { return patchHelper.Patch(ctx, input.ControlPlane) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch the new kubernetes version to KCP %s", klog.KObj(input.ControlPlane)) @@ -618,16 +635,6 @@ func UpgradeMachineDeploymentsAndWait(ctx context.Context, input framework.Upgra oldVersion := deployment.Spec.Template.Spec.Version deployment.Spec.Template.Spec.Version = &input.UpgradeVersion - // Create a new ObjectReference for the infrastructure provider - newInfrastructureRef := corev1.ObjectReference{ - APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", - Kind: "DockerMachineTemplate", - Name: fmt.Sprintf("%s-md-1.30-0", input.Cluster.Name), - Namespace: deployment.Spec.Template.Spec.InfrastructureRef.Namespace, - } - - // Update the infrastructureRef - deployment.Spec.Template.Spec.InfrastructureRef = newInfrastructureRef Eventually(func() error { return patchHelper.Patch(ctx, deployment) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch Kubernetes version on MachineDeployment %s", klog.KObj(deployment)) From 37353c33b6d64f0245481105bfd78d230995db4c Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 1 Aug 2024 15:44:19 +0200 Subject: [PATCH 15/32] rename docker tags to old/new --- .github/workflows/e2e.yaml | 24 ++++----- test/e2e/cluster_upgrade.go | 8 +-- .../cluster-template-upgrades.yaml | 28 ++++++---- test/e2e/helpers.go | 51 ++++++++----------- 4 files changed, 54 insertions(+), 57 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 3ec0a279..2e271af5 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -34,27 +34,27 @@ jobs: run: | cd templates/docker # TODO(ben): required for the rollout upgrade test. Remove KUBERNETES_VERSION_OVERRIDE once we have a 1.31 release. - sudo docker build . -t k8s-snap:dev-1.29 --build-arg BRANCH=main --build-arg KUBERNETES_VERSION_OVERRIDE=v1.29.6 + sudo docker build . -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION_OVERRIDE=v1.29.6 # TODO(ben): release-1.30 is missing extra-node-args hence we need to use main branch for now. - sudo docker build . -t k8s-snap:dev-1.30 --build-arg BRANCH=main + sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main - name: Save provider image run: | sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev sudo chmod 775 provider-images.tar - name: Save k8s-snap image run: | - sudo docker save -o k8s-snap-image-1.29.tar k8s-snap:dev-1.29 - sudo docker save -o k8s-snap-image-1.30.tar k8s-snap:dev-1.30 - sudo chmod 775 k8s-snap-image-1.29.tar - sudo chmod 775 k8s-snap-image-1.30.tar + sudo docker save -o k8s-snap-image-old.tar k8s-snap:dev-old + sudo docker save -o k8s-snap-image-new.tar k8s-snap:dev-new + sudo chmod 775 k8s-snap-image-old.tar + sudo chmod 775 k8s-snap-image-new.tar - name: Upload artifacts uses: actions/upload-artifact@v4 with: name: e2e-images path: | provider-images.tar - k8s-snap-image-1.29.tar - k8s-snap-image-1.30.tar + k8s-snap-image-old.tar + k8s-snap-image-new.tar run-e2e-tests: name: Run E2E Tests @@ -94,13 +94,13 @@ jobs: path: . - name: Load provider image run: sudo docker load -i provider-images.tar - - name: Load k8s-snap 1.29 image + - name: Load k8s-snap old image if: matrix.ginkgo_focus == 'Workload cluster upgrade' run: | - sudo docker load -i k8s-snap-image-1.29.tar - - name: Load k8s-snap 1.30 image + sudo docker load -i k8s-snap-image-old.tar + - name: Load k8s-snap new image run: | - sudo docker load -i k8s-snap-image-1.30.tar + sudo docker load -i k8s-snap-image-new.tar - name: Create docker network run: | sudo docker network create kind --driver=bridge -o com.docker.network.bridge.enable_ip_masquerade=true diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index 4d5b2acd..dd54f95a 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -152,19 +152,13 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }, result) - By("Upgrading the DockerMachineTemplate") - UpgradeDockerMachineTemplateAndWaitForUpgrade(ctx, UpgradeDockerMachineTemplateAndWaitForUpgradeInput{ - ClusterProxy: input.BootstrapClusterProxy, - ControlPlane: result.ControlPlane, - CustomImage: "k8s-snap:dev-1.30", - }) - By("Upgrading the Kubernetes control-plane") UpgradeControlPlaneAndWaitForUpgrade(ctx, UpgradeControlPlaneAndWaitForUpgradeInput{ ClusterProxy: input.BootstrapClusterProxy, Cluster: result.Cluster, ControlPlane: result.ControlPlane, KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)), WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), }) diff --git a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml index c6eefc94..90357d99 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml @@ -40,7 +40,7 @@ spec: infrastructureTemplate: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate - name: ${CLUSTER_NAME}-control-plane-1.29 + name: ${CLUSTER_NAME}-control-plane-old spec: airGapped: true controlPlane: @@ -53,13 +53,23 @@ spec: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: - name: ${CLUSTER_NAME}-control-plane-1.29 + name: ${CLUSTER_NAME}-control-plane-old namespace: ${NAMESPACE} spec: template: spec: - customImage: k8s-snap:dev-1.29 - + customImage: k8s-snap:dev-old +# After upgrade template for the machine deployment +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: ${CLUSTER_NAME}-control-plane-new + namespace: ${NAMESPACE} +spec: + template: + spec: + customImage: k8s-snap:dev-new --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -92,27 +102,27 @@ spec: infrastructureRef: apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate - name: ${CLUSTER_NAME}-md-1.29-0 + name: ${CLUSTER_NAME}-md-old-0 --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: - name: ${CLUSTER_NAME}-md-1.29-0 + name: ${CLUSTER_NAME}-md-old-0 namespace: ${NAMESPACE} spec: template: spec: - customImage: k8s-snap:dev-1.29 + customImage: k8s-snap:dev-old --- apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: - name: ${CLUSTER_NAME}-md-1.30-0 + name: ${CLUSTER_NAME}-md-new-0 namespace: ${NAMESPACE} spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 035d820f..3f8ebb8c 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -37,7 +37,6 @@ import ( expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1" "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" - dockerv1beta1 "sigs.k8s.io/cluster-api/test/infrastructure/docker/api/v1beta1" "sigs.k8s.io/cluster-api/util/patch" "sigs.k8s.io/controller-runtime/pkg/client" @@ -553,40 +552,13 @@ func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForContr }) } -// UpgradeDockerMachineAndWaitForUpgradeInput is the input type for UpgradeDockerMachineAndWaitForUpgrade. -type UpgradeDockerMachineTemplateAndWaitForUpgradeInput struct { - ClusterProxy framework.ClusterProxy - ControlPlane *controlplanev1.CK8sControlPlane - CustomImage string -} - -// UpgradeDockerMachineTemplateAndWaitForUpgrade upgrades a DockerMachineTemplate custom image and waits for it to be upgraded. -func UpgradeDockerMachineTemplateAndWaitForUpgrade(ctx context.Context, input UpgradeDockerMachineTemplateAndWaitForUpgradeInput) { - Byf("Patching the DockerMachineTemplate image to use the updated custom image") - mgmtClient := input.ClusterProxy.GetClient() - - dockerMachineTemplate := &dockerv1beta1.DockerMachineTemplate{} - err := mgmtClient.Get(ctx, client.ObjectKey{Name: input.ControlPlane.Spec.MachineTemplate.InfrastructureRef.Name, Namespace: input.ControlPlane.Namespace}, dockerMachineTemplate) - Expect(err).ToNot(HaveOccurred()) - - patchHelperDocker, err := patch.NewHelper(dockerMachineTemplate, mgmtClient) - Expect(err).ToNot(HaveOccurred()) - dockerMachineTemplate.Spec.Template.Spec.CustomImage = input.CustomImage - Eventually(func() error { - err := patchHelperDocker.Patch(ctx, dockerMachineTemplate) - if err != nil { - Byf("Failed to patch the DockerMachineTemplate: %v", err) - } - return err - }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch the DockerMachineTemplate") -} - // UpgradeControlPlaneAndWaitForUpgradeInput is the input type for UpgradeControlPlaneAndWaitForUpgrade. type UpgradeControlPlaneAndWaitForUpgradeInput struct { ClusterProxy framework.ClusterProxy Cluster *clusterv1.Cluster ControlPlane *controlplanev1.CK8sControlPlane KubernetesUpgradeVersion string + UpgradeMachineTemplate *string WaitForMachinesToBeUpgraded []interface{} } @@ -606,6 +578,17 @@ func UpgradeControlPlaneAndWaitForUpgrade(ctx context.Context, input UpgradeCont input.ControlPlane.Spec.Version = input.KubernetesUpgradeVersion + // Create a new ObjectReference for the infrastructure provider + newInfrastructureRef := corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "DockerMachineTemplate", + Name: fmt.Sprintf("%s-control-plane-new", input.Cluster.Name), + Namespace: input.ControlPlane.Spec.MachineTemplate.InfrastructureRef.Namespace, + } + + // Update the infrastructureRef + input.ControlPlane.Spec.MachineTemplate.InfrastructureRef = newInfrastructureRef + Eventually(func() error { return patchHelper.Patch(ctx, input.ControlPlane) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch the new kubernetes version to KCP %s", klog.KObj(input.ControlPlane)) @@ -635,6 +618,16 @@ func UpgradeMachineDeploymentsAndWait(ctx context.Context, input framework.Upgra oldVersion := deployment.Spec.Template.Spec.Version deployment.Spec.Template.Spec.Version = &input.UpgradeVersion + // Create a new ObjectReference for the infrastructure provider + newInfrastructureRef := corev1.ObjectReference{ + APIVersion: "infrastructure.cluster.x-k8s.io/v1beta1", + Kind: "DockerMachineTemplate", + Name: fmt.Sprintf("%s-md-new-0", input.Cluster.Name), + Namespace: deployment.Spec.Template.Spec.InfrastructureRef.Namespace, + } + + // Update the infrastructureRef + deployment.Spec.Template.Spec.InfrastructureRef = newInfrastructureRef Eventually(func() error { return patchHelper.Patch(ctx, deployment) }, retryableOperationTimeout, retryableOperationInterval).Should(Succeed(), "Failed to patch Kubernetes version on MachineDeployment %s", klog.KObj(deployment)) From 46fc91adc575f90ca74b1375db2a1bad7caa9e7b Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Fri, 2 Aug 2024 08:45:56 +0200 Subject: [PATCH 16/32] fix docker build script --- .github/workflows/e2e.yaml | 4 +--- test/e2e/config/ck8s-docker.yaml | 2 +- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 2e271af5..495a3d77 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -33,9 +33,7 @@ jobs: - name: Build k8s-snap image run: | cd templates/docker - # TODO(ben): required for the rollout upgrade test. Remove KUBERNETES_VERSION_OVERRIDE once we have a 1.31 release. - sudo docker build . -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION_OVERRIDE=v1.29.6 - # TODO(ben): release-1.30 is missing extra-node-args hence we need to use main branch for now. + sudo docker build . -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.29.6 sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main - name: Save provider image run: | diff --git a/test/e2e/config/ck8s-docker.yaml b/test/e2e/config/ck8s-docker.yaml index 38ccb7b4..06134e12 100644 --- a/test/e2e/config/ck8s-docker.yaml +++ b/test/e2e/config/ck8s-docker.yaml @@ -86,7 +86,7 @@ providers: variables: KUBERNETES_VERSION_MANAGEMENT: "v1.28.0" KUBERNETES_VERSION: "v1.29.6" - KUBERNETES_VERSION_UPGRADE_TO: "v1.30.2" + KUBERNETES_VERSION_UPGRADE_TO: "v1.30.3" IP_FAMILY: "IPv4" KIND_IMAGE_VERSION: "v1.28.0" From c9a90c53bc741e56d5ef2d3d286d6e39e6cd0d7e Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Fri, 2 Aug 2024 11:30:58 +0200 Subject: [PATCH 17/32] fix build error --- templates/docker/Dockerfile | 3 +++ 1 file changed, 3 insertions(+) diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index f12ea402..1a4c2a20 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -90,6 +90,9 @@ RUN /src/k8s-snap/build-scripts/build-component.sh helm FROM builder AS build-kubernetes ENV KUBERNETES_VERSION=${KUBERNETES_VERSION} RUN [ -n "$KUBERNETES_VERSION" ] && echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version +RUN if [ -n "$KUBERNETES_VERSION" ]; then \ + echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version; \ + fi RUN /src/k8s-snap/build-scripts/build-component.sh kubernetes ## runc build From f06be1095fb6dcd7d92824238a8ae96992c1e08f Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Fri, 2 Aug 2024 12:26:37 +0200 Subject: [PATCH 18/32] docker fix --- templates/docker/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index 1a4c2a20..0aa0ef84 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -89,7 +89,6 @@ RUN /src/k8s-snap/build-scripts/build-component.sh helm ## kubernetes build FROM builder AS build-kubernetes ENV KUBERNETES_VERSION=${KUBERNETES_VERSION} -RUN [ -n "$KUBERNETES_VERSION" ] && echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version RUN if [ -n "$KUBERNETES_VERSION" ]; then \ echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version; \ fi From 4c74334ae50040e552601d2645ff4cf4418679c9 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 7 Aug 2024 09:44:45 +0200 Subject: [PATCH 19/32] update image tags --- .../cluster-template-kcp-remediation.yaml | 4 ++-- .../cluster-template-md-remediation.yaml | 4 ++-- test/e2e/data/infrastructure-docker/cluster-template.yaml | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml index 6b0d0f1f..91e3496c 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml @@ -84,7 +84,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -127,7 +127,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml index ce06f5ad..e54be632 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -101,7 +101,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template.yaml b/test/e2e/data/infrastructure-docker/cluster-template.yaml index 59ed9950..91b3113c 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -100,7 +100,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-1.30 + customImage: k8s-snap:dev-new --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate From 8394c4b473fd8608462230047974edf4c21aa354 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 8 Aug 2024 11:03:41 +0200 Subject: [PATCH 20/32] build script --- .github/workflows/e2e.yaml | 7 +++---- hack/build-e2e-images.sh | 3 +++ 2 files changed, 6 insertions(+), 4 deletions(-) create mode 100755 hack/build-e2e-images.sh diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 495a3d77..27d27903 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -30,11 +30,10 @@ jobs: sudo snap install kubectl --classic --channel=1.30/stable - name: Build provider images run: sudo make docker-build-e2e - - name: Build k8s-snap image + - name: Build k8s-snap images + working-directory: hack/ run: | - cd templates/docker - sudo docker build . -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.29.6 - sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main + ./build-e2e-images.sh - name: Save provider image run: | sudo docker save -o provider-images.tar ghcr.io/canonical/cluster-api-k8s/controlplane-controller:dev ghcr.io/canonical/cluster-api-k8s/bootstrap-controller:dev diff --git a/hack/build-e2e-images.sh b/hack/build-e2e-images.sh new file mode 100755 index 00000000..072abe89 --- /dev/null +++ b/hack/build-e2e-images.sh @@ -0,0 +1,3 @@ +cd ../templates/docker +sudo docker build -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.29.6 +sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main From b5edec79d7b3efc01f259827e75eb6ef40cdfcb1 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 8 Aug 2024 15:33:47 +0200 Subject: [PATCH 21/32] add wrapper scripts --- hack/build-e2e-images.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/hack/build-e2e-images.sh b/hack/build-e2e-images.sh index 072abe89..c96a7e38 100755 --- a/hack/build-e2e-images.sh +++ b/hack/build-e2e-images.sh @@ -1,3 +1,14 @@ -cd ../templates/docker -sudo docker build -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.29.6 +#!/bin/bash + +# Description: +# Build k8s-snap docker images required for e2e tests. +# +# Usage: +# ./build-e2e-images.sh + +DIR="$(realpath "$(dirname "${0}")")" + +cd "${DIR}/../templates/docker" +sudo docker build . -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.29.6 sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main +cd - From 383a574982169cc5bb8e9a22fa0bca5cfe7b0fe7 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Thu, 29 Aug 2024 15:21:04 +0200 Subject: [PATCH 22/32] fix remediation --- .../cluster-template-md-remediation.yaml | 4 +- test/e2e/helpers.go | 3 + test/e2e/md_remediation_test.go | 113 ++++++++++++++++++ 3 files changed, 118 insertions(+), 2 deletions(-) diff --git a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml index e54be632..b2bf8e7e 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-md-remediation.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-new + customImage: k8s-snap:dev-old --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -101,7 +101,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-new + customImage: k8s-snap:dev-old --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 3f8ebb8c..39dcbfe3 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -661,14 +661,17 @@ func WaitForNodesReady(ctx context.Context, input WaitForNodesReadyInput) { } nodeReadyCount := 0 for _, node := range nodeList.Items { + fmt.Fprintf(GinkgoWriter, "versions: %s %s\n", semver.MajorMinor(node.Status.NodeInfo.KubeletVersion), semver.MajorMinor(input.KubernetesVersion)) if !(semver.MajorMinor(node.Status.NodeInfo.KubeletVersion) == semver.MajorMinor(input.KubernetesVersion)) { return false, nil } + fmt.Fprintf(GinkgoWriter, "node %s is ready: %t\n", node.Name, noderefutil.IsNodeReady(&node)) if !noderefutil.IsNodeReady(&node) { return false, nil } nodeReadyCount++ } + fmt.Fprintf(GinkgoWriter, "nodeReadyCount: %d, expected count: %d\n", nodeReadyCount, input.Count) return input.Count == nodeReadyCount, nil }, input.WaitForNodesReady...).Should(BeTrue()) } diff --git a/test/e2e/md_remediation_test.go b/test/e2e/md_remediation_test.go index 4f707ba2..1390f51c 100644 --- a/test/e2e/md_remediation_test.go +++ b/test/e2e/md_remediation_test.go @@ -23,6 +23,7 @@ import ( "context" "fmt" "path/filepath" + "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -31,6 +32,10 @@ import ( "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" + + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) var _ = Describe("When testing MachineDeployment remediation", func() { @@ -120,3 +125,111 @@ var _ = Describe("When testing MachineDeployment remediation", func() { }) }) }) + +// DiscoverMachineHealthChecksAndWaitForRemediation patches an unhealthy node condition to one node observed by the Machine Health Check and then wait for remediation. +func DiscoverMachineHealthChecksAndWaitForRemediation(ctx context.Context, input framework.DiscoverMachineHealthCheckAndWaitForRemediationInput) { + Expect(ctx).NotTo(BeNil(), "ctx is required for DiscoverMachineHealthChecksAndWaitForRemediation") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling DiscoverMachineHealthChecksAndWaitForRemediation") + Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling DiscoverMachineHealthChecksAndWaitForRemediation") + + mgmtClient := input.ClusterProxy.GetClient() + fmt.Fprintln(GinkgoWriter, "Discovering machine health check resources") + machineHealthChecks := framework.GetMachineHealthChecksForCluster(ctx, framework.GetMachineHealthChecksForClusterInput{ + Lister: mgmtClient, + ClusterName: input.Cluster.Name, + Namespace: input.Cluster.Namespace, + }) + + Expect(machineHealthChecks).NotTo(BeEmpty()) + + for _, mhc := range machineHealthChecks { + Expect(mhc.Spec.UnhealthyConditions).NotTo(BeEmpty()) + + fmt.Fprintln(GinkgoWriter, "Ensuring there is at least 1 Machine that MachineHealthCheck is matching") + machines := framework.GetMachinesByMachineHealthCheck(ctx, framework.GetMachinesByMachineHealthCheckInput{ + Lister: mgmtClient, + ClusterName: input.Cluster.Name, + MachineHealthCheck: mhc, + }) + + Expect(machines).NotTo(BeEmpty()) + + fmt.Fprintln(GinkgoWriter, "Patching MachineHealthCheck unhealthy condition to one of the nodes") + unhealthyNodeCondition := corev1.NodeCondition{ + Type: mhc.Spec.UnhealthyConditions[0].Type, + Status: mhc.Spec.UnhealthyConditions[0].Status, + LastTransitionTime: metav1.Time{Time: time.Now()}, + } + framework.PatchNodeCondition(ctx, framework.PatchNodeConditionInput{ + ClusterProxy: input.ClusterProxy, + Cluster: input.Cluster, + NodeCondition: unhealthyNodeCondition, + Machine: machines[0], + }) + + fmt.Fprintln(GinkgoWriter, "Waiting for remediation x") + framework.WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx, framework.WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput{ + ClusterProxy: input.ClusterProxy, + Cluster: input.Cluster, + MachineHealthCheck: mhc, + MachinesCount: len(machines), + }, input.WaitForMachineRemediation...) + } +} + +// WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition patches a node condition to any one of the machines with a node ref. +func WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx context.Context, input framework.WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput, intervals ...interface{}) { + Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") + Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") + Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") + Expect(input.MachineHealthCheck).NotTo(BeNil(), "Invalid argument. input.MachineHealthCheck can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") + Expect(input.MachinesCount).NotTo(BeZero(), "Invalid argument. input.MachinesCount can't be zero when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") + + fmt.Fprintln(GinkgoWriter, "Waiting until the node with unhealthy node condition is remediated") + Eventually(func() bool { + machines := framework.GetMachinesByMachineHealthCheck(ctx, framework.GetMachinesByMachineHealthCheckInput{ + Lister: input.ClusterProxy.GetClient(), + ClusterName: input.Cluster.Name, + MachineHealthCheck: input.MachineHealthCheck, + }) + // Wait for all the machines to exist. + // NOTE: this is required given that this helper is called after a remediation + // and we want to make sure all the machine are back in place before testing for unhealthyCondition being fixed. + fmt.Fprintf(GinkgoWriter, "waiting for all machines to exist, current count: %d, expected count: %d\n", len(machines), input.MachinesCount) + if len(machines) < input.MachinesCount { + return false + } + + for _, machine := range machines { + if machine.Status.NodeRef == nil { + fmt.Fprintf(GinkgoWriter, "machine %s no node ref", machine.Name) + return false + } + node := &corev1.Node{} + // This should not be an Expect(), because it may return error during machine deletion. + err := input.ClusterProxy.GetWorkloadCluster(ctx, input.Cluster.Namespace, input.Cluster.Name).GetClient().Get(ctx, types.NamespacedName{Name: machine.Status.NodeRef.Name, Namespace: machine.Status.NodeRef.Namespace}, node) + if err != nil { + fmt.Fprintf(GinkgoWriter, "failed to get node from ref: %v", err) + return false + } + if hasMatchingUnhealthyConditions(input.MachineHealthCheck, node.Status.Conditions) { + fmt.Fprintf(GinkgoWriter, "%s has not matching unhealthy condiditon", machine.Name) + return false + } + } + return true + }, intervals...).Should(BeTrue()) +} + +// hasMatchingUnhealthyConditions returns true if any node condition matches with machine health check unhealthy conditions. +func hasMatchingUnhealthyConditions(machineHealthCheck *clusterv1.MachineHealthCheck, nodeConditions []corev1.NodeCondition) bool { + fmt.Fprintf(GinkgoWriter, "checking for matching unhealthy conditions, machine health check: %v, node conditions: %v\n", machineHealthCheck.Spec.UnhealthyConditions, nodeConditions) + for _, unhealthyCondition := range machineHealthCheck.Spec.UnhealthyConditions { + for _, nodeCondition := range nodeConditions { + if nodeCondition.Type == unhealthyCondition.Type && nodeCondition.Status == unhealthyCondition.Status { + return true + } + } + } + return false +} From a1d4fe426783a88f428f136894243d9518f863e3 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Sun, 1 Sep 2024 08:11:58 +0200 Subject: [PATCH 23/32] debug --- .../controllers/ck8sconfig_controller.go | 8 ++ c1.yaml | 103 ++++++++++++++++++ templates/docker/Dockerfile | 15 ++- test/e2e/cluster_upgrade.go | 58 +++++----- test/e2e/cluster_upgrade_test.go | 6 +- 5 files changed, 157 insertions(+), 33 deletions(-) create mode 100644 c1.yaml diff --git a/bootstrap/controllers/ck8sconfig_controller.go b/bootstrap/controllers/ck8sconfig_controller.go index bdd70113..a1884e7a 100644 --- a/bootstrap/controllers/ck8sconfig_controller.go +++ b/bootstrap/controllers/ck8sconfig_controller.go @@ -213,6 +213,9 @@ func (r *CK8sConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) } func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scope) error { + + log := r.Log.WithValues("scope.Config", scope.Config) + machine := &clusterv1.Machine{} if err := runtime.DefaultUnstructuredConverter.FromUnstructured(scope.ConfigOwner.Object, machine); err != nil { return fmt.Errorf("cannot convert %s to Machine: %w", scope.ConfigOwner.GetKind(), err) @@ -242,6 +245,11 @@ func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scop ControlPlaneEndpoint: scope.Cluster.Spec.ControlPlaneEndpoint.Host, ControlPlaneConfig: controlPlaneConfig, }) + log.Info("-----------------------------------------") + log.Info("Config.Name: %v\n", scope.Config.Name) + log.Info("extraSANs: %v\n", controlPlaneConfig.ExtraSANs) + log.Info("-----------------------------------------") + joinConfig, err := kubeyaml.Marshal(configStruct) if err != nil { return err diff --git a/c1.yaml b/c1.yaml new file mode 100644 index 00000000..6d3863e9 --- /dev/null +++ b/c1.yaml @@ -0,0 +1,103 @@ +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: c1 + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 10.1.0.0/16 + serviceDomain: cluster.local + services: + cidrBlocks: + - 10.152.0.0/16 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta2 + kind: CK8sControlPlane + name: c1-control-plane + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: c1 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: c1 + namespace: default +spec: {} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta2 +kind: CK8sControlPlane +metadata: + name: c1-control-plane + namespace: default +spec: + machineTemplate: + infrastructureTemplate: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-control-plane + replicas: 3 + spec: + airGapped: true + controlPlane: + extraKubeAPIServerArgs: + --anonymous-auth: "true" + version: v1.29.6 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-control-plane + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev-old +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: MachineDeployment +metadata: + name: c1-worker-md-0 + namespace: default +spec: + clusterName: c1 + replicas: 3 + selector: + matchLabels: + cluster.x-k8s.io/cluster-name: c1 + template: + spec: + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 + kind: CK8sConfigTemplate + name: c1-md-0 + clusterName: c1 + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: c1-md-0 + version: v1.29.6 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + customImage: k8s-snap:dev-old +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 +kind: CK8sConfigTemplate +metadata: + name: c1-md-0 + namespace: default +spec: + template: + spec: + airGapped: true diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index 0aa0ef84..e13b60d9 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -87,12 +87,21 @@ FROM builder AS build-helm RUN /src/k8s-snap/build-scripts/build-component.sh helm ## kubernetes build +RUN cat /src/k8s-snap/build-scripts/components/kubernetes/version FROM builder AS build-kubernetes ENV KUBERNETES_VERSION=${KUBERNETES_VERSION} RUN if [ -n "$KUBERNETES_VERSION" ]; then \ + echo "Overwriting Kubernetes version with $KUBERNETES_VERSION"; \ echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version; \ + cat /src/k8s-snap/build-scripts/components/kubernetes/version; \ fi -RUN /src/k8s-snap/build-scripts/build-component.sh kubernetes +RUN cat /src/k8s-snap/build-scripts/components/kubernetes/version +RUN echo "Kubernetes version: $KUBERNETES_VERSION" \ +&& cat /src/k8s-snap/build-scripts/components/kubernetes/version \ +&& /src/k8s-snap/build-scripts/build-component.sh kubernetes +RUN ls /out +RUN /out/bin/kubectl version --client +RUN /out/bin/kubelet --version ## runc build FROM builder AS build-runc @@ -162,3 +171,7 @@ ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/k8s/ ## NOTE(neoaggelos): Required for containerd to properly set up overlayfs for pods VOLUME ["/var/snap/k8s/common/var/lib/containerd"] + +## NOTE(ben): Remove exisitng kind image kubectl and kubelet binaries +# to avoid version confusion. +RUN rm -f /usr/bin/kubectl /usr/bin/kubelet diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index dd54f95a..cc0593cb 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -152,34 +152,34 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }, result) - By("Upgrading the Kubernetes control-plane") - UpgradeControlPlaneAndWaitForUpgrade(ctx, UpgradeControlPlaneAndWaitForUpgradeInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: result.Cluster, - ControlPlane: result.ControlPlane, - KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), - UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - }) - - By("Upgrading the machine deployment") - framework.UpgradeMachineDeploymentsAndWait(ctx, framework.UpgradeMachineDeploymentsAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: result.Cluster, - UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), - MachineDeployments: result.MachineDeployments, - UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-1.30-0", clusterName)), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }) - - By("Waiting until nodes are ready") - workloadProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, namespace.Name, result.Cluster.Name) - workloadClient := workloadProxy.GetClient() - framework.WaitForNodesReady(ctx, framework.WaitForNodesReadyInput{ - Lister: workloadClient, - KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), - Count: int(result.ExpectedTotalNodes()), - WaitForNodesReady: input.E2EConfig.GetIntervals(specName, "wait-nodes-ready"), - }) + /* By("Upgrading the Kubernetes control-plane") + UpgradeControlPlaneAndWaitForUpgrade(ctx, UpgradeControlPlaneAndWaitForUpgradeInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: result.Cluster, + ControlPlane: result.ControlPlane, + KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)), + WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + }) + + By("Upgrading the machine deployment") + framework.UpgradeMachineDeploymentsAndWait(ctx, framework.UpgradeMachineDeploymentsAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: result.Cluster, + UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + MachineDeployments: result.MachineDeployments, + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-1.30-0", clusterName)), + WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) */ + + /* By("Waiting until nodes are ready") + workloadProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, namespace.Name, result.Cluster.Name) + workloadClient := workloadProxy.GetClient() + framework.WaitForNodesReady(ctx, framework.WaitForNodesReadyInput{ + Lister: workloadClient, + KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + Count: int(result.ExpectedTotalNodes()), + WaitForNodesReady: input.E2EConfig.GetIntervals(specName, "wait-nodes-ready"), + }) */ }) } diff --git a/test/e2e/cluster_upgrade_test.go b/test/e2e/cluster_upgrade_test.go index 5829f75d..4de856e6 100644 --- a/test/e2e/cluster_upgrade_test.go +++ b/test/e2e/cluster_upgrade_test.go @@ -25,7 +25,7 @@ import ( ) var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { - Context("Upgrading a cluster with 1 control plane", func() { + /* Context("Upgrading a cluster with 1 control plane", func() { It("Non-HA upgrades require in-place upgrades which are not supported yet.", // TODO(ben): Enable this test once we have support for in-place upgrades. func() { Skip("") }, @@ -42,7 +42,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { WorkerMachineCount: ptr.To[int64](2), } }) - }) + }) */ Context("Upgrading a cluster with HA control plane", func() { ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { @@ -54,7 +54,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { SkipCleanup: skipCleanup, InfrastructureProvider: ptr.To("docker"), ControlPlaneMachineCount: ptr.To[int64](3), - WorkerMachineCount: ptr.To[int64](1), + WorkerMachineCount: ptr.To[int64](0), } }) }) From 060a2160f0ee33ccb451f0f353b468e22bc6f3c7 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Mon, 2 Sep 2024 09:18:24 +0200 Subject: [PATCH 24/32] make linter happy --- .../controllers/ck8sconfig_controller.go | 1 - c1.yaml | 103 ------------------ templates/docker/Dockerfile | 2 +- 3 files changed, 1 insertion(+), 105 deletions(-) delete mode 100644 c1.yaml diff --git a/bootstrap/controllers/ck8sconfig_controller.go b/bootstrap/controllers/ck8sconfig_controller.go index a1884e7a..2a519f4c 100644 --- a/bootstrap/controllers/ck8sconfig_controller.go +++ b/bootstrap/controllers/ck8sconfig_controller.go @@ -213,7 +213,6 @@ func (r *CK8sConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) } func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scope) error { - log := r.Log.WithValues("scope.Config", scope.Config) machine := &clusterv1.Machine{} diff --git a/c1.yaml b/c1.yaml deleted file mode 100644 index 6d3863e9..00000000 --- a/c1.yaml +++ /dev/null @@ -1,103 +0,0 @@ -apiVersion: cluster.x-k8s.io/v1beta1 -kind: Cluster -metadata: - name: c1 - namespace: default -spec: - clusterNetwork: - pods: - cidrBlocks: - - 10.1.0.0/16 - serviceDomain: cluster.local - services: - cidrBlocks: - - 10.152.0.0/16 - controlPlaneRef: - apiVersion: controlplane.cluster.x-k8s.io/v1beta2 - kind: CK8sControlPlane - name: c1-control-plane - infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: DockerCluster - name: c1 ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -kind: DockerCluster -metadata: - name: c1 - namespace: default -spec: {} ---- -apiVersion: controlplane.cluster.x-k8s.io/v1beta2 -kind: CK8sControlPlane -metadata: - name: c1-control-plane - namespace: default -spec: - machineTemplate: - infrastructureTemplate: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: DockerMachineTemplate - name: c1-control-plane - replicas: 3 - spec: - airGapped: true - controlPlane: - extraKubeAPIServerArgs: - --anonymous-auth: "true" - version: v1.29.6 ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -kind: DockerMachineTemplate -metadata: - name: c1-control-plane - namespace: default -spec: - template: - spec: - customImage: k8s-snap:dev-old ---- -apiVersion: cluster.x-k8s.io/v1beta1 -kind: MachineDeployment -metadata: - name: c1-worker-md-0 - namespace: default -spec: - clusterName: c1 - replicas: 3 - selector: - matchLabels: - cluster.x-k8s.io/cluster-name: c1 - template: - spec: - bootstrap: - configRef: - apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 - kind: CK8sConfigTemplate - name: c1-md-0 - clusterName: c1 - infrastructureRef: - apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 - kind: DockerMachineTemplate - name: c1-md-0 - version: v1.29.6 ---- -apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 -kind: DockerMachineTemplate -metadata: - name: c1-md-0 - namespace: default -spec: - template: - spec: - customImage: k8s-snap:dev-old ---- -apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 -kind: CK8sConfigTemplate -metadata: - name: c1-md-0 - namespace: default -spec: - template: - spec: - airGapped: true diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index e13b60d9..9a264b7c 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -172,6 +172,6 @@ ENV PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/snap/k8s/ ## NOTE(neoaggelos): Required for containerd to properly set up overlayfs for pods VOLUME ["/var/snap/k8s/common/var/lib/containerd"] -## NOTE(ben): Remove exisitng kind image kubectl and kubelet binaries +## NOTE(ben): Remove existing kind image kubectl and kubelet binaries # to avoid version confusion. RUN rm -f /usr/bin/kubectl /usr/bin/kubelet From 98bfbf94a63333a7821aa3f1387207db009acc87 Mon Sep 17 00:00:00 2001 From: "Homayoon (Hue) Alimohammadi" Date: Tue, 3 Sep 2024 10:51:23 +0400 Subject: [PATCH 25/32] Add key to ck8sconfig_controller logs --- bootstrap/controllers/ck8sconfig_controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap/controllers/ck8sconfig_controller.go b/bootstrap/controllers/ck8sconfig_controller.go index 2a519f4c..17356d80 100644 --- a/bootstrap/controllers/ck8sconfig_controller.go +++ b/bootstrap/controllers/ck8sconfig_controller.go @@ -245,8 +245,8 @@ func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scop ControlPlaneConfig: controlPlaneConfig, }) log.Info("-----------------------------------------") - log.Info("Config.Name: %v\n", scope.Config.Name) - log.Info("extraSANs: %v\n", controlPlaneConfig.ExtraSANs) + log.Info("Config.Name: %v\n", "scope.Config.Name", scope.Config.Name) + log.Info("extraSANs: %v\n", "controlPlaneConfig.ExtraSANs", controlPlaneConfig.ExtraSANs) log.Info("-----------------------------------------") joinConfig, err := kubeyaml.Marshal(configStruct) From 63bb3719a0c47a735ea1f763b51e90d7875a4929 Mon Sep 17 00:00:00 2001 From: "Homayoon (Hue) Alimohammadi" Date: Tue, 3 Sep 2024 10:54:11 +0400 Subject: [PATCH 26/32] improve logs --- bootstrap/controllers/ck8sconfig_controller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/bootstrap/controllers/ck8sconfig_controller.go b/bootstrap/controllers/ck8sconfig_controller.go index 17356d80..f40edcb3 100644 --- a/bootstrap/controllers/ck8sconfig_controller.go +++ b/bootstrap/controllers/ck8sconfig_controller.go @@ -245,8 +245,8 @@ func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scop ControlPlaneConfig: controlPlaneConfig, }) log.Info("-----------------------------------------") - log.Info("Config.Name: %v\n", "scope.Config.Name", scope.Config.Name) - log.Info("extraSANs: %v\n", "controlPlaneConfig.ExtraSANs", controlPlaneConfig.ExtraSANs) + log.Info(fmt.Sprintf("Config.Name: %v\n", scope.Config.Name)) + log.Info(fmt.Sprintf("extraSANs: %v\n", controlPlaneConfig.ExtraSANs)) log.Info("-----------------------------------------") joinConfig, err := kubeyaml.Marshal(configStruct) From 3cefdb31dfe1a84f9f88b57b132558488004590b Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Tue, 3 Sep 2024 09:39:45 +0200 Subject: [PATCH 27/32] cleanup tests --- .github/workflows/e2e.yaml | 4 +- .../controllers/ck8sconfig_controller.go | 7 -- templates/docker/Dockerfile | 10 +- test/e2e/cluster_upgrade.go | 58 ++++----- test/e2e/cluster_upgrade_test.go | 14 +-- test/e2e/md_remediation_test.go | 113 ------------------ 6 files changed, 37 insertions(+), 169 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 27d27903..e2bce00e 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -65,6 +65,7 @@ jobs: - "Workload cluster creation" - "Workload cluster scaling" - "Workload cluster upgrade" + # TODO(ben): Remove once all tests are running stable. fail-fast: false steps: - @@ -109,6 +110,3 @@ jobs: - name: Run e2e tests run: | sudo GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e - - name: Setup tmate session - if: ${{ failure() }} - uses: canonical/action-tmate@main diff --git a/bootstrap/controllers/ck8sconfig_controller.go b/bootstrap/controllers/ck8sconfig_controller.go index f40edcb3..bdd70113 100644 --- a/bootstrap/controllers/ck8sconfig_controller.go +++ b/bootstrap/controllers/ck8sconfig_controller.go @@ -213,8 +213,6 @@ func (r *CK8sConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) } func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scope) error { - log := r.Log.WithValues("scope.Config", scope.Config) - machine := &clusterv1.Machine{} if err := runtime.DefaultUnstructuredConverter.FromUnstructured(scope.ConfigOwner.Object, machine); err != nil { return fmt.Errorf("cannot convert %s to Machine: %w", scope.ConfigOwner.GetKind(), err) @@ -244,11 +242,6 @@ func (r *CK8sConfigReconciler) joinControlplane(ctx context.Context, scope *Scop ControlPlaneEndpoint: scope.Cluster.Spec.ControlPlaneEndpoint.Host, ControlPlaneConfig: controlPlaneConfig, }) - log.Info("-----------------------------------------") - log.Info(fmt.Sprintf("Config.Name: %v\n", scope.Config.Name)) - log.Info(fmt.Sprintf("extraSANs: %v\n", controlPlaneConfig.ExtraSANs)) - log.Info("-----------------------------------------") - joinConfig, err := kubeyaml.Marshal(configStruct) if err != nil { return err diff --git a/templates/docker/Dockerfile b/templates/docker/Dockerfile index 9a264b7c..b6a3b08f 100644 --- a/templates/docker/Dockerfile +++ b/templates/docker/Dockerfile @@ -87,21 +87,13 @@ FROM builder AS build-helm RUN /src/k8s-snap/build-scripts/build-component.sh helm ## kubernetes build -RUN cat /src/k8s-snap/build-scripts/components/kubernetes/version FROM builder AS build-kubernetes ENV KUBERNETES_VERSION=${KUBERNETES_VERSION} RUN if [ -n "$KUBERNETES_VERSION" ]; then \ echo "Overwriting Kubernetes version with $KUBERNETES_VERSION"; \ echo "$KUBERNETES_VERSION" > /src/k8s-snap/build-scripts/components/kubernetes/version; \ - cat /src/k8s-snap/build-scripts/components/kubernetes/version; \ fi -RUN cat /src/k8s-snap/build-scripts/components/kubernetes/version -RUN echo "Kubernetes version: $KUBERNETES_VERSION" \ -&& cat /src/k8s-snap/build-scripts/components/kubernetes/version \ -&& /src/k8s-snap/build-scripts/build-component.sh kubernetes -RUN ls /out -RUN /out/bin/kubectl version --client -RUN /out/bin/kubelet --version +RUN /src/k8s-snap/build-scripts/build-component.sh kubernetes ## runc build FROM builder AS build-runc diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index cc0593cb..dd54f95a 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -152,34 +152,34 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp WaitForMachineDeployments: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }, result) - /* By("Upgrading the Kubernetes control-plane") - UpgradeControlPlaneAndWaitForUpgrade(ctx, UpgradeControlPlaneAndWaitForUpgradeInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: result.Cluster, - ControlPlane: result.ControlPlane, - KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), - UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), - }) - - By("Upgrading the machine deployment") - framework.UpgradeMachineDeploymentsAndWait(ctx, framework.UpgradeMachineDeploymentsAndWaitInput{ - ClusterProxy: input.BootstrapClusterProxy, - Cluster: result.Cluster, - UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), - MachineDeployments: result.MachineDeployments, - UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-1.30-0", clusterName)), - WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), - }) */ - - /* By("Waiting until nodes are ready") - workloadProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, namespace.Name, result.Cluster.Name) - workloadClient := workloadProxy.GetClient() - framework.WaitForNodesReady(ctx, framework.WaitForNodesReadyInput{ - Lister: workloadClient, - KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), - Count: int(result.ExpectedTotalNodes()), - WaitForNodesReady: input.E2EConfig.GetIntervals(specName, "wait-nodes-ready"), - }) */ + By("Upgrading the Kubernetes control-plane") + UpgradeControlPlaneAndWaitForUpgrade(ctx, UpgradeControlPlaneAndWaitForUpgradeInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: result.Cluster, + ControlPlane: result.ControlPlane, + KubernetesUpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-control-plane-old", clusterName)), + WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-machine-upgrade"), + }) + + By("Upgrading the machine deployment") + framework.UpgradeMachineDeploymentsAndWait(ctx, framework.UpgradeMachineDeploymentsAndWaitInput{ + ClusterProxy: input.BootstrapClusterProxy, + Cluster: result.Cluster, + UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + MachineDeployments: result.MachineDeployments, + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-1.30-0", clusterName)), + WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), + }) + + By("Waiting until nodes are ready") + workloadProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, namespace.Name, result.Cluster.Name) + workloadClient := workloadProxy.GetClient() + framework.WaitForNodesReady(ctx, framework.WaitForNodesReadyInput{ + Lister: workloadClient, + KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), + Count: int(result.ExpectedTotalNodes()), + WaitForNodesReady: input.E2EConfig.GetIntervals(specName, "wait-nodes-ready"), + }) }) } diff --git a/test/e2e/cluster_upgrade_test.go b/test/e2e/cluster_upgrade_test.go index 4de856e6..f0f1984d 100644 --- a/test/e2e/cluster_upgrade_test.go +++ b/test/e2e/cluster_upgrade_test.go @@ -25,12 +25,10 @@ import ( ) var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { - /* Context("Upgrading a cluster with 1 control plane", func() { - It("Non-HA upgrades require in-place upgrades which are not supported yet.", - // TODO(ben): Enable this test once we have support for in-place upgrades. - func() { Skip("") }, - ) - ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { + // Skipping this test as in-place upgrades are not supported yet. + // TODO(ben): Remove this skip when in-place upgrades are supported. + //Context("Upgrading a cluster with 1 control plane", func() { + /* ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { return ClusterUpgradeSpecInput{ E2EConfig: e2eConfig, ClusterctlConfigPath: clusterctlConfigPath, @@ -41,8 +39,8 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { ControlPlaneMachineCount: ptr.To[int64](1), WorkerMachineCount: ptr.To[int64](2), } - }) }) */ + //}) Context("Upgrading a cluster with HA control plane", func() { ClusterUpgradeSpec(ctx, func() ClusterUpgradeSpecInput { @@ -54,7 +52,7 @@ var _ = Describe("Workload cluster upgrade [CK8s-Upgrade]", func() { SkipCleanup: skipCleanup, InfrastructureProvider: ptr.To("docker"), ControlPlaneMachineCount: ptr.To[int64](3), - WorkerMachineCount: ptr.To[int64](0), + WorkerMachineCount: ptr.To[int64](1), } }) }) diff --git a/test/e2e/md_remediation_test.go b/test/e2e/md_remediation_test.go index 1390f51c..4f707ba2 100644 --- a/test/e2e/md_remediation_test.go +++ b/test/e2e/md_remediation_test.go @@ -23,7 +23,6 @@ import ( "context" "fmt" "path/filepath" - "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -32,10 +31,6 @@ import ( "sigs.k8s.io/cluster-api/test/framework" "sigs.k8s.io/cluster-api/test/framework/clusterctl" "sigs.k8s.io/cluster-api/util" - - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" ) var _ = Describe("When testing MachineDeployment remediation", func() { @@ -125,111 +120,3 @@ var _ = Describe("When testing MachineDeployment remediation", func() { }) }) }) - -// DiscoverMachineHealthChecksAndWaitForRemediation patches an unhealthy node condition to one node observed by the Machine Health Check and then wait for remediation. -func DiscoverMachineHealthChecksAndWaitForRemediation(ctx context.Context, input framework.DiscoverMachineHealthCheckAndWaitForRemediationInput) { - Expect(ctx).NotTo(BeNil(), "ctx is required for DiscoverMachineHealthChecksAndWaitForRemediation") - Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling DiscoverMachineHealthChecksAndWaitForRemediation") - Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling DiscoverMachineHealthChecksAndWaitForRemediation") - - mgmtClient := input.ClusterProxy.GetClient() - fmt.Fprintln(GinkgoWriter, "Discovering machine health check resources") - machineHealthChecks := framework.GetMachineHealthChecksForCluster(ctx, framework.GetMachineHealthChecksForClusterInput{ - Lister: mgmtClient, - ClusterName: input.Cluster.Name, - Namespace: input.Cluster.Namespace, - }) - - Expect(machineHealthChecks).NotTo(BeEmpty()) - - for _, mhc := range machineHealthChecks { - Expect(mhc.Spec.UnhealthyConditions).NotTo(BeEmpty()) - - fmt.Fprintln(GinkgoWriter, "Ensuring there is at least 1 Machine that MachineHealthCheck is matching") - machines := framework.GetMachinesByMachineHealthCheck(ctx, framework.GetMachinesByMachineHealthCheckInput{ - Lister: mgmtClient, - ClusterName: input.Cluster.Name, - MachineHealthCheck: mhc, - }) - - Expect(machines).NotTo(BeEmpty()) - - fmt.Fprintln(GinkgoWriter, "Patching MachineHealthCheck unhealthy condition to one of the nodes") - unhealthyNodeCondition := corev1.NodeCondition{ - Type: mhc.Spec.UnhealthyConditions[0].Type, - Status: mhc.Spec.UnhealthyConditions[0].Status, - LastTransitionTime: metav1.Time{Time: time.Now()}, - } - framework.PatchNodeCondition(ctx, framework.PatchNodeConditionInput{ - ClusterProxy: input.ClusterProxy, - Cluster: input.Cluster, - NodeCondition: unhealthyNodeCondition, - Machine: machines[0], - }) - - fmt.Fprintln(GinkgoWriter, "Waiting for remediation x") - framework.WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx, framework.WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput{ - ClusterProxy: input.ClusterProxy, - Cluster: input.Cluster, - MachineHealthCheck: mhc, - MachinesCount: len(machines), - }, input.WaitForMachineRemediation...) - } -} - -// WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition patches a node condition to any one of the machines with a node ref. -func WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition(ctx context.Context, input framework.WaitForMachineHealthCheckToRemediateUnhealthyNodeConditionInput, intervals ...interface{}) { - Expect(ctx).NotTo(BeNil(), "ctx is required for WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") - Expect(input.ClusterProxy).ToNot(BeNil(), "Invalid argument. input.ClusterProxy can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") - Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") - Expect(input.MachineHealthCheck).NotTo(BeNil(), "Invalid argument. input.MachineHealthCheck can't be nil when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") - Expect(input.MachinesCount).NotTo(BeZero(), "Invalid argument. input.MachinesCount can't be zero when calling WaitForMachineHealthCheckToRemediateUnhealthyNodeCondition") - - fmt.Fprintln(GinkgoWriter, "Waiting until the node with unhealthy node condition is remediated") - Eventually(func() bool { - machines := framework.GetMachinesByMachineHealthCheck(ctx, framework.GetMachinesByMachineHealthCheckInput{ - Lister: input.ClusterProxy.GetClient(), - ClusterName: input.Cluster.Name, - MachineHealthCheck: input.MachineHealthCheck, - }) - // Wait for all the machines to exist. - // NOTE: this is required given that this helper is called after a remediation - // and we want to make sure all the machine are back in place before testing for unhealthyCondition being fixed. - fmt.Fprintf(GinkgoWriter, "waiting for all machines to exist, current count: %d, expected count: %d\n", len(machines), input.MachinesCount) - if len(machines) < input.MachinesCount { - return false - } - - for _, machine := range machines { - if machine.Status.NodeRef == nil { - fmt.Fprintf(GinkgoWriter, "machine %s no node ref", machine.Name) - return false - } - node := &corev1.Node{} - // This should not be an Expect(), because it may return error during machine deletion. - err := input.ClusterProxy.GetWorkloadCluster(ctx, input.Cluster.Namespace, input.Cluster.Name).GetClient().Get(ctx, types.NamespacedName{Name: machine.Status.NodeRef.Name, Namespace: machine.Status.NodeRef.Namespace}, node) - if err != nil { - fmt.Fprintf(GinkgoWriter, "failed to get node from ref: %v", err) - return false - } - if hasMatchingUnhealthyConditions(input.MachineHealthCheck, node.Status.Conditions) { - fmt.Fprintf(GinkgoWriter, "%s has not matching unhealthy condiditon", machine.Name) - return false - } - } - return true - }, intervals...).Should(BeTrue()) -} - -// hasMatchingUnhealthyConditions returns true if any node condition matches with machine health check unhealthy conditions. -func hasMatchingUnhealthyConditions(machineHealthCheck *clusterv1.MachineHealthCheck, nodeConditions []corev1.NodeCondition) bool { - fmt.Fprintf(GinkgoWriter, "checking for matching unhealthy conditions, machine health check: %v, node conditions: %v\n", machineHealthCheck.Spec.UnhealthyConditions, nodeConditions) - for _, unhealthyCondition := range machineHealthCheck.Spec.UnhealthyConditions { - for _, nodeCondition := range nodeConditions { - if nodeCondition.Type == unhealthyCondition.Type && nodeCondition.Status == unhealthyCondition.Status { - return true - } - } - } - return false -} From 15b2fe79a507e6314711623d4f030774512ed4ac Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Tue, 3 Sep 2024 12:39:52 +0200 Subject: [PATCH 28/32] address comments --- .../cluster-template-kcp-remediation.yaml | 4 ++-- .../infrastructure-docker/cluster-template-upgrades.yaml | 6 ++++-- test/e2e/data/infrastructure-docker/cluster-template.yaml | 4 ++-- test/e2e/helpers.go | 2 +- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml index 91e3496c..cbd88799 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-kcp-remediation.yaml @@ -84,7 +84,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-new + customImage: k8s-snap:dev-old --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -127,7 +127,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-new + customImage: k8s-snap:dev-old --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml index 90357d99..4a028727 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template-upgrades.yaml @@ -48,8 +48,8 @@ spec: --anonymous-auth: "true" replicas: ${CONTROL_PLANE_MACHINE_COUNT} version: ${KUBERNETES_VERSION} -# Initial template for the machine deployment --- +# Initial template for the control-plane deployment apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: @@ -59,8 +59,8 @@ spec: template: spec: customImage: k8s-snap:dev-old -# After upgrade template for the machine deployment --- +# After upgrade template for the control plane deployment apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: @@ -104,6 +104,7 @@ spec: kind: DockerMachineTemplate name: ${CLUSTER_NAME}-md-old-0 --- +# Initial template for the machine deployment apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: @@ -114,6 +115,7 @@ spec: spec: customImage: k8s-snap:dev-old --- +# After upgrade template for the machine deployment apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 kind: DockerMachineTemplate metadata: diff --git a/test/e2e/data/infrastructure-docker/cluster-template.yaml b/test/e2e/data/infrastructure-docker/cluster-template.yaml index 91b3113c..dbb3136e 100644 --- a/test/e2e/data/infrastructure-docker/cluster-template.yaml +++ b/test/e2e/data/infrastructure-docker/cluster-template.yaml @@ -57,7 +57,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-new + customImage: k8s-snap:dev-old --- apiVersion: cluster.x-k8s.io/v1beta1 kind: MachineDeployment @@ -100,7 +100,7 @@ metadata: spec: template: spec: - customImage: k8s-snap:dev-new + customImage: k8s-snap:dev-old --- apiVersion: bootstrap.cluster.x-k8s.io/v1beta2 kind: CK8sConfigTemplate diff --git a/test/e2e/helpers.go b/test/e2e/helpers.go index 39dcbfe3..8c64ad84 100644 --- a/test/e2e/helpers.go +++ b/test/e2e/helpers.go @@ -661,7 +661,7 @@ func WaitForNodesReady(ctx context.Context, input WaitForNodesReadyInput) { } nodeReadyCount := 0 for _, node := range nodeList.Items { - fmt.Fprintf(GinkgoWriter, "versions: %s %s\n", semver.MajorMinor(node.Status.NodeInfo.KubeletVersion), semver.MajorMinor(input.KubernetesVersion)) + fmt.Fprintf(GinkgoWriter, "KubeletVersions: %s, KubernetesVersion: %s\n", semver.MajorMinor(node.Status.NodeInfo.KubeletVersion), semver.MajorMinor(input.KubernetesVersion)) if !(semver.MajorMinor(node.Status.NodeInfo.KubeletVersion) == semver.MajorMinor(input.KubernetesVersion)) { return false, nil } From acb7383c735ee585b8609ed8808513ddec3edf11 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 4 Sep 2024 08:54:03 +0200 Subject: [PATCH 29/32] readd tmate for debugging --- .github/workflows/e2e.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index e2bce00e..191f9e9d 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -110,3 +110,6 @@ jobs: - name: Run e2e tests run: | sudo GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e + - name: Setup tmate session + if: ${{ failure() }} + uses: canonical/action-tmate@main From 4a1686304033911db3f4870b85c18c5340c4b762 Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 4 Sep 2024 12:15:03 +0200 Subject: [PATCH 30/32] fix worker image --- test/e2e/cluster_upgrade.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index dd54f95a..a4676c6e 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -168,7 +168,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp Cluster: result.Cluster, UpgradeVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), MachineDeployments: result.MachineDeployments, - UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-1.30-0", clusterName)), + UpgradeMachineTemplate: ptr.To(fmt.Sprintf("%s-md-new-0", clusterName)), WaitForMachinesToBeUpgraded: input.E2EConfig.GetIntervals(specName, "wait-worker-nodes"), }) From 32dfa3541dba2d06be0c8e3e201c730a7e0e198e Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 4 Sep 2024 13:35:55 +0200 Subject: [PATCH 31/32] only validate major minor version --- .github/workflows/e2e.yaml | 3 --- test/e2e/cluster_upgrade.go | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/e2e.yaml b/.github/workflows/e2e.yaml index 191f9e9d..e2bce00e 100644 --- a/.github/workflows/e2e.yaml +++ b/.github/workflows/e2e.yaml @@ -110,6 +110,3 @@ jobs: - name: Run e2e tests run: | sudo GINKGO_FOCUS="${{ matrix.ginkgo_focus }}" SKIP_RESOURCE_CLEANUP=true make test-e2e - - name: Setup tmate session - if: ${{ failure() }} - uses: canonical/action-tmate@main diff --git a/test/e2e/cluster_upgrade.go b/test/e2e/cluster_upgrade.go index a4676c6e..05ffd068 100644 --- a/test/e2e/cluster_upgrade.go +++ b/test/e2e/cluster_upgrade.go @@ -175,7 +175,7 @@ func ClusterUpgradeSpec(ctx context.Context, inputGetter func() ClusterUpgradeSp By("Waiting until nodes are ready") workloadProxy := input.BootstrapClusterProxy.GetWorkloadCluster(ctx, namespace.Name, result.Cluster.Name) workloadClient := workloadProxy.GetClient() - framework.WaitForNodesReady(ctx, framework.WaitForNodesReadyInput{ + WaitForNodesReady(ctx, WaitForNodesReadyInput{ Lister: workloadClient, KubernetesVersion: input.E2EConfig.GetVariable(KubernetesVersionUpgradeTo), Count: int(result.ExpectedTotalNodes()), From 1728168027c24bb0366f9c556458d32747f144ec Mon Sep 17 00:00:00 2001 From: Benjamin Schimke Date: Wed, 4 Sep 2024 16:06:13 +0200 Subject: [PATCH 32/32] pin kubernetes version for new image --- hack/build-e2e-images.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hack/build-e2e-images.sh b/hack/build-e2e-images.sh index c96a7e38..85f12de0 100755 --- a/hack/build-e2e-images.sh +++ b/hack/build-e2e-images.sh @@ -10,5 +10,5 @@ DIR="$(realpath "$(dirname "${0}")")" cd "${DIR}/../templates/docker" sudo docker build . -t k8s-snap:dev-old --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.29.6 -sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main +sudo docker build . -t k8s-snap:dev-new --build-arg BRANCH=main --build-arg KUBERNETES_VERSION=v1.30.4 cd -