Skip to content

Commit

Permalink
Fix KOps Integration Test (#3140)
Browse files Browse the repository at this point in the history
* scripts lib integration: add more logging steps

* scripts lib cluster: increase kops control plane node size
  • Loading branch information
dshehbaj authored and orsenthil committed Dec 10, 2024
1 parent 5303211 commit c4642c7
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 19 deletions.
3 changes: 3 additions & 0 deletions scripts/lib/cluster.sh
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,9 @@ function up-kops-cluster {
--networking amazonvpc \
--node-count 2 \
--node-size c5.xlarge \
--control-plane-count 3 \
--control-plane-size c5.xlarge \
--control-plane-zones ${AWS_DEFAULT_REGION}a,${AWS_DEFAULT_REGION}b \
--ssh-public-key=~/.ssh/devopsinuse.pub \
--kubernetes-version ${K8S_VERSION} \
--image ${HOST_IMAGE_SSM_PARAMETER} \
Expand Down
85 changes: 66 additions & 19 deletions scripts/lib/integration.sh
Original file line number Diff line number Diff line change
@@ -1,32 +1,79 @@
function run_kops_conformance() {
START=$SECONDS
START=$SECONDS

export KUBECONFIG=~/.kube/config

echo "=== Setting up test environment ==="
echo "Current directory: $(pwd)"
echo "KUBECONFIG path: $KUBECONFIG"
echo "K8S_VERSION: $K8S_VERSION"

export KUBECONFIG=~/.kube/config
kubectl apply -f "$TEST_CONFIG_PATH"
# Download e2e test binary
echo "=== Downloading e2e test binary ==="
wget -qO- https://dl.k8s.io/v$K8S_VERSION/kubernetes-test-linux-amd64.tar.gz | tar -zxvf - --strip-components=3 -C /tmp kubernetes/test/bin/e2e.test

# Apply CNI config and wait for daemonset
echo "=== Applying CNI configuration ==="
kubectl apply -f "$TEST_CONFIG_PATH"
echo "Waiting for aws-node daemonset to be ready..."
sleep 5
while [[ $(kubectl describe ds aws-node -n=kube-system | grep "Available Pods: 0") ]]; do
sleep 5
while [[ $(kubectl describe ds aws-node -n=kube-system | grep "Available Pods: 0") ]]
do
sleep 5
echo "Waiting for daemonset update"
done
echo "Updated!"
echo "Still waiting for daemonset update..."
kubectl get ds aws-node -n kube-system
done
echo "CNI DaemonSet is ready!"

# Show cluster state before tests
echo "=== Cluster State Before Tests ==="
echo "Nodes:"
kubectl get nodes -o wide
echo "Pods in kube-system:"
kubectl get pods -n kube-system
echo "CNI DaemonSet status:"
kubectl describe ds aws-node -n=kube-system

# Run the focused set of tests with detailed logging
TEST_START=$SECONDS
set -o pipefail # Ensure we catch test failures

/tmp/e2e.test --ginkgo.focus="Conformance" --ginkgo.timeout=120m --kubeconfig=$KUBECONFIG --ginkgo.v --ginkgo.trace --ginkgo.flake-attempts 8 \
--ginkgo.skip="(works for CRD with validation schema)|(ServiceAccountIssuerDiscovery should support OIDC discovery of service account issuer)|(should support remote command execution over websockets)|(should support retrieving logs from the container over websockets)|(Basic StatefulSet functionality [StatefulSetBasic])|\[Slow\]|\[Serial\]"

/tmp/e2e.test --ginkgo.focus="\[Serial\].*Conformance" --ginkgo.timeout=120m --kubeconfig=$KUBECONFIG --ginkgo.v --ginkgo.trace --ginkgo.flake-attempts 8 \
--ginkgo.skip="(ServiceAccountIssuerDiscovery should support OIDC discovery of service account issuer)|(should support remote command execution over websockets)|(should support retrieving logs from the container over websockets)|\[Slow\]"
echo "Kops conformance tests ran successfully!"

TEST_EXIT_CODE=$?
TEST_DURATION=$((SECONDS - TEST_START))

wget -qO- https://dl.k8s.io/v$K8S_VERSION/kubernetes-test-linux-amd64.tar.gz | tar -zxvf - --strip-components=3 -C /tmp kubernetes/test/bin/e2e.test
echo "=== Test Results ==="
echo "Test duration: $TEST_DURATION seconds"
echo "Exit code: $TEST_EXIT_CODE"

/tmp/e2e.test --ginkgo.focus="Conformance" --ginkgo.timeout 120m --kubeconfig=$KUBECONFIG --ginkgo.v --ginkgo.fail-fast --ginkgo.flake-attempts 2 \
--ginkgo.skip="(works for CRD with validation schema)|(ServiceAccountIssuerDiscovery should support OIDC discovery of service account issuer)|(should support remote command execution over websockets)|(should support retrieving logs from the container over websockets)|(Basic StatefulSet functionality [StatefulSetBasic])|\[Slow\]|\[Serial\]"
# Show cluster state after tests
echo "=== Cluster State After Tests ==="
echo "Nodes:"
kubectl get nodes -o wide
echo "Pods in kube-system:"
kubectl get pods -n kube-system
echo "CNI DaemonSet status:"
kubectl describe ds aws-node -n=kube-system

/tmp/e2e.test --ginkgo.focus="\[Serial\].*Conformance" --kubeconfig=$KUBECONFIG --ginkgo.v --ginkgo.fail-fast --ginkgo.flake-attempts 2 \
--ginkgo.skip="(ServiceAccountIssuerDiscovery should support OIDC discovery of service account issuer)|(should support remote command execution over websockets)|(should support retrieving logs from the container over websockets)|\[Slow\]"
echo "Kops conformance tests ran successfully!"
KOPS_TEST_DURATION=$((SECONDS - START))
echo "=== Test Run Complete ==="
echo "TIMELINE: KOPS tests took $KOPS_TEST_DURATION seconds"

KOPS_TEST_DURATION=$((SECONDS - START))
echo "TIMELINE: KOPS tests took $KOPS_TEST_DURATION seconds."
# Workaround to avoid ENI leakage during cluster deletion
# See: https://github.com/aws/amazon-vpc-cni-k8s/issues/1223
echo "Waiting for 240 seconds to avoid ENI leakage..."
sleep 240

sleep 240 #Workaround to avoid ENI leakage during cluster deletion: https://github.com/aws/amazon-vpc-cni-k8s/issues/1223
# Exit with the test exit code
return $TEST_EXIT_CODE
}

function build_and_push_image(){
function build_and_push_image() {
command=$1
args=$2
START=$SECONDS
Expand Down

0 comments on commit c4642c7

Please sign in to comment.