Skip to content

Commit

Permalink
more debugging
Browse files Browse the repository at this point in the history
  • Loading branch information
bschimke95 committed Jul 4, 2024
1 parent c19dd3f commit ed20544
Show file tree
Hide file tree
Showing 3 changed files with 197 additions and 4 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/e2e.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,9 +47,7 @@ jobs:
pwd
- name: Run e2e tests
run: |
ls
pwd
sudo make test-e2e
sudo GINKGO_FOCUS="Workload cluster scaling" SKIP_RESOURCE_CLEANUP=true make test-e2e
- name: Setup tmate session
if: ${{ failure() }}
uses: canonical/action-tmate@main
195 changes: 195 additions & 0 deletions test/e2e/docker_logcollector.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,195 @@
// Copied from CAPI e2e framework and modified to add pebble logs.

/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package e2e

import (
"bytes"
"context"
"fmt"
"os"
osExec "os/exec"
"path/filepath"
"strings"

kerrors "k8s.io/apimachinery/pkg/util/errors"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/kind/pkg/errors"

Check failure on line 32 in test/e2e/docker_logcollector.go

View workflow job for this annotation

GitHub Actions / Unit Tests & Code Quality

import 'sigs.k8s.io/kind/pkg/errors' is not allowed from list 'main' (depguard)

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
"sigs.k8s.io/cluster-api/test/infrastructure/container"
)

// DockerLogCollector collect logs from a CAPD workload cluster.
type DockerLogCollector struct{}

// machineContainerName return a container name using the same rule used in CAPD.
// NOTE: if the cluster name is already included in the machine name, the cluster name is not add thus
// avoiding \"sethostname: invalid argument\" errors due to container name too long.
func machineContainerName(cluster, machine string) string {
if strings.HasPrefix(machine, cluster) {
return machine
}
return fmt.Sprintf("%s-%s", cluster, machine)
}

func (k DockerLogCollector) CollectMachineLog(ctx context.Context, _ client.Client, m *clusterv1.Machine, outputPath string) error {
containerName := machineContainerName(m.Spec.ClusterName, m.Name)
containerRuntime, err := container.NewDockerClient()
if err != nil {
return err
}
ctx = container.RuntimeInto(ctx, containerRuntime)
return k.collectLogsFromNode(ctx, outputPath, containerName)
}

func (k DockerLogCollector) CollectMachinePoolLog(ctx context.Context, _ client.Client, m *expv1.MachinePool, outputPath string) error {
containerRuntime, err := container.NewDockerClient()
if err != nil {
return err
}
ctx = container.RuntimeInto(ctx, containerRuntime)

var errs []error
for _, instance := range m.Status.NodeRefs {
containerName := machineContainerName(m.Spec.ClusterName, instance.Name)
if err := k.collectLogsFromNode(ctx, filepath.Join(outputPath, instance.Name), containerName); err != nil {
// collecting logs is best effort so we proceed to the next instance even if we encounter an error.
errs = append(errs, err)
}
}

return kerrors.NewAggregate(errs)
}

func (k DockerLogCollector) CollectInfrastructureLogs(ctx context.Context, _ client.Client, c *clusterv1.Cluster, outputPath string) error {
containerRuntime, err := container.NewDockerClient()
if err != nil {
return err
}
ctx = container.RuntimeInto(ctx, containerRuntime)

lbContainerName := fmt.Sprintf("%s-lb", c.GetName())

f, err := fileOnHost(filepath.Join(outputPath, fmt.Sprintf("%s.log", lbContainerName)))
if err != nil {
return err
}

defer f.Close()

return containerRuntime.ContainerDebugInfo(ctx, lbContainerName, f)
}

func (k DockerLogCollector) collectLogsFromNode(ctx context.Context, outputPath string, containerName string) error {
containerRuntime, err := container.RuntimeFrom(ctx)
if err != nil {
return errors.Wrap(err, "Failed to collect logs from node")
}

execToPathFn := func(outputFileName, command string, args ...string) func() error {
return func() error {
f, err := fileOnHost(filepath.Join(outputPath, outputFileName))
if err != nil {
return err
}
defer f.Close()
execConfig := container.ExecContainerInput{
OutputBuffer: f,
}
return containerRuntime.ExecContainer(ctx, containerName, &execConfig, command, args...)
}
}
copyDirFn := func(containerDir, dirName string) func() error {
return func() error {
f, err := os.CreateTemp("", containerName)
if err != nil {
return err
}

tempfileName := f.Name()
outputDir := filepath.Join(outputPath, dirName)

defer os.Remove(tempfileName)

var execErr string
execConfig := container.ExecContainerInput{
OutputBuffer: f,
ErrorBuffer: bytes.NewBufferString(execErr),
}
err = containerRuntime.ExecContainer(
ctx,
containerName,
&execConfig,
"tar", "--hard-dereference", "--dereference", "--directory", containerDir, "--create", "--file", "-", ".",
)
if err != nil {
return errors.Wrapf(err, execErr)
}

err = os.MkdirAll(outputDir, os.ModePerm)
if err != nil {
return err
}

return osExec.Command("tar", "--extract", "--file", tempfileName, "--directory", outputDir).Run() //nolint:gosec // We don't care about command injection here.

Check failure on line 151 in test/e2e/docker_logcollector.go

View workflow job for this annotation

GitHub Actions / Unit Tests & Code Quality

directive `//nolint:gosec // We don't care about command injection here.` is unused for linter "gosec" (nolintlint)
}
}
return errors.AggregateConcurrent([]func() error{
execToPathFn(
"journal.log",
"journalctl", "--no-pager", "--output=short-precise",
),
execToPathFn(
"kern.log",
"journalctl", "--no-pager", "--output=short-precise", "-k",
),
execToPathFn(
"kubelet-version.txt",
"kubelet", "--version",
),
execToPathFn(
"kubelet.log",
"journalctl", "--no-pager", "--output=short-precise", "-u", "kubelet.service",
),
execToPathFn(
"containerd-info.txt",
"crictl", "info",
),
execToPathFn(
"containerd.log",
"journalctl", "--no-pager", "--output=short-precise", "-u", "containerd.service",
),
execToPathFn(
"pebble.log",
"pebble", "logs", "-n", "all",
),
copyDirFn("/var/log/pods", "pods"),
})
}

// fileOnHost is a helper to create a file at path
// even if the parent directory doesn't exist
// in which case it will be created with ModePerm.
func fileOnHost(path string) (*os.File, error) {
if err := os.MkdirAll(filepath.Dir(path), os.ModePerm); err != nil {
return nil, err
}
return os.Create(path) //nolint:gosec // No security issue: path is safe.

Check failure on line 194 in test/e2e/docker_logcollector.go

View workflow job for this annotation

GitHub Actions / Unit Tests & Code Quality

directive `//nolint:gosec // No security issue: path is safe.` is unused for linter "gosec" (nolintlint)
}
2 changes: 1 addition & 1 deletion test/e2e/e2e_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -170,7 +170,7 @@ var _ = SynchronizedBeforeSuite(func() []byte {
kubeconfigPath := parts[3]

e2eConfig = loadE2EConfig(configPath)
bootstrapClusterProxy = framework.NewClusterProxy("bootstrap", kubeconfigPath, initScheme(), framework.WithMachineLogCollector(framework.DockerLogCollector{}))
bootstrapClusterProxy = framework.NewClusterProxy("bootstrap", kubeconfigPath, initScheme(), framework.WithMachineLogCollector(DockerLogCollector{}))
})

// Using a SynchronizedAfterSuite for controlling how to delete resources shared across ParallelNodes (~ginkgo threads).
Expand Down

0 comments on commit ed20544

Please sign in to comment.