From 85dc2146f133cb96cf0bae76797d724a12ddc64c Mon Sep 17 00:00:00 2001 From: Julija Milos Date: Tue, 13 Jun 2023 12:36:07 +0000 Subject: [PATCH 1/3] resctrl fix --- resctrl/collector.go | 8 +++- resctrl/utils.go | 108 ++++++++++++++++++++++++------------------ resctrl/utils_test.go | 20 ++------ 3 files changed, 73 insertions(+), 63 deletions(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index e5e71a7e48..4a89604cd9 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -20,6 +20,7 @@ package resctrl import ( "fmt" + "math/rand" "os" "path/filepath" "strings" @@ -62,12 +63,14 @@ func (c *collector) setup() error { c.running = true } go func() { + time.Sleep(time.Duration(rand.Float64() * float64(1*c.interval))) + var tStart time.Time for { - time.Sleep(c.interval) c.mu.Lock() if c.destroyed { break } + tStart = time.Now() klog.V(5).Infof("Trying to check %q containers control group.", c.id) if c.running { err = c.checkMonitoringGroup() @@ -80,9 +83,12 @@ func (c *collector) setup() error { if err != nil { c.running = false klog.Errorf("Failed to setup container %q resctrl collector: %s \n Trying again in next intervals.", c.id, err) + } else { + c.running = true } } c.mu.Unlock() + time.Sleep(c.interval - time.Now().Sub(tStart)) } }() } else { diff --git a/resctrl/utils.go b/resctrl/utils.go index e26b0a8a27..224e3d54fa 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -77,6 +77,9 @@ var ( modeFileName: {}, sizeFileName: {}, } + + errNotEnoughPIDs = fmt.Errorf("there should be all pids in group") + errTooManyPIDs = fmt.Errorf("group should have container pids only") ) func Setup() error { @@ -105,7 +108,7 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str return rootResctrl, nil } - pids, err := getContainerPids() + pids, err := getPids(containerName) if err != nil { return "", err } @@ -114,60 +117,79 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName) } + if !inHostNamespace { + processPath = "/rootfs/proc" + } + var processThreads []string + for _, pid := range pids { + processThreads, err = getAllProcessThreads(filepath.Join(processPath, strconv.Itoa(pid), processTask)) + if err != nil { + return "", err + } + } + // Firstly, find the control group to which the container belongs. // Consider the root group. - controlGroupPath, err := findGroup(rootResctrl, pids, true, false) - if err != nil { + controlGroupPath, err := findGroup(rootResctrl, processThreads, true, false) + if err != nil && err != errNotEnoughPIDs && err != errTooManyPIDs { return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err) } if controlGroupPath == "" { return "", fmt.Errorf("%q %q", noControlGroupFoundError, containerName) } - // Check if there is any monitoring group. - monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, false, true) - if err != nil { - return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) + // Remove leading prefix. + // e.g. /my/container -> my/container + if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { + containerName = containerName[1:] } + // Add own prefix and use `-` instead `/`. + // e.g. my/container -> cadvisor-my-container + properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) + monGroupPath := filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) - // Prepare new one if not exists. - if monGroupPath == "" { - // Remove leading prefix. - // e.g. /my/container -> my/container - if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { - containerName = containerName[1:] + createNew := false + + // Check if there is any monitoring group. + existingPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), processThreads, false, true) + if err != nil { + if err != errNotEnoughPIDs && err != errTooManyPIDs { + return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) } - // Add own prefix and use `-` instead `/`. - // e.g. my/container -> cadvisor-my-container - properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) - monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) + rmErr := os.Remove(monGroupPath) + if rmErr != nil && !os.IsNotExist(rmErr) { + return "", fmt.Errorf("couldn't clean up monitoring group matching %q container: %v", containerName, rmErr) + } + if existingPath != monGroupPath { + rmErr = os.Remove(existingPath) + if rmErr != nil && !os.IsNotExist(rmErr) { + return "", fmt.Errorf("couldn't clean up monitoring group matching %q container: %v", containerName, rmErr) + } + } + createNew = true + } + // Prepare new one if not exists. + if createNew || existingPath == "" { err = os.MkdirAll(monGroupPath, os.ModePerm) if err != nil { return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) } - - if !inHostNamespace { - processPath = "/rootfs/proc" - } - - for _, pid := range pids { - processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) + for _, thread := range processThreads { + treadInt, err := strconv.Atoi(thread) if err != nil { - return "", err + return "", fmt.Errorf("couldn't parse %q: %w", thread, err) } - for _, thread := range processThreads { - err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread) - if err != nil { - secondError := os.Remove(monGroupPath) - if secondError != nil { - return "", fmt.Errorf( - "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", - containerName, err, containerName, secondError) - } - return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) + err = intelrdt.WriteIntelRdtTasks(monGroupPath, treadInt) + if err != nil { + secondError := os.Remove(monGroupPath) + if secondError != nil { + return "", fmt.Errorf( + "coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", + containerName, err, containerName, secondError) } + return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) } } } @@ -190,8 +212,8 @@ func getPids(containerName string) ([]int, error) { // getAllProcessThreads obtains all available processes from directory. // e.g. ls /proc/4215/task/ -> 4215, 4216, 4217, 4218 // func will return [4215, 4216, 4217, 4218]. -func getAllProcessThreads(path string) ([]int, error) { - processThreads := make([]int, 0) +func getAllProcessThreads(path string) ([]string, error) { + processThreads := make([]string, 0) threadDirs, err := ioutil.ReadDir(path) if err != nil { @@ -199,11 +221,7 @@ func getAllProcessThreads(path string) ([]int, error) { } for _, dir := range threadDirs { - pid, err := strconv.Atoi(dir.Name()) - if err != nil { - return nil, fmt.Errorf("couldn't parse %q dir: %v", dir.Name(), err) - } - processThreads = append(processThreads, pid) + processThreads = append(processThreads, dir.Name()) } return processThreads, nil @@ -233,7 +251,7 @@ func findGroup(group string, pids []string, includeGroup bool, exclusive bool) ( for _, path := range availablePaths { groupFound, err := arePIDsInGroup(path, pids, exclusive) if err != nil { - return "", err + return path, err } if groupFound { return path, nil @@ -260,7 +278,7 @@ func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { if !ok { // There are missing pids within group. if any { - return false, fmt.Errorf("there should be all pids in group") + return false, errNotEnoughPIDs } return false, nil } @@ -270,7 +288,7 @@ func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { // Check if there should be only passed pids in group. if exclusive { if len(tasks) != len(pids) { - return false, fmt.Errorf("group should have container pids only") + return false, errTooManyPIDs } } diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index f9deaf83d8..2e123e2ee0 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -406,15 +406,6 @@ func TestGetAllProcessThreads(t *testing.T) { filepath.Join(path, "4215", processTask, "4218"), touchDir, }, - // invalid - { - filepath.Join(path, "301", processTask, "301"), - touchDir, - }, - { - filepath.Join(path, "301", processTask, "incorrect"), - touchDir, - }, } for _, file := range files { @@ -429,19 +420,14 @@ func TestGetAllProcessThreads(t *testing.T) { var testCases = []struct { path string - expected []int + expected []string err string }{ { filepath.Join(mockedProcFs, "4215", processTask), - []int{4215, 4216, 4217, 4218}, + []string{"4215", "4216", "4217", "4218"}, "", }, - { - filepath.Join(mockedProcFs, "301", processTask), - nil, - "couldn't parse \"incorrect\" dir: strconv.Atoi: parsing \"incorrect\": invalid syntax", - }, } for _, test := range testCases { @@ -516,7 +502,7 @@ func TestFindGroup(t *testing.T) { []string{"7"}, false, true, - "", + filepath.Join(rootResctrl, "m1", monGroupsDirName, "test"), "group should have container pids only", }, } From 65012c38d30e20fe2424834bd8b3902caabead0f Mon Sep 17 00:00:00 2001 From: Julija Milos Date: Tue, 13 Jun 2023 12:56:44 +0000 Subject: [PATCH 2/3] use --- resctrl/collector.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resctrl/collector.go b/resctrl/collector.go index 4a89604cd9..7ad4231bd4 100644 --- a/resctrl/collector.go +++ b/resctrl/collector.go @@ -88,7 +88,7 @@ func (c *collector) setup() error { } } c.mu.Unlock() - time.Sleep(c.interval - time.Now().Sub(tStart)) + time.Sleep(c.interval - time.Since(tStart)) } }() } else { From 24a1cbfb3b03c5e4bb7d20d274bc56b9b2b6b192 Mon Sep 17 00:00:00 2001 From: Julija Milos Date: Wed, 28 Jun 2023 11:58:48 +0000 Subject: [PATCH 3/3] wip --- resctrl/utils.go | 3 ++- resctrl/utils_test.go | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/resctrl/utils.go b/resctrl/utils.go index 224e3d54fa..467be94114 100644 --- a/resctrl/utils.go +++ b/resctrl/utils.go @@ -122,10 +122,11 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str } var processThreads []string for _, pid := range pids { - processThreads, err = getAllProcessThreads(filepath.Join(processPath, strconv.Itoa(pid), processTask)) + pt, err := getAllProcessThreads(filepath.Join(processPath, strconv.Itoa(pid), processTask)) if err != nil { return "", err } + processThreads = append(processThreads, pt...) } // Firstly, find the control group to which the container belongs. diff --git a/resctrl/utils_test.go b/resctrl/utils_test.go index 2e123e2ee0..b41da4316d 100644 --- a/resctrl/utils_test.go +++ b/resctrl/utils_test.go @@ -406,6 +406,11 @@ func TestGetAllProcessThreads(t *testing.T) { filepath.Join(path, "4215", processTask, "4218"), touchDir, }, + // invalid + { + filepath.Join(path, "301", processTask, "301"), + touchDir, + }, } for _, file := range files {