-
Notifications
You must be signed in to change notification settings - Fork 2.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Resctrl collector fixes #3326
base: master
Are you sure you want to change the base?
Resctrl collector fixes #3326
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -76,6 +76,9 @@ var ( | |||||
modeFileName: {}, | ||||||
sizeFileName: {}, | ||||||
} | ||||||
|
||||||
errNotEnoughPIDs = fmt.Errorf("there should be all pids in group") | ||||||
errTooManyPIDs = fmt.Errorf("group should have container pids only") | ||||||
) | ||||||
|
||||||
func Setup() error { | ||||||
|
@@ -104,7 +107,7 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str | |||||
return rootResctrl, nil | ||||||
} | ||||||
|
||||||
pids, err := getContainerPids() | ||||||
pids, err := getPids(containerName) | ||||||
if err != nil { | ||||||
return "", err | ||||||
} | ||||||
|
@@ -113,60 +116,80 @@ func prepareMonitoringGroup(containerName string, getContainerPids func() ([]str | |||||
return "", fmt.Errorf("couldn't obtain %q container pids: there is no pids in cgroup", containerName) | ||||||
} | ||||||
|
||||||
if !inHostNamespace { | ||||||
processPath = "/rootfs/proc" | ||||||
} | ||||||
var processThreads []string | ||||||
for _, pid := range pids { | ||||||
pt, err := getAllProcessThreads(filepath.Join(processPath, strconv.Itoa(pid), processTask)) | ||||||
if err != nil { | ||||||
return "", err | ||||||
} | ||||||
processThreads = append(processThreads, pt...) | ||||||
} | ||||||
|
||||||
// Firstly, find the control group to which the container belongs. | ||||||
// Consider the root group. | ||||||
controlGroupPath, err := findGroup(rootResctrl, pids, true, false) | ||||||
if err != nil { | ||||||
controlGroupPath, err := findGroup(rootResctrl, processThreads, true, false) | ||||||
if err != nil && err != errNotEnoughPIDs && err != errTooManyPIDs { | ||||||
return "", fmt.Errorf("%q %q: %q", noControlGroupFoundError, containerName, err) | ||||||
} | ||||||
if controlGroupPath == "" { | ||||||
return "", fmt.Errorf("%q %q", noControlGroupFoundError, containerName) | ||||||
} | ||||||
|
||||||
// Check if there is any monitoring group. | ||||||
monGroupPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), pids, false, true) | ||||||
if err != nil { | ||||||
return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) | ||||||
// Remove leading prefix. | ||||||
// e.g. /my/container -> my/container | ||||||
if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { | ||||||
containerName = containerName[1:] | ||||||
} | ||||||
// Add own prefix and use `-` instead `/`. | ||||||
// e.g. my/container -> cadvisor-my-container | ||||||
properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) | ||||||
monGroupPath := filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) | ||||||
|
||||||
// Prepare new one if not exists. | ||||||
if monGroupPath == "" { | ||||||
// Remove leading prefix. | ||||||
// e.g. /my/container -> my/container | ||||||
if len(containerName) >= minContainerNameLen && containerName[0] == containerPrefix { | ||||||
containerName = containerName[1:] | ||||||
createNew := false | ||||||
|
||||||
// Check if there is any monitoring group. | ||||||
existingPath, err := findGroup(filepath.Join(controlGroupPath, monGroupsDirName), processThreads, false, true) | ||||||
if err != nil { | ||||||
if err != errNotEnoughPIDs && err != errTooManyPIDs { | ||||||
return "", fmt.Errorf("couldn't find monitoring group matching %q container: %v", containerName, err) | ||||||
} | ||||||
|
||||||
// Add own prefix and use `-` instead `/`. | ||||||
// e.g. my/container -> cadvisor-my-container | ||||||
properContainerName := fmt.Sprintf("%s-%s", monGroupPrefix, strings.Replace(containerName, "/", "-", -1)) | ||||||
monGroupPath = filepath.Join(controlGroupPath, monitoringGroupDir, properContainerName) | ||||||
rmErr := os.Remove(monGroupPath) | ||||||
if rmErr != nil && !os.IsNotExist(rmErr) { | ||||||
return "", fmt.Errorf("couldn't clean up monitoring group matching %q container: %v", containerName, rmErr) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrap the error, please. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
if existingPath != monGroupPath { | ||||||
rmErr = os.Remove(existingPath) | ||||||
if rmErr != nil && !os.IsNotExist(rmErr) { | ||||||
return "", fmt.Errorf("couldn't clean up monitoring group matching %q container: %v", containerName, rmErr) | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Wrap the error, please. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you have in mind specific error type? Please let me know how to improve this error There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
} | ||||||
} | ||||||
createNew = true | ||||||
} | ||||||
|
||||||
// Prepare new one if not exists. | ||||||
if createNew || existingPath == "" { | ||||||
err = os.MkdirAll(monGroupPath, os.ModePerm) | ||||||
if err != nil { | ||||||
return "", fmt.Errorf("couldn't create monitoring group directory for %q container: %w", containerName, err) | ||||||
} | ||||||
|
||||||
if !inHostNamespace { | ||||||
processPath = "/rootfs/proc" | ||||||
} | ||||||
|
||||||
for _, pid := range pids { | ||||||
processThreads, err := getAllProcessThreads(filepath.Join(processPath, pid, processTask)) | ||||||
for _, thread := range processThreads { | ||||||
treadInt, err := strconv.Atoi(thread) | ||||||
if err != nil { | ||||||
return "", err | ||||||
return "", fmt.Errorf("couldn't parse %q: %w", thread, err) | ||||||
} | ||||||
for _, thread := range processThreads { | ||||||
err = intelrdt.WriteIntelRdtTasks(monGroupPath, thread) | ||||||
if err != nil { | ||||||
secondError := os.Remove(monGroupPath) | ||||||
if secondError != nil { | ||||||
return "", fmt.Errorf( | ||||||
"coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", | ||||||
containerName, err, containerName, secondError) | ||||||
} | ||||||
return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) | ||||||
err = intelrdt.WriteIntelRdtTasks(monGroupPath, treadInt) | ||||||
if err != nil { | ||||||
secondError := os.Remove(monGroupPath) | ||||||
if secondError != nil { | ||||||
return "", fmt.Errorf( | ||||||
"coudn't assign pids to %q container monitoring group: %w \n couldn't clear %q monitoring group: %v", | ||||||
containerName, err, containerName, secondError) | ||||||
} | ||||||
return "", fmt.Errorf("coudn't assign pids to %q container monitoring group: %w", containerName, err) | ||||||
Comment on lines
-149
to
+192
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct me if I'm wrong but:
Functionally these two approaches are identical. In the PR description you wrote:
, but threads ( I might be missing something but I can't understand where the bug that you are trying to fix is. I will appreciate more detailed explanation that will help me to understand your reasoning. A test case failing with The Old Way and passing with The New Way would be perfect! |
||||||
} | ||||||
} | ||||||
} | ||||||
|
@@ -189,20 +212,16 @@ func getPids(containerName string) ([]int, error) { | |||||
// getAllProcessThreads obtains all available processes from directory. | ||||||
// e.g. ls /proc/4215/task/ -> 4215, 4216, 4217, 4218 | ||||||
// func will return [4215, 4216, 4217, 4218]. | ||||||
func getAllProcessThreads(path string) ([]int, error) { | ||||||
processThreads := make([]int, 0) | ||||||
func getAllProcessThreads(path string) ([]string, error) { | ||||||
processThreads := make([]string, 0) | ||||||
|
||||||
threadDirs, err := os.ReadDir(path) | ||||||
if err != nil { | ||||||
return processThreads, err | ||||||
} | ||||||
|
||||||
for _, dir := range threadDirs { | ||||||
pid, err := strconv.Atoi(dir.Name()) | ||||||
if err != nil { | ||||||
return nil, fmt.Errorf("couldn't parse %q dir: %v", dir.Name(), err) | ||||||
} | ||||||
processThreads = append(processThreads, pid) | ||||||
processThreads = append(processThreads, dir.Name()) | ||||||
iwankgb marked this conversation as resolved.
Show resolved
Hide resolved
JulSenko marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
|
||||||
return processThreads, nil | ||||||
|
@@ -232,7 +251,7 @@ func findGroup(group string, pids []string, includeGroup bool, exclusive bool) ( | |||||
for _, path := range availablePaths { | ||||||
groupFound, err := arePIDsInGroup(path, pids, exclusive) | ||||||
if err != nil { | ||||||
return "", err | ||||||
return path, err | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do you want to return real value instead of zero value when you return an error? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is not ideal, but we need path value for additional check later if error type is There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If this is supposed to be information about an error, then it should be part of error struct rather then value returned from a function. I would expect you to check if
JulSenko marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
} | ||||||
if groupFound { | ||||||
return path, nil | ||||||
|
@@ -259,7 +278,7 @@ func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { | |||||
if !ok { | ||||||
// There are missing pids within group. | ||||||
if any { | ||||||
return false, fmt.Errorf("there should be all pids in group") | ||||||
return false, errNotEnoughPIDs | ||||||
} | ||||||
return false, nil | ||||||
} | ||||||
|
@@ -269,7 +288,7 @@ func arePIDsInGroup(path string, pids []string, exclusive bool) (bool, error) { | |||||
// Check if there should be only passed pids in group. | ||||||
if exclusive { | ||||||
if len(tasks) != len(pids) { | ||||||
return false, fmt.Errorf("group should have container pids only") | ||||||
return false, errTooManyPIDs | ||||||
} | ||||||
} | ||||||
|
||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is
getContainerPids()
used (it does not seem to be)? If not, would it be possible to drop the argument?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It should be, but it touches quite a bunch of definitions. I'll follow up on this if that works for you:)