From 2d1efd62e40756f8a0cc20208a2dbc160561762b Mon Sep 17 00:00:00 2001 From: Rouzip <1226015390@qq.com> Date: Tue, 14 May 2024 12:10:56 +0800 Subject: [PATCH] fix comments Signed-off-by: Rouzip <1226015390@qq.com> --- pkg/features/koordlet_features.go | 7 ++++ pkg/koordlet/metrics/resctrl.go | 8 ++--- .../collectors/resctrl/resctrl_collector.go | 23 +++++++++---- .../resctrl/resctrl_collector_test.go | 4 --- .../metricsadvisor/framework/options.go | 1 - .../metricsadvisor/metrics_advisor.go | 1 - pkg/koordlet/resourceexecutor/resctrl.go | 6 ++-- pkg/koordlet/util/system/resctrl.go | 34 +++++++++++-------- 8 files changed, 50 insertions(+), 34 deletions(-) diff --git a/pkg/features/koordlet_features.go b/pkg/features/koordlet_features.go index cb76621ac3..d60cb600c1 100644 --- a/pkg/features/koordlet_features.go +++ b/pkg/features/koordlet_features.go @@ -116,6 +116,12 @@ const ( // Libpfm4 enables libpfm4 feature of koordlet. Libpfm4 featuregate.Feature = "Libpfm4" + // owner: @Rouzip + // alpha: v0.1 + // + // ResctrlCollector enables resctrl collector feature of koordlet. + ResctrlCollector featuregate.Feature = "ResctrlCollector" + // owner: @songtao98 @zwzhang0107 // alpha: v1.0 // @@ -161,6 +167,7 @@ var ( CPUBurst: {Default: true, PreRelease: featuregate.Beta}, SystemConfig: {Default: false, PreRelease: featuregate.Alpha}, RdtResctrl: {Default: true, PreRelease: featuregate.Beta}, + ResctrlCollector: {Default: false, PreRelease: featuregate.Alpha}, CgroupReconcile: {Default: false, PreRelease: featuregate.Alpha}, NodeTopologyReport: {Default: true, PreRelease: featuregate.Beta}, Accelerators: {Default: false, PreRelease: featuregate.Alpha}, diff --git a/pkg/koordlet/metrics/resctrl.go b/pkg/koordlet/metrics/resctrl.go index 202d77a85e..f340c25fe5 100644 --- a/pkg/koordlet/metrics/resctrl.go +++ b/pkg/koordlet/metrics/resctrl.go @@ -26,10 +26,10 @@ const ( ResourceTypeLLC = "llc" ResourceTypeMB = "mb" - ResctrlResourceType = "resctrl_resource_type" - ResctrlCacheId = "resctrl_cacheid" - ResctrlQos = "resctrl_qos" - ResctrlMbType = "resctrl_mb_type" + ResctrlResourceType = "resource_type" + ResctrlCacheId = "cache_id" + ResctrlQos = "qos" + ResctrlMbType = "mb_type" ) var ( diff --git a/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector.go b/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector.go index aebb2c01d3..c7b8ef82b8 100644 --- a/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector.go +++ b/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector.go @@ -23,6 +23,7 @@ import ( "k8s.io/apimachinery/pkg/util/wait" "k8s.io/klog/v2" + "github.com/koordinator-sh/koordinator/pkg/features" "github.com/koordinator-sh/koordinator/pkg/koordlet/metriccache" "github.com/koordinator-sh/koordinator/pkg/koordlet/metrics" "github.com/koordinator-sh/koordinator/pkg/koordlet/metricsadvisor/framework" @@ -47,16 +48,26 @@ type resctrlCollector struct { func New(opt *framework.Options) framework.Collector { return &resctrlCollector{ - collectInterval: opt.Config.ResctrlCollectorInterval, - statesInformer: opt.StatesInformer, - metricCache: opt.MetricCache, - resctrlReader: opt.ResctrlReader, - started: atomic.NewBool(false), + collectInterval: opt.Config.ResctrlCollectorInterval, + statesInformer: opt.StatesInformer, + metricCache: opt.MetricCache, + resctrlReader: resourceexecutor.NewResctrlReader(), + resctrlCollectorGate: opt.Config.EnableResctrlCollector, + started: atomic.NewBool(false), } } +// 1. config enable resctrl collector +// 2. cmdline, os, cpuid enable resctrl collector +// 3. check CPU vendor(Intel&AMD) +// 4. check resctrl collector feature gate func (r *resctrlCollector) Enabled() bool { - return r.resctrlCollectorGate + isResctrlEnabled, _ := system.IsSupportResctrl() + isResctrlCollectorEnabled, _ := system.IsResctrlCollectorAvailableByCpuInfo() + return r.resctrlCollectorGate && + isResctrlEnabled && isResctrlCollectorEnabled && + system.IsVendorSupportResctrl() && + features.DefaultKoordletFeatureGate.Enabled(features.ResctrlCollector) } func (r *resctrlCollector) Setup(c *framework.Context) {} diff --git a/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector_test.go b/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector_test.go index d241e4013c..506fd5bb17 100644 --- a/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector_test.go +++ b/pkg/koordlet/metricsadvisor/collectors/resctrl/resctrl_collector_test.go @@ -36,7 +36,6 @@ func TestNewResctrlCollector(t *testing.T) { cfg *framework.Config statesInformer statesinformer.StatesInformer metricCache metriccache.MetricCache - resctrlReader resourceexecutor.ResctrlReader } tests := []struct { name string @@ -48,7 +47,6 @@ func TestNewResctrlCollector(t *testing.T) { cfg: framework.NewDefaultConfig(), statesInformer: nil, metricCache: nil, - resctrlReader: resourceexecutor.NewResctrlReader(), }, }, } @@ -58,7 +56,6 @@ func TestNewResctrlCollector(t *testing.T) { Config: tt.args.cfg, StatesInformer: tt.args.statesInformer, MetricCache: tt.args.metricCache, - ResctrlReader: tt.args.resctrlReader, } if got := New(opt); got == nil { t.Errorf("NewResctrlCollector() = %v", got) @@ -152,7 +149,6 @@ func Test_collectQosResctrlStatNoErr(t *testing.T) { Config: framework.NewDefaultConfig(), StatesInformer: mockStatesInformer, MetricCache: mockMetricCache, - ResctrlReader: resourceexecutor.NewResctrlReader(), CgroupReader: resourceexecutor.NewCgroupReader(), }) diff --git a/pkg/koordlet/metricsadvisor/framework/options.go b/pkg/koordlet/metricsadvisor/framework/options.go index e77a0ddb8a..25529b4416 100644 --- a/pkg/koordlet/metricsadvisor/framework/options.go +++ b/pkg/koordlet/metricsadvisor/framework/options.go @@ -27,6 +27,5 @@ type Options struct { StatesInformer statesinformer.StatesInformer MetricCache metriccache.MetricCache CgroupReader resourceexecutor.CgroupReader - ResctrlReader resourceexecutor.ResctrlReader PodFilters map[string]PodFilter } diff --git a/pkg/koordlet/metricsadvisor/metrics_advisor.go b/pkg/koordlet/metricsadvisor/metrics_advisor.go index 834ebb4341..c804387b3a 100644 --- a/pkg/koordlet/metricsadvisor/metrics_advisor.go +++ b/pkg/koordlet/metricsadvisor/metrics_advisor.go @@ -45,7 +45,6 @@ func NewMetricAdvisor(cfg *framework.Config, statesInformer statesinformer.State MetricCache: metricCache, CgroupReader: resourceexecutor.NewCgroupReader(), PodFilters: podFilters, - ResctrlReader: resourceexecutor.NewResctrlReader(), } ctx := &framework.Context{ DeviceCollectors: make(map[string]framework.DeviceCollector, len(devicePlugins)), diff --git a/pkg/koordlet/resourceexecutor/resctrl.go b/pkg/koordlet/resourceexecutor/resctrl.go index 45228161cc..50ef83c24b 100644 --- a/pkg/koordlet/resourceexecutor/resctrl.go +++ b/pkg/koordlet/resourceexecutor/resctrl.go @@ -34,9 +34,8 @@ const CacheIdIndex = 2 func NewResctrlReader() ResctrlReader { if vendorId, err := system.GetVendorIDByCPUInfo(system.GetCPUInfoPath()); err != nil { - // FIXME: should we panic there? - klog.V(0).ErrorS(err, "get cpu vendor error") - return nil + klog.V(0).ErrorS(err, "get cpu vendor error, stop start resctrl collector") + return &fakeReader{} } else { switch vendorId { case system.INTEL_VENDOR_ID: @@ -84,6 +83,7 @@ func (rr *ResctrlRDTReader) ReadResctrlL3Stat(parent string) (map[CacheId]uint64 if err != nil { return nil, fmt.Errorf(ErrResctrlDir) } + defer fd.Close() // read all l3-memory domains domains, err := fd.ReadDir(-1) if err != nil { diff --git a/pkg/koordlet/util/system/resctrl.go b/pkg/koordlet/util/system/resctrl.go index f0c8919faf..43a7ecfbc3 100644 --- a/pkg/koordlet/util/system/resctrl.go +++ b/pkg/koordlet/util/system/resctrl.go @@ -29,6 +29,7 @@ import ( "strings" "sync" + "go.uber.org/multierr" "k8s.io/klog/v2" "github.com/koordinator-sh/koordinator/pkg/util" @@ -96,6 +97,15 @@ func isKernelSupportResctrl() (bool, error) { return isCatFlagSet && isMbaFlagSet, nil } +func IsVendorSupportResctrl() bool { + vendorID, err := GetVendorIDByCPUInfo(GetCPUInfoPath()) + if err != nil { + klog.V(4).Infof("GetVendorIDByCPUInfo error: %v", err) + return false + } + return vendorID == AMD_VENDOR_ID || vendorID == INTEL_VENDOR_ID +} + func IsSupportResctrl() (bool, error) { initLock.Lock() defer initLock.Unlock() @@ -117,6 +127,15 @@ func IsSupportResctrl() (bool, error) { return isSupportResctrl, nil } +func IsResctrlCollectorAvailableByCpuInfo() (bool, error) { + initLock.Lock() + defer initLock.Unlock() + path := GetCPUInfoPath() + mbm, err1 := isResctrlMBMAvailableByCpuInfo(path) + cqm, err2 := isResctrlCQMAvailableByCpuInfo(path) + return mbm && cqm, multierr.Append(err1, err2) +} + var ( ResctrlSchemata = NewCommonResctrlResource(ResctrlSchemataName, "") ResctrlTasks = NewCommonResctrlResource(ResctrlTasksName, "") @@ -575,21 +594,6 @@ func CheckAndTryEnableResctrlCat() error { return nil } -func InitCatGroupIfNotExist(group string) error { - path := GetResctrlGroupRootDirPath(group) - _, err := os.Stat(path) - if err == nil { - return nil - } else if !os.IsNotExist(err) { - return fmt.Errorf("check dir %v for group %s but got unexpected err: %v", path, group, err) - } - err = os.Mkdir(path, 0755) - if err != nil { - return fmt.Errorf("create dir %v failed for group %s, err: %v", path, group, err) - } - return nil -} - func CheckResctrlSchemataValid() error { schemataPath := GetResctrlSchemataFilePath("") schemataRaw, err := ReadResctrlSchemataRaw(schemataPath, -1)