diff --git a/config/manager/manager.yaml b/config/manager/manager.yaml index 55d5f46e..fdc929d6 100644 --- a/config/manager/manager.yaml +++ b/config/manager/manager.yaml @@ -59,6 +59,18 @@ spec: name: auth-refs key: AUTHORINO_LABEL optional: true + - name: CONTROL_PLANE_NAME + valueFrom: + configMapKeyRef: + name: service-mesh-refs + key: CONTROL_PLANE_NAME + optional: true + - name: MESH_NAMESPACE + valueFrom: + configMapKeyRef: + name: service-mesh-refs + key: MESH_NAMESPACE + optional: true livenessProbe: httpGet: path: /healthz diff --git a/config/rbac/role.yaml b/config/rbac/role.yaml index 0f8330b3..5e235fe2 100644 --- a/config/rbac/role.yaml +++ b/config/rbac/role.yaml @@ -25,9 +25,20 @@ rules: - endpoints - namespaces - pods + verbs: + - create + - get + - list + - patch + - update + - watch +- apiGroups: + - "" + resources: - services verbs: - create + - delete - get - list - patch diff --git a/controllers/constants/constants.go b/controllers/constants/constants.go index 2b71a48a..d1dadd0f 100644 --- a/controllers/constants/constants.go +++ b/controllers/constants/constants.go @@ -19,6 +19,7 @@ const ( InferenceServiceKind = "InferenceService" IstioNamespace = "istio-system" + IstioControlPlaneName = "data-science-smcp" ServiceMeshMemberRollName = "default" IstioIngressService = "istio-ingressgateway" IstioIngressServiceHTTPPortName = "http2" diff --git a/controllers/reconcilers/kserve_istio_peerauthentication_reconciler.go b/controllers/reconcilers/kserve_istio_peerauthentication_reconciler.go index 53c4f9a2..0ab851da 100644 --- a/controllers/reconcilers/kserve_istio_peerauthentication_reconciler.go +++ b/controllers/reconcilers/kserve_istio_peerauthentication_reconciler.go @@ -22,10 +22,7 @@ import ( "github.com/opendatahub-io/odh-model-controller/controllers/comparators" "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" - "istio.io/api/security/v1beta1" - istiotypes "istio.io/api/type/v1beta1" istiosecv1beta1 "istio.io/client-go/pkg/apis/security/v1beta1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -51,9 +48,9 @@ func NewKServeIstioPeerAuthenticationReconciler(client client.Client) *KserveIst } } +// TODO remove this reconcile loop in future versions func (r *KserveIstioPeerAuthenticationReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { - log.V(1).Info("Reconciling PeerAuthentication for target namespace") - + log.V(1).Info("Reconciling PeerAuthentication for target namespace, checking if there are resources for deletion") // Create Desired resource desiredResource, err := r.createDesiredResource(isvc) if err != nil { @@ -79,25 +76,7 @@ func (r *KserveIstioPeerAuthenticationReconciler) Cleanup(ctx context.Context, l } func (r *KserveIstioPeerAuthenticationReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*istiosecv1beta1.PeerAuthentication, error) { - desiredPeerAuthentication := &istiosecv1beta1.PeerAuthentication{ - ObjectMeta: metav1.ObjectMeta{ - Name: peerAuthenticationName, - Namespace: isvc.Namespace, - }, - Spec: v1beta1.PeerAuthentication{ - Selector: &istiotypes.WorkloadSelector{ - MatchLabels: map[string]string{ - "component": "predictor", - }, - }, - Mtls: &v1beta1.PeerAuthentication_MutualTLS{Mode: 3}, - PortLevelMtls: map[uint32]*v1beta1.PeerAuthentication_MutualTLS{ - 8086: {Mode: 2}, - 3000: {Mode: 2}, - }, - }, - } - return desiredPeerAuthentication, nil + return nil, nil } func (r *KserveIstioPeerAuthenticationReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*istiosecv1beta1.PeerAuthentication, error) { diff --git a/controllers/reconcilers/kserve_istio_podmonitor_reconciler.go b/controllers/reconcilers/kserve_istio_podmonitor_reconciler.go index 66e3e22d..bbcd3ac4 100644 --- a/controllers/reconcilers/kserve_istio_podmonitor_reconciler.go +++ b/controllers/reconcilers/kserve_istio_podmonitor_reconciler.go @@ -17,11 +17,13 @@ package reconcilers import ( "context" + "fmt" "github.com/go-logr/logr" kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1" "github.com/opendatahub-io/odh-model-controller/controllers/comparators" "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" + "github.com/opendatahub-io/odh-model-controller/controllers/utils" v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -53,7 +55,7 @@ func (r *KserveIstioPodMonitorReconciler) Reconcile(ctx context.Context, log log log.V(1).Info("Creating Istio PodMonitor for target namespace") // Create Desired resource - desiredResource, err := r.createDesiredResource(isvc) + desiredResource, err := r.createDesiredResource(ctx, isvc) if err != nil { return err } @@ -76,7 +78,9 @@ func (r *KserveIstioPodMonitorReconciler) Cleanup(ctx context.Context, log logr. return r.podMonitorHandler.DeletePodMonitor(ctx, types.NamespacedName{Name: istioPodMonitorName, Namespace: isvcNs}) } -func (r *KserveIstioPodMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.PodMonitor, error) { +func (r *KserveIstioPodMonitorReconciler) createDesiredResource(ctx context.Context, isvc *kservev1beta1.InferenceService) (*v1.PodMonitor, error) { + istioControlPlaneName, meshNamespace := utils.GetIstioControlPlaneName(ctx, r.client) + desiredPodMonitor := &v1.PodMonitor{ ObjectMeta: metav1.ObjectMeta{ Name: istioPodMonitorName, @@ -86,8 +90,9 @@ func (r *KserveIstioPodMonitorReconciler) createDesiredResource(isvc *kservev1be Selector: metav1.LabelSelector{ MatchExpressions: []metav1.LabelSelectorRequirement{ { - Key: "istio-prometheus-ignore", - Operator: metav1.LabelSelectorOpDoesNotExist, + Key: "component", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"predictor", "explainer", "transformer"}, }, }, }, @@ -95,6 +100,50 @@ func (r *KserveIstioPodMonitorReconciler) createDesiredResource(isvc *kservev1be { Path: "/stats/prometheus", Interval: "30s", + RelabelConfigs: []*v1.RelabelConfig{ + { + Action: "keep", + SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_container_name"}, + Regex: "istio-proxy", + }, + { + Action: "keep", + SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_annotationpresent_prometheus_io_scrape"}, + }, + { + Action: "replace", + Regex: "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})", + Replacement: "[$2]:$1", + SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"}, + TargetLabel: "__address__", + }, + { + Action: "replace", + Regex: "(\\d+);((([0-9]+?)(\\.|$)){4})", + Replacement: "$2:$1", + SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"}, + TargetLabel: "__address__", + }, + { + Action: "labeldrop", + Regex: "__meta_kubernetes_pod_label_(.+)", + }, + { + Action: "replace", + SourceLabels: []v1.LabelName{"__meta_kubernetes_namespace"}, + TargetLabel: "namespace", + }, + { + Action: "replace", + SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_name"}, + TargetLabel: "pod_name", + }, + { + Action: "replace", + Replacement: fmt.Sprintf("%s-%s", istioControlPlaneName, meshNamespace), + TargetLabel: "mesh_id", + }, + }, }, }, }, diff --git a/controllers/reconcilers/kserve_istio_servicemonitor_reconciler.go b/controllers/reconcilers/kserve_istio_servicemonitor_reconciler.go index a010a7c8..c484d33a 100644 --- a/controllers/reconcilers/kserve_istio_servicemonitor_reconciler.go +++ b/controllers/reconcilers/kserve_istio_servicemonitor_reconciler.go @@ -23,7 +23,6 @@ import ( "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -76,28 +75,9 @@ func (r *KserveIstioServiceMonitorReconciler) Cleanup(ctx context.Context, log l return r.serviceMonitorHandler.DeleteServiceMonitor(ctx, types.NamespacedName{Name: istioServiceMonitorName, Namespace: isvcNs}) } +// TODO remove this reconcile loop in future versions func (r *KserveIstioServiceMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { - desiredServiceMonitor := &v1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: istioServiceMonitorName, - Namespace: isvc.Namespace, - }, - Spec: v1.ServiceMonitorSpec{ - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "istio": "pilot", - }, - }, - TargetLabels: []string{"app"}, - Endpoints: []v1.Endpoint{ - { - Port: "http-monitoring", - Interval: "30s", - }, - }, - }, - } - return desiredServiceMonitor, nil + return nil, nil } func (r *KserveIstioServiceMonitorReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { diff --git a/controllers/reconcilers/kserve_metrics_service_reconciler.go b/controllers/reconcilers/kserve_metrics_service_reconciler.go index 8d30c70c..a006263c 100644 --- a/controllers/reconcilers/kserve_metrics_service_reconciler.go +++ b/controllers/reconcilers/kserve_metrics_service_reconciler.go @@ -23,10 +23,7 @@ import ( "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" v1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - "k8s.io/apimachinery/pkg/util/intstr" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -51,8 +48,9 @@ func NewKServeMetricsServiceReconciler(client client.Client) *KserveMetricsServi } } +// TODO remove this reconcile loop in future versions func (r *KserveMetricsServiceReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { - log.V(1).Info("Reconciling Metrics Service for InferenceService") + log.V(1).Info("Reconciling Metrics Service for InferenceService, checking if there are resource for deletion") // Create Desired resource desiredResource, err := r.createDesiredResource(log, isvc) @@ -74,40 +72,7 @@ func (r *KserveMetricsServiceReconciler) Reconcile(ctx context.Context, log logr } func (r *KserveMetricsServiceReconciler) createDesiredResource(log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) { - metricsService := &v1.Service{ - ObjectMeta: metav1.ObjectMeta{ - Name: getMetricsServiceName(isvc), - Namespace: isvc.Namespace, - Labels: map[string]string{ - "name": getMetricsServiceName(isvc), - }, - }, - Spec: v1.ServiceSpec{ - Ports: []v1.ServicePort{ - { - Name: "caikit-metrics", - Protocol: v1.ProtocolTCP, - Port: 8086, - TargetPort: intstr.FromInt(8086), - }, - { - Name: "tgis-metrics", - Protocol: v1.ProtocolTCP, - Port: 3000, - TargetPort: intstr.FromInt(3000), - }, - }, - Type: v1.ServiceTypeClusterIP, - Selector: map[string]string{ - inferenceServiceLabelName: isvc.Name, - }, - }, - } - if err := ctrl.SetControllerReference(isvc, metricsService, r.client.Scheme()); err != nil { - log.Error(err, "Unable to add OwnerReference to the Metrics Service") - return nil, err - } - return metricsService, nil + return nil, nil } func (r *KserveMetricsServiceReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) { diff --git a/controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go b/controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go index 5017e21d..9350eaa2 100644 --- a/controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go +++ b/controllers/reconcilers/kserve_metrics_servicemonitor_reconciler.go @@ -23,9 +23,7 @@ import ( "github.com/opendatahub-io/odh-model-controller/controllers/processors" "github.com/opendatahub-io/odh-model-controller/controllers/resources" v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" ) @@ -46,6 +44,7 @@ func NewKServeMetricsServiceMonitorReconciler(client client.Client) *KserveMetri } } +// TODO remove this reconcile loop in future versions func (r *KserveMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error { log.V(1).Info("Reconciling Metrics ServiceMonitor for InferenceService") @@ -68,35 +67,9 @@ func (r *KserveMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, l return nil } +// TODO remove this reconcile loop in future versions func (r *KserveMetricsServiceMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { - desiredServiceMonitor := &v1.ServiceMonitor{ - ObjectMeta: metav1.ObjectMeta{ - Name: getMetricsServiceMonitorName(isvc), - Namespace: isvc.Namespace, - }, - Spec: v1.ServiceMonitorSpec{ - Endpoints: []v1.Endpoint{ - { - Port: "caikit-metrics", - Scheme: "http", - }, - { - Port: "tgis-metrics", - Scheme: "http", - }, - }, - NamespaceSelector: v1.NamespaceSelector{}, - Selector: metav1.LabelSelector{ - MatchLabels: map[string]string{ - "name": getMetricsServiceMonitorName(isvc), - }, - }, - }, - } - if err := ctrl.SetControllerReference(isvc, desiredServiceMonitor, r.client.Scheme()); err != nil { - return nil, err - } - return desiredServiceMonitor, nil + return nil, nil } func (r *KserveMetricsServiceMonitorReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) { diff --git a/controllers/utils/utils.go b/controllers/utils/utils.go index 0e371108..4a8b3f3c 100644 --- a/controllers/utils/utils.go +++ b/controllers/utils/utils.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "fmt" + "github.com/kuadrant/authorino/pkg/log" "os" "reflect" @@ -133,11 +134,9 @@ func AuthorinoEnabledWhenOperatorNotMissing(_, reason string) bool { // VerifyIfCapabilityIsEnabled checks if given DSCI capability is enabled. It only fails if client call to fetch DSCI fails. // In other cases it assumes capability is not enabled. func VerifyIfCapabilityIsEnabled(ctx context.Context, cli client.Client, capabilityName string, enabledWhen func(status, reason string) bool) (bool, error) { - objectList := &unstructured.UnstructuredList{} - objectList.SetAPIVersion(GVK.DataScienceClusterInitialization.GroupVersion().String()) - objectList.SetKind(GVK.DataScienceClusterInitialization.Kind) + objectList, err := getDSCIObject(ctx, cli) - if err := cli.List(ctx, objectList); err != nil { + if err != nil { return false, fmt.Errorf("not able to read %s: %w", objectList, err) } @@ -167,7 +166,50 @@ func VerifyIfCapabilityIsEnabled(ctx context.Context, cli client.Client, capabil } return false, nil +} + +// GetIstioControlPlaneName return the name of the Istio Control Plane and the mesh namespace. +// It will first try to read the environment variables, if not found will then try to read the DSCI +// If the required value is not available in the DSCI, it will return the default values +func GetIstioControlPlaneName(ctx context.Context, cli client.Client) (istioControlPlane string, meshNamespace string) { + // first try to retrieve it from the envs, it should be available through the service-mesh-refs ConfigMap + istioControlPlane = os.Getenv("CONTROL_PLANE_NAME") + meshNamespace = os.Getenv("MESH_NAMESPACE") + if len(istioControlPlane) == 0 || len(meshNamespace) == 0 { + log.V(1).Info("Trying to read Istio Control Plane name and namespace from DSCI") + objectList, err := getDSCIObject(ctx, cli) + if err != nil { + log.V(0).Error(err, "Failed to fetch the DSCI object, using default values") + return constants.IstioControlPlaneName, constants.IstioNamespace + } + for _, item := range objectList.Items { + if len(istioControlPlane) == 0 { + name, _, _ := unstructured.NestedString(item.Object, "spec", "serviceMesh", "controlPlane", "name") + if len(name) > 0 { + istioControlPlane = name + } else { + log.V(1).Info("Istio Control Plane name is not set in DSCI") + // at this point, it is not set anywhere, lets just use the default + istioControlPlane = constants.IstioControlPlaneName + } + } + + if len(meshNamespace) == 0 { + namespace, _, _ := unstructured.NestedString(item.Object, "spec", "serviceMesh", "controlPlane", "namespace") + if len(namespace) > 0 { + meshNamespace = namespace + } else { + log.V(1).Info("Mesh Namespace is not set in DSCI") + // at this point, it is not set anywhere, lets just use the default + meshNamespace = constants.IstioNamespace + } + } + } + } else { + log.V(1).Info("Istio Control Plane name and namespace read from environment variables") + } + return istioControlPlane, meshNamespace } // VerifyIfMeshAuthorizationIsEnabled func checks if Authorization has been configured for @@ -216,3 +258,16 @@ func IsNil(i any) bool { func IsNotNil(i any) bool { return !IsNil(i) } + +// Query the DSCI from the cluster +func getDSCIObject(ctx context.Context, cli client.Client) (*unstructured.UnstructuredList, error) { + objectList := &unstructured.UnstructuredList{} + objectList.SetAPIVersion(GVK.DataScienceClusterInitialization.GroupVersion().String()) + objectList.SetKind(GVK.DataScienceClusterInitialization.Kind) + + if err := cli.List(ctx, objectList); err != nil { + return objectList, fmt.Errorf("not able to read %s: %w", objectList, err) + } + + return objectList, nil +} diff --git a/main.go b/main.go index bb89f344..a13819d8 100644 --- a/main.go +++ b/main.go @@ -75,7 +75,8 @@ func init() { //nolint:gochecknoinits //reason this way we ensure schemes are al // +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch // +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;podmonitors,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=extensions,resources=ingresses,verbs=get;list;watch -// +kubebuilder:rbac:groups="",resources=namespaces;pods;services;endpoints,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=namespaces;pods;endpoints,verbs=get;list;watch;create;update;patch +// +kubebuilder:rbac:groups="",resources=services,verbs=get;list;watch;create;update;delete;patch // +kubebuilder:rbac:groups="",resources=secrets;configmaps;serviceaccounts,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=authorino.kuadrant.io,resources=authconfigs,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=datasciencecluster.opendatahub.io,resources=datascienceclusters,verbs=get;list;watch