Skip to content

Commit

Permalink
Merge pull request #200 from spolti/RHOAIENG-6264
Browse files Browse the repository at this point in the history
Refactor kserve metrics solution to work with prometheus annotations
  • Loading branch information
openshift-merge-bot[bot] authored May 10, 2024
2 parents c5de0a4 + 7231fda commit a3df072
Show file tree
Hide file tree
Showing 10 changed files with 149 additions and 123 deletions.
12 changes: 12 additions & 0 deletions config/manager/manager.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,18 @@ spec:
name: auth-refs
key: AUTHORINO_LABEL
optional: true
- name: CONTROL_PLANE_NAME
valueFrom:
configMapKeyRef:
name: service-mesh-refs
key: CONTROL_PLANE_NAME
optional: true
- name: MESH_NAMESPACE
valueFrom:
configMapKeyRef:
name: service-mesh-refs
key: MESH_NAMESPACE
optional: true
livenessProbe:
httpGet:
path: /healthz
Expand Down
11 changes: 11 additions & 0 deletions config/rbac/role.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,20 @@ rules:
- endpoints
- namespaces
- pods
verbs:
- create
- get
- list
- patch
- update
- watch
- apiGroups:
- ""
resources:
- services
verbs:
- create
- delete
- get
- list
- patch
Expand Down
1 change: 1 addition & 0 deletions controllers/constants/constants.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ const (
InferenceServiceKind = "InferenceService"

IstioNamespace = "istio-system"
IstioControlPlaneName = "data-science-smcp"
ServiceMeshMemberRollName = "default"
IstioIngressService = "istio-ingressgateway"
IstioIngressServiceHTTPPortName = "http2"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,7 @@ import (
"github.com/opendatahub-io/odh-model-controller/controllers/comparators"
"github.com/opendatahub-io/odh-model-controller/controllers/processors"
"github.com/opendatahub-io/odh-model-controller/controllers/resources"
"istio.io/api/security/v1beta1"
istiotypes "istio.io/api/type/v1beta1"
istiosecv1beta1 "istio.io/client-go/pkg/apis/security/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand All @@ -51,9 +48,9 @@ func NewKServeIstioPeerAuthenticationReconciler(client client.Client) *KserveIst
}
}

// TODO remove this reconcile loop in future versions
func (r *KserveIstioPeerAuthenticationReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error {
log.V(1).Info("Reconciling PeerAuthentication for target namespace")

log.V(1).Info("Reconciling PeerAuthentication for target namespace, checking if there are resources for deletion")
// Create Desired resource
desiredResource, err := r.createDesiredResource(isvc)
if err != nil {
Expand All @@ -79,25 +76,7 @@ func (r *KserveIstioPeerAuthenticationReconciler) Cleanup(ctx context.Context, l
}

func (r *KserveIstioPeerAuthenticationReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*istiosecv1beta1.PeerAuthentication, error) {
desiredPeerAuthentication := &istiosecv1beta1.PeerAuthentication{
ObjectMeta: metav1.ObjectMeta{
Name: peerAuthenticationName,
Namespace: isvc.Namespace,
},
Spec: v1beta1.PeerAuthentication{
Selector: &istiotypes.WorkloadSelector{
MatchLabels: map[string]string{
"component": "predictor",
},
},
Mtls: &v1beta1.PeerAuthentication_MutualTLS{Mode: 3},
PortLevelMtls: map[uint32]*v1beta1.PeerAuthentication_MutualTLS{
8086: {Mode: 2},
3000: {Mode: 2},
},
},
}
return desiredPeerAuthentication, nil
return nil, nil
}

func (r *KserveIstioPeerAuthenticationReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*istiosecv1beta1.PeerAuthentication, error) {
Expand Down
57 changes: 53 additions & 4 deletions controllers/reconcilers/kserve_istio_podmonitor_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,13 @@ package reconcilers

import (
"context"
"fmt"
"github.com/go-logr/logr"
kservev1beta1 "github.com/kserve/kserve/pkg/apis/serving/v1beta1"
"github.com/opendatahub-io/odh-model-controller/controllers/comparators"
"github.com/opendatahub-io/odh-model-controller/controllers/processors"
"github.com/opendatahub-io/odh-model-controller/controllers/resources"
"github.com/opendatahub-io/odh-model-controller/controllers/utils"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
Expand Down Expand Up @@ -53,7 +55,7 @@ func (r *KserveIstioPodMonitorReconciler) Reconcile(ctx context.Context, log log
log.V(1).Info("Creating Istio PodMonitor for target namespace")

// Create Desired resource
desiredResource, err := r.createDesiredResource(isvc)
desiredResource, err := r.createDesiredResource(ctx, isvc)
if err != nil {
return err
}
Expand All @@ -76,7 +78,9 @@ func (r *KserveIstioPodMonitorReconciler) Cleanup(ctx context.Context, log logr.
return r.podMonitorHandler.DeletePodMonitor(ctx, types.NamespacedName{Name: istioPodMonitorName, Namespace: isvcNs})
}

func (r *KserveIstioPodMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.PodMonitor, error) {
func (r *KserveIstioPodMonitorReconciler) createDesiredResource(ctx context.Context, isvc *kservev1beta1.InferenceService) (*v1.PodMonitor, error) {
istioControlPlaneName, meshNamespace := utils.GetIstioControlPlaneName(ctx, r.client)

desiredPodMonitor := &v1.PodMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: istioPodMonitorName,
Expand All @@ -86,15 +90,60 @@ func (r *KserveIstioPodMonitorReconciler) createDesiredResource(isvc *kservev1be
Selector: metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "istio-prometheus-ignore",
Operator: metav1.LabelSelectorOpDoesNotExist,
Key: "component",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"predictor", "explainer", "transformer"},
},
},
},
PodMetricsEndpoints: []v1.PodMetricsEndpoint{
{
Path: "/stats/prometheus",
Interval: "30s",
RelabelConfigs: []*v1.RelabelConfig{
{
Action: "keep",
SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_container_name"},
Regex: "istio-proxy",
},
{
Action: "keep",
SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_annotationpresent_prometheus_io_scrape"},
},
{
Action: "replace",
Regex: "(\\d+);(([A-Fa-f0-9]{1,4}::?){1,7}[A-Fa-f0-9]{1,4})",
Replacement: "[$2]:$1",
SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"},
TargetLabel: "__address__",
},
{
Action: "replace",
Regex: "(\\d+);((([0-9]+?)(\\.|$)){4})",
Replacement: "$2:$1",
SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_annotation_prometheus_io_port", "__meta_kubernetes_pod_ip"},
TargetLabel: "__address__",
},
{
Action: "labeldrop",
Regex: "__meta_kubernetes_pod_label_(.+)",
},
{
Action: "replace",
SourceLabels: []v1.LabelName{"__meta_kubernetes_namespace"},
TargetLabel: "namespace",
},
{
Action: "replace",
SourceLabels: []v1.LabelName{"__meta_kubernetes_pod_name"},
TargetLabel: "pod_name",
},
{
Action: "replace",
Replacement: fmt.Sprintf("%s-%s", istioControlPlaneName, meshNamespace),
TargetLabel: "mesh_id",
},
},
},
},
},
Expand Down
24 changes: 2 additions & 22 deletions controllers/reconcilers/kserve_istio_servicemonitor_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ import (
"github.com/opendatahub-io/odh-model-controller/controllers/processors"
"github.com/opendatahub-io/odh-model-controller/controllers/resources"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
)
Expand Down Expand Up @@ -76,28 +75,9 @@ func (r *KserveIstioServiceMonitorReconciler) Cleanup(ctx context.Context, log l
return r.serviceMonitorHandler.DeleteServiceMonitor(ctx, types.NamespacedName{Name: istioServiceMonitorName, Namespace: isvcNs})
}

// TODO remove this reconcile loop in future versions
func (r *KserveIstioServiceMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) {
desiredServiceMonitor := &v1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: istioServiceMonitorName,
Namespace: isvc.Namespace,
},
Spec: v1.ServiceMonitorSpec{
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"istio": "pilot",
},
},
TargetLabels: []string{"app"},
Endpoints: []v1.Endpoint{
{
Port: "http-monitoring",
Interval: "30s",
},
},
},
}
return desiredServiceMonitor, nil
return nil, nil
}

func (r *KserveIstioServiceMonitorReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) {
Expand Down
41 changes: 3 additions & 38 deletions controllers/reconcilers/kserve_metrics_service_reconciler.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@ import (
"github.com/opendatahub-io/odh-model-controller/controllers/processors"
"github.com/opendatahub-io/odh-model-controller/controllers/resources"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"k8s.io/apimachinery/pkg/util/intstr"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -51,8 +48,9 @@ func NewKServeMetricsServiceReconciler(client client.Client) *KserveMetricsServi
}
}

// TODO remove this reconcile loop in future versions
func (r *KserveMetricsServiceReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error {
log.V(1).Info("Reconciling Metrics Service for InferenceService")
log.V(1).Info("Reconciling Metrics Service for InferenceService, checking if there are resource for deletion")

// Create Desired resource
desiredResource, err := r.createDesiredResource(log, isvc)
Expand All @@ -74,40 +72,7 @@ func (r *KserveMetricsServiceReconciler) Reconcile(ctx context.Context, log logr
}

func (r *KserveMetricsServiceReconciler) createDesiredResource(log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) {
metricsService := &v1.Service{
ObjectMeta: metav1.ObjectMeta{
Name: getMetricsServiceName(isvc),
Namespace: isvc.Namespace,
Labels: map[string]string{
"name": getMetricsServiceName(isvc),
},
},
Spec: v1.ServiceSpec{
Ports: []v1.ServicePort{
{
Name: "caikit-metrics",
Protocol: v1.ProtocolTCP,
Port: 8086,
TargetPort: intstr.FromInt(8086),
},
{
Name: "tgis-metrics",
Protocol: v1.ProtocolTCP,
Port: 3000,
TargetPort: intstr.FromInt(3000),
},
},
Type: v1.ServiceTypeClusterIP,
Selector: map[string]string{
inferenceServiceLabelName: isvc.Name,
},
},
}
if err := ctrl.SetControllerReference(isvc, metricsService, r.client.Scheme()); err != nil {
log.Error(err, "Unable to add OwnerReference to the Metrics Service")
return nil, err
}
return metricsService, nil
return nil, nil
}

func (r *KserveMetricsServiceReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.Service, error) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,7 @@ import (
"github.com/opendatahub-io/odh-model-controller/controllers/processors"
"github.com/opendatahub-io/odh-model-controller/controllers/resources"
v1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
)

Expand All @@ -46,6 +44,7 @@ func NewKServeMetricsServiceMonitorReconciler(client client.Client) *KserveMetri
}
}

// TODO remove this reconcile loop in future versions
func (r *KserveMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) error {
log.V(1).Info("Reconciling Metrics ServiceMonitor for InferenceService")

Expand All @@ -68,35 +67,9 @@ func (r *KserveMetricsServiceMonitorReconciler) Reconcile(ctx context.Context, l
return nil
}

// TODO remove this reconcile loop in future versions
func (r *KserveMetricsServiceMonitorReconciler) createDesiredResource(isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) {
desiredServiceMonitor := &v1.ServiceMonitor{
ObjectMeta: metav1.ObjectMeta{
Name: getMetricsServiceMonitorName(isvc),
Namespace: isvc.Namespace,
},
Spec: v1.ServiceMonitorSpec{
Endpoints: []v1.Endpoint{
{
Port: "caikit-metrics",
Scheme: "http",
},
{
Port: "tgis-metrics",
Scheme: "http",
},
},
NamespaceSelector: v1.NamespaceSelector{},
Selector: metav1.LabelSelector{
MatchLabels: map[string]string{
"name": getMetricsServiceMonitorName(isvc),
},
},
},
}
if err := ctrl.SetControllerReference(isvc, desiredServiceMonitor, r.client.Scheme()); err != nil {
return nil, err
}
return desiredServiceMonitor, nil
return nil, nil
}

func (r *KserveMetricsServiceMonitorReconciler) getExistingResource(ctx context.Context, log logr.Logger, isvc *kservev1beta1.InferenceService) (*v1.ServiceMonitor, error) {
Expand Down
Loading

0 comments on commit a3df072

Please sign in to comment.