Skip to content

Commit

Permalink
Merge pull request #105 from pyrra-dev/promql-grouping
Browse files Browse the repository at this point in the history
slo: Fix PromQL grouping
  • Loading branch information
metalmatze authored Feb 4, 2022
2 parents 25d86c3 + 5091f2d commit 3325adf
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
13 changes: 10 additions & 3 deletions slo/promql.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,21 +11,24 @@ import (

// QueryTotal returns a PromQL query to get the total amount of requests served during the window.
func (o Objective) QueryTotal(window model.Duration) string {
expr, err := parser.ParseExpr(`sum(metric{})`)
expr, err := parser.ParseExpr(`sum by (grouping) (metric{})`)
if err != nil {
return ""
}

var metric string
var matchers []*labels.Matcher
var grouping []string

if o.Indicator.Ratio != nil && o.Indicator.Ratio.Total.Name != "" {
metric = increaseName(o.Indicator.Ratio.Total.Name, window)
matchers = o.Indicator.Ratio.Total.LabelMatchers
grouping = o.Indicator.Ratio.Grouping
}
if o.Indicator.Latency != nil && o.Indicator.Latency.Total.Name != "" {
metric = increaseName(o.Indicator.Latency.Total.Name, window)
matchers = o.Indicator.Latency.Total.LabelMatchers
grouping = o.Indicator.Latency.Grouping
}

matchers = append(matchers, &labels.Matcher{
Expand All @@ -43,6 +46,7 @@ func (o Objective) QueryTotal(window model.Duration) string {
objectiveReplacer{
metric: metric,
matchers: matchers,
grouping: grouping,
}.replace(expr)

return expr.String()
Expand All @@ -51,13 +55,14 @@ func (o Objective) QueryTotal(window model.Duration) string {
// QueryErrors returns a PromQL query to get the amount of request errors during the window.
func (o Objective) QueryErrors(window model.Duration) string {
if o.Indicator.Ratio != nil && o.Indicator.Ratio.Total.Name != "" {
expr, err := parser.ParseExpr(`sum(metric{})`)
expr, err := parser.ParseExpr(`sum by (grouping) (metric{})`)
if err != nil {
return ""
}

metric := increaseName(o.Indicator.Ratio.Errors.Name, window)
matchers := o.Indicator.Ratio.Errors.LabelMatchers

for _, m := range matchers {
if m.Name == labels.MetricName {
m.Value = metric
Expand All @@ -73,13 +78,14 @@ func (o Objective) QueryErrors(window model.Duration) string {
objectiveReplacer{
metric: metric,
matchers: matchers,
grouping: o.Indicator.Ratio.Grouping,
}.replace(expr)

return expr.String()
}

if o.Indicator.Latency != nil && o.Indicator.Latency.Total.Name != "" {
expr, err := parser.ParseExpr(`sum(metric{matchers="total"}) - sum(errorMetric{matchers="errors"})`)
expr, err := parser.ParseExpr(`sum by(grouping) (metric{matchers="total"}) - sum by(grouping) (errorMetric{matchers="errors"})`)
if err != nil {
return ""
}
Expand Down Expand Up @@ -119,6 +125,7 @@ func (o Objective) QueryErrors(window model.Duration) string {
matchers: matchers,
errorMetric: errorMetric,
errorMatchers: errorMatchers,
grouping: o.Indicator.Latency.Grouping,
}.replace(expr)

return expr.String()
Expand Down
28 changes: 14 additions & 14 deletions slo/promql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -264,47 +264,47 @@ func TestObjective_QueryTotal(t *testing.T) {
}, {
name: "http-ratio-grouping",
objective: objectiveHTTPRatioGrouping(),
expected: `sum(http_requests:increase4w{job="thanos-receive-default",slo="monitoring-http-errors"})`,
expected: `sum by(job, handler) (http_requests:increase4w{job="thanos-receive-default",slo="monitoring-http-errors"})`,
}, {
name: "http-ratio-grouping-regex",
objective: objectiveHTTPRatioGroupingRegex(),
expected: `sum(http_requests:increase4w{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"})`,
expected: `sum by(job, handler) (http_requests:increase4w{handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"})`,
}, {
name: "grpc-ratio",
objective: objectiveGRPCRatio(),
expected: `sum(grpc_server_handled:increase4w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"})`,
}, {
name: "grpc-ratio-grouping",
objective: objectiveGRPCRatioGrouping(),
expected: `sum(grpc_server_handled:increase4w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"})`,
expected: `sum by(job, handler) (grpc_server_handled:increase4w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"})`,
}, {
name: "http-latency",
objective: objectiveHTTPLatency(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
}, {
name: "http-latency-grouping",
objective: objectiveHTTPLatencyGrouping(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
expected: `sum by(job, handler) (http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
}, {
name: "http-latency-grouping-regex",
objective: objectiveHTTPLatencyGroupingRegex(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
expected: `sum by(job, handler) (http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",slo="monitoring-http-latency"})`,
}, {
name: "grpc-latency",
objective: objectiveGRPCLatency(),
expected: `sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"})`,
}, {
name: "grpc-latency-grouping",
objective: objectiveGRPCLatencyGrouping(),
expected: `sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"})`,
expected: `sum by(job, handler) (grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-latency"})`,
}, {
name: "operator-ratio",
objective: objectiveOperator(),
expected: `sum(prometheus_operator_reconcile_operations:increase2w{slo="monitoring-prometheus-operator-errors"})`,
}, {
name: "operator-ratio-grouping",
objective: objectiveOperatorGrouping(),
expected: `sum(prometheus_operator_reconcile_operations:increase2w{slo="monitoring-prometheus-operator-errors"})`,
expected: `sum by(namespace) (prometheus_operator_reconcile_operations:increase2w{slo="monitoring-prometheus-operator-errors"})`,
}, {
name: "apiserver-write-response-errors",
objective: objectiveAPIServerRatio(),
Expand Down Expand Up @@ -333,47 +333,47 @@ func TestObjective_QueryErrors(t *testing.T) {
}, {
name: "http-ratio-grouping",
objective: objectiveHTTPRatioGrouping(),
expected: `sum(http_requests:increase4w{code=~"5..",job="thanos-receive-default",slo="monitoring-http-errors"})`,
expected: `sum by(job, handler) (http_requests:increase4w{code=~"5..",job="thanos-receive-default",slo="monitoring-http-errors"})`,
}, {
name: "http-ratio-grouping-regex",
objective: objectiveHTTPRatioGroupingRegex(),
expected: `sum(http_requests:increase4w{code=~"5..",handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"})`,
expected: `sum by(job, handler) (http_requests:increase4w{code=~"5..",handler=~"/api.*",job="thanos-receive-default",slo="monitoring-http-errors"})`,
}, {
name: "grpc-ratio",
objective: objectiveGRPCRatio(),
expected: `sum(grpc_server_handled:increase4w{grpc_code=~"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss",grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"})`,
}, {
name: "grpc-ratio-grouping",
objective: objectiveGRPCRatioGrouping(),
expected: `sum(grpc_server_handled:increase4w{grpc_code=~"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss",grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"})`,
expected: `sum by(job, handler) (grpc_server_handled:increase4w{grpc_code=~"Aborted|Unavailable|Internal|Unknown|Unimplemented|DataLoss",grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",slo="monitoring-grpc-errors"})`,
}, {
name: "http-latency",
objective: objectiveHTTPLatency(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"}) - sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="1",slo="monitoring-http-latency"})`,
}, {
name: "http-latency-grouping",
objective: objectiveHTTPLatencyGrouping(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"}) - sum(http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="1",slo="monitoring-http-latency"})`,
expected: `sum by(job, handler) (http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"}) - sum by(job, handler) (http_request_duration_seconds:increase4w{code=~"2..",job="metrics-service-thanos-receive-default",le="1",slo="monitoring-http-latency"})`,
}, {
name: "http-latency-grouping-regex",
objective: objectiveHTTPLatencyGroupingRegex(),
expected: `sum(http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"}) - sum(http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="1",slo="monitoring-http-latency"})`,
expected: `sum by(job, handler) (http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="",slo="monitoring-http-latency"}) - sum by(job, handler) (http_request_duration_seconds:increase4w{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="1",slo="monitoring-http-latency"})`,
}, {
name: "grpc-latency",
objective: objectiveGRPCLatency(),
expected: `sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="",slo="monitoring-grpc-latency"}) - sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6",slo="monitoring-grpc-latency"})`,
}, {
name: "grpc-latency-grouping",
objective: objectiveGRPCLatencyGrouping(),
expected: `sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="",slo="monitoring-grpc-latency"}) - sum(grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6",slo="monitoring-grpc-latency"})`,
expected: `sum by(job, handler) (grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="",slo="monitoring-grpc-latency"}) - sum by(job, handler) (grpc_server_handling_seconds:increase1w{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6",slo="monitoring-grpc-latency"})`,
}, {
name: "operator-ratio",
objective: objectiveOperator(),
expected: `sum(prometheus_operator_reconcile_errors:increase2w{slo="monitoring-prometheus-operator-errors"})`,
}, {
name: "operator-ratio-grouping",
objective: objectiveOperatorGrouping(),
expected: `sum(prometheus_operator_reconcile_errors:increase2w{slo="monitoring-prometheus-operator-errors"})`,
expected: `sum by(namespace) (prometheus_operator_reconcile_errors:increase2w{slo="monitoring-prometheus-operator-errors"})`,
}, {
name: "apiserver-write-response-errors",
objective: objectiveAPIServerRatio(),
Expand Down

0 comments on commit 3325adf

Please sign in to comment.