Skip to content

Commit

Permalink
Merge pull request #117 from pyrra-dev/fix-latency-burnrates
Browse files Browse the repository at this point in the history
slo: Fix latency burnrate recording rules
  • Loading branch information
metalmatze authored Feb 10, 2022
2 parents c45329f + 073b4bf commit bb9544b
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 50 deletions.
2 changes: 1 addition & 1 deletion slo/promql.go
Original file line number Diff line number Diff line change
Expand Up @@ -377,7 +377,7 @@ func (o Objective) ErrorsRange(timerange time.Duration) string {
return expr.String()
}
if o.Indicator.Latency != nil && o.Indicator.Latency.Total.Name != "" {
expr, err := parser.ParseExpr(`sum(rate(metric{matchers="total"}[1s])) - sum(rate(errorMetric{matchers="errors"}[1s]))`)
expr, err := parser.ParseExpr(`(sum(rate(metric{matchers="total"}[1s])) - sum(rate(errorMetric{matchers="errors"}[1s]))) / sum(rate(metric{matchers="total"}[1s]))`)
if err != nil {
return err.Error()
}
Expand Down
12 changes: 6 additions & 6 deletions slo/promql_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -578,27 +578,27 @@ func TestObjective_ErrorsRange(t *testing.T) {
name: "http-latency",
objective: objectiveHTTPLatency(),
timerange: time.Hour,
expected: `sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))`,
expected: `(sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))) / sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h]))`,
}, {
name: "http-latency-grouping",
objective: objectiveHTTPLatencyGrouping(),
timerange: time.Hour,
expected: `sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))`,
expected: `(sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",job="metrics-service-thanos-receive-default",le="1"}[1h]))) / sum(rate(http_request_duration_seconds_count{code=~"2..",job="metrics-service-thanos-receive-default"}[1h]))`,
}, {
name: "http-latency-grouping-regex",
objective: objectiveHTTPLatencyGroupingRegex(),
timerange: time.Hour,
expected: `sum(rate(http_request_duration_seconds_count{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="1"}[1h]))`,
expected: `(sum(rate(http_request_duration_seconds_count{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default"}[1h])) - sum(rate(http_request_duration_seconds_bucket{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default",le="1"}[1h]))) / sum(rate(http_request_duration_seconds_count{code=~"2..",handler=~"/api.*",job="metrics-service-thanos-receive-default"}[1h]))`,
}, {
name: "grpc-latency",
objective: objectiveGRPCLatency(),
timerange: time.Hour,
expected: `sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))`,
expected: `(sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))) / sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h]))`,
}, {
name: "grpc-latency-grouping",
objective: objectiveGRPCLatencyGrouping(),
timerange: time.Hour,
expected: `sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))`,
expected: `(sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h])) - sum(rate(grpc_server_handling_seconds_bucket{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api",le="0.6"}[1h]))) / sum(rate(grpc_server_handling_seconds_count{grpc_method="Write",grpc_service="conprof.WritableProfileStore",job="api"}[1h]))`,
}, {
name: "operator-ratio",
objective: objectiveOperator(),
Expand All @@ -618,7 +618,7 @@ func TestObjective_ErrorsRange(t *testing.T) {
name: "apiserver-read-resource-latency",
objective: objectiveAPIServerLatency(),
timerange: 2 * time.Hour,
expected: `sum(rate(apiserver_request_duration_seconds_count{job="apiserver",resource=~"resource|",verb=~"LIST|GET"}[2h])) - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",le="0.1",resource=~"resource|",verb=~"LIST|GET"}[2h]))`,
expected: `(sum(rate(apiserver_request_duration_seconds_count{job="apiserver",resource=~"resource|",verb=~"LIST|GET"}[2h])) - sum(rate(apiserver_request_duration_seconds_bucket{job="apiserver",le="0.1",resource=~"resource|",verb=~"LIST|GET"}[2h]))) / sum(rate(apiserver_request_duration_seconds_count{job="apiserver",resource=~"resource|",verb=~"LIST|GET"}[2h]))`,
}}

for _, tc := range testcases {
Expand Down
11 changes: 10 additions & 1 deletion slo/rules.go
Original file line number Diff line number Diff line change
Expand Up @@ -310,7 +310,16 @@ func (o Objective) Burnrate(timerange time.Duration) string {
return expr.String()
}
if o.Indicator.Latency != nil && o.Indicator.Latency.Total.Name != "" {
expr, err := parser.ParseExpr(`sum by(grouping) (rate(metric{matchers="total"}[1s])) - sum by(grouping) (rate(errorMetric{matchers="errors"}[1s]))`)
query := `
(
sum by(grouping) (rate(metric{matchers="total"}[1s]))
-
sum by(grouping) (rate(errorMetric{matchers="errors"}[1s]))
)
/
sum by(grouping) (rate(metric{matchers="total"}[1s]))
`
expr, err := parser.ParseExpr(query)
if err != nil {
return err.Error()
}
Expand Down
Loading

0 comments on commit bb9544b

Please sign in to comment.