Skip to content

Commit

Permalink
Introduce requests per LB pool metric & fix LB pool health status met…
Browse files Browse the repository at this point in the history
…ric (#65)

* fix: Report LB pool health properly

The `cloudflare_zone_pool_health_status` metric introduced with goal
to report the actual pools health status during the time, regardless
if this pool is the selected one or not. The initial implementation
uses the `loadBalancingRequestsAdaptiveGroups` type which returns
aggregated Load Balancing origin requests with adaptive sampling and
contains information only about the selectedPool (Name, Health,
AvgRttMs, etc), rather than for all LB pools. This leads on reporthing
back the status only of the selectedPool (which normally is always 1)
and not the overall status of all LB's pools.
According to Cloudflare's documentation about LoadBalancing GraphQL
Analytics API[1], the `loadBalancingRequestsAdaptive` schema can be
used to fetch analytics about the Raw Load Balancing origin requests
with adaptive sampling. These analytics exposes infromation about the
selected pool like:
* Selected Pool - Name/Health/ID/Average RTT
* Session Affinity - Type/Status
But it also exposes information about LB's pools and origins regardless
the selection decision, like:
* Pools ID/Name/Health/Average RTT/Selected
* Origins Name/FQDN/Ipv4/Ipv6/Selected
The latter metrics can be used to report the health of all pools
associated with account's LBs.
For this reason, this commit adds the required `loadBalancingRequestsAdaptive`
schema struct on the lbResp struct and configures it as the source for the
`cloudflare_zone_pool_health_status` metric.

1. https://developers.cloudflare.com/load-balancing/reference/load-balancing-analytics/#graphql-analytics

* feat: Add poolRequestsTotal metric

This commit introduces a prometheus counter metric about the total
number of requests per pool.
  • Loading branch information
axilleastr authored Jul 24, 2022
1 parent 3034d18 commit b6871aa
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 21 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,8 @@ Note: `ZONE_<name>` configuration is not supported as flag.
# HELP cloudflare_zone_threats_country Threats per zone per country
# HELP cloudflare_zone_threats_total Threats per zone
# HELP cloudflare_zone_uniques_total Uniques per zone
# HELP cloudflare_zone_pool_health_status Reports the health of a pool, 1 for healthy, 0 for unhealthy.
# HELP cloudflare_zone_pool_health_status Reports the health of a pool, 1 for healthy, 0 for unhealthy
# HELP cloudflare_zone_pool_requests_total Requests per pool
```

## Helm chart repository
Expand Down
68 changes: 60 additions & 8 deletions cloudflare.go
Original file line number Diff line number Diff line change
Expand Up @@ -183,15 +183,41 @@ type lbResp struct {
LoadBalancingRequestsAdaptiveGroups []struct {
Count uint64 `json:"count"`
Dimensions struct {
ColoCode string `json:"coloCode"`
LbName string `json:"lbName"`
Region string `json:"region"`
SelectedOriginName string `json:"selectedOriginName"`
SelectedPoolHealthy int `json:"selectedPoolHealthy"`
SelectedPoolName string `json:"selectedPoolName"`
SteeringPolicy string `json:"steeringPolicy"`
LbName string `json:"lbName"`
Proxied uint8 `json:"proxied"`
Region string `json:"region"`
SelectedOriginName string `json:"selectedOriginName"`
SelectedPoolAvgRttMs uint64 `json:"selectedPoolAvgRttMs"`
SelectedPoolHealthy uint8 `json:"selectedPoolHealthy"`
SelectedPoolName string `json:"selectedPoolName"`
SteeringPolicy string `json:"steeringPolicy"`
} `json:"dimensions"`
} `json:"loadBalancingRequestsAdaptiveGroups"`

LoadBalancingRequestsAdaptive []struct {
LbName string `json:"lbName"`
Proxied uint8 `json:"proxied"`
Region string `json:"region"`
SelectedPoolHealthy uint8 `json:"selectedPoolHealthy"`
SelectedPoolID string `json:"selectedPoolID"`
SelectedPoolName string `json:"selectedPoolName"`
SessionAffinityStatus string `json:"sessionAffinityStatus"`
SteeringPolicy string `json:"steeringPolicy"`
SelectedPoolAvgRttMs uint64 `json:"selectedPoolAvgRttMs"`
Pools []struct {
AvgRttMs uint64 `json:"avgRttMs"`
Healthy uint8 `json:"healthy"`
ID string `json:"id"`
PoolName string `json:"poolName"`
} `json:"pools"`
Origins []struct {
OriginName string `json:"originName"`
Health uint8 `json:"health"`
IPv4 string `json:"ipv4"`
Selected uint8 `json:"selected"`
} `json:"origins"`
} `json:"loadBalancingRequestsAdaptive"`

ZoneTag string `json:"zoneTag"`
}

Expand Down Expand Up @@ -493,15 +519,41 @@ func fetchLoadBalancerTotals(zoneIDs []string) (*cloudflareResponseLb, error) {
limit: $limit) {
count
dimensions {
coloCode
region
lbName
selectedPoolName
proxied
selectedOriginName
selectedPoolAvgRttMs
selectedPoolHealthy
steeringPolicy
}
}
loadBalancingRequestsAdaptive(
filter: { datetime_geq: $mintime, datetime_lt: $maxtime},
limit: $limit) {
lbName
proxied
region
selectedPoolHealthy
selectedPoolId
selectedPoolName
sessionAffinityStatus
steeringPolicy
selectedPoolAvgRttMs
pools {
id
poolName
healthy
avgRttMs
}
origins {
originName
health
ipv4
selected
}
}
}
}
}
Expand Down
42 changes: 30 additions & 12 deletions prometheus.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,12 +174,18 @@ var (
}, []string{"script_name", "quantile"},
)

poolHealthStatus = promauto.NewGaugeVec(
prometheus.GaugeOpts{
Name: "cloudflare_zone_pool_health_status",
Help: "Reports the health of a pool, 1 for healthy, 0 for unhealthy.",
},
[]string{"zone", "colo_code", "load_balancer_name", "origin_name", "steering_policy", "pool_name", "region"},
poolHealthStatus = promauto.NewGaugeVec(prometheus.GaugeOpts{
Name: "cloudflare_zone_pool_health_status",
Help: "Reports the health of a pool, 1 for healthy, 0 for unhealthy.",
},
[]string{"zone", "load_balancer_name", "pool_name"},
)

poolRequestsTotal = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "cloudflare_zone_pool_requests_total",
Help: "Requests per pool",
},
[]string{"zone", "load_balancer_name", "pool_name", "origin_name"},
)
)

Expand Down Expand Up @@ -384,23 +390,35 @@ func fetchLoadBalancerAnalytics(zones []cloudflare.Zone, wg *sync.WaitGroup) {
}
for _, lb := range l.Viewer.Zones {
name := findZoneName(zones, lb.ZoneTag)
addLoadBalancingRequestsAdaptive(&lb, name)
addLoadBalancingRequestsAdaptiveGroups(&lb, name)
}
}

func addLoadBalancingRequestsAdaptiveGroups(z *lbResp, name string) {

for _, g := range z.LoadBalancingRequestsAdaptiveGroups {
poolHealthStatus.With(
poolRequestsTotal.With(
prometheus.Labels{
"zone": name,
"colo_code": g.Dimensions.ColoCode,
"load_balancer_name": g.Dimensions.LbName,
"origin_name": g.Dimensions.SelectedOriginName,
"steering_policy": g.Dimensions.SteeringPolicy,
"pool_name": g.Dimensions.SelectedPoolName,
"region": g.Dimensions.Region,
}).Set(float64(g.Dimensions.SelectedPoolHealthy))
"origin_name": g.Dimensions.SelectedOriginName,
}).Add(float64(g.Count))
}
}

func addLoadBalancingRequestsAdaptive(z *lbResp, name string) {

for _, g := range z.LoadBalancingRequestsAdaptive {
for _, p := range g.Pools {
poolHealthStatus.With(
prometheus.Labels{
"zone": name,
"load_balancer_name": g.LbName,
"pool_name": p.PoolName,
}).Set(float64(p.Healthy))
}
}

}

0 comments on commit b6871aa

Please sign in to comment.