Skip to content

Commit

Permalink
feat: add api gateway
Browse files Browse the repository at this point in the history
  • Loading branch information
miguelpuiggarcia committed Sep 4, 2023
1 parent b020790 commit b8e4cdf
Show file tree
Hide file tree
Showing 6 changed files with 113 additions and 230 deletions.
140 changes: 15 additions & 125 deletions gcp/cloud-alerts/main.tf
Original file line number Diff line number Diff line change
@@ -1,140 +1,30 @@
# Error Rate Alert
resource "google_monitoring_alert_policy" "error_rate_alert" {
project = var.project_id
resource "google_monitoring_alert_policy" "error" {
display_name = "Error Rate Alert for ${var.service_name}"
enabled = var.enabled
combiner = "OR"

conditions {
display_name = "5xx Errors"

condition_threshold {
filter = "metric.type=\"run.googleapis.com/request_count\" AND metric.response_code_class=\"5xx\" AND resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"${var.service_name}\""



comparison = "COMPARISON_GT"
threshold_value = var.error_rate_threshold
duration = var.error_rate_duration

aggregations {
alignment_period = "60s"
per_series_aligner = "ALIGN_RATE"
}
}
user_labels = {
api_name = var.service_name
}

notification_channels = var.alert_notification_channels
enabled = true
}

# High Latency Alert
resource "google_monitoring_alert_policy" "latency_alert" {
project = var.project_id
display_name = "High Latency Alert for ${var.service_name}"
combiner = "OR"

conditions {
display_name = "High Latency"

display_name = "Number of errors is above ${var.threshold_value} during ${var.duration}s"
condition_threshold {
filter = "resource.type=\"cloud_run_revision\" AND metric.type=\"run.googleapis.com/request_latencies\" AND resource.labels.service_name=\"${var.service_name}\""

comparison = "COMPARISON_GT"
threshold_value = var.latency_threshold
duration = var.latency_duration

aggregations {
alignment_period = "60s"
per_series_aligner = "ALIGN_PERCENTILE_95"
alignment_period = "${var.alignment_period}s"
per_series_aligner = "ALIGN_SUM"
}
}
}

notification_channels = var.alert_notification_channels
enabled = true
}


# 4xx Error Rate Alert
resource "google_monitoring_alert_policy" "client_error_rate_alert" {
project = var.project_id
display_name = "4xx Error Rate Alert for ${var.service_name}"
combiner = "OR"

conditions {
display_name = "4xx Errors"

condition_threshold {
filter = "metric.type=\"run.googleapis.com/request_count\" AND metric.response_code_class=\"4xx\" AND resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"${var.service_name}\""


comparison = "COMPARISON_GT"
threshold_value = var.client_error_rate_threshold
duration = var.client_error_rate_duration

aggregations {
alignment_period = "60s"
per_series_aligner = "ALIGN_RATE"
trigger {
percent = 100
}
}
}

notification_channels = var.alert_notification_channels
enabled = true
}


# Traffic Volume Alert
resource "google_monitoring_alert_policy" "traffic_volume_alert" {
project = var.project_id
display_name = "Traffic Volume Alert for ${var.service_name}"
combiner = "OR"

conditions {
display_name = "Traffic Volume"

condition_threshold {
filter = "resource.type=\"cloud_run_revision\" AND metric.type=\"run.googleapis.com/request_count\" AND resource.labels.service_name=\"${var.service_name}\""

duration = "${var.duration}s"
comparison = "COMPARISON_GT"
threshold_value = var.traffic_volume_threshold
duration = var.traffic_volume_duration
filter = "resource.type = \"cloud_run_revision\" AND resource.labels.service_name = \"${var.service_name}\" AND metric.type = \"logging.googleapis.com/log_entry_count\" AND metric.labels.severity = \"ERROR\""
threshold_value = var.threshold_value

aggregations {
alignment_period = "60s"
per_series_aligner = "ALIGN_RATE"
}
}
}

notification_channels = var.alert_notification_channels
enabled = true
}

# CPU Utilization Alert
resource "google_monitoring_alert_policy" "cpu_utilization_alert" {
project = var.project_id
display_name = "CPU Utilization Alert for ${var.service_name}"
combiner = "OR"

conditions {
display_name = "CPU Utilization"

condition_threshold {
filter = "metric.type=\"run.googleapis.com/container/cpu/utilization\" AND resource.type=\"cloud_run_revision\" AND resource.labels.service_name=\"${var.service_name}\""


comparison = "COMPARISON_GT"
threshold_value = var.cpu_utilization_threshold
duration = var.cpu_utilization_duration

aggregations {
alignment_period = "60s"
per_series_aligner = "ALIGN_RATE"
}
}
alert_strategy {
auto_close = "${var.auto_close}s"
}

notification_channels = var.alert_notification_channels
enabled = true
notification_channels = var.notification_channels
}
74 changes: 20 additions & 54 deletions gcp/cloud-alerts/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -2,72 +2,38 @@ variable "project_id" {
description = "The project ID to deploy to"
}

variable "alert_notification_channels" {
description = "Notification channels for the alert"
type = list(string)
default = []
}

variable "service_name" {
description = "Cloud Run service name"
}


variable "error_rate_threshold" {
description = "Threshold for the error rate alert"
default = 10.0
variable "enabled" {
description = "Whether the alert policy should be enabled"
type = bool
default = true
}

variable "error_rate_duration" {
description = "Duration for the error rate alert"
default = "300s"
}

variable "latency_threshold" {
description = "Threshold for the latency alert"
default = 1000.0
}

variable "latency_duration" {
description = "Duration for the latency alert"
default = "300s"
}

# For 4xx Error Rate
variable "client_error_rate_threshold" {
description = "Threshold for 4xx client error rate"
type = number
default = 50 # This sets a default value, adjust according to your needs
}

variable "client_error_rate_duration" {
description = "Time window for 4xx client error rate"
type = string
default = "300s" # This sets a default value of 5 minutes, adjust according to your needs
variable "threshold_value" {
description = "Threshold value for the error rate alert"
default = 10.0
}

# For Traffic Volume
variable "traffic_volume_threshold" {
description = "Threshold for traffic volume"
type = number
default = 1000 # This sets a default value, adjust according to your needs
variable "duration" {
description = "Duration for the error rate alert in seconds"
default = 300
}

variable "traffic_volume_duration" {
description = "Time window for traffic volume"
type = string
default = "300s" # This sets a default value of 5 minutes, adjust according to your needs
variable "alignment_period" {
description = "The alignment period for the time series query in seconds"
default = 60
}

# For CPU Utilization
variable "cpu_utilization_threshold" {
description = "Threshold for CPU utilization"
type = number
default = 90 # This sets a default value of 90%, adjust according to your needs
variable "auto_close" {
description = "The duration after which the alert will auto close in seconds"
default = 86400 # This sets a default value of 24 hours in seconds, adjust according to your needs
}

variable "cpu_utilization_duration" {
description = "Time window for CPU utilization"
type = string
default = "300s" # This sets a default value of 5 minutes, adjust according to your needs
variable "notification_channels" {
description = "Notification channels for the alert"
type = list(string)
default = []
}
44 changes: 34 additions & 10 deletions gcp/cloud-run-v2/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,28 @@ resource "google_compute_region_network_endpoint_group" "cloudrun_neg" {
}
}


# Cloud Armor Security Policy
resource "google_compute_security_policy" "cloud_armor_policy" {
count = var.enable_cloud_armor ? 1 : 0
name = "${var.name}-armor-policy"
description = "A security policy for Cloud Armor."
rule {
action = "allow"
priority = "1000"
match {
versioned_expr = "SRC_IPS_V1"
config {
src_ip_ranges = ["*"]
}
}
description = "default rule"
}
}


# Load Balancer module using serverless NEGs
# View all options on https://github.com/terraform-google-modules/terraform-google-lb-http
module "lb-http" {
source = "GoogleCloudPlatform/lb-http/google//modules/serverless_negs"
project = var.project_id
Expand All @@ -141,7 +162,10 @@ module "lb-http" {
enable_cdn = false
custom_request_headers = ["X-Client-Geo-Location: {client_region_subdivision}, {client_city}"]
custom_response_headers = ["X-Cache-Hit: {cdn_cache_status}"]
security_policy = null

# Clour Armor security
security_policy = var.enable_cloud_armor ? google_compute_security_policy.cloud_armor_policy[0].self_link : null

log_config = {
enable = false
}
Expand Down Expand Up @@ -176,14 +200,14 @@ module "trigger_provision" {
}
}


module "cloud_run_alerts" {
source = "../cloud-alerts"
project_id = var.project_id
service_name = var.name
alert_notification_channels = var.alert_config.alert_notification_channels
error_rate_threshold = var.alert_config.error_rate_threshold
error_rate_duration = var.alert_config.error_rate_duration
latency_threshold = var.alert_config.latency_threshold
latency_duration = var.alert_config.latency_duration
source = "../cloud-alerts"
project_id = var.project_id
service_name = var.name
enabled = var.alert_config.enabled
threshold_value = var.alert_config.threshold_value
duration = var.alert_config.duration
alignment_period = var.alert_config.alignment_period
auto_close = var.alert_config.auto_close
notification_channels = var.alert_config.notification_channels
}
46 changes: 18 additions & 28 deletions gcp/cloud-run-v2/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -161,38 +161,28 @@ variable "trigger_config" {
create_trigger = true
}
}

variable "alert_config" {
description = "Configuration for alerts"
type = object({
alert_notification_channels = list(string)
error_rate_threshold = number
error_rate_duration = string
latency_threshold = number
latency_duration = string

# Additional alert settings
client_error_rate_threshold = number
client_error_rate_duration = string
traffic_volume_threshold = number
traffic_volume_duration = string
cpu_utilization_threshold = number
cpu_utilization_duration = string
enabled = bool
threshold_value = number
duration = number
alignment_period = number
auto_close = number
notification_channels = list(string)
})
default = {
alert_notification_channels = []
error_rate_threshold = 10.0
error_rate_duration = "300s"
latency_threshold = 1000.0
latency_duration = "300s"

# Additional alert settings
client_error_rate_threshold = 50.0
client_error_rate_duration = "300s"
traffic_volume_threshold = 1000
traffic_volume_duration = "300s"
cpu_utilization_threshold = 90
cpu_utilization_duration = "300s"
enabled = true
threshold_value = 10.0
duration = 300
alignment_period = 60
auto_close = 86400
notification_channels = []
}
}


variable "enable_cloud_armor" {
description = "Enable Google Cloud Armor integration"
type = bool
default = false
}
19 changes: 19 additions & 0 deletions test/gcp/assets/cloud-armor-rules.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"rules": [
{
"action": "allow",
"priority": 1000,
"src_ip_ranges": [
"192.0.2.0/24",
"198.51.100.0/24"
]
},
{
"action": "deny",
"priority": 2000,
"src_ip_ranges": [
"203.0.113.0/24"
]
}
]
}
Loading

0 comments on commit b8e4cdf

Please sign in to comment.