From 77a4882afd107e8e1a69e49e48770f5e0782719a Mon Sep 17 00:00:00 2001 From: aidenvaines-cgi Date: Mon, 2 Mar 2026 13:27:50 +0000 Subject: [PATCH 1/3] CCM-14044 Adding anomaly alarms --- .tool-versions | 2 +- .../components/events/.tool-versions | 2 +- .../terraform/components/events/README.md | 8 ++++ ...c_alarm_control_plane_ingestion_anomaly.tf | 40 +++++++++++++++++++ ...alarm_control_plane_invocations_anomaly.tf | 40 +++++++++++++++++++ ...tric_alarm_data_plane_ingestion_anomaly.tf | 40 +++++++++++++++++++ ...ic_alarm_data_plane_invocations_anomaly.tf | 40 +++++++++++++++++++ .../terraform/components/events/outputs.tf | 32 +++++++++++++++ .../terraform/components/events/variables.tf | 29 ++++++++++++++ 9 files changed, 231 insertions(+), 2 deletions(-) create mode 100644 infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf create mode 100644 infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf create mode 100644 infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf create mode 100644 infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf diff --git a/.tool-versions b/.tool-versions index 14550dc..4d0c381 100644 --- a/.tool-versions +++ b/.tool-versions @@ -3,7 +3,7 @@ gitleaks 8.24.0 jq 1.6 nodejs 22.16.0 pre-commit 3.6.0 -terraform 1.12.0 +terraform 1.14.3 terraform-docs 0.19.0 trivy 0.61.0 vale 3.6.0 diff --git a/infrastructure/terraform/components/events/.tool-versions b/infrastructure/terraform/components/events/.tool-versions index 82cdb4d..52428de 100644 --- a/infrastructure/terraform/components/events/.tool-versions +++ b/infrastructure/terraform/components/events/.tool-versions @@ -1 +1 @@ -terraform 1.12.0 +terraform 1.14.3 diff --git a/infrastructure/terraform/components/events/README.md b/infrastructure/terraform/components/events/README.md index cbfaf27..b1cc46d 100644 --- a/infrastructure/terraform/components/events/README.md +++ b/infrastructure/terraform/components/events/README.md @@ -15,7 +15,11 @@ | [aws\_account\_id](#input\_aws\_account\_id) | The AWS Account ID (numeric) | `string` | n/a | yes | | [component](#input\_component) | The variable encapsulating the name of this component | `string` | `"events"` | no | | [default\_tags](#input\_default\_tags) | A map of default tags to apply to all taggable resources within the component | `map(string)` | `{}` | no | +| [enable\_event\_anomaly\_detection](#input\_enable\_event\_anomaly\_detection) | Enable CloudWatch anomaly detection alarms for event bus traffic. Applies to both data and control plane ingestion and invocations. | `bool` | `true` | no | | [environment](#input\_environment) | The name of the tfscaffold environment | `string` | n/a | yes | +| [event\_anomaly\_band\_width](#input\_event\_anomaly\_band\_width) | The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4. | `number` | `3` | no | +| [event\_anomaly\_evaluation\_periods](#input\_event\_anomaly\_evaluation\_periods) | Number of evaluation periods for the anomaly alarm. Each period is defined by event\_anomaly\_period. | `number` | `2` | no | +| [event\_anomaly\_period](#input\_event\_anomaly\_period) | The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600. | `number` | `300` | no | | [event\_publisher\_account\_ids](#input\_event\_publisher\_account\_ids) | An object representing account id's of event publishers | `list(any)` | `[]` | no | | [event\_target\_arns](#input\_event\_target\_arns) | A map of event target ARNs keyed by name |
object({
sms_nudge = string
notify_core_sns_topic = optional(string, null)
supplier_api_sns_topic = optional(string, null)
app_response = optional(string, null)
client_callbacks = optional(string, null)
})
| n/a | yes | | [force\_lambda\_code\_deploy](#input\_force\_lambda\_code\_deploy) | If the lambda package in s3 has the same commit id tag as the terraform build branch, the lambda will not update automatically. Set to True if making changes to Lambda code from on the same commit for example during development | `bool` | `false` | no | @@ -43,7 +47,11 @@ | Name | Description | |------|-------------| | [control\_plane\_event\_bus](#output\_control\_plane\_event\_bus) | n/a | +| [control\_plane\_ingestion\_anomaly\_alarm](#output\_control\_plane\_ingestion\_anomaly\_alarm) | Control plane ingestion anomaly detection alarm details | +| [control\_plane\_invocations\_anomaly\_alarm](#output\_control\_plane\_invocations\_anomaly\_alarm) | Control plane invocations anomaly detection alarm details | | [data\_plane\_event\_bus](#output\_data\_plane\_event\_bus) | n/a | +| [data\_plane\_ingestion\_anomaly\_alarm](#output\_data\_plane\_ingestion\_anomaly\_alarm) | Data plane ingestion anomaly detection alarm details | +| [data\_plane\_invocations\_anomaly\_alarm](#output\_data\_plane\_invocations\_anomaly\_alarm) | Data plane invocations anomaly detection alarm details | diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf new file mode 100644 index 0000000..9cc7b7e --- /dev/null +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf @@ -0,0 +1,40 @@ +resource "aws_cloudwatch_metric_alarm" "control_plane_ingestion_anomaly" { + count = var.enable_event_anomaly_detection ? 1 : 0 + + alarm_name = "${local.csi}-control-plane-ingestion-anomaly" + alarm_description = "ANOMALY: Detects anomalous patterns in events ingested to the control plane event bus" + comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" + evaluation_periods = var.event_anomaly_evaluation_periods + threshold_metric_id = "ad1" + treat_missing_data = "notBreaching" + + metric_query { + id = "m1" + return_data = true + + metric { + metric_name = "Ingestion" + namespace = "AWS/Events" + period = var.event_anomaly_period + stat = "Sum" + + dimensions = { + EventBusName = aws_cloudwatch_event_bus.control_plane.name + } + } + } + + metric_query { + id = "ad1" + expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" + label = "Ingestion (expected)" + return_data = true + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-control-plane-ingestion-anomaly" + } + ) +} diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf new file mode 100644 index 0000000..391fc91 --- /dev/null +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf @@ -0,0 +1,40 @@ +resource "aws_cloudwatch_metric_alarm" "control_plane_invocations_anomaly" { + count = var.enable_event_anomaly_detection ? 1 : 0 + + alarm_name = "${local.csi}-control-plane-invocations-anomaly" + alarm_description = "ANOMALY: Detects anomalous patterns in events delivered from the control plane event bus to targets" + comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" + evaluation_periods = var.event_anomaly_evaluation_periods + threshold_metric_id = "ad1" + treat_missing_data = "notBreaching" + + metric_query { + id = "m1" + return_data = true + + metric { + metric_name = "Invocations" + namespace = "AWS/Events" + period = var.event_anomaly_period + stat = "Sum" + + dimensions = { + EventBusName = aws_cloudwatch_event_bus.control_plane.name + } + } + } + + metric_query { + id = "ad1" + expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" + label = "Invocations (expected)" + return_data = true + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-control-plane-invocations-anomaly" + } + ) +} diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf new file mode 100644 index 0000000..f6fd0ca --- /dev/null +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf @@ -0,0 +1,40 @@ +resource "aws_cloudwatch_metric_alarm" "data_plane_ingestion_anomaly" { + count = var.enable_event_anomaly_detection ? 1 : 0 + + alarm_name = "${local.csi}-data-plane-ingestion-anomaly" + alarm_description = "ANOMALY: Detects anomalous patterns in events ingested to the data plane event bus" + comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" + evaluation_periods = var.event_anomaly_evaluation_periods + threshold_metric_id = "ad1" + treat_missing_data = "notBreaching" + + metric_query { + id = "m1" + return_data = true + + metric { + metric_name = "Ingestion" + namespace = "AWS/Events" + period = var.event_anomaly_period + stat = "Sum" + + dimensions = { + EventBusName = aws_cloudwatch_event_bus.data_plane.name + } + } + } + + metric_query { + id = "ad1" + expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" + label = "Ingestion (expected)" + return_data = true + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-data-plane-ingestion-anomaly" + } + ) +} diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf new file mode 100644 index 0000000..bc5d1b2 --- /dev/null +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf @@ -0,0 +1,40 @@ +resource "aws_cloudwatch_metric_alarm" "data_plane_invocations_anomaly" { + count = var.enable_event_anomaly_detection ? 1 : 0 + + alarm_name = "${local.csi}-data-plane-invocations-anomaly" + alarm_description = "ANOMALY: Detects anomalous patterns in events delivered from the data plane event bus to targets" + comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" + evaluation_periods = var.event_anomaly_evaluation_periods + threshold_metric_id = "ad1" + treat_missing_data = "notBreaching" + + metric_query { + id = "m1" + return_data = true + + metric { + metric_name = "Invocations" + namespace = "AWS/Events" + period = var.event_anomaly_period + stat = "Sum" + + dimensions = { + EventBusName = aws_cloudwatch_event_bus.data_plane.name + } + } + } + + metric_query { + id = "ad1" + expression = "ANOMALY_DETECTION_BAND(m1, ${var.event_anomaly_band_width})" + label = "Invocations (expected)" + return_data = true + } + + tags = merge( + local.default_tags, + { + Name = "${local.csi}-data-plane-invocations-anomaly" + } + ) +} diff --git a/infrastructure/terraform/components/events/outputs.tf b/infrastructure/terraform/components/events/outputs.tf index e90c7aa..fb678ab 100644 --- a/infrastructure/terraform/components/events/outputs.tf +++ b/infrastructure/terraform/components/events/outputs.tf @@ -11,3 +11,35 @@ output "data_plane_event_bus" { arn = aws_cloudwatch_event_bus.data_plane.arn } } + +output "data_plane_ingestion_anomaly_alarm" { + description = "Data plane ingestion anomaly detection alarm details" + value = var.enable_event_anomaly_detection ? { + arn = aws_cloudwatch_metric_alarm.data_plane_ingestion_anomaly[0].arn + name = aws_cloudwatch_metric_alarm.data_plane_ingestion_anomaly[0].alarm_name + } : null +} + +output "data_plane_invocations_anomaly_alarm" { + description = "Data plane invocations anomaly detection alarm details" + value = var.enable_event_anomaly_detection ? { + arn = aws_cloudwatch_metric_alarm.data_plane_invocations_anomaly[0].arn + name = aws_cloudwatch_metric_alarm.data_plane_invocations_anomaly[0].alarm_name + } : null +} + +output "control_plane_ingestion_anomaly_alarm" { + description = "Control plane ingestion anomaly detection alarm details" + value = var.enable_event_anomaly_detection ? { + arn = aws_cloudwatch_metric_alarm.control_plane_ingestion_anomaly[0].arn + name = aws_cloudwatch_metric_alarm.control_plane_ingestion_anomaly[0].alarm_name + } : null +} + +output "control_plane_invocations_anomaly_alarm" { + description = "Control plane invocations anomaly detection alarm details" + value = var.enable_event_anomaly_detection ? { + arn = aws_cloudwatch_metric_alarm.control_plane_invocations_anomaly[0].arn + name = aws_cloudwatch_metric_alarm.control_plane_invocations_anomaly[0].alarm_name + } : null +} diff --git a/infrastructure/terraform/components/events/variables.tf b/infrastructure/terraform/components/events/variables.tf index d2c7eef..99863c7 100644 --- a/infrastructure/terraform/components/events/variables.tf +++ b/infrastructure/terraform/components/events/variables.tf @@ -130,3 +130,32 @@ variable "notify_core_sns_kms_arn" { type = string default = null } + +variable "enable_event_anomaly_detection" { + type = bool + description = "Enable CloudWatch anomaly detection alarms for event bus traffic. Applies to both data and control plane ingestion and invocations." + default = true +} + +variable "event_anomaly_evaluation_periods" { + type = number + description = "Number of evaluation periods for the anomaly alarm. Each period is defined by event_anomaly_period." + default = 2 +} + +variable "event_anomaly_period" { + type = number + description = "The period in seconds over which the specified statistic is applied for anomaly detection. Minimum 300 seconds (5 minutes). Recommended: 300-600." + default = 300 +} + +variable "event_anomaly_band_width" { + type = number + description = "The width of the anomaly detection band. Higher values (e.g. 4-6) reduce sensitivity and noise, lower values (e.g. 2-3) increase sensitivity. Recommended: 2-4." + default = 3 + + validation { + condition = var.event_anomaly_band_width >= 2 && var.event_anomaly_band_width <= 10 + error_message = "Band width must be between 2 and 10" + } +} From dcb3e9f7e1184f1f53c860cc29bb5dec79e9ed05 Mon Sep 17 00:00:00 2001 From: aidenvaines-cgi Date: Mon, 2 Mar 2026 13:31:09 +0000 Subject: [PATCH 2/3] CCM-14044 Adding anomaly alarms --- .tool-versions | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.tool-versions b/.tool-versions index 4d0c381..ef1c7b1 100644 --- a/.tool-versions +++ b/.tool-versions @@ -5,7 +5,7 @@ nodejs 22.16.0 pre-commit 3.6.0 terraform 1.14.3 terraform-docs 0.19.0 -trivy 0.61.0 +trivy 0.69.2 vale 3.6.0 python 3.13.2 From e781a0e5575110b79242080000fa4e6f36ce2270 Mon Sep 17 00:00:00 2001 From: aidenvaines-cgi Date: Mon, 2 Mar 2026 13:51:12 +0000 Subject: [PATCH 3/3] CCM-14044 Adding anomaly alarms --- .../cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf | 2 +- ...cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf | 2 +- .../cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf | 2 +- .../cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf index 9cc7b7e..8119c8f 100644 --- a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_ingestion_anomaly.tf @@ -2,7 +2,7 @@ resource "aws_cloudwatch_metric_alarm" "control_plane_ingestion_anomaly" { count = var.enable_event_anomaly_detection ? 1 : 0 alarm_name = "${local.csi}-control-plane-ingestion-anomaly" - alarm_description = "ANOMALY: Detects anomalous patterns in events ingested to the control plane event bus" + alarm_description = "RELIABILITY: Detects anomalous patterns in events ingested to the control plane event bus" comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" evaluation_periods = var.event_anomaly_evaluation_periods threshold_metric_id = "ad1" diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf index 391fc91..b30913a 100644 --- a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_control_plane_invocations_anomaly.tf @@ -2,7 +2,7 @@ resource "aws_cloudwatch_metric_alarm" "control_plane_invocations_anomaly" { count = var.enable_event_anomaly_detection ? 1 : 0 alarm_name = "${local.csi}-control-plane-invocations-anomaly" - alarm_description = "ANOMALY: Detects anomalous patterns in events delivered from the control plane event bus to targets" + alarm_description = "RELIABILITY: Detects anomalous patterns in events delivered from the control plane event bus to targets" comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" evaluation_periods = var.event_anomaly_evaluation_periods threshold_metric_id = "ad1" diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf index f6fd0ca..5eed057 100644 --- a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_ingestion_anomaly.tf @@ -2,7 +2,7 @@ resource "aws_cloudwatch_metric_alarm" "data_plane_ingestion_anomaly" { count = var.enable_event_anomaly_detection ? 1 : 0 alarm_name = "${local.csi}-data-plane-ingestion-anomaly" - alarm_description = "ANOMALY: Detects anomalous patterns in events ingested to the data plane event bus" + alarm_description = "RELIABILITY: Detects anomalous patterns in events ingested to the data plane event bus" comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" evaluation_periods = var.event_anomaly_evaluation_periods threshold_metric_id = "ad1" diff --git a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf index bc5d1b2..d029fe6 100644 --- a/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf +++ b/infrastructure/terraform/components/events/cloudwatch_metric_alarm_data_plane_invocations_anomaly.tf @@ -2,7 +2,7 @@ resource "aws_cloudwatch_metric_alarm" "data_plane_invocations_anomaly" { count = var.enable_event_anomaly_detection ? 1 : 0 alarm_name = "${local.csi}-data-plane-invocations-anomaly" - alarm_description = "ANOMALY: Detects anomalous patterns in events delivered from the data plane event bus to targets" + alarm_description = "RELIABILITY: Detects anomalous patterns in events delivered from the data plane event bus to targets" comparison_operator = "LessThanLowerOrGreaterThanUpperThreshold" evaluation_periods = var.event_anomaly_evaluation_periods threshold_metric_id = "ad1"