diff --git a/jobs/competitive-test.yml b/jobs/competitive-test.yml index c4872ad11c..8a3121f1a9 100644 --- a/jobs/competitive-test.yml +++ b/jobs/competitive-test.yml @@ -97,10 +97,3 @@ jobs: regions: ${{ parameters.regions }} engine_input: ${{ parameters.engine_input }} credential_type: ${{ parameters.credential_type }} - - template: /steps/cleanup-resources.yml - parameters: - cloud: ${{ parameters.cloud }} - regions: ${{ parameters.regions }} - terraform_arguments: ${{ parameters.terraform_arguments }} - retry_attempt_count: ${{ parameters.retry_attempt_count }} - credential_type: ${{ parameters.credential_type }} diff --git a/modules/python/clusterloader2/cri/cri.py b/modules/python/clusterloader2/cri/cri.py index 088b5a2564..97569529e9 100644 --- a/modules/python/clusterloader2/cri/cri.py +++ b/modules/python/clusterloader2/cri/cri.py @@ -38,7 +38,15 @@ def override_config_clusterloader2( print(f"Node {node.metadata.name} has allocatable cpu of {allocatable_cpu} and allocatable memory of {allocatable_memory}") cpu_value = int(allocatable_cpu.replace("m", "")) - memory_value = int(allocatable_memory.replace("Ki", "")) + # Bottlerocket OS SKU on EKS has allocatable_memory property in Mi. AKS and Amazon Linux (default SKUs) + # user Ki. Handling the Mi case here and converting Mi to Ki, if needed. + if "Mi" in allocatable_memory: + memory_value = int(allocatable_memory.replace("Mi", "")) * 1024 + elif "Ki" in allocatable_memory: + memory_value = int(allocatable_memory.replace("Ki", "")) + else: + raise Exception("Unexpected format of allocatable memory node property") + print(f"Node {node.metadata.name} has cpu value of {cpu_value} and memory value of {memory_value}") allocated_cpu, allocated_memory = _get_daemonsets_pods_allocated_resources(client, node.metadata.name) @@ -182,13 +190,13 @@ def main(): args = parser.parse_args() if args.command == "override": - override_config_clusterloader2(args.node_count, args.node_per_step, args.max_pods, args.repeats, args.operation_timeout, + override_config_clusterloader2(args.node_count, args.node_per_step, args.max_pods, args.repeats, args.operation_timeout, args.load_type, args.scale_enabled, args.pod_startup_latency_threshold, args.provider, args.cl2_override_file) elif args.command == "execute": execute_clusterloader2(args.cl2_image, args.cl2_config_dir, args.cl2_report_dir, args.kubeconfig, args.provider) elif args.command == "collect": - collect_clusterloader2(args.node_count, args.max_pods, args.repeats, args.load_type, + collect_clusterloader2(args.node_count, args.max_pods, args.repeats, args.load_type, args.cl2_report_dir, args.cloud_info, args.run_id, args.run_url, args.result_file) if __name__ == "__main__": diff --git a/modules/terraform/azure/aks-cli/main.tf b/modules/terraform/azure/aks-cli/main.tf index 99543ffdb4..d69970f074 100644 --- a/modules/terraform/azure/aks-cli/main.tf +++ b/modules/terraform/azure/aks-cli/main.tf @@ -121,6 +121,12 @@ resource "terraform_data" "aks_cli" { "--nodepool-name", var.aks_cli_config.default_node_pool.name, "--node-count", var.aks_cli_config.default_node_pool.node_count, "--node-vm-size", var.aks_cli_config.default_node_pool.vm_size, + length(var.aks_cli_config.default_node_pool.node_labels) == 0 ? "" : format("%s %s", + "--labels", join(" ", [ + for label_name, label_value in var.aks_cli_config.default_node_pool.node_labels : + format("%s=%s", label_name, label_value) + ]) + ), "--vm-set-type", var.aks_cli_config.default_node_pool.vm_set_type, local.optional_parameters, local.subnet_id_parameter, @@ -159,6 +165,13 @@ resource "terraform_data" "aks_nodepool_cli" { "--nodepool-name", each.value.name, "--node-count", each.value.node_count, "--node-vm-size", each.value.vm_size, + local.aks_custom_headers_flags, + length(each.value.node_labels) == 0 ? "" : format("%s %s", + "--labels", join(" ", [ + for label_name, label_value in each.value.node_labels : + format("%s=%s", label_name, label_value) + ]) + ), "--vm-set-type", each.value.vm_set_type, ]) } diff --git a/modules/terraform/azure/aks-cli/variables.tf b/modules/terraform/azure/aks-cli/variables.tf index 8a472cc19e..fcc8feb641 100644 --- a/modules/terraform/azure/aks-cli/variables.tf +++ b/modules/terraform/azure/aks-cli/variables.tf @@ -36,6 +36,7 @@ variable "aks_cli_config" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") }) extra_node_pool = optional( @@ -43,6 +44,7 @@ variable "aks_cli_config" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") })), []) optional_parameters = optional(list(object({ diff --git a/modules/terraform/azure/variables.tf b/modules/terraform/azure/variables.tf index 9b94319759..f8a85cd067 100644 --- a/modules/terraform/azure/variables.tf +++ b/modules/terraform/azure/variables.tf @@ -13,6 +13,7 @@ variable "json_input" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = string })) aks_cli_user_node_pool = optional( @@ -20,6 +21,7 @@ variable "json_input" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = string })) ) @@ -217,6 +219,7 @@ variable "aks_cli_config_list" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") }) extra_node_pool = optional( @@ -224,6 +227,7 @@ variable "aks_cli_config_list" { name = string node_count = number vm_size = string + node_labels = optional(map(string), {}) vm_set_type = optional(string, "VirtualMachineScaleSets") })), []) optional_parameters = optional(list(object({ diff --git a/pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml similarity index 60% rename from pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml rename to pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml index 5e98687d59..b7cdd28f05 100644 --- a/pipelines/perf-eval/CRI Benchmark/cri-kbench-cp-bottlerocket.yml +++ b/pipelines/perf-eval/CRI Benchmark/cri-clusterloader2-immut-host.yml @@ -2,22 +2,22 @@ trigger: none variables: SCENARIO_TYPE: perf-eval - SCENARIO_NAME: cri-kbench-cp-bottlerocket + SCENARIO_NAME: cri-clusterloader2-immut-host SCENARIO_VERSION: main stages: - - stage: aws_westeurope + - stage: azure_swedencentral dependsOn: [] jobs: - template: /jobs/competitive-test.yml parameters: - cloud: aws + cloud: azure regions: - - eu-west-1 + - swedencentral engine: clusterloader2 engine_input: image: "ghcr.io/azure/clusterloader2:v20241016" - topology: cri-kbench-cp + topology: cri-resource-consume matrix: n3-p300-memory: node_count: 3 @@ -25,13 +25,7 @@ stages: repeats: 1 operation_timeout: 3m load_type: memory - n3-p300-cpu: - node_count: 3 - max_pods: 9 - repeats: 1 - operation_timeout: 3m - load_type: cpu - max_parallel: 3 - timeout_in_minutes: 120 + max_parallel: 1 + timeout_in_minutes: 240 credential_type: service_connection ssh_key_enabled: false diff --git a/scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-inputs/aws.tfvars b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/aws.tfvars similarity index 95% rename from scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-inputs/aws.tfvars rename to scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/aws.tfvars index 951d538d5d..4f371ade78 100644 --- a/scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-inputs/aws.tfvars +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/aws.tfvars @@ -1,5 +1,5 @@ scenario_type = "perf-eval" -scenario_name = "cri-kbench-cp-bottlerocket" +scenario_name = "cri-clusterloader2-immut-host" deletion_delay = "2h" owner = "aks" @@ -84,9 +84,9 @@ eks_config_list = [{ name = "userpool0" ami_type = "BOTTLEROCKET_x86_64" instance_types = ["m5.4xlarge"] - min_size = 3 - max_size = 3 - desired_size = 3 + min_size = 10 + max_size = 10 + desired_size = 10 capacity_type = "ON_DEMAND" taints = [ { diff --git a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars new file mode 100644 index 0000000000..c5207780c2 --- /dev/null +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-inputs/azure.tfvars @@ -0,0 +1,67 @@ +scenario_type = "perf-eval" +scenario_name = "cri-clusterloader2-immut-host" +deletion_delay = "2h" +owner = "aks" + +network_config_list = [ + { + role = "client" + vnet_name = "cri-vnet" + vnet_address_space = "10.0.0.0/9" + subnet = [ + { + name = "cri-subnet-1" + address_prefix = "10.0.0.0/16" + } + ] + network_security_group_name = "" + nic_public_ip_associations = [] + nsr_rules = [] + } +] + +aks_config_list = [ + { + role = "client" + aks_name = "cri-resource-consume" + dns_prefix = "cri" + subnet_name = "cri-vnet" + sku_tier = "Standard" + network_profile = { + network_plugin = "azure" + network_plugin_mode = "overlay" + pod_cidr = "10.0.0.0/9" + service_cidr = "192.168.0.0/16" + dns_service_ip = "192.168.0.10" + } + default_node_pool = { + name = "default" + node_count = 2 + vm_size = "Standard_D16_v4" + os_disk_type = "Managed" + os_sku = "AzureLinux" + only_critical_addons_enabled = true + temporary_name_for_rotation = "defaulttmp" + } + extra_node_pool = [ + { + name = "prompool" + node_count = 1 + auto_scaling_enabled = false + vm_size = "Standard_D16_v4" + os_sku = "AzureLinux" + node_labels = { "prometheus" = "true" } + }, + { + name = "userpool0" + node_count = 3 + auto_scaling_enabled = false + vm_size = "Standard_D16_v4" + os_sku = "AzureLinux" + node_taints = ["cri-resource-consume=true:NoSchedule"] + node_labels = { "cri-resource-consume" = "true" } + } + ] + kubernetes_version = "1.31" + } +] diff --git a/scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-test-inputs/aws.json b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/aws.json similarity index 100% rename from scenarios/perf-eval/cri-kbench-cp-bottlerocket/terraform-test-inputs/aws.json rename to scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/aws.json diff --git a/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json new file mode 100644 index 0000000000..ea27a572c6 --- /dev/null +++ b/scenarios/perf-eval/cri-clusterloader2-immut-host/terraform-test-inputs/azure.json @@ -0,0 +1,4 @@ +{ + "run_id" : "123456789", + "region" : "eastus" +} diff --git a/steps/topology/cri-kbench-cp/collect-clusterloader2.yml b/steps/topology/cri-kbench-cp/collect-clusterloader2.yml deleted file mode 100644 index ee0c8a1bb1..0000000000 --- a/steps/topology/cri-kbench-cp/collect-clusterloader2.yml +++ /dev/null @@ -1,17 +0,0 @@ -parameters: -- name: cloud - type: string - default: '' -- name: engine_input - type: object - default: {} -- name: regions - type: object - default: {} - -steps: -- template: /steps/engine/clusterloader2/cri/collect.yml - parameters: - cloud: ${{ parameters.cloud }} - engine_input: ${{ parameters.engine_input }} - region: ${{ parameters.regions[0] }} diff --git a/steps/topology/cri-kbench-cp/execute-clusterloader2.yml b/steps/topology/cri-kbench-cp/execute-clusterloader2.yml deleted file mode 100644 index fcdab04dbe..0000000000 --- a/steps/topology/cri-kbench-cp/execute-clusterloader2.yml +++ /dev/null @@ -1,17 +0,0 @@ -parameters: -- name: cloud - type: string - default: '' -- name: engine_input - type: object - default: {} -- name: regions - type: object - default: {} - -steps: -- template: /steps/engine/clusterloader2/cri/execute.yml - parameters: - cloud: ${{ parameters.cloud }} - engine_input: ${{ parameters.engine_input }} - region: ${{ parameters.regions[0] }} diff --git a/steps/topology/cri-kbench-cp/validate-resources.yml b/steps/topology/cri-kbench-cp/validate-resources.yml deleted file mode 100644 index 3118e1475c..0000000000 --- a/steps/topology/cri-kbench-cp/validate-resources.yml +++ /dev/null @@ -1,16 +0,0 @@ -parameters: -- name: cloud - type: string -- name: engine - type: string -- name: regions - type: object - -steps: -- template: /steps/cloud/${{ parameters.cloud }}/update-kubeconfig.yml - parameters: - role: client - region: ${{ parameters.regions[0] }} -- template: /steps/engine/clusterloader2/slo/validate.yml - parameters: - desired_nodes: 7 diff --git a/steps/topology/cri-resource-consume/validate-resources.yml b/steps/topology/cri-resource-consume/validate-resources.yml index 1efbf41ddf..b5d3808605 100644 --- a/steps/topology/cri-resource-consume/validate-resources.yml +++ b/steps/topology/cri-resource-consume/validate-resources.yml @@ -13,4 +13,4 @@ steps: region: ${{ parameters.regions[0] }} - template: /steps/engine/clusterloader2/slo/validate.yml parameters: - desired_nodes: 14 + desired_nodes: 6