diff --git a/modules/python/clusterloader2/cri/config/config.yaml b/modules/python/clusterloader2/cri/config/config.yaml index 745175fc64..6b30350f65 100644 --- a/modules/python/clusterloader2/cri/config/config.yaml +++ b/modules/python/clusterloader2/cri/config/config.yaml @@ -18,6 +18,7 @@ name: resource-consumer {{$loadType := DefaultParam .CL2_LOAD_TYPE "memory"}} {{$provider := DefaultParam .CL2_PROVIDER "aks"}} +{{$registry := DefaultParam .CL2_REGISTRY "akscritelescope" }} {{$osType := DefaultParam .CL2_OS_TYPE "linux"}} {{$scrapeKubelets := DefaultParam .CL2_SCRAPE_KUBELETS false}} {{$hostNetwork := DefaultParam .CL2_HOST_NETWORK "true"}} @@ -99,6 +100,7 @@ steps: CPURequest: {{$cpu}}m LoadType: {{$loadType}} Provider: {{$provider}} + Registry: {{$registry}} OSType: {{$osType}} HostNetwork: {{$hostNetwork}} diff --git a/modules/python/clusterloader2/cri/config/deployment_template.yaml b/modules/python/clusterloader2/cri/config/deployment_template.yaml index 1b7ab8dbd8..807ae979f0 100644 --- a/modules/python/clusterloader2/cri/config/deployment_template.yaml +++ b/modules/python/clusterloader2/cri/config/deployment_template.yaml @@ -4,6 +4,7 @@ {{$CPURequest := DefaultParam .CPURequest "100m"}} {{$LoadType := DefaultParam .LoadType "memory"}} {{$Provider := DefaultParam .Provider "aks"}} +{{$Registry := DefaultParam .Registry "akscritelescope"}} {{$OSType := DefaultParam .OSType "linux"}} {{$HostNetwork := DefaultParam .HostNetwork "true"}} @@ -32,7 +33,7 @@ spec: - name: resource-consumer-memory imagePullPolicy: IfNotPresent {{if eq $OSType "windows"}} - image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022 + image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022 command: - testlimit.exe args: @@ -46,7 +47,7 @@ spec: - "1" {{else}} {{if eq $Provider "aks"}} - image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13 + image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13 {{else}} image: registry.k8s.io/e2e-test-images/resource-consumer:1.13 {{end}} @@ -70,12 +71,12 @@ spec: - name: resource-consumer-cpu imagePullPolicy: IfNotPresent {{if eq $OSType "windows"}} - image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022 + image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13-windows-amd64-ltsc2022 command: - /consume-cpu/consume-cpu.exe {{else}} {{if eq $Provider "aks"}} - image: akscritelescope.azurecr.io/e2e-test-images/resource-consumer:1.13 + image: {{$Registry}}.azurecr.io/e2e-test-images/resource-consumer:1.13 {{else}} image: registry.k8s.io/e2e-test-images/resource-consumer:1.13 {{end}} diff --git a/modules/python/clusterloader2/cri/cri.py b/modules/python/clusterloader2/cri/cri.py index 9cebebf502..7b74ce523e 100644 --- a/modules/python/clusterloader2/cri/cri.py +++ b/modules/python/clusterloader2/cri/cri.py @@ -17,7 +17,7 @@ def override_config_clusterloader2( node_count, node_per_step, max_pods, repeats, operation_timeout, load_type, scale_enabled, pod_startup_latency_threshold, provider, - os_type, scrape_kubelets, host_network, override_file): + registry, os_type, scrape_kubelets, host_network, override_file): client = KubernetesClient(os.path.expanduser("~/.kube/config")) nodes = client.get_nodes(label_selector="cri-resource-consume=true") if len(nodes) == 0: @@ -88,6 +88,7 @@ def override_config_clusterloader2( file.write("CL2_PROMETHEUS_NODE_SELECTOR: \"prometheus: \\\"true\\\"\"\n") file.write(f"CL2_POD_STARTUP_LATENCY_THRESHOLD: {pod_startup_latency_threshold}\n") file.write(f"CL2_PROVIDER: {provider}\n") + file.write(f"CL2_REGISTRY: {registry}\n") file.write(f"CL2_OS_TYPE: {os_type}\n") file.write(f"CL2_SCRAPE_KUBELETS: {str(scrape_kubelets).lower()}\n") file.write(f"CL2_HOST_NETWORK: {str(host_network).lower()}\n") @@ -141,10 +142,16 @@ def collect_clusterloader2( run_id, run_url, result_file, - scrape_kubelets + scrape_kubelets, + scrape_acr_info=False, + acr_info ): if scrape_kubelets: verify_measurement() + + if scrape_acr_info: + # attach ACR info to cloud_info + # append acr info to cloud_info details = parse_xml_to_json(os.path.join(cl2_report_dir, "junit.xml"), indent = 2) json_data = json.loads(details) @@ -248,6 +255,7 @@ def main(): help="Pod startup latency threshold", ) parser_override.add_argument("--provider", type=str, help="Cloud provider name") + parser_override.add_argument("--registry", type=str, help="Container image registry") parser_override.add_argument( "--os_type", type=str, choices=["linux", "windows"], default="linux" ) @@ -342,6 +350,7 @@ def main(): args.scale_enabled, args.pod_startup_latency_threshold, args.provider, + args.registry, args.os_type, args.scrape_kubelets, args.host_network, diff --git a/modules/python/tests/test_cri.py b/modules/python/tests/test_cri.py index 4c1ab63709..f39c3788ff 100644 --- a/modules/python/tests/test_cri.py +++ b/modules/python/tests/test_cri.py @@ -60,6 +60,7 @@ def test_override_config_clusterloader2(self, mock_kubernetes_client, mock_open) scale_enabled=False, pod_startup_latency_threshold="15s", provider="aks", + registry="akscritelescope", os_type="linux", scrape_kubelets=True, host_network=True, @@ -86,6 +87,7 @@ def test_override_config_clusterloader2(self, mock_kubernetes_client, mock_open) handle.write.assert_any_call("CL2_PROMETHEUS_NODE_SELECTOR: \"prometheus: \\\"true\\\"\"\n") handle.write.assert_any_call("CL2_POD_STARTUP_LATENCY_THRESHOLD: 15s\n") handle.write.assert_any_call("CL2_PROVIDER: aks\n") + handle.write.assert_any_call("CL2_REGISTRY: akscritelescope\n") handle.write.assert_any_call("CL2_OS_TYPE: linux\n") handle.write.assert_any_call("CL2_SCRAPE_KUBELETS: true\n") handle.write.assert_any_call("CL2_HOST_NETWORK: true\n") @@ -114,6 +116,7 @@ def test_override_config_clusterloader2_host_network_false(self, mock_kubernetes scale_enabled=False, pod_startup_latency_threshold="15s", provider="aks", + registry="akscritelescope", os_type="linux", scrape_kubelets=False, host_network=False, @@ -228,6 +231,7 @@ def test_override_command(self, mock_override): "--scale_enabled", "True", "--pod_startup_latency_threshold", "10s", "--provider", "aws", + "--registry", "", "--os_type", "linux", "--scrape_kubelets", "False", "--host_network", "False", @@ -236,7 +240,7 @@ def test_override_command(self, mock_override): with patch.object(sys, 'argv', test_args): main() mock_override.assert_called_once_with( - 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "linux", False, False, "/tmp/override.yaml" + 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "", "linux", False, False, "/tmp/override.yaml" ) @patch("clusterloader2.cri.cri.override_config_clusterloader2") @@ -253,6 +257,7 @@ def test_override_command_default_host_network(self, mock_override): "--scale_enabled", "True", "--pod_startup_latency_threshold", "10s", "--provider", "aws", + "--registry", "", "--os_type", "linux", "--scrape_kubelets", "False", "--cl2_override_file", "/tmp/override.yaml" @@ -260,7 +265,7 @@ def test_override_command_default_host_network(self, mock_override): with patch.object(sys, 'argv', test_args): main() mock_override.assert_called_once_with( - 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "linux", False, True, "/tmp/override.yaml" + 5, 1, 110, 3, "2m", "cpu", True, "10s", "aws", "", "linux", False, True, "/tmp/override.yaml" ) @patch("clusterloader2.cri.cri.execute_clusterloader2") diff --git a/pipelines/system/new-pipeline-test.yml b/pipelines/system/new-pipeline-test.yml index 63d55f02d9..f19d60f5f3 100644 --- a/pipelines/system/new-pipeline-test.yml +++ b/pipelines/system/new-pipeline-test.yml @@ -1,25 +1,34 @@ trigger: none variables: - SCENARIO_TYPE: - SCENARIO_NAME: + SCENARIO_TYPE: perf-eval + SCENARIO_NAME: cri-resource-consume stages: - - stage: # format: [_]+ (e.g. azure_eastus2, aws_eastus_westus) + - stage: azure_swedencentral_managed_vs_ephemeral_1_31 dependsOn: [] jobs: - - template: /jobs/competitive-test.yml # must keep as is + - template: /jobs/competitive-test.yml parameters: - cloud: # e.g. azure, aws - regions: # list of regions - - region1 # e.g. eastus2 - topology: # e.g. cluster-autoscaler - engine: # e.g. clusterloader2 - matrix: # list of test parameters to customize the provisioned resources - : - : - : - max_parallel: # required - credential_type: service_connection # required + cloud: azure + regions: + - swedencentral + engine: clusterloader2 + engine_input: + image: "ghcr.io/azure/clusterloader2:v20250513" + topology: cri-resource-consume + matrix: + n10-p300-memory-ephemeral: + node_count: 10 + max_pods: 30 + repeats: 1 + operation_timeout: 3m + load_type: memory + kubernetes_version: "1.31" + scrape_kubelets: True + scrape_acr: True + registry_endpoint: "acrtelescope.azurecr.io" + max_parallel: 3 + timeout_in_minutes: 120 + credential_type: service_connection ssh_key_enabled: false - timeout_in_minutes: 60 # if not specified, default is 60 diff --git a/steps/engine/clusterloader2/cri/collect.yml b/steps/engine/clusterloader2/cri/collect.yml index 19e47820ea..2e8cabb9fc 100644 --- a/steps/engine/clusterloader2/cri/collect.yml +++ b/steps/engine/clusterloader2/cri/collect.yml @@ -7,6 +7,9 @@ parameters: default: {} - name: region type: string + - name: registry_info + type: string + default: "registry info collected" steps: - template: /steps/cloud/${{ parameters.cloud }}/collect-cloud-info.yml @@ -25,7 +28,9 @@ steps: --run_id $RUN_ID \ --run_url $RUN_URL \ --result_file $TEST_RESULTS_FILE \ - --scrape_kubelets ${SCRAPE_KUBELETS:-False} + --scrape_kubelets ${SCRAPE_KUBELETS:-False} \ + --scrape_acr_info ${SCRAPE_ACR:-False} \ + --registry_info ${REGISTRY_INFO:-""} workingDirectory: modules/python env: CLOUD: ${{ parameters.cloud }} diff --git a/steps/engine/clusterloader2/cri/execute.yml b/steps/engine/clusterloader2/cri/execute.yml index 45456ee0b2..b682f375c5 100644 --- a/steps/engine/clusterloader2/cri/execute.yml +++ b/steps/engine/clusterloader2/cri/execute.yml @@ -22,6 +22,7 @@ steps: --scale_enabled ${SCALE_ENABLED:-False} \ --pod_startup_latency_threshold ${POD_STARTUP_LATENCY_THRESHOLD:-15s} \ --provider $CLOUD \ + --registry $REGISTRY_ENDPOINT \ --os_type ${OS_TYPE:-linux} \ --scrape_kubelets ${SCRAPE_KUBELETS:-False} \ --host_network ${HOST_NETWORK:-True} \ diff --git a/steps/topology/image-pull-acr/collect-clusterloader2.yml b/steps/topology/image-pull-acr/collect-clusterloader2.yml new file mode 100644 index 0000000000..fc13195b5a --- /dev/null +++ b/steps/topology/image-pull-acr/collect-clusterloader2.yml @@ -0,0 +1,34 @@ +parameters: +- name: cloud + type: string + default: '' +- name: engine_input + type: object + default: {} +- name: regions + type: object + default: {} + +steps: +- script: | + if [ "$SCRAPE_ACR" = "true" ]; then + echo "Scraping ACR info of $REGISTRY_ENDPOINT..." + registry_info="acr info for bla bla bla" + else + echo "SCRAPE_ACR is false — skipping" + fi + + echo "##vso[task.setvariable variable=registry_info;isOutput=true]$registry_info" + name: collectRegistry + workingDirectory: modules/python + env: + CLOUD: ${{ parameters.cloud }} + RUN_URL: $(RUN_URL) + displayName: "Collect ACR Info" +- template: /steps/engine/clusterloader2/cri/collect.yml + parameters: + cloud: ${{ parameters.cloud }} + engine_input: ${{ parameters.engine_input }} + region: ${{ parameters.regions[0] }} + registry_info: ${{ dependencies.collectRegistry.outputs['collectRegistry.registry_info'] }} + diff --git a/steps/topology/image-pull-acr/execute-clusterloader2.yml b/steps/topology/image-pull-acr/execute-clusterloader2.yml new file mode 100644 index 0000000000..600c1db006 --- /dev/null +++ b/steps/topology/image-pull-acr/execute-clusterloader2.yml @@ -0,0 +1,18 @@ +parameters: +- name: cloud + type: string + default: '' +- name: engine_input + type: object + default: {} +- name: regions + type: object + default: {} + +steps: +- template: /steps/engine/clusterloader2/cri/execute.yml + parameters: + cloud: ${{ parameters.cloud }} + engine_input: ${{ parameters.engine_input }} + region: ${{ parameters.regions[0] }} + diff --git a/steps/topology/image-pull-acr/validate-resources.yml b/steps/topology/image-pull-acr/validate-resources.yml new file mode 100644 index 0000000000..000578974b --- /dev/null +++ b/steps/topology/image-pull-acr/validate-resources.yml @@ -0,0 +1,16 @@ +parameters: +- name: cloud + type: string +- name: engine + type: string +- name: regions + type: object + +steps: +- template: /steps/cloud/${{ parameters.cloud }}/update-kubeconfig.yml + parameters: + role: client + region: ${{ parameters.regions[0] }} +- template: /steps/engine/clusterloader2/large-cluster/validate.yml + parameters: + desired_nodes: 14