diff --git a/modules/python/clusterloader2/slo/config/ccnp_template.yaml b/modules/python/clusterloader2/slo/config/ccnp_template.yaml index 0c394d0c26..695d8dc0e1 100644 --- a/modules/python/clusterloader2/slo/config/ccnp_template.yaml +++ b/modules/python/clusterloader2/slo/config/ccnp_template.yaml @@ -6,14 +6,26 @@ spec: endpointSelector: matchLabels: group: cnp-ccnp + ingress: + - icmps: + - fields: + - type: 8 + family: IPv4 + - type: 128 + family: IPv6 ingressDeny: - - fromEndpoints: - - matchLabels: - io.kubernetes.pod.namespace: default + - fromEntities: + - world egress: + - icmps: + - fields: + - type: 8 + family: IPv4 + - type: 128 + family: IPv6 - toPorts: - ports: - - port: "53" - protocol: UDP + - port: "443" + protocol: ANY toEntities: - cluster \ No newline at end of file diff --git a/modules/python/clusterloader2/slo/config/cnp_template.yaml b/modules/python/clusterloader2/slo/config/cnp_template.yaml index ec1613ccff..39d49fccd9 100644 --- a/modules/python/clusterloader2/slo/config/cnp_template.yaml +++ b/modules/python/clusterloader2/slo/config/cnp_template.yaml @@ -1,20 +1,41 @@ +{{- $randNum := RandIntRange 1 10 -}} +{{- $randomNamespace := printf "slo-%d" $randNum -}} + apiVersion: cilium.io/v2 kind: CiliumNetworkPolicy metadata: - name: {{.basename}} - namespace: slo-1 # slo-1 was used because that is the ns pods are deployed in & tried passing in namespace from load-config but had object mismatch error, revise in future to possibly pass in ns + name: {{.Name}} + namespace: {{.Namespace}} spec: endpointSelector: matchLabels: group: cnp-ccnp + name: {{.Name}} + ingress: + - icmps: + - fields: + - type: 8 + family: IPv4 + - type: 128 + family: IPv6 ingressDeny: - fromEndpoints: - matchLabels: - io.kubernetes.pod.namespace: default + k8s:io.kubernetes.pod.namespace: {{$randomNamespace}} egress: + - icmps: + - fields: + - type: 8 + family: IPv4 + - type: 128 + family: IPv6 + toEntities: + - cluster - toPorts: - ports: - port: "443" protocol: TCP - toCIDR: - - 0.0.0.0/0 \ No newline at end of file + - port: "53" + protocol: UDP + toEntities: + - cluster \ No newline at end of file diff --git a/modules/python/clusterloader2/slo/config/deployment_template.yaml b/modules/python/clusterloader2/slo/config/deployment_template.yaml index 91929229c9..2ebf4fb5c8 100644 --- a/modules/python/clusterloader2/slo/config/deployment_template.yaml +++ b/modules/python/clusterloader2/slo/config/deployment_template.yaml @@ -11,6 +11,7 @@ metadata: name: {{.Name}} labels: group: {{.Group}} + name: {{.Name}} {{if .SvcName}} svc: {{.SvcName}}-{{.Index}} {{end}} diff --git a/modules/python/clusterloader2/slo/config/load-config.yaml b/modules/python/clusterloader2/slo/config/load-config.yaml index 6d97ec8761..f08d438fad 100644 --- a/modules/python/clusterloader2/slo/config/load-config.yaml +++ b/modules/python/clusterloader2/slo/config/load-config.yaml @@ -6,7 +6,7 @@ name: load-config {{$CCNP_TEST := DefaultParam .CL2_CCNP_TEST false}} # Config options for test parameters -{{$nodesPerNamespace := DefaultParam .CL2_NODES_PER_NAMESPACE 100}} +{{$nodesPerNamespace := DefaultParam .CL2_NODES_PER_NAMESPACE 1000}} {{$podsPerNode := DefaultParam .CL2_PODS_PER_NODE 50}} {{$loadTestThroughput := DefaultParam .CL2_LOAD_TEST_THROUGHPUT 100}} {{$deploymentSize := DefaultParam .CL2_DEPLOYMENT_SIZE 100}} @@ -14,12 +14,13 @@ name: load-config {{$groupName := DefaultParam .CL2_GROUP_NAME "service-discovery"}} # TODO(jshr-w): This should eventually use >1 namespace. -{{$namespaces := 1}} +{{$namespaces := DefaultParam .CL2_NO_OF_NAMESPACES 1}} {{$nodes := DefaultParam .CL2_NODES 1000}} +#set nodesPerNamespace to 100 {{$deploymentQPS := DivideFloat $loadTestThroughput $deploymentSize}} {{$operationTimeout := DefaultParam .CL2_OPERATION_TIMEOUT "15m"}} -{{$totalPods := MultiplyInt $namespaces $nodes $podsPerNode}} +{{$totalPods := MultiplyInt $namespaces $nodesPerNamespace $podsPerNode}} {{$podsPerNamespace := DivideInt $totalPods $namespaces}} {{$deploymentsPerNamespace := DivideInt $podsPerNamespace $deploymentSize}} @@ -46,6 +47,7 @@ name: load-config {{$CNPS_PER_NAMESPACE := DefaultParam .CL2_CNPS_PER_NAMESPACE 0}} {{$CCNPS := DefaultParam .CL2_CCNPS 0}} {{$DUALSTACK := DefaultParam .CL2_DUALSTACK false}} +{{$smallDeploymentsPerNamespaceCNP := DivideInt $podsPerNamespace $SMALL_GROUP_SIZE}} namespace: number: {{$namespaces}} @@ -127,18 +129,22 @@ steps: {{if or $CCNP_TEST $CNP_TEST}} bigDeploymentSize: 0 bigDeploymentsPerNamespace: 0 + smallDeploymentSize: {{$SMALL_GROUP_SIZE}} + smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespaceCNP}} cnp_test: {{$CNP_TEST}} ccnp_test: {{$CCNP_TEST}} {{else}} bigDeploymentSize: {{$BIG_GROUP_SIZE}} bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}} - {{end}} smallDeploymentSize: {{$SMALL_GROUP_SIZE}} smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}} + {{end}} CpuRequest: {{$latencyPodCpu}}m MemoryRequest: {{$latencyPodMemory}}M Group: {{$groupName}} - deploymentLabel: start + deploymentLabel: start + + - module: path: /modules/reconcile-objects.yaml @@ -150,14 +156,16 @@ steps: {{if or $CCNP_TEST $CNP_TEST}} bigDeploymentSize: 0 bigDeploymentsPerNamespace: 0 + smallDeploymentSize: {{$SMALL_GROUP_SIZE}} + smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespaceCNP}} cnp_test: {{$CNP_TEST}} ccnp_test: {{$CCNP_TEST}} {{else}} bigDeploymentSize: {{$BIG_GROUP_SIZE}} bigDeploymentsPerNamespace: {{$bigDeploymentsPerNamespace}} - {{end}} smallDeploymentSize: {{$SMALL_GROUP_SIZE}} smallDeploymentsPerNamespace: {{$smallDeploymentsPerNamespace}} + {{end}} CpuRequest: {{$latencyPodCpu}}m MemoryRequest: {{$latencyPodMemory}}M Group: {{$groupName}} @@ -198,6 +206,7 @@ steps: params: actionName: "Deleting" namespaces: {{$namespaces}} + Group: {{$groupName}} cnpsPerNamespace: 0 {{end}} {{if $CCNP_TEST}} @@ -206,6 +215,7 @@ steps: params: actionName: "Deleting" namespaces: {{$namespaces}} + Group: {{$groupName}} ccnps: 0 {{end}} {{end}} diff --git a/modules/python/clusterloader2/slo/config/modules/cilium-measurements.yaml b/modules/python/clusterloader2/slo/config/modules/cilium-measurements.yaml index 8c9278c614..662095fa8d 100644 --- a/modules/python/clusterloader2/slo/config/modules/cilium-measurements.yaml +++ b/modules/python/clusterloader2/slo/config/modules/cilium-measurements.yaml @@ -180,3 +180,126 @@ steps: - name: Perc50 query: quantile(0.5, avg_over_time(cilium_operator_process_resident_memory_bytes[%v:]) / 1024 / 1024) + - Identifier: AvgCiliumBPFMapPressure + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Avg Cilium BPF Map Pressure + metricVersion: v1 + unit: ratio + dimensions: + - map_name + enableViolations: true + queries: + - name: avg bpf map pressure over time + query: avg(avg_over_time(cilium_bpf_map_pressure[%v:])) by (map_name) + - Identifier: MaxCiliumBPFMapPressure + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Max Cilium BPF Map Pressure + metricVersion: v1 + unit: ratio + dimensions: + - map_name + enableViolations: true + queries: + - name: max bpf map pressure over time + query: max(max_over_time(cilium_bpf_map_pressure[%v:])) by (map_name) + - Identifier: MaxCiliumBPFMapOpsTotal + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Max Cilium BPF Map Ops Total + metricVersion: v1 + unit: ratio + dimensions: + - map_name + - operation + - outcome + enableViolations: true + queries: + - name: max bpf map pressure over time + query: max(max_over_time(cilium_bpf_map_ops_total[%v:])) by (map_name, operation, outcome) + - Identifier: MaxCiliumPoliciesLoadedCount + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Max number of Cilium Policies Loaded + metricVersion: v1 + unit: policies + enableViolations: true + queries: + - name: max number of cilium policies loaded over time + query: max(max_over_time(cilium_policy[%v:])) + - Identifier: AvgCiliumPoliciesLoadedCount + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Avg number of Cilium Policies Loaded + metricVersion: v1 + unit: policies + enableViolations: true + queries: + - name: avg number of cilium policies loaded over time + query: avg(avg_over_time(cilium_policy[%v:])) + - Identifier: CiliumBPFMapsAvgMemoryUsage + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Cilium BPF Maps Avg Memory Usage + metricVersion: v1 + unit: MB + enableViolations: true + queries: + - name: Perc99 + query: quantile(0.99, avg_over_time(cilium_bpf_maps_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc90 + query: quantile(0.90, avg_over_time(cilium_bpf_maps_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc50 + query: quantile(0.50, avg_over_time(cilium_bpf_maps_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - Identifier: CiliumBPFMapsMaxMemoryUsage + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Cilium BPF Maps Max Memory Usage + metricVersion: v1 + unit: MB + enableViolations: true + queries: + - name: Perc99 + query: quantile(0.99, max_over_time(cilium_bpf_maps_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc90 + query: quantile(0.90, max_over_time(cilium_bpf_maps_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc50 + query: quantile(0.50, max_over_time(cilium_bpf_maps_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - Identifier: CiliumBPFProgramsAvgMemoryUsage + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Cilium BPF Programs Avg Memory Usage + metricVersion: v1 + unit: MB + enableViolations: true + queries: + - name: Perc99 + query: quantile(0.99, avg_over_time(cilium_bpf_progs_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc90 + query: quantile(0.90, avg_over_time(cilium_bpf_progs_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc50 + query: quantile(0.50, avg_over_time(cilium_bpf_progs_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - Identifier: CiliumBPFProgramsMaxMemoryUsage + Method: GenericPrometheusQuery + Params: + action: {{$action}} + metricName: Cilium BPF Programs Max Memory Usage + metricVersion: v1 + unit: MB + enableViolations: true + queries: + - name: Perc99 + query: quantile(0.99, max_over_time(cilium_bpf_progs_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc90 + query: quantile(0.90, max_over_time(cilium_bpf_progs_virtual_memory_max_bytes[%v:]) / 1024 / 1024) + - name: Perc50 + query: quantile(0.50, max_over_time(cilium_bpf_progs_virtual_memory_max_bytes[%v:]) / 1024 / 1024) diff --git a/modules/python/clusterloader2/slo/config/modules/ciliumnetworkpolicy.yaml b/modules/python/clusterloader2/slo/config/modules/ciliumnetworkpolicy.yaml index d52f8e9c97..70123fa87d 100644 --- a/modules/python/clusterloader2/slo/config/modules/ciliumnetworkpolicy.yaml +++ b/modules/python/clusterloader2/slo/config/modules/ciliumnetworkpolicy.yaml @@ -7,7 +7,7 @@ {{$Group := .Group}} steps: -- name: "{{$actionName}} {{$cnpsPerNamespace}} k8s CNPs" +- name: "{{$actionName}} k8s CNPs" phases: - namespaceRange: min: 1 @@ -16,4 +16,4 @@ steps: tuningSet: Sequence objectBundle: - basename: cnp - objectTemplatePath: cnp_template.yaml + objectTemplatePath: cnp_template.yaml \ No newline at end of file diff --git a/modules/python/clusterloader2/slo/config/modules/reconcile-objects.yaml b/modules/python/clusterloader2/slo/config/modules/reconcile-objects.yaml index d3e08b0f8e..802b06d73d 100644 --- a/modules/python/clusterloader2/slo/config/modules/reconcile-objects.yaml +++ b/modules/python/clusterloader2/slo/config/modules/reconcile-objects.yaml @@ -54,7 +54,7 @@ steps: replicasPerNamespace: {{$smallDeploymentsPerNamespace}} tuningSet: {{$tuningSet}} objectBundle: - - basename: small-deployment + - basename: cnp objectTemplatePath: deployment_template.yaml templateFillMap: Replicas: {{$smallDeploymentSize}} diff --git a/modules/python/clusterloader2/slo/slo.py b/modules/python/clusterloader2/slo/slo.py index 76392d3340..362dde4f79 100644 --- a/modules/python/clusterloader2/slo/slo.py +++ b/modules/python/clusterloader2/slo/slo.py @@ -58,6 +58,7 @@ def configure_clusterloader2( num_cnps, num_ccnps, dualstack, + no_of_namespaces, override_file): steps = node_count // node_per_step @@ -95,11 +96,14 @@ def configure_clusterloader2( file.write("CL2_CNP_TEST: true\n") file.write(f"CL2_CNPS_PER_NAMESPACE: {num_cnps}\n") file.write(f"CL2_DUALSTACK: {dualstack}\n") + file.write("CL2_NODES_PER_NAMESPACE: 100\n") + file.write(f"CL2_NO_OF_NAMESPACES: {no_of_namespaces}\n") file.write("CL2_GROUP_NAME: cnp-ccnp\n") if ccnp_test: file.write("CL2_CCNP_TEST: true\n") file.write(f"CL2_CCNPS: {num_ccnps}\n") + file.write("CL2_NODES_PER_NAMESPACE: 100\n") file.write(f"CL2_DUALSTACK: {dualstack}\n") file.write("CL2_GROUP_NAME: cnp-ccnp\n") @@ -220,7 +224,7 @@ def main(): parser_configure.add_argument("node_count", type=int, help="Number of nodes") parser_configure.add_argument("node_per_step", type=int, help="Number of nodes per scaling step") parser_configure.add_argument("max_pods", type=int, nargs='?', default=0, help="Maximum number of pods per node") - parser_configure.add_argument("repeats", type=int, help="Number of times to repeat the deployment churn") + parser_configure.add_argument("repeats", type=int, nargs='?', default=1, help="Number of times to repeat the deployment churn") parser_configure.add_argument("operation_timeout", type=str, help="Timeout before failing the scale up test") parser_configure.add_argument("provider", type=str, help="Cloud provider name") parser_configure.add_argument("cilium_enabled", type=eval, choices=[True, False], default=False, @@ -235,6 +239,7 @@ def main(): parser_configure.add_argument("num_ccnps", type=int, nargs='?', default=0, help="Number of ccnps") parser_configure.add_argument("dualstack", type=eval, choices=[True, False], nargs='?', default=False, help="Whether cluster is dualstack. Must be either True or False") + parser_configure.add_argument("no_of_namespaces", type=int, nargs='?', default=1, help="Number of namespaces to create") parser_configure.add_argument("cl2_override_file", type=str, help="Path to the overrides of CL2 config file") # Sub-command for validate_clusterloader2 @@ -256,7 +261,7 @@ def main(): parser_collect.add_argument("cpu_per_node", type=int, help="CPU per node") parser_collect.add_argument("node_count", type=int, help="Number of nodes") parser_collect.add_argument("max_pods", type=int, nargs='?', default=0, help="Maximum number of pods per node") - parser_collect.add_argument("repeats", type=int, help="Number of times to repeat the deployment churn") + parser_collect.add_argument("repeats", type=int, nargs='?', default=1, help="Number of times to repeat the deployment churn") parser_collect.add_argument("cl2_report_dir", type=str, help="Path to the CL2 report directory") parser_collect.add_argument("cloud_info", type=str, help="Cloud information") parser_collect.add_argument("run_id", type=str, help="Run ID") @@ -281,7 +286,7 @@ def main(): if args.command == "configure": configure_clusterloader2(args.cpu_per_node, args.node_count, args.node_per_step, args.max_pods, args.repeats, args.operation_timeout, args.provider, args.cilium_enabled, - args.service_test, args.cnp_test, args.ccnp_test, args.num_cnps, args.num_ccnps, args.dualstack, args.cl2_override_file) + args.service_test, args.cnp_test, args.ccnp_test, args.num_cnps, args.num_ccnps, args.dualstack, args.no_of_namespaces, args.cl2_override_file) elif args.command == "validate": validate_clusterloader2(args.node_count, args.operation_timeout) elif args.command == "execute": diff --git a/pipelines/perf-eval/CNI Benchmark/cnp-ccnp-feature.yml b/pipelines/perf-eval/CNI Benchmark/cnp-ccnp-feature.yml index 5e495b7433..81f55ea418 100644 --- a/pipelines/perf-eval/CNI Benchmark/cnp-ccnp-feature.yml +++ b/pipelines/perf-eval/CNI Benchmark/cnp-ccnp-feature.yml @@ -3,10 +3,11 @@ trigger: none variables: SCENARIO_TYPE: perf-eval SCENARIO_NAME: cnp-ccnp-feature + SCENARIO_VERSION: additionstoccnp-cnpPipeline OWNER: aks stages: - - stage: azure_eastus2 + - stage: azure_cnp_ccnp dependsOn: [] jobs: - template: /jobs/competitive-test.yml @@ -24,7 +25,7 @@ stages: node_count: $(NODES) node_per_step: $(STEP_NODES) max_pods: $(MAX_PODS_IN_NODE) - repeats: 1 + repeats: $(NUM_REPEATS) scale_timeout: "15m" cilium_enabled: True network_policy: cilium @@ -35,6 +36,7 @@ stages: num_cnps: $(CNPS_NUM) num_ccnps: $(CCNPS_NUM) dualstack: $(DUAL) + no_of_namespaces: $(NUMBER_NAMESPACES) cl2_config_file: load-config.yaml max_parallel: 2 timeout_in_minutes: 720 diff --git a/steps/engine/clusterloader2/slo/collect.yml b/steps/engine/clusterloader2/slo/collect.yml index 569f4467ca..6dc6d0a65c 100644 --- a/steps/engine/clusterloader2/slo/collect.yml +++ b/steps/engine/clusterloader2/slo/collect.yml @@ -17,7 +17,7 @@ steps: PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE collect \ $CPU_PER_NODE $NODE_COUNT ${MAX_PODS:-0} \ - $REPEATS $CL2_REPORT_DIR "$CLOUD_INFO" $RUN_ID $RUN_URL $SERVICE_TEST ${CNP_TEST:-False} \ + ${REPEATS:-1} $CL2_REPORT_DIR "$CLOUD_INFO" $RUN_ID $RUN_URL $SERVICE_TEST ${CNP_TEST:-False} \ ${CCNP_TEST:-False} ${NUM_CNPS:-0} ${NUM_CCNPS:-0} ${DUALSTACK:-False} $TEST_RESULTS_FILE \ $TEST_TYPE $SLO_START_TIME workingDirectory: modules/python/clusterloader2 diff --git a/steps/engine/clusterloader2/slo/execute.yml b/steps/engine/clusterloader2/slo/execute.yml index f45bb458af..095256e54b 100644 --- a/steps/engine/clusterloader2/slo/execute.yml +++ b/steps/engine/clusterloader2/slo/execute.yml @@ -21,8 +21,8 @@ steps: PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE configure \ $CPU_PER_NODE $NODE_COUNT $NODE_PER_STEP ${MAX_PODS:-0} \ - $REPEATS $SCALE_TIMEOUT $CLOUD $CILIUM_ENABLED \ - $SERVICE_TEST ${CNP_TEST:-False} ${CCNP_TEST:-False} ${NUM_CNPS:-0} ${NUM_CCNPS:-0} ${DUALSTACK:-False} ${CL2_CONFIG_DIR}/overrides.yaml + ${REPEATS:-1} $SCALE_TIMEOUT $CLOUD $CILIUM_ENABLED \ + $SERVICE_TEST ${CNP_TEST:-False} ${CCNP_TEST:-False} ${NUM_CNPS:-0} ${NUM_CCNPS:-0} ${DUALSTACK:-False} ${NO_OF_NAMESPACES:-1} ${CL2_CONFIG_DIR}/overrides.yaml PYTHONPATH=$PYTHONPATH:$(pwd) python3 $PYTHON_SCRIPT_FILE execute \ ${CL2_IMAGE} ${CL2_CONFIG_DIR} $CL2_REPORT_DIR $CL2_CONFIG_FILE \ ${HOME}/.kube/config $CLOUD diff --git a/steps/topology/cilium-usercluster/collect-clusterloader2.yml b/steps/topology/cilium-usercluster/collect-clusterloader2.yml index c5e1d59d67..39de4a3608 100644 --- a/steps/topology/cilium-usercluster/collect-clusterloader2.yml +++ b/steps/topology/cilium-usercluster/collect-clusterloader2.yml @@ -16,12 +16,12 @@ steps: engine_input: ${{ parameters.engine_input }} region: ${{ parameters.regions[0] }} -- template: /steps/engine/clusterloader2/cilium/scale-cluster.yml - parameters: - role: ces - region: ${{ parameters.regions[0] }} - nodes_per_nodepool: 0 - enable_autoscale: "false" +# - template: /steps/engine/clusterloader2/cilium/scale-cluster.yml +# parameters: +# role: ces +# region: ${{ parameters.regions[0] }} +# nodes_per_nodepool: 0 +# enable_autoscale: "false" - script: | run_id=$(Build.BuildId)-$(System.JobId)