diff --git a/deploy/25_role.yaml b/deploy/25_role.yaml new file mode 100644 index 000000000..eb9e77c48 --- /dev/null +++ b/deploy/25_role.yaml @@ -0,0 +1,14 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: benchmark-operator +rules: +- apiGroups: + - "" + resources: + - nodes + verbs: + - get + - list + - patch + diff --git a/deploy/35_role_binding.yaml b/deploy/35_role_binding.yaml new file mode 100644 index 000000000..cd0b952a9 --- /dev/null +++ b/deploy/35_role_binding.yaml @@ -0,0 +1,13 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: benchmark-operator +subjects: +- kind: ServiceAccount + name: benchmark-operator + namespace: my-ripsaw +roleRef: + kind: ClusterRole + name: benchmark-operator + apiGroup: rbac.authorization.k8s.io + diff --git a/docs/uperf.md b/docs/uperf.md index 3d05dbc55..66955208b 100644 --- a/docs/uperf.md +++ b/docs/uperf.md @@ -41,10 +41,10 @@ spec: kind: pod pin_server: "node-0" pin_client: "node-1" + pair: 1 multus: enabled: false samples: 1 - pair: 1 test_types: - stream protos: @@ -54,6 +54,10 @@ spec: nthrs: - 1 runtime: 30 + colocate: false + density_range: [low, high] + node_range: [low, high] + step_size: addN, log2 ``` `client_resources` and `server_resources` will create uperf client's and server's containers with the given k8s compute resources respectively [k8s resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) @@ -74,6 +78,11 @@ spec: `pin_client` what node to pin the client pod to. +`pair` how many instances of uperf client-server pairs. `pair` is applicable for `pin: true` only. +If `pair` is not specified, the operator will use the value in `density_range` to detemine the number of pairs. +See **Scale** section for more info. `density_range` can do more than `pair` can, but `pair` support is retained +for backward compatibility. + `multus[1]` Configure our pods to use multus. `samples` how many times to run the tests. For example @@ -82,7 +91,7 @@ spec: ```yaml samples: 3 - pair: 1 + density_range: [1,1] test_types: - stream protos: @@ -108,7 +117,7 @@ size. For example: ```yaml samples: 3 - pair: 1 + density_range: [1,1] test_types: - rr protos: @@ -189,6 +198,55 @@ To enable Multus in Ripsaw, here is the relevant config. ... ``` +### Scale +Scale in this context refers to the ability to enumerate UPERF +client-server pairs during test in a control fashion using the following knobs. + +`colocate: true` will place each client and server pod pair on the same node. + +`density_range` to specify the range of client-server pairs that the test will iterate. + +`node_range` to specify the range of nodes that the test will iterate. + +`step_size` to specify the incrementing method. + +Here is one scale example: + +``` + ... + pin: false + colocate: false + density_range: [1,10] + node_range: [1,128] + step_size: log2 + ... +``` +Note, the `scale` mode is mutually exlusive to `pin` mode with the `pin` mode having higher precedence. +In other words, if `pin:true` the test will deploy pods on `pin_server` and `pin_client` nodes +and ignore `colocate`, `node_range`, and the number of pairs to deploy is specified by the + `density_range.high` value. + +In the above sample, the `scale` mode will be activated since `pin: false`. In the first phase, the +pod instantion phase, the system gathers node inventory and may reduce the `node_range.high` value +to match the number of worker node available in the cluster. + +According to `node_range: [1,128]`, and `density_range:[1,10]`, the system will instantiate 10 pairs on +each of 128 nodes. Each pair has a node_idx and a pod_idx that are used later to control +which one and when they should run the UPERF workload, After all pairs are up and ready, +next comes the test execution phase. + +The scale mode iterates the test as a double nested loop as follows: +``` + for node with node_idx less-or-equal node_range(low, high. step_size): + for pod with pod_idx less-or-equal density_range(low, high, step_size): + run uperf +``` +Hence, with the above params, the first iteration runs the pair with node_idx/pod_idx of {1,1}. After the first +run has completed, the second interation runs 2 pairs of {1,1} and {1,2} and so on. + +The valid `step_size` methods are: addN and log2. `N` can be any integer and `log2` will double the value at each iteration i.e. 1,2,4,8,16 ... +By choosing the appropriate values for `density_range` and `node_range`, the user can generate most if not all +combinations of UPERF data points to exercise datapath performance from many angles. Once done creating/editing the resource file, you can run it by: diff --git a/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml b/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml index 417f46c0b..2aa13a7a3 100644 --- a/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml +++ b/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml @@ -123,6 +123,19 @@ spec: type: string cerberus: type: string + pod_hi_idx: + type: string + pod_low_idx: + type: string + node_hi_idx: + type: string + node_low_idx: + type: string + pod_idx: + type: string + node_idx: + type: string + additionalPrinterColumns: - name: Type type: string diff --git a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml index bf2606875..8d44d1a88 100644 --- a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml +++ b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml @@ -15,14 +15,22 @@ spec: serviceip: false hostnetwork: false networkpolicy: false - pin: false multus: enabled: false + pin: false + # + # pin: true/false - default=false + # - true will run 'Pin' mode using 1 server (pin_server:) and 1 client (pin_clien:) nodes. + # - false will run 'Scale' mode. See colocate, density_range, node_range and step_size. pin_server: "node-0" pin_client: "node-1" samples: 1 kind: pod pair: 1 + # + # 'pair' sepcifies fixed number of client-server pairs for "Pin" mode, + # If 'pair' is NOT present, it will use 'density_range' which allows + # enumeration in addition to fixed number of pair. test_types: - stream protos: @@ -32,3 +40,25 @@ spec: nthrs: - 1 runtime: 30 + + # The following variables are for 'Scale' mode. + # The 'Scale' mode is activated when 'pin=false' or undefined. + # The Scale mode params are: colocate, denstisy_range, node_range and step_size. + # + # colocate: true/false - default=false + # density_range: [n, m] - default=[1,1] + # node_range: [x, y] - default=[1,1] + # step_size: log2 - default=add1 + # Valid step_size values are: addN or log2 + # N can be any decimal number + # Enumeration examples: + # add1: 1,2,3,4 ,,, + # add2: 1,3,5,7 ... + # add10: 1,11,21,31 ... + # log2: 1,2,4,8,16,32 ,,, + # + # 'exclude_labels' specifies the list of ineligible worker nodes. + # exclude_labels: (OR conditional, every node that matches any of these labels is excluded) + # - "bad=true" + # - "fc640=true" + diff --git a/resources/namespace.yaml b/resources/namespace.yaml index 0908738fc..1505d5723 100644 --- a/resources/namespace.yaml +++ b/resources/namespace.yaml @@ -2,3 +2,5 @@ apiVersion: v1 kind: Namespace metadata: name: my-ripsaw + labels: + project: my-ripsaw diff --git a/resources/operator.yaml b/resources/operator.yaml index f49ac96f0..dd4e0291a 100644 --- a/resources/operator.yaml +++ b/resources/operator.yaml @@ -70,7 +70,7 @@ spec: - containerPort: 6379 resources: limits: - cpu: "0.1" + cpu: "2.0" volumeMounts: - mountPath: /redis-master-data name: data diff --git a/roles/common/templates/networkpolicy.yml.j2 b/roles/common/templates/networkpolicy.yml.j2 index 6267d2196..361468fd6 100644 --- a/roles/common/templates/networkpolicy.yml.j2 +++ b/roles/common/templates/networkpolicy.yml.j2 @@ -4,11 +4,51 @@ metadata: name: "{{ meta.name }}-networkpolicy-{{ trunc_uuid }}" namespace: '{{ operator_namespace }}' spec: - podSelector: + podSelector: matchLabels: type: "{{ meta.name }}-bench-server-{{ trunc_uuid }}" ingress: - from: - - podSelector: + - podSelector: matchLabels: type: "{{ meta.name }}-bench-client-{{ trunc_uuid }}" + - namespaceSelector: + matchLabels: + project: "{{ operator_namespace }}" +{% if workload.args.ip_block.enable | default(false) %} + - ipBlock: + cidr: "{{ workload.args.ip_block.allow_subnet }}" + except: +{% for subnet in workload.args.ip_block.except_subnet %} + - "{{ subnet }}" +{% endfor %} +{% if workload.args.port_block.enable | default(false) %} + ports: + - protocol: TCP + port: 6379 +{% for prange in workload.args.port_block.range %} +{% for num in range(prange[0]|int,prange[1]|int) %} + - protocol: TCP + port: {{ num }} + - protocol: UDP + port: {{ num }} +{% endfor %} +{% endfor %} +{% endif %} + egress: + - to: + - ipBlock: + cidr: "{{ workload.args.ip_block.allow_subnet }}" +{% if workload.args.port_block.enable | default(false) %} + ports: +{% for prange in workload.args.port_block.range %} +{% for num in range(prange[0]|int,prange[1]|int) %} + - protocol: TCP + port: {{ num }} + - protocol: UDP + port: {{ num }} +{% endfor %} +{% endfor %} +{% endif %} +{% endif %} + diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml new file mode 100644 index 000000000..a1e7b8348 --- /dev/null +++ b/roles/uperf/tasks/cleanup.yml @@ -0,0 +1,82 @@ +--- + +- block: + ### kind + # Cleanup servers, but leave clients around mostly for further examining of results. + - name: Get Server Jobs + k8s_facts: + kind: Job + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_jobs + + - name: Get Server Pods + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: Server Job and Pod names - to clean + set_fact: + clean_jobs: | + [ + {% for item in server_jobs.resources %} + "{{ item['metadata']['name'] }}", + {% endfor %} + ] + clean_pods: | + [ + {% for item in server_pods.resources %} + "{{ item['metadata']['name'] }}", + {% endfor %} + ] + + - name: Cleanup server Job + k8s: + kind: Job + api_version: v1 + namespace: '{{ operator_namespace }}' + state: absent + name: "{{ item }}" + with_items: "{{ clean_jobs }}" + + - name: Cleanup server Pod + k8s: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + state: absent + name: "{{ item }}" + with_items: "{{ clean_pods }}" + + when: resource_kind == "pod" and cleanup == True + +- block: + - name: Cleanup redis + command: "{{ item }}" + with_items: + - redis-cli del num_completion-{{trunc_uuid}} + - redis-cli del start-{{trunc_uuid}} + when: resource_kind == "pod" + + + + +# +# no kind block - We leave VM running +# + +- operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Complete + complete: true + diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml new file mode 100644 index 000000000..7c9b7220b --- /dev/null +++ b/roles/uperf/tasks/init.yml @@ -0,0 +1,22 @@ +--- + +- name: Clear start flag + command: "redis-cli set start-{{trunc_uuid}} 0" + +- name: Clear num_completion + command: "redis-cli set num_completion-{{trunc_uuid}} 0" + +- name: Init node and pod indices in benchmark context + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + pod_hi_idx: "{{pod_hi_idx}}" + pod_low_idx: "{{pod_low_idx}}" + node_hi_idx: "{{node_hi_idx}}" + node_low_idx: "{{node_low_idx}}" + node_idx: "{{node_low_idx}}" + pod_idx: "{{pod_low_idx}}" + diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index 5e5fafefb..435a9b6b9 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -1,377 +1,70 @@ --- -- name: Get current state - k8s_facts: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: '{{ meta.name }}' - namespace: '{{ operator_namespace }}' - register: resource_state +- include_tasks: setup.yml -- operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Building - complete: false - when: resource_state.resources[0].status.state is not defined - -- name: Get current state - If it has changed - k8s_facts: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: '{{ meta.name }}' - namespace: '{{ operator_namespace }}' - register: resource_state - -- name: Capture operator information - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - name = benchmark-operator - register: bo - -- name: Capture ServiceIP - k8s_facts: - kind: Service - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: serviceip - when: workload_args.serviceip is defined and workload_args.serviceip - -- block: - - - name: Create service for server pods - k8s: - definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - when: workload_args.serviceip is defined and workload_args.serviceip - - - name: Start Server(s) - k8s: - definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" - register: servers - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - - - name: Wait for pods to be running.... - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods - - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Servers" - - when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" +- include_tasks: start_server.yml + when: resource_state.resources[0].status.state == "Building" - block: - - name: Start Server(s) - k8s: - definition: "{{ lookup('template', 'server_vm.yml.j2') | from_yaml }}" - register: servers - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - - - name: Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms + - include_tasks: wait_server_ready.yml + when: resource_state.resources[0].status.state == "Starting Servers" - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Servers" + - include_tasks: start_client.yml + when: resource_state.resources[0].status.state == "Starting Clients" - when: resource_state.resources[0].status.state == "Building" and resource_kind == "vm" + - include_tasks: wait_client_ready.yml + when: resource_state.resources[0].status.state == "Waiting for Clients" -- block: + # LOOP BEGIN + # This loop iterates density_range[] and node_range[] for "scale" mode - - name: Get server pods - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods + - include_tasks: run_a_set.yml + when: resource_state.resources[0].status.state == "Clients Running" - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Clients" - when: "workload_args.pair|default('1')|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + - include_tasks: wait_set_done.yml + when: resource_state.resources[0].status.state == "Set Running" - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" + - include_tasks: next_set.yml + when: resource_state.resources[0].status.state == "Run Next Set" + # will loop back to "Client Running" state, or FALLTHRU to "Running" + # state below and finish -- block: + # LOOP END - - name: Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms + - include_tasks: wait_client_done.yml + when: resource_state.resources[0].status.state == "Running" - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Clients" - when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + - include_tasks: cleanup.yml + when: resource_state.resources[0].status.state == "Cleanup" - - name: blocking client from running uperf - command: "redis-cli set start false" - with_items: "{{ server_vms.resources }}" - when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + when: resource_kind == "pod" - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" +# +# kind does not support "scale" mode yet +# - block: - - name: Get pod info - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods - - - name: Generate uperf xml files - k8s: - definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" - - - block: - - name: Start Client(s) - k8s: - definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: "{{ server_pods.resources }}" - when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 - - - name: Start Client(s) - ServiceIP - k8s: - definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: "{{ serviceip.resources }}" - when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 - - when: resource_kind == "pod" - - - block: + - include_tasks: wait_server_ready.yml + when: resource_state.resources[0].status.state == "Starting Servers" - - name: Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms + - include_tasks: start_client.yml + when: resource_state.resources[0].status.state == "Starting Clients" - - name: Generate uperf test files - k8s: - definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" - with_items: "{{ server_vms.resources }}" - - - name: Start Client(s) - k8s: - definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" - with_items: "{{ server_vms.resources }}" - when: server_vms.resources|length > 0 - - when: resource_kind == "vm" - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Waiting for Clients - - when: resource_state.resources[0].status.state == "Starting Clients" - -- block: - - - block: - - name: Get client pod status - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_pods - - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Clients Running - when: "workload_args.pair|default('1')|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (client_pods | json_query('resources[].status.podIP')|length)" - - when: resource_kind == "pod" - - - block: - - - name: set complete to false - command: "redis-cli set complete false" - - - name: Get count of clients ready - command: "redis-cli get clients-{{ trunc_uuid }}" - register: clients_ready_count - - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Clients Running - when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" - - when: resource_kind == "vm" - - when: resource_state.resources[0].status.state == "Waiting for Clients" - -- block: - - - name: Signal workload - command: "redis-cli set start true" - - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Running" - - when: resource_state.resources[0].status.state == "Clients Running" - -- block: - - block: - - name: Waiting for pods to complete.... - k8s_facts: - kind: pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_pods - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Cleanup - complete: false - when: "workload_args.pair|default('1')|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" - when: resource_kind == "pod" - - - block: - - - name: get complete - command: "redis-cli get complete" - register: complete_status - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Cleanup - complete: false - when: complete_status.stdout == "true" - when: resource_kind == "vm" - - when: resource_state.resources[0].status.state == "Running" - -- block: + - include_tasks: wait_client_ready.yml + when: resource_state.resources[0].status.state == "Waiting for Clients" - - block: - - name: Get Server Pods - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods + - include_tasks: send_client_run_signal.yml + when: resource_state.resources[0].status.state == "Clients Running" - - name: Pod names - to clean - set_fact: - clean_pods: | - [ - {% for item in server_pods.resources %} - "{{ item['metadata']['name'] }}", - {% endfor %} - ] + - include_tasks: wait_client_done.yml + when: resource_state.resources[0].status.state == "Running" - - name: Cleanup run - k8s: - kind: pod - api_version: v1 - namespace: '{{ operator_namespace }}' - state: absent - name: "{{ item }}" - with_items: "{{ clean_pods }}" - when: cleanup - when: resource_kind == "pod" + - include_tasks: cleanup.yml + when: resource_state.resources[0].status.state == "Cleanup" - - name: delete redis keys - command: "redis-cli del {{ item }}" - loop: - - "{{ trunc_uuid }}" - - "clients-{{ trunc_uuid }}" + when: resource_kind == "vm" - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Complete - complete: true - when: resource_state.resources[0].status.state == "Cleanup" diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml new file mode 100644 index 000000000..3f2a5f4cf --- /dev/null +++ b/roles/uperf/tasks/next_set.yml @@ -0,0 +1,104 @@ +--- +# +# This module logically implements an RE-ENTRANT nested "for" loops; +# +# with_items: +# range (node_low_idx, node_hi_idx) +# range (pod_low_idx, pod_hi_idx) +# +# Each iteration executes one item, and each re-entrance +# continues where it left off. +# +- block: + - name: Read previous node_idx and pod_idx + set_fact: + all_run_done: False + inc: "{{workload_args.step_size|default('add1')}}" + amount: 0 + pod_idx: "{{resource_state.resources[0].status.pod_idx|int}}" + node_idx: "{{resource_state.resources[0].status.node_idx|int}}" + + - name: Extract add amount + set_fact: + amount: "{{ inc | regex_replace('[^0-9]', '') }}" + inc: add + when: "'add' in inc" + + - name: Increment pod_idx + set_fact: + pod_idx: "{%-if inc=='add' -%}{{pod_idx|int+amount|int}} + {%-elif inc=='log2' -%}{{(pod_idx|int*2)+1}} + {%-else -%}{{pod_idx|int+1}} + {% endif %}" + - block: + # + # This block starts a new node loop + # + - name: Increment node_idx + set_fact: + node_idx: "{%- if inc=='add' -%}{{node_idx|int+amount|int}} + {%- elif inc=='log2' -%}{{(node_idx|int *2)+1}} + {%- else -%}{{node_idx|int+1}} + {% endif %}" + + - name: Check node loop for ending condition + set_fact: + all_run_done: True + when: "node_idx|int > resource_state.resources[0].status.node_hi_idx|int" + + # + # Reset pod_idx AFTER node_idx tasks above, else cond change + # causes it to skip node_idx tasks + # + - name: Reset pod_idx to pod_low_idx + set_fact: + pod_idx: "{{resource_state.resources[0].status.pod_low_idx}}" + + when: "pod_idx|int > resource_state.resources[0].status.pod_hi_idx|int" + + - block: + # + # All done + # + - name: Unpause pods to complete + command: "redis-cli set start-{{trunc_uuid}} done" + + - name: Change state to proceed to exit + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Running + + when: all_run_done == True + + - block: + # + # More round(s) to run. + # + - name: Send redis restart signal + command: "redis-cli set start-{{trunc_uuid}} restart" + + - name: Reset redis num_completion + command: "redis-cli set num_completion-{{trunc_uuid}} 0" + + - name: Change state to run next round + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + pod_idx: "{{pod_idx}}" + node_idx: "{{node_idx}}" + + when: all_run_done == False + + when: resource_kind == "pod" + +# +# No block - Scale mode support is N/A +# diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml new file mode 100644 index 000000000..46c08eb9d --- /dev/null +++ b/roles/uperf/tasks/run_a_set.yml @@ -0,0 +1,26 @@ +--- + +- block: + # + # Entry Condition: + # 1. A previous task has set 'node_idx' and 'pod_idx' in benchmark ctx + # 2. All cliest are polling redis for 'start-node_idx-pod_idx' to start + # Output: Clients with node_idx <= redis node_idx && pod_idx <= redis pod_ix will run + # + + - name: Signal group to run + command: "redis-cli set start-{{trunc_uuid}} true-{{resource_state.resources[0].status.node_idx|int}}-{{resource_state.resources[0].status.pod_idx|int}}" + + - name: Update state to "Set Running" + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Set Running + when: resource_kind == "pod" + +# +# No kind - It has not been adapted to Scale mode +# diff --git a/roles/uperf/tasks/send_client_run_signal.yml b/roles/uperf/tasks/send_client_run_signal.yml new file mode 100644 index 000000000..52fd92a97 --- /dev/null +++ b/roles/uperf/tasks/send_client_run_signal.yml @@ -0,0 +1,18 @@ +--- +# This module is invoked by VM-kind only +- block: + + - name: Signal workload + command: "redis-cli set start true" + + - name: A26 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Running" + + when: resource_state.resources[0].status.state == "Clients Running" + diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml new file mode 100644 index 000000000..86ca12e9e --- /dev/null +++ b/roles/uperf/tasks/setup.yml @@ -0,0 +1,167 @@ +--- + +- name: Get current state + k8s_facts: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: '{{ meta.name }}' + namespace: '{{ operator_namespace }}' + register: resource_state + +- operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Building + complete: false + when: resource_state.resources[0].status.state is not defined + +- name: Get current state - If it has changed + k8s_facts: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: '{{ meta.name }}' + namespace: '{{ operator_namespace }}' + register: resource_state + +- name: Capture operator information + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - name = benchmark-operator + register: bo + +- block: + # + # This block is for scale mode where client and server pods are spreaded + # across all eligible nodes + # + - name: List Nodes Labeled as Workers + k8s_info: + api_version: v1 + kind: Node + label_selectors: + - "node-role.kubernetes.io/worker=" + register: node_list + no_log: True + + - name: Isolate Worker Role Hostnames + set_fact: + worker_node_list: "{{ node_list | json_query('resources[].metadata.labels.\"kubernetes.io/hostname\"') | list }}" + + - name: List Nodes Labeled with {{ workload_args.exclude_label }} + k8s_info: + api_version: v1 + kind: Node + label_selectors: + - '{{ item }}' + with_items: "{{ workload_args.exclude_labels }}" + register: exclude_node_list + when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 + + - name: Isolate Worker Role Hostnames for label {{ workload_args.exclude_label }} + set_fact: + worker_node_exclude_list: "{{ exclude_node_list | json_query('results[].resources[].metadata.name') }}" + + - name: Exclude labeled nodes + set_fact: + worker_node_list: "{{ worker_node_list | difference(worker_node_exclude_list) }}" + when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 + # + # Compute node and pod limits using CR params while taking into account + # of the actual number of nodes available in the system + # + - name: init pod and node low/hi idx + set_fact: + pod_low_idx: "{{ workload_args.density_range[0] | default('1')|int - 1 }}" + pod_hi_idx: "{{ workload_args.density_range[1] | default('1')|int - 1 }}" + node_low_idx: "{{ workload_args.node_range[0] | default('1')|int - 1 }}" + node_hi_idx: "{{ workload_args.node_range[1] | default('1')|int - 1 }}" + # + # Next sanity check and massage the indices if necessary. + # We shall complete gracefully and not iterate wildly. + # + - name: Adjust node_hi_idx if cluster has less nodes + set_fact: + node_hi_idx: "{{ worker_node_list|length| default('0')|int -1 }}" + when: "node_hi_idx|int >= worker_node_list|length| default('0')|int " + + - name: Adjust node_low_idx if necessary + set_fact: + node_low_idx: "{{node_hi_idx|int}}" + when: "node_low_idx|int > node_hi_idx|int" + + - name: Adjust pod_low_idx if necessary + set_fact: + pod_low_idx: "{{pod_hi_idx|int}}" + when: "pod_low_idx|int > pod_hi_idx|int" + + - name: Record num server pods using new worker_node_list + # in Scale mode, num server pods = num_node * number_pod + set_fact: + num_server_pods: "{{ (node_hi_idx|int+1) * (pod_hi_idx|int+1) }}" + + # + # End scale mode + # + when: workload_args.pin | default(False) == False + +- block: + # + # This block is for the "pin" mode where the server and the client node + # are specified by pin_server and pin_client variables. + + - name: Add "Pin" server and client node to worker list. + # The add order is significant as we will enumerate the server pods on + # the first node in the list, and client pods on the second node. + set_fact: + worker_node_list: "{{worker_node_list + [item]}}" + with_items: + - '{{workload_args.pin_server}}' + - '{{workload_args.pin_client}}' + # + # In 'Pin' mode, 'pair' specifies number of pairs (classic behavior), If 'pair' + # is undefined use 'density_range' (new bahavior with "Scale" enhancement) + # + - name: Init "Pin" mode indices using 'pair' + set_fact: + pod_low_idx: "{{ workload_args.pair | default('1')|int - 1 }}" + pod_hi_idx: "{{ workload_args.pair | default('1')|int - 1 }}" + # node indices are used as client pod 'start' parameter. + node_low_idx: "0" + node_hi_idx: "0" + when: workload_args.pair is defined + + - name: Init "Pin" mode indices using 'density_range' + set_fact: + pod_low_idx: "{{ workload_args.density_range[0] | default('1')|int - 1 }}" + pod_hi_idx: "{{ workload_args.density_range[1] | default('1')|int - 1 }}" + # node indices are used as client pod 'start' parameter. + node_low_idx: "0" + node_hi_idx: "0" + when: workload_args.pair is not defined + + - name: Record num Pin server pods using new worker_node_list + set_fact: + # in Pin mode, num server pods = number of pods + num_server_pods: "{{ pod_hi_idx|int +1 }}" + + # + # End pin mode where pin_client and pin_server are specified + # + when: workload_args.pin | default(False) == True + +- name: Capture ServiceIP + k8s_facts: + kind: Service + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: serviceip + when: workload_args.serviceip is defined and workload_args.serviceip + diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml new file mode 100644 index 000000000..9d6ec9ab9 --- /dev/null +++ b/roles/uperf/tasks/start_client.yml @@ -0,0 +1,73 @@ +--- + +- name: Get pod info + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + +- name: Generate uperf xml files + k8s: + definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" + +- block: + ### kind + - name: Start Client(s) w/o serviceIP + k8s: + definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" + vars: + resource_item: "{{ server_pods.resources }}" + when: workload_args.serviceip|default(False) == False and server_pods.resources|length > 0 + + # + # Each server annotates a "node_idx". Each peer client will + # derive its affinity according the 'colocate' variable. + # + + - name: Start Client(s) with serviceIP + k8s: + definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" + vars: + resource_item: "{{ serviceip.resources }}" + when: workload_args.serviceip|default(False) == True and serviceip.resources|length > 0 + + when: resource_kind == "pod" + +- block: + ### kind + + - name: Wait for vms to be running.... + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_vms + + + - name: Generate uperf test files + k8s: + definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" + with_items: "{{ server_vms.resources }}" + + - name: Start Client(s) + k8s: + definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" + with_items: "{{ server_vms.resources }}" + when: server_vms.resources|length > 0 + + when: resource_kind == "vm" + +- operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Waiting for Clients + + diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml new file mode 100644 index 000000000..743faa176 --- /dev/null +++ b/roles/uperf/tasks/start_server.yml @@ -0,0 +1,74 @@ +--- + +- block: + ### kind + - include_tasks: init.yml + + - name: Create service for server pods + k8s: + definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" + vars: + pod_sequence: "{{ pod_hi_idx|int +1 }}" + node_sequence: "{{ node_hi_idx|int +1 }}" + + when: workload_args.serviceip is defined and workload_args.serviceip + + - name: Start Server(s) - total = eligible nodes * density + k8s: + definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" + vars: + pod_sequence: "{{ pod_hi_idx|int +1 }}" + node_sequence: "{{ node_hi_idx|int +1 }}" + + # + # Each server annotates a "node_idx" which will allow its peer client + # to derive its affinity according the 'colocate' variable + # + - name: Wait for pods to be running.... + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Servers" + + when: resource_kind == "pod" + +- block: + ### kind + - name: Waiting for pods to complete.... + k8s_facts: + kind: pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_pods + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Cleanup + complete: false + when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" + when: resource_kind == "pod" + +- block: + ### kind + - name: get complete + command: "redis-cli get complete" + register: complete_status + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Cleanup + complete: false + when: complete_status.stdout == "true" + when: resource_kind == "vm" + diff --git a/roles/uperf/tasks/wait_client_ready.yml b/roles/uperf/tasks/wait_client_ready.yml new file mode 100644 index 000000000..c6455fea7 --- /dev/null +++ b/roles/uperf/tasks/wait_client_ready.yml @@ -0,0 +1,48 @@ +--- + +- block: + ### kind + + - name: Get client pod status + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_pods + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + when: "num_server_pods|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (client_pods | json_query('resources[].status.podIP')|length)" + + when: resource_kind == "pod" + +- block: + ### kind + + - name: set complete to false + command: "redis-cli set complete false" + + - name: Get count of clients ready + command: "redis-cli get clients-{{ trunc_uuid }}" + register: clients_ready_count + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" + + when: resource_kind == "vm" + diff --git a/roles/uperf/tasks/wait_server_ready.yml b/roles/uperf/tasks/wait_server_ready.yml new file mode 100644 index 000000000..195125e97 --- /dev/null +++ b/roles/uperf/tasks/wait_server_ready.yml @@ -0,0 +1,55 @@ +--- +- block: + ### kind + + - name: Get server pods + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Clients" + when: "num_server_pods|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + + when: resource_kind == "pod" + +- block: + ### kind + + - name: Wait for vms to be running.... + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_vms + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Clients" + when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + + - name: blocking client from running uperf + command: "redis-cli set start false" + with_items: "{{ server_vms.resources }}" + when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + + when: resource_kind == "vm" + + diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml new file mode 100644 index 000000000..3f596d681 --- /dev/null +++ b/roles/uperf/tasks/wait_set_done.yml @@ -0,0 +1,28 @@ +--- + +- block: + - block: + ### kind + - name: read pod completion count + command: "redis-cli get num_completion-{{trunc_uuid}}" + register: num_completion + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Run Next Set + + when: "num_completion.stdout|int == ((resource_state.resources[0].status.node_idx|int +1) * (resource_state.resources[0].status.pod_idx|int +1))" + + when: resource_kind == "pod" + + ### no kind block - Run a "set" is not yet supported + + when: resource_kind == "pod" + +# +# No kind block - It has not been adapted to scale mode yet. +# diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index d46a66799..669ad572f 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -1,47 +1,65 @@ --- -kind: Job -apiVersion: batch/v1 -metadata: - name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' - namespace: "{{ operator_namespace }}" -spec: - ttlSecondsAfterFinished: 600 - backoffLimit: 0 - template: - metadata: - labels: - app: uperf-bench-server-{{item}}-{{ trunc_uuid }} - type: uperf-bench-server-{{ trunc_uuid }} +apiVersion: v1 +kind: List +metadata: {} +items: +{% macro job_template(item, node_idx_item='') %} + - kind: Job + apiVersion: batch/v1 + metadata: + name: 'uperf-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }}' + namespace: "{{ operator_namespace }}" + spec: + ttlSecondsAfterFinished: 600 + backoffLimit: 0 + template: + metadata: + labels: + app: uperf-bench-server-{{ worker_node_list[node_idx_item] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} + type: uperf-bench-server-{{ trunc_uuid }} + annotations: {% if workload_args.multus.enabled is sameas true %} - annotations: - k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} + k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} {% endif %} - spec: + node_idx: '{{ node_idx_item }}' + pod_idx: '{{ item }}' + spec: {% if workload_args.runtime_class is defined %} - runtimeClassName: "{{ workload_args.runtime_class }}" + runtimeClassName: "{{ workload_args.runtime_class }}" {% endif %} {% if workload_args.hostnetwork is sameas true %} - hostNetwork: true - serviceAccountName: benchmark-operator + hostNetwork: true + serviceAccountName: benchmark-operator {% endif %} - containers: - - name: benchmark - image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} + containers: + - name: benchmark + image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} {% if workload_args.server_resources is defined %} - resources: {{ workload_args.server_resources | to_json }} + resources: {{ workload_args.server_resources | to_json }} {% endif %} - imagePullPolicy: Always - command: ["/bin/sh","-c"] - args: ["uperf -s -v -P 20000"] - restartPolicy: OnFailure + imagePullPolicy: Always + command: ["/bin/sh","-c"] + args: ["uperf -s -v -P 20000"] + restartPolicy: OnFailure {% if workload_args.pin is sameas true %} - nodeSelector: - kubernetes.io/hostname: '{{ workload_args.pin_server }}' + nodeSelector: + kubernetes.io/hostname: '{{ workload_args.pin_server }}' {% endif %} + nodeSelector: + kubernetes.io/hostname: '{{ worker_node_list[node_idx_item] }}' + {% if workload_args.serviceip is sameas true %} - securityContext: - sysctls: - - name: net.ipv4.ip_local_port_range - value: 20000 20011 + securityContext: + sysctls: + - name: net.ipv4.ip_local_port_range + value: 20000 20011 {% endif %} -{% include "metadata.yml.j2" %} +{% macro metadata() %}{% include "metadata.yml.j2" %}{% endmacro %} + {{ metadata()|indent }} +{% endmacro %} +{% for node_idx_item in range(node_sequence|int) %} +{% for item in range(pod_sequence|int) %} +{{ job_template(item,node_idx_item) }} +{% endfor %} +{% endfor %} + diff --git a/roles/uperf/templates/service.yml.j2 b/roles/uperf/templates/service.yml.j2 index 89177dad0..5b787858a 100644 --- a/roles/uperf/templates/service.yml.j2 +++ b/roles/uperf/templates/service.yml.j2 @@ -1,27 +1,41 @@ --- -kind: Service apiVersion: v1 -metadata: - name: uperf-service-{{ item }}-{{ trunc_uuid }} - namespace: '{{ operator_namespace }}' - labels: - app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - type: uperf-bench-server-{{ trunc_uuid }} -spec: - selector: - app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - ports: - - name: uperf - port: 20000 - targetPort: 20000 - protocol: TCP +kind: List +metadata: {} +items: +{% macro job_template(item, node_idx_item='') %} + - kind: Service + apiVersion: v1 + metadata: + name: uperf-service-{{ worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} + namespace: '{{ operator_namespace }}' + labels: + app: uperf-bench-server-{{ worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} + type: uperf-bench-server-{{ trunc_uuid }} + annotations: + node_idx: '{{ node_idx_item }}' + pod_idx: '{{ item }}' + spec: + selector: + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} + ports: + - name: uperf + port: 20000 + targetPort: 20000 + protocol: TCP {% for num in range(20001,20012,1) %} - - name: uperf-control-tcp-{{num}} - port: {{num}} - targetPort: {{num}} - protocol: TCP - - name: uperf-control-udp-{{num}} - port: {{num}} - targetPort: {{num}} - protocol: UDP + - name: uperf-control-tcp-{{num}} + port: {{num}} + targetPort: {{num}} + protocol: TCP + - name: uperf-control-udp-{{num}} + port: {{num}} + targetPort: {{num}} + protocol: UDP +{% endfor %} +{% endmacro %} +{% for node_idx_item in range(node_sequence|int) %} +{% for item in range(pod_sequence|int) %} +{{ job_template(item,node_idx_item) }} +{% endfor %} {% endfor %} diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 493b4b7a9..814bdac54 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -1,105 +1,142 @@ --- -kind: Job -apiVersion: batch/v1 -metadata: +apiVersion: v1 +kind: List +metadata: {} +items: +{% for item in resource_item %} + - kind: Job + apiVersion: batch/v1 + metadata: {% if workload_args.serviceip is sameas true %} - name: 'uperf-client-{{item.spec.clusterIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item.spec.clusterIP}}-{{ trunc_uuid }}' {% else %} - name: 'uperf-client-{{item.status.podIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item.status.podIP}}-{{ trunc_uuid }}' {% endif %} - namespace: '{{ operator_namespace }}' -spec: - template: - metadata: - labels: - app: uperf-bench-client-{{ trunc_uuid }} - clientfor: {{ item.metadata.labels.app }} - type: uperf-bench-client-{{ trunc_uuid }} + namespace: '{{ operator_namespace }}' + spec: + template: + metadata: + labels: + app: uperf-bench-client-{{ trunc_uuid }} + clientfor: {{ item.metadata.labels.app }} + type: uperf-bench-client-{{ trunc_uuid }} {% if workload_args.multus.enabled is sameas true %} - annotations: - k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.client }} + annotations: + k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.client }} {% endif %} - spec: + spec: {% if workload_args.runtime_class is defined %} - runtimeClassName: "{{ workload_args.runtime_class }}" + runtimeClassName: "{{ workload_args.runtime_class }}" {% endif %} {% if workload_args.hostnetwork is sameas true %} - hostNetwork: true - serviceAccountName: benchmark-operator -{% endif %} - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ item.metadata.labels.app }} - topologyKey: kubernetes.io/hostname - containers: - - name: benchmark - image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} - env: - - name: uuid - value: "{{ uuid }}" - - name: test_user - value: "{{ test_user | default("ripsaw") }}" - - name: clustername - value: "{{ clustername }}" + hostNetwork: true + serviceAccountName: benchmark-operator +{% endif %} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ item.metadata.labels.app }} + topologyKey: kubernetes.io/hostname + containers: + - name: benchmark + image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} + env: + - name: uuid + value: "{{ uuid }}" + - name: test_user + value: "{{ test_user | default("ripsaw") }}" + - name: clustername + value: "{{ clustername }}" {% if elasticsearch is defined %} - - name: es - value: "{{ elasticsearch.url }}" - - name: es_index - value: "{{ elasticsearch.index_name | default("ripsaw-uperf") }}" - - name: parallel - value: "{{ elasticsearch.parallel | default(false) }}" - - name: es_verify_cert - value: "{{ elasticsearch.verify_cert | default(true) }}" + - name: es + value: "{{ elasticsearch.url }}" + - name: es_index + value: "{{ elasticsearch.index_name | default("ripsaw-uperf") }}" + - name: parallel + value: "{{ elasticsearch.parallel | default(false) }}" + - name: es_verify_cert + value: "{{ elasticsearch.verify_cert | default(true) }}" {% endif %} {% if prometheus is defined %} - - name: prom_es - value: "{{ prometheus.es_url }}" - - name: prom_parallel - value: "{{ prometheus.es_parallel | default(false) }}" - - name: prom_token - value: "{{ prometheus.prom_token | default() }}" - - name: prom_url - value: "{{ prometheus.prom_url | default() }}" -{% endif %} - - name: client_node - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: server_node - value: "{{ uperf.pin_server|default("unknown") }}" + - name: prom_es + value: "{{ prometheus.es_url }}" + - name: prom_parallel + value: "{{ prometheus.es_parallel | default(false) }}" + - name: prom_token + value: "{{ prometheus.prom_token | default() }}" + - name: prom_url + value: "{{ prometheus.prom_url | default() }}" +{% endif %} + - name: client_node + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: server_node + value: "{{ uperf.pin_server|default("unknown") }}" {% if workload_args.client_resources is defined %} - resources: {{ workload_args.client_resources | to_json }} + resources: {{ workload_args.client_resources | to_json }} {% endif %} - imagePullPolicy: Always - command: ["/bin/sh", "-c"] - args: + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: {% if workload_args.serviceip is sameas true %} - - "export serviceip=true; - export h={{item.spec.clusterIP}}; + - "export serviceip=true; + export h={{item.spec.clusterIP}}; {% else %} {% if workload_args.multus.client is defined %} - - "export multus_client={{workload_args.multus.client}}; - export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; + - "export multus_client={{workload_args.multus.client}}; + export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; {% else %} - - "export h={{item.status.podIP}}; + - "export h={{item.status.podIP}}; +{% endif %} {% endif %} +{% if (workload_args.colocate is defined) %} + export colocate={{ workload_args.colocate}}; +{% endif %} +{% if workload_args.step_size is defined %} + export stepsize={{ workload_args.step_size }}; +{% endif %} +{% if workload_args.node_range is defined %} + export node_range='{{ workload_args.node_range[0] }}_{{ workload_args.node_range[1] }}'; +{% endif %} +{% if workload_args.density_range is defined %} + export density_range='{{ workload_args.density_range[0] }}_{{ workload_args.density_range[1] }}'; {% endif %} {% if workload_args.networkpolicy is defined %} - export networkpolicy={{workload_args.networkpolicy}}; + export networkpolicy={{workload_args.networkpolicy}}; {% endif %} - export hostnet={{workload_args.hostnetwork}}; - export ips=$(hostname -I); - export num_pairs={{workload_args.pair}}; - while true; do - if [[ $(redis-cli -h {{bo.resources[0].status.podIP}} get start) =~ 'true' ]]; then + export hostnet={{workload_args.hostnetwork}}; + export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; + export my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; + export ips=$(hostname -I); + export num_pairs=1 + export node_count=0; + export pod_count=0; + node_limit=0; + pod_limit=0; + STR=''; + while true; do + STR=$(redis-cli -h {{bo.resources[0].status.podIP}} get start-{{trunc_uuid}}); + state=$(echo $STR | cut -f1 -d-); + if [[ $state =~ 'true' ]]; then + node_limit=$(echo $STR | cut -f2 -d-); + pod_limit=$(echo $STR | cut -f3 -d-); + if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then + sleep 0.5; continue; + fi; + + echo 'UPERF-run-context num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; + node_count=$((node_limit+1)); + pod_count=$((pod_limit+1)); + num_pairs=$((pod_limit+1)); + {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} {% for size in workload_args.sizes %} @@ -111,32 +148,66 @@ spec: {% set rsize = size %} {% endif %} {% for nthr in workload_args.nthrs %} - cat /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}}; + cat /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}}; {% if workload_args.run_id is defined %} - run_snafu --tool uperf --run-id {{workload_args.run_id}} -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; + run_snafu --tool uperf --run-id {{workload_args.run_id}} -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; {% else %} - run_snafu --tool uperf -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; + run_snafu --tool uperf -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; {% endif %} {% endfor %} {% endfor %} {% endfor %} {% endfor %} - else - continue; - fi; - break; - done; - redis-cli -h {{bo.resources[0].status.podIP}} set start false" - volumeMounts: - - name: config-volume - mountPath: "/tmp/uperf-test" - volumes: - - name: config-volume - configMap: - name: uperf-test-{{ trunc_uuid }} - restartPolicy: OnFailure + redis-cli -h {{bo.resources[0].status.podIP}} incr num_completion-{{trunc_uuid}}; + while true; do + state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start-{{trunc_uuid}}); + if [[ $state =~ 'restart' ]]; then + break; + elif [[ $state =~ 'done' ]]; then + break; + else + sleep 0.5; continue; + fi; + done; + if [[ $state =~ 'restart' ]]; then + sleep 0.5; continue; + fi; + + elif [[ $state =~ 'done' ]]; then + break; + else + sleep 0.5; continue; + fi; + break; + done; + " + volumeMounts: + - name: config-volume + mountPath: "/tmp/uperf-test" + volumes: + - name: config-volume + configMap: + name: uperf-test-{{ trunc_uuid }} + restartPolicy: OnFailure +{% if workload_args.pin is sameas false %} +{% if workload_args.colocate is sameas true %} + nodeSelector: + # client node same as server node + kubernetes.io/hostname: "{{ worker_node_list[item['metadata']['annotations']['node_idx'] | from_json] }}" +{% else %} + nodeSelector: + # skew client node one position to the right in the worker_node_list + kubernetes.io/hostname: "{{ worker_node_list[ (1+(item['metadata']['annotations']['node_idx'] | from_json)) % (worker_node_list|length)] }}" +{% endif %} + +{% else %} {% if workload_args.pin is sameas true %} - nodeSelector: - kubernetes.io/hostname: '{{ workload_args.pin_client }}' + nodeSelector: + kubernetes.io/hostname: '{{ workload_args.pin_client }}' +{% endif %} + {% endif %} -{% include "metadata.yml.j2" %} + +{% macro metadata() %}{% include "metadata.yml.j2" %}{% endmacro %} + {{ metadata()|indent }} +{% endfor %} diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index 82fc9c23f..aaee090cd 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -1,3 +1,11 @@ --- # vars file for bench cleanup: true +worker_node_list: [] +pod_low_idx: "0" +pod_hi_idx: "0" +node_low_idx: "0" +node_hi_idx: "0" +node_idx: "0" +pod_idx: "0" +all_run_done: false diff --git a/tests/test_uperf.sh b/tests/test_uperf.sh index 65abcc0f5..4f1b81272 100755 --- a/tests/test_uperf.sh +++ b/tests/test_uperf.sh @@ -27,8 +27,8 @@ function functional_test_uperf { uuid=${long_uuid:0:8} pod_count "type=uperf-bench-server-$uuid" 1 900 - uperf_server_pod=$(get_pod "app=uperf-bench-server-0-$uuid" 300) - wait_for "kubectl -n my-ripsaw wait --for=condition=Initialized -l app=uperf-bench-server-0-$uuid pods --timeout=300s" "300s" $uperf_server_pod + uperf_server_pod=$(get_pod "type=uperf-bench-server-${uuid}" 300) + wait_for "kubectl -n my-ripsaw wait --for=condition=Initialized -l type=uperf-bench-server-${uuid} pods --timeout=300s" "300s" $uperf_server_pod uperf_client_pod=$(get_pod "app=uperf-bench-client-$uuid" 900) wait_for "kubectl wait -n my-ripsaw --for=condition=Initialized pods/$uperf_client_pod --timeout=500s" "500s" $uperf_client_pod wait_for "kubectl wait -n my-ripsaw --for=condition=complete -l app=uperf-bench-client-$uuid jobs --timeout=500s" "500s" $uperf_client_pod