From c16c1c69ca3ae72761edd31d0a1d7b38b51656bb Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 17 Dec 2020 11:09:43 -0500 Subject: [PATCH 01/41] basic pod enumeration over node list over denstiy argument --- roles/uperf/tasks/main.yml | 38 +++++++++++++++++++++++++++-- roles/uperf/templates/server.yml.j2 | 12 +++++++++ 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index 5e5fafefb..7d54966fb 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -45,6 +45,23 @@ register: serviceip when: workload_args.serviceip is defined and workload_args.serviceip +### V2 vs V1 +- name: V2 Record num server (V1) pods + set_fact: + num_server_pods: "{{ workload_args.pair | default('1'|int) }}" + when: workload_args.max_node is not defined + +- name: V2 Setup eligible node (static) list - Tobe replaced by real node list builder + set_fact: + server_node_list: [ worker0 ] + +- name: V2 Record num server pods + set_fact: + num_server_pods: "{{ server_node_list|length * workload_args.density | default('1'|int) }}" + when: workload_args.max_node is defined + +### + - block: - name: Create service for server pods @@ -58,6 +75,19 @@ definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" register: servers with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + when: workload_args.max_node is not defined + +############ + + - name: V2 Start Server(s) - total = eligible nodes * density + k8s: + definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" + with_nested: + - "{{ server_node_list }}" + - "{{ range(0, workload_args.density | default('1'|int)) | list }}" + when: workload_args.max_node is defined + +############ - name: Wait for pods to be running.... k8s_facts: @@ -78,7 +108,8 @@ state: "Starting Servers" when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" - + +########### VM block - block: - name: Start Server(s) @@ -107,6 +138,8 @@ when: resource_state.resources[0].status.state == "Building" and resource_kind == "vm" +########### + - block: - name: Get server pods @@ -126,7 +159,8 @@ namespace: "{{ operator_namespace }}" status: state: "Starting Clients" - when: "workload_args.pair|default('1')|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + when: "num_server_pods|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + #when: "workload_args.pair|default('1')|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index d46a66799..2d91d2421 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -2,7 +2,13 @@ kind: Job apiVersion: batch/v1 metadata: + +{% if workload_args.max_node is not defined %} name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' +{% else %} + name: 'uperf-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }}' +{% endif %} + namespace: "{{ operator_namespace }}" spec: ttlSecondsAfterFinished: 600 @@ -10,7 +16,13 @@ spec: template: metadata: labels: + +{% if workload_args.max_node is not defined %} app: uperf-bench-server-{{item}}-{{ trunc_uuid }} +{% else %} + app: uperf-bench-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }} +{% endif %} + type: uperf-bench-server-{{ trunc_uuid }} {% if workload_args.multus.enabled is sameas true %} annotations: From 4cf61d185e36a3634048a6c3501029a403e6bcad Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Fri, 18 Dec 2020 14:19:01 +0000 Subject: [PATCH 02/41] density fully working -with debug --- roles/uperf/tasks/main.yml | 124 +++++++++++++++++++++---------------- 1 file changed, 70 insertions(+), 54 deletions(-) diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index 7d54966fb..eb9e73310 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -1,6 +1,6 @@ --- -- name: Get current state +- name: A1 Get current state k8s_facts: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -18,7 +18,7 @@ complete: false when: resource_state.resources[0].status.state is not defined -- name: Get current state - If it has changed +- name: A3 Get current state - If it has changed k8s_facts: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -26,7 +26,7 @@ namespace: '{{ operator_namespace }}' register: resource_state -- name: Capture operator information +- name: A4 Capture operator information k8s_facts: kind: Pod api_version: v1 @@ -35,7 +35,7 @@ - name = benchmark-operator register: bo -- name: Capture ServiceIP +- name: A5 Capture ServiceIP k8s_facts: kind: Service api_version: v1 @@ -46,31 +46,35 @@ when: workload_args.serviceip is defined and workload_args.serviceip ### V2 vs V1 -- name: V2 Record num server (V1) pods +- name: A6 Record num server (V1) pods set_fact: - num_server_pods: "{{ workload_args.pair | default('1'|int) }}" + num_server_pods: "{{ workload_args.pair | default('1')|int }}" when: workload_args.max_node is not defined -- name: V2 Setup eligible node (static) list - Tobe replaced by real node list builder +- name: A7 V2 Setup eligible node (static) list - Tobe replaced by real node list builder set_fact: - server_node_list: [ worker0 ] + server_node_list: [ worker0, worker2 ] -- name: V2 Record num server pods +- name: A8 V2 Record num server pods set_fact: - num_server_pods: "{{ server_node_list|length * workload_args.density | default('1'|int) }}" + num_server_pods: "{{ server_node_list|length * workload_args.density | default('1')|int }}" when: workload_args.max_node is defined +- name: A8_1 show num_server_pods + debug: + var: num_server_pods + ### - block: - - name: Create service for server pods + - name: P8 Create service for server pods k8s: definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} when: workload_args.serviceip is defined and workload_args.serviceip - - name: Start Server(s) + - name: P9 Start Server(s) k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" register: servers @@ -79,7 +83,7 @@ ############ - - name: V2 Start Server(s) - total = eligible nodes * density + - name: P10 V2 Start Server(s) - total = eligible nodes * density k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" with_nested: @@ -89,7 +93,7 @@ ############ - - name: Wait for pods to be running.... + - name: P11 Wait for pods to be running.... k8s_facts: kind: Pod api_version: v1 @@ -98,7 +102,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: Update resource state + - name: P12 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -109,16 +113,16 @@ when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" -########### VM block +########### - block: - - name: Start Server(s) + - name: V11 Start Server(s) k8s: definition: "{{ lookup('template', 'server_vm.yml.j2') | from_yaml }}" register: servers with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - - name: Wait for vms to be running.... + - name: V12 Wait for vms to be running.... k8s_facts: kind: VirtualMachineInstance api_version: kubevirt.io/v1alpha3 @@ -127,7 +131,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_vms - - name: Update resource state + - name: V13 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -140,9 +144,10 @@ ########### +########### - block: - - name: Get server pods + - name: P13 Get server pods k8s_facts: kind: Pod api_version: v1 @@ -151,7 +156,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: Update resource state + - name: P14 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -165,9 +170,11 @@ when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" +######## +######## - block: - - name: Wait for vms to be running.... + - name: V14 Wait for vms to be running.... k8s_facts: kind: VirtualMachineInstance api_version: kubevirt.io/v1alpha3 @@ -176,7 +183,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_vms - - name: Update resource state + - name: V15 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -184,18 +191,22 @@ namespace: "{{ operator_namespace }}" status: state: "Starting Clients" - when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + #when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + when: "num_server_pods|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - - name: blocking client from running uperf + - name: V16 blocking client from running uperf command: "redis-cli set start false" with_items: "{{ server_vms.resources }}" - when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + #when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + when: "num_server_pods|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" -- block: +#### + +- block: #HN while state is "start client" - - name: Get pod info + - name: A17 Get pod info k8s_facts: kind: Pod api_version: v1 @@ -204,28 +215,28 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: Generate uperf xml files + - name: A18 Generate uperf xml files k8s: definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" - block: - - name: Start Client(s) + - name: P19 Start Client(s) k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" with_items: "{{ server_pods.resources }}" when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 - - name: Start Client(s) - ServiceIP + - name: P20 Start Client(s) - ServiceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" with_items: "{{ serviceip.resources }}" when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 when: resource_kind == "pod" - + - block: - - name: Wait for vms to be running.... + - name: V19 Wait for vms to be running.... k8s_facts: kind: VirtualMachineInstance api_version: kubevirt.io/v1alpha3 @@ -234,12 +245,12 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_vms - - name: Generate uperf test files + - name: V20 Generate uperf test files k8s: definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" with_items: "{{ server_vms.resources }}" - - name: Start Client(s) + - name: V21 Start Client(s) k8s: definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" with_items: "{{ server_vms.resources }}" @@ -259,8 +270,8 @@ - block: - - block: - - name: Get client pod status + - block: #P block + - name: P22 Get client pod status k8s_facts: kind: Pod api_version: v1 @@ -269,7 +280,7 @@ - app = uperf-bench-client-{{ trunc_uuid }} register: client_pods - - name: Update resource state + - name: P23 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -277,20 +288,19 @@ namespace: "{{ operator_namespace }}" status: state: Clients Running - when: "workload_args.pair|default('1')|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (client_pods | json_query('resources[].status.podIP')|length)" + when: "num_server_pods|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (client_pods | json_query('resources[].status.podIP')|length)" + #when: "workload_args.pair|default('1')|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (client_pods | json_query('resources[].status.podIP')|length)" - when: resource_kind == "pod" - - - block: + - block: #V block - - name: set complete to false + - name: V22 set complete to false command: "redis-cli set complete false" - - name: Get count of clients ready + - name: V23 Get count of clients ready command: "redis-cli get clients-{{ trunc_uuid }}" register: clients_ready_count - - name: Update resource state + - name: V24 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -300,16 +310,21 @@ state: Clients Running when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" + - name: V14 debug state + debug: + msg: "HN ater V24 {{ resource_state.resources[0].status.state }}" + when: resource_kind == "vm" + when: resource_state.resources[0].status.state == "Waiting for Clients" -- block: +- block: #ALL state = Clients running - - name: Signal workload + - name: A25 Signal workload command: "redis-cli set start true" - - name: Update resource state + - name: A26 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -322,7 +337,7 @@ - block: - block: - - name: Waiting for pods to complete.... + - name: P27 Waiting for pods to complete.... k8s_facts: kind: pod api_version: v1 @@ -339,12 +354,13 @@ status: state: Cleanup complete: false - when: "workload_args.pair|default('1')|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" + #when: "workload_args.pair|default('1')|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" + when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" when: resource_kind == "pod" - block: - - name: get complete + - name: V28 get complete command: "redis-cli get complete" register: complete_status @@ -364,7 +380,7 @@ - block: - block: - - name: Get Server Pods + - name: P29 Get Server Pods k8s_facts: kind: Pod api_version: v1 @@ -373,7 +389,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: Pod names - to clean + - name: P30 Pod names - to clean set_fact: clean_pods: | [ @@ -382,7 +398,7 @@ {% endfor %} ] - - name: Cleanup run + - name: P31 Cleanup run k8s: kind: pod api_version: v1 From 7e84f1117e2e912637e40cc2bab9f7ad250e3f18 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Tue, 22 Dec 2020 01:31:06 +0000 Subject: [PATCH 03/41] client affinity and client/server pairinng are working correctly --- roles/uperf/tasks/main.yml | 87 ++++++++++++++++++++++----- roles/uperf/templates/server.yml.j2 | 9 +++ roles/uperf/templates/workload.yml.j2 | 21 ++++--- roles/uperf/vars/main.yml | 1 + 4 files changed, 95 insertions(+), 23 deletions(-) diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index eb9e73310..daeebb41e 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -45,27 +45,30 @@ register: serviceip when: workload_args.serviceip is defined and workload_args.serviceip -### V2 vs V1 -- name: A6 Record num server (V1) pods +# +# "pin" mode exists prior to "scale" mode. If "pin: true", we will +# do the old way using pin_server and pin_client +# +- name: A6 Record num server (V1) pods using workload_args.pair - TBD set_fact: num_server_pods: "{{ workload_args.pair | default('1')|int }}" when: workload_args.max_node is not defined +#### + - name: A7 V2 Setup eligible node (static) list - Tobe replaced by real node list builder set_fact: - server_node_list: [ worker0, worker2 ] + worker_node_list: "{{workload_args.node_list}}" -- name: A8 V2 Record num server pods +- name: A8 V2 Record num server pods using new worker_node_list set_fact: - num_server_pods: "{{ server_node_list|length * workload_args.density | default('1')|int }}" + num_server_pods: "{{ worker_node_list|length * workload_args.density | default('1')|int }}" when: workload_args.max_node is defined - name: A8_1 show num_server_pods debug: var: num_server_pods -### - - block: - name: P8 Create service for server pods @@ -87,7 +90,7 @@ k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" with_nested: - - "{{ server_node_list }}" + - "{{ worker_node_list }}" - "{{ range(0, workload_args.density | default('1'|int)) | list }}" when: workload_args.max_node is defined @@ -114,6 +117,8 @@ when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" ########### +# VM remains scale agnostic for now +########### - block: - name: V11 Start Server(s) @@ -144,7 +149,7 @@ ########### -########### +########### mode - block: - name: P13 Get server pods @@ -171,6 +176,7 @@ when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" ######## +# ######## - block: @@ -204,7 +210,7 @@ #### -- block: #HN while state is "start client" +- block: #HN while state is "start client" for pod - name: A17 Get pod info k8s_facts: @@ -219,19 +225,68 @@ k8s: definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" - - block: - - name: P19 Start Client(s) + - block: # Starting Clients" + - set_fact: + cpod_affi_list: [] + + - block: + - name: HN colocate TBD + debug: + msg: "HN colocate TBD" + when: workload_args.colocate is defined and workload_args.colocate|bool == True + + #### generate affinity list + - block: + + - name: Pass 1 - Build client list for node[1:] + set_fact: + cpod_affi_list: "{{ cpod_affi_list + [ item[0]] }}" + with_nested: + - "{{ worker_node_list[1:] }}" + - "{{ range(0, workload_args.density | default('1'|int)) | list }}" + + - name: HNC_0 debug cpod_list + debug: + var: cpod_affi_list + + - name: Pass 2 - Append client list with node[0] to the end + set_fact: + cpod_affi_list: "{{cpod_affi_list + [ item[0] ] }}" + with_nested: + - "{{ worker_node_list[0] }}" + - "{{ range(0, workload_args.density | default('1'|int)) | list }}" + + + - name: HNC_1 debug cpod_list + debug: + var: cpod_affi_list + + + when: workload_args.colocate is not defined or workload_args.colocate|bool == False + #### End generate cpod affinity list + + - name: P19 Start Client(s) w/o serviceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: "{{ server_pods.resources }}" + with_together: + - "{{ server_pods.resources }}" + - "{{ cpod_affi_list }}" when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 - - name: P20 Start Client(s) - ServiceIP + - name: P20 Start Client(s) with serviceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: "{{ serviceip.resources }}" + with_together: + - "{{ serviceip.resources }}" + - "{{ cpod_affi_list }}" + when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 + - name: compare worker000 versus pin_server + debug: + msg: "HN equal" + when: worker_node_list[0] == workload_args.pin_server + when: resource_kind == "pod" - block: @@ -270,7 +325,7 @@ - block: - - block: #P block + - block: # Pod block - name: P22 Get client pod status k8s_facts: kind: Pod diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 2d91d2421..4fafff7a1 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -50,6 +50,15 @@ spec: nodeSelector: kubernetes.io/hostname: '{{ workload_args.pin_server }}' {% endif %} + +# +# V2 pin server pod to node +# +{% if workload_args.max_node is defined %} + nodeSelector: + kubernetes.io/hostname: '{{ item[0] }}' +{% endif %} + {% if workload_args.serviceip is sameas true %} securityContext: sysctls: diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 493b4b7a9..55655212a 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -3,9 +3,9 @@ kind: Job apiVersion: batch/v1 metadata: {% if workload_args.serviceip is sameas true %} - name: 'uperf-client-{{item.spec.clusterIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item[0].spec.clusterIP}}-{{ trunc_uuid }}' {% else %} - name: 'uperf-client-{{item.status.podIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item[0].status.podIP}}-{{ trunc_uuid }}' {% endif %} namespace: '{{ operator_namespace }}' spec: @@ -13,7 +13,7 @@ spec: metadata: labels: app: uperf-bench-client-{{ trunc_uuid }} - clientfor: {{ item.metadata.labels.app }} + clientfor: {{ item[0].metadata.labels.app }} type: uperf-bench-client-{{ trunc_uuid }} {% if workload_args.multus.enabled is sameas true %} annotations: @@ -37,7 +37,7 @@ spec: - key: app operator: In values: - - {{ item.metadata.labels.app }} + - {{ item[0].metadata.labels.app }} topologyKey: kubernetes.io/hostname containers: - name: benchmark @@ -83,13 +83,13 @@ spec: args: {% if workload_args.serviceip is sameas true %} - "export serviceip=true; - export h={{item.spec.clusterIP}}; + export h={{item[0].spec.clusterIP}}; {% else %} {% if workload_args.multus.client is defined %} - "export multus_client={{workload_args.multus.client}}; - export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; + export h={{ (item[0]['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; {% else %} - - "export h={{item.status.podIP}}; + - "export h={{item[0].status.podIP}}; {% endif %} {% endif %} {% if workload_args.networkpolicy is defined %} @@ -139,4 +139,11 @@ spec: nodeSelector: kubernetes.io/hostname: '{{ workload_args.pin_client }}' {% endif %} + +{% if workload_args.max_node is defined %} + nodeSelector: + kubernetes.io/hostname: '{{ item[1] }}' +{% endif %} + + {% include "metadata.yml.j2" %} diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index 82fc9c23f..6c879b5c3 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -1,3 +1,4 @@ --- # vars file for bench cleanup: true +worker_node_list: [] From 98f3244938959b179126a8ee600a66a5b9bef467 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Wed, 23 Dec 2020 19:39:04 +0000 Subject: [PATCH 04/41] Annotate server with node-idx and use it to derive client affinity --- roles/uperf/tasks/main.yml | 82 +++++++-------------------- roles/uperf/templates/server.yml.j2 | 11 ++-- roles/uperf/templates/workload.yml.j2 | 31 ++++++---- 3 files changed, 46 insertions(+), 78 deletions(-) diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index daeebb41e..51f1d9894 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -56,19 +56,21 @@ #### -- name: A7 V2 Setup eligible node (static) list - Tobe replaced by real node list builder +- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder + set_fact: + worker_node_list: "{{workload_args.node_list[0]}}" + when: workload_args.max_node is not defined + +- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder set_fact: worker_node_list: "{{workload_args.node_list}}" + when: workload_args.max_node is defined - name: A8 V2 Record num server pods using new worker_node_list set_fact: num_server_pods: "{{ worker_node_list|length * workload_args.density | default('1')|int }}" when: workload_args.max_node is defined -- name: A8_1 show num_server_pods - debug: - var: num_server_pods - - block: - name: P8 Create service for server pods @@ -90,9 +92,12 @@ k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" with_nested: - - "{{ worker_node_list }}" + - "{{ range(0, worker_node_list|length| default('0'|int)) | list }}" - "{{ range(0, workload_args.density | default('1'|int)) | list }}" - when: workload_args.max_node is defined + # + # Each server annotates a "node_idx" which will allow its peer client + # to derive its affinity according the 'colocate' variable + # ############ @@ -226,67 +231,25 @@ definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" - block: # Starting Clients" - - set_fact: - cpod_affi_list: [] - - - block: - - name: HN colocate TBD - debug: - msg: "HN colocate TBD" - when: workload_args.colocate is defined and workload_args.colocate|bool == True - - #### generate affinity list - - block: - - - name: Pass 1 - Build client list for node[1:] - set_fact: - cpod_affi_list: "{{ cpod_affi_list + [ item[0]] }}" - with_nested: - - "{{ worker_node_list[1:] }}" - - "{{ range(0, workload_args.density | default('1'|int)) | list }}" - - - name: HNC_0 debug cpod_list - debug: - var: cpod_affi_list - - - name: Pass 2 - Append client list with node[0] to the end - set_fact: - cpod_affi_list: "{{cpod_affi_list + [ item[0] ] }}" - with_nested: - - "{{ worker_node_list[0] }}" - - "{{ range(0, workload_args.density | default('1'|int)) | list }}" - - - - name: HNC_1 debug cpod_list - debug: - var: cpod_affi_list - - - when: workload_args.colocate is not defined or workload_args.colocate|bool == False - #### End generate cpod affinity list - - name: P19 Start Client(s) w/o serviceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_together: + with_items: - "{{ server_pods.resources }}" - - "{{ cpod_affi_list }}" when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 + # + # Each server annotates a "node_idx". Each peer client will + # derive its affinity according the 'colocate' variable. + # + - name: P20 Start Client(s) with serviceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_together: + with_items: - "{{ serviceip.resources }}" - - "{{ cpod_affi_list }}" - when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 - - name: compare worker000 versus pin_server - debug: - msg: "HN equal" - when: worker_node_list[0] == workload_args.pin_server - when: resource_kind == "pod" - block: @@ -365,13 +328,6 @@ state: Clients Running when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" - - name: V14 debug state - debug: - msg: "HN ater V24 {{ resource_state.resources[0].status.state }}" - - when: resource_kind == "vm" - - when: resource_state.resources[0].status.state == "Waiting for Clients" - block: #ALL state = Clients running diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 4fafff7a1..8a67fbf14 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -6,7 +6,7 @@ metadata: {% if workload_args.max_node is not defined %} name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' {% else %} - name: 'uperf-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }}' + name: 'uperf-server-{{worker_node_list[ item[0]] }}-{{ item[1] }}-{{ trunc_uuid }}' {% endif %} namespace: "{{ operator_namespace }}" @@ -20,14 +20,16 @@ spec: {% if workload_args.max_node is not defined %} app: uperf-bench-server-{{item}}-{{ trunc_uuid }} {% else %} - app: uperf-bench-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }} + #app: uperf-bench-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ worker_node_list[item[0]] }}-{{ item[1] }}-{{ trunc_uuid }} {% endif %} type: uperf-bench-server-{{ trunc_uuid }} -{% if workload_args.multus.enabled is sameas true %} annotations: +{% if workload_args.multus.enabled is sameas true %} k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} {% endif %} + node_idx: '{{ item[0] }}' spec: {% if workload_args.runtime_class is defined %} runtimeClassName: "{{ workload_args.runtime_class }}" @@ -56,7 +58,8 @@ spec: # {% if workload_args.max_node is defined %} nodeSelector: - kubernetes.io/hostname: '{{ item[0] }}' + #kubernetes.io/hostname: '{{ item[0] }}' + kubernetes.io/hostname: '{{ worker_node_list[item[0]] }}' {% endif %} {% if workload_args.serviceip is sameas true %} diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 55655212a..155b52a7a 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -3,9 +3,9 @@ kind: Job apiVersion: batch/v1 metadata: {% if workload_args.serviceip is sameas true %} - name: 'uperf-client-{{item[0].spec.clusterIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item.spec.clusterIP}}-{{ trunc_uuid }}' {% else %} - name: 'uperf-client-{{item[0].status.podIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item.status.podIP}}-{{ trunc_uuid }}' {% endif %} namespace: '{{ operator_namespace }}' spec: @@ -13,7 +13,7 @@ spec: metadata: labels: app: uperf-bench-client-{{ trunc_uuid }} - clientfor: {{ item[0].metadata.labels.app }} + clientfor: {{ item.metadata.labels.app }} type: uperf-bench-client-{{ trunc_uuid }} {% if workload_args.multus.enabled is sameas true %} annotations: @@ -37,7 +37,7 @@ spec: - key: app operator: In values: - - {{ item[0].metadata.labels.app }} + - {{ item.metadata.labels.app }} topologyKey: kubernetes.io/hostname containers: - name: benchmark @@ -83,13 +83,13 @@ spec: args: {% if workload_args.serviceip is sameas true %} - "export serviceip=true; - export h={{item[0].spec.clusterIP}}; + export h={{item.spec.clusterIP}}; {% else %} {% if workload_args.multus.client is defined %} - "export multus_client={{workload_args.multus.client}}; - export h={{ (item[0]['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; + export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; {% else %} - - "export h={{item[0].status.podIP}}; + - "export h={{item.status.podIP}}; {% endif %} {% endif %} {% if workload_args.networkpolicy is defined %} @@ -135,14 +135,23 @@ spec: configMap: name: uperf-test-{{ trunc_uuid }} restartPolicy: OnFailure -{% if workload_args.pin is sameas true %} +{% if workload_args.max_node is defined %} +{% if workload_args.colocate is sameas true %} nodeSelector: - kubernetes.io/hostname: '{{ workload_args.pin_client }}' + # client node same as server node + kubernetes.io/hostname: "{{ worker_node_list[item['metadata']['annotations']['node_idx'] | from_json] }}" +{% else %} + nodeSelector: + # skew client node one position left in the woker_node_list + kubernetes.io/hostname: "{{ worker_node_list[ (1+(item['metadata']['annotations']['node_idx'] | from_json)) % (worker_node_list|length)] }}" {% endif %} -{% if workload_args.max_node is defined %} +{% else %} +{% if workload_args.pin is sameas true %} nodeSelector: - kubernetes.io/hostname: '{{ item[1] }}' + kubernetes.io/hostname: '{{ workload_args.pin_client }}' +{% endif %} + {% endif %} From 101e5b584ba834aa13ec08f55085c4267e630d91 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Wed, 6 Jan 2021 02:50:18 +0000 Subject: [PATCH 05/41] restructure main, add stm to main and client to support iterations --- roles/uperf/tasks/cleanup.yml | 45 ++ roles/uperf/tasks/main.yml | 445 ++----------------- roles/uperf/tasks/next_set.yml | 59 +++ roles/uperf/tasks/run_a_set.yml | 27 ++ roles/uperf/tasks/send_client_run_signal.yml | 18 + roles/uperf/tasks/setup.yml | 71 +++ roles/uperf/tasks/start_client.yml | 76 ++++ roles/uperf/tasks/start_server.yml | 104 +++++ roles/uperf/tasks/wait_client_done.yml | 45 ++ roles/uperf/tasks/wait_client_ready.yml | 55 +++ roles/uperf/tasks/wait_server_ready.yml | 55 +++ roles/uperf/tasks/wait_set_done.yml | 31 ++ roles/uperf/templates/workload.yml.j2 | 20 +- roles/uperf/vars/main.yml | 3 + 14 files changed, 645 insertions(+), 409 deletions(-) create mode 100644 roles/uperf/tasks/cleanup.yml create mode 100644 roles/uperf/tasks/next_set.yml create mode 100644 roles/uperf/tasks/run_a_set.yml create mode 100644 roles/uperf/tasks/send_client_run_signal.yml create mode 100644 roles/uperf/tasks/setup.yml create mode 100644 roles/uperf/tasks/start_client.yml create mode 100644 roles/uperf/tasks/start_server.yml create mode 100644 roles/uperf/tasks/wait_client_done.yml create mode 100644 roles/uperf/tasks/wait_client_ready.yml create mode 100644 roles/uperf/tasks/wait_server_ready.yml create mode 100644 roles/uperf/tasks/wait_set_done.yml diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml new file mode 100644 index 000000000..ae67c2a6a --- /dev/null +++ b/roles/uperf/tasks/cleanup.yml @@ -0,0 +1,45 @@ +--- + +- block: + + - block: + - name: P29 Get Server Pods + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: P30 Pod names - to clean + set_fact: + clean_pods: | + [ + {% for item in server_pods.resources %} + "{{ item['metadata']['name'] }}", + {% endfor %} + ] + + - name: P31 Cleanup run + k8s: + kind: pod + api_version: v1 + namespace: '{{ operator_namespace }}' + state: absent + name: "{{ item }}" + with_items: "{{ clean_pods }}" + when: cleanup + when: resource_kind == "pod" + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Complete + complete: true + + when: resource_state.resources[0].status.state == "Cleanup" + diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index 51f1d9894..f9fa3d045 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -1,438 +1,67 @@ --- -- name: A1 Get current state - k8s_facts: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: '{{ meta.name }}' - namespace: '{{ operator_namespace }}' - register: resource_state +- include_tasks: setup.yml -- operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Building - complete: false - when: resource_state.resources[0].status.state is not defined - -- name: A3 Get current state - If it has changed - k8s_facts: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: '{{ meta.name }}' - namespace: '{{ operator_namespace }}' - register: resource_state - -- name: A4 Capture operator information - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - name = benchmark-operator - register: bo - -- name: A5 Capture ServiceIP - k8s_facts: - kind: Service - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: serviceip - when: workload_args.serviceip is defined and workload_args.serviceip - -# -# "pin" mode exists prior to "scale" mode. If "pin: true", we will -# do the old way using pin_server and pin_client -# -- name: A6 Record num server (V1) pods using workload_args.pair - TBD - set_fact: - num_server_pods: "{{ workload_args.pair | default('1')|int }}" - when: workload_args.max_node is not defined - -#### - -- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder - set_fact: - worker_node_list: "{{workload_args.node_list[0]}}" - when: workload_args.max_node is not defined - -- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder - set_fact: - worker_node_list: "{{workload_args.node_list}}" - when: workload_args.max_node is defined - -- name: A8 V2 Record num server pods using new worker_node_list - set_fact: - num_server_pods: "{{ worker_node_list|length * workload_args.density | default('1')|int }}" - when: workload_args.max_node is defined +- include_tasks: start_server.yml + when: resource_state.resources[0].status.state == "Building" - block: - - name: P8 Create service for server pods - k8s: - definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - when: workload_args.serviceip is defined and workload_args.serviceip - - - name: P9 Start Server(s) - k8s: - definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" - register: servers - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - when: workload_args.max_node is not defined - -############ + - include_tasks: wait_server_ready.yml + when: resource_state.resources[0].status.state == "Starting Servers" - - name: P10 V2 Start Server(s) - total = eligible nodes * density - k8s: - definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" - with_nested: - - "{{ range(0, worker_node_list|length| default('0'|int)) | list }}" - - "{{ range(0, workload_args.density | default('1'|int)) | list }}" - # - # Each server annotates a "node_idx" which will allow its peer client - # to derive its affinity according the 'colocate' variable - # + - include_tasks: start_client.yml + when: resource_state.resources[0].status.state == "Starting Clients" -############ + - include_tasks: wait_client_ready.yml + when: resource_state.resources[0].status.state == "Waiting for Clients" - - name: P11 Wait for pods to be running.... - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods - - - name: P12 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Servers" - - when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" - -########### -# VM remains scale agnostic for now -########### -- block: + - include_tasks: run_a_set.yml + when: resource_state.resources[0].status.state == "Clients Running" - - name: V11 Start Server(s) - k8s: - definition: "{{ lookup('template', 'server_vm.yml.j2') | from_yaml }}" - register: servers - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + - include_tasks: wait_set_done.yml + when: resource_state.resources[0].status.state == "Set Running" - - name: V12 Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms - - name: V13 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Servers" + - include_tasks: next_set.yml + when: resource_state.resources[0].status.state == "Run Next Set" + # will loop back to "Client Running" state, or FALLTHRU to "Running" state below and finish - when: resource_state.resources[0].status.state == "Building" and resource_kind == "vm" +# - include_tasks: send_client_run_signal.yml +# when: resource_state.resources[0].status.state == "Clients Running" -########### -########### mode -- block: + - include_tasks: wait_client_done.yml + when: resource_state.resources[0].status.state == "Running" - - name: P13 Get server pods - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods + - include_tasks: cleanup.yml + when: resource_state.resources[0].status.state == "Cleanup" - - name: P14 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Clients" - when: "num_server_pods|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" - #when: "workload_args.pair|default('1')|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + when: resource_kind == "pod" - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" -######## -# -######## - block: - - name: V14 Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms - - - name: V15 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Starting Clients" - #when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - when: "num_server_pods|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - - - name: V16 blocking client from running uperf - command: "redis-cli set start false" - with_items: "{{ server_vms.resources }}" - #when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - when: "num_server_pods|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" - -#### - -- block: #HN while state is "start client" for pod - - - name: A17 Get pod info - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods - - - name: A18 Generate uperf xml files - k8s: - definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" - - - block: # Starting Clients" - - name: P19 Start Client(s) w/o serviceIP - k8s: - definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: - - "{{ server_pods.resources }}" - when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 - - # - # Each server annotates a "node_idx". Each peer client will - # derive its affinity according the 'colocate' variable. - # - - - name: P20 Start Client(s) with serviceIP - k8s: - definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: - - "{{ serviceip.resources }}" - when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 - - when: resource_kind == "pod" - - - block: - - - name: V19 Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms + - include_tasks: wait_server_ready.yml + when: resource_state.resources[0].status.state == "Starting Servers" - - name: V20 Generate uperf test files - k8s: - definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" - with_items: "{{ server_vms.resources }}" + - include_tasks: start_client.yml + when: resource_state.resources[0].status.state == "Starting Clients" - - name: V21 Start Client(s) - k8s: - definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" - with_items: "{{ server_vms.resources }}" - when: server_vms.resources|length > 0 - - when: resource_kind == "vm" - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Waiting for Clients - - when: resource_state.resources[0].status.state == "Starting Clients" - -- block: - - - block: # Pod block - - name: P22 Get client pod status - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_pods - - - name: P23 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Clients Running - when: "num_server_pods|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (client_pods | json_query('resources[].status.podIP')|length)" - #when: "workload_args.pair|default('1')|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (client_pods | json_query('resources[].status.podIP')|length)" - - - block: #V block - - - name: V22 set complete to false - command: "redis-cli set complete false" - - - name: V23 Get count of clients ready - command: "redis-cli get clients-{{ trunc_uuid }}" - register: clients_ready_count - - - name: V24 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Clients Running - when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" - - when: resource_state.resources[0].status.state == "Waiting for Clients" - -- block: #ALL state = Clients running - - - name: A25 Signal workload - command: "redis-cli set start true" - - - name: A26 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: "Running" - - when: resource_state.resources[0].status.state == "Clients Running" - -- block: - - block: - - name: P27 Waiting for pods to complete.... - k8s_facts: - kind: pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_pods - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Cleanup - complete: false - #when: "workload_args.pair|default('1')|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" - when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" - when: resource_kind == "pod" - - - block: - - - name: V28 get complete - command: "redis-cli get complete" - register: complete_status - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Cleanup - complete: false - when: complete_status.stdout == "true" - when: resource_kind == "vm" - - when: resource_state.resources[0].status.state == "Running" - -- block: + - include_tasks: wait_client_ready.yml + when: resource_state.resources[0].status.state == "Waiting for Clients" - - block: - - name: P29 Get Server Pods - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods + - include_tasks: send_client_run_signal.yml + #when: resource_state.resources[0].status.state == "Clients Running" + when: resource_state.resources[0].status.state == "Clients" - - name: P30 Pod names - to clean - set_fact: - clean_pods: | - [ - {% for item in server_pods.resources %} - "{{ item['metadata']['name'] }}", - {% endfor %} - ] + - include_tasks: wait_client_done.yml + when: resource_state.resources[0].status.state == "Running" - - name: P31 Cleanup run - k8s: - kind: pod - api_version: v1 - namespace: '{{ operator_namespace }}' - state: absent - name: "{{ item }}" - with_items: "{{ clean_pods }}" - when: cleanup - when: resource_kind == "pod" + - include_tasks: cleanup.yml + when: resource_state.resources[0].status.state == "Cleanup" - - name: delete redis keys - command: "redis-cli del {{ item }}" - loop: - - "{{ trunc_uuid }}" - - "clients-{{ trunc_uuid }}" + when: resource_kind == "vm" - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Complete - complete: true - when: resource_state.resources[0].status.state == "Cleanup" diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml new file mode 100644 index 000000000..7d7e4e544 --- /dev/null +++ b/roles/uperf/tasks/next_set.yml @@ -0,0 +1,59 @@ +--- + +- block: + - name: debug + command: "redis-cli set task next_set" + + - name: read last group_node_count + command: "redis-cli get group_node_count" + register: redis_out + + - name: Compute next run group size + set_fact: + group_node_count: "{{redis_out.stdout|int + 1}}" + + - block: + # + # We have passed max_node - All done + # + - name: Unpause pods to complete + command: "redis-cli set start done" + + - name: Change state to proceed to exit + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Set Running + + when: "group_node_count|int > workload_args.max_node|int" + + - block: + # + # More round(s) to run. + # + + - name: Send redis restart signal + command: "redis-cli set start restart" + + - name: Reset redis num_completion + command: "redis-cli set num_completion 0" + + - name: Set next run group_node_count + command: "redis-cli set group_node_count {{group_node_count}}" + + - name: Change state to run next round + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + + when: "group_node_count|int <= workload_args.max_node|int" + + when: resource_state.resources[0].status.state == "Run Next Set" + diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml new file mode 100644 index 000000000..1b3114598 --- /dev/null +++ b/roles/uperf/tasks/run_a_set.yml @@ -0,0 +1,27 @@ +--- + +- block: + # + # Entry Condition: + # 1. A previous task has set 'group_node_count' in redis + # 2. All cliest are polling for 'start' to run its workoad + # Output: Clients with idx <= group_node_count will run + # + + - name: Signal group to run + command: "redis-cli set start true " + + - name: Update state to "Set Running" + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Set Running + + - name: debug 2 + command: "redis-cli set state Set_Running" + + when: resource_state.resources[0].status.state == "Clients Running" + diff --git a/roles/uperf/tasks/send_client_run_signal.yml b/roles/uperf/tasks/send_client_run_signal.yml new file mode 100644 index 000000000..043d4c5cd --- /dev/null +++ b/roles/uperf/tasks/send_client_run_signal.yml @@ -0,0 +1,18 @@ +--- + +- block: + + - name: A25 Signal workload + command: "redis-cli set start true" + + - name: A26 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Running" + + when: resource_state.resources[0].status.state == "Clients Running" + diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml new file mode 100644 index 000000000..64b326b75 --- /dev/null +++ b/roles/uperf/tasks/setup.yml @@ -0,0 +1,71 @@ +--- + +- name: A1 Get current state + k8s_facts: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: '{{ meta.name }}' + namespace: '{{ operator_namespace }}' + register: resource_state + +- operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Building + complete: false + when: resource_state.resources[0].status.state is not defined + +- name: A3 Get current state - If it has changed + k8s_facts: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: '{{ meta.name }}' + namespace: '{{ operator_namespace }}' + register: resource_state + +- name: A4 Capture operator information + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - name = benchmark-operator + register: bo + +- name: A5 Capture ServiceIP + k8s_facts: + kind: Service + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: serviceip + when: workload_args.serviceip is defined and workload_args.serviceip + +# +# "pin" mode exists prior to "scale" mode. If "pin: true", we will +# do the old way using pin_server and pin_client +# +- name: A6 Record num server (V1) pods using workload_args.pair - TBD + set_fact: + num_server_pods: "{{ workload_args.pair | default('1')|int }}" + when: workload_args.max_node is not defined + +- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder + set_fact: + worker_node_list: "{{workload_args.node_list[0]}}" + when: workload_args.max_node is not defined + +- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder + set_fact: + worker_node_list: "{{workload_args.node_list}}" + when: workload_args.max_node is defined + +- name: A8 V2 Record num server pods using new worker_node_list + set_fact: + num_server_pods: "{{ worker_node_list|length * workload_args.density | default('1')|int }}" + when: workload_args.max_node is defined + diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml new file mode 100644 index 000000000..220ecde4f --- /dev/null +++ b/roles/uperf/tasks/start_client.yml @@ -0,0 +1,76 @@ +--- + +- block: + +### kind + - name: A17 Get pod info + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: A18 Generate uperf xml files + k8s: + definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" + + - block: # Starting Clients" + - name: P19 Start Client(s) w/o serviceIP + k8s: + definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" + with_items: + - "{{ server_pods.resources }}" + when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 + + # + # Each server annotates a "node_idx". Each peer client will + # derive its affinity according the 'colocate' variable. + # + + - name: P20 Start Client(s) with serviceIP + k8s: + definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" + with_items: + - "{{ serviceip.resources }}" + when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 + + when: resource_kind == "pod" + +### kind + - block: + + - name: V19 Wait for vms to be running.... + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_vms + + + - name: V20 Generate uperf test files + k8s: + definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" + with_items: "{{ server_vms.resources }}" + + - name: V21 Start Client(s) + k8s: + definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" + with_items: "{{ server_vms.resources }}" + when: server_vms.resources|length > 0 + + when: resource_kind == "vm" + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Waiting for Clients + + when: resource_state.resources[0].status.state == "Starting Clients" + diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml new file mode 100644 index 000000000..e54b5b02b --- /dev/null +++ b/roles/uperf/tasks/start_server.yml @@ -0,0 +1,104 @@ +--- + + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + # Start servers + #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +- block: + - name: init group node count + set_fact: + group_node_count: "{{ workload_args.min_node | default('2')|int }}" + # + # TBD HN range check, colocate and other checks + # + + - name: init redis + command: "redis-cli set group_mark 0" + - name: init redis + command: "redis-cli set start 0" + - name: init redis + command: "redis-cli set num_completion 0" + - name: init redis + command: "redis-cli set group_node_count {{ group_node_count }}" + + - name: P8 Create service for server pods + k8s: + definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" + with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + when: workload_args.serviceip is defined and workload_args.serviceip + + - name: P9 Start Server(s) + k8s: + definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" + register: servers + with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + when: workload_args.max_node is not defined + +############ + + - name: P10 V2 Start Server(s) - total = eligible nodes * density + k8s: + definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" + with_nested: + - "{{ range(0, worker_node_list|length| default('0'|int)) | list }}" + - "{{ range(0, workload_args.density | default('1'|int)) | list }}" + # + # Each server annotates a "node_idx" which will allow its peer client + # to derive its affinity according the 'colocate' variable + # + +############ + + - name: P11 Wait for pods to be running.... + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: P12 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Servers" + + when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" + +########### +# VM remains scale agnostic for now +########### +- block: + + - name: V11 Start Server(s) + k8s: + definition: "{{ lookup('template', 'server_vm.yml.j2') | from_yaml }}" + register: servers + with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + + - name: V12 Wait for vms to be running.... + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_vms + + - name: V13 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Servers" + + when: resource_state.resources[0].status.state == "Building" and resource_kind == "vm" + +########### + + diff --git a/roles/uperf/tasks/wait_client_done.yml b/roles/uperf/tasks/wait_client_done.yml new file mode 100644 index 000000000..1ff18c530 --- /dev/null +++ b/roles/uperf/tasks/wait_client_done.yml @@ -0,0 +1,45 @@ +--- + +- block: +#### kind + - block: + - name: P27 Waiting for pods to complete.... + k8s_facts: + kind: pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_pods + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Cleanup + complete: false + when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" + when: resource_kind == "pod" + +#### kind + - block: + + - name: V28 get complete + command: "redis-cli get complete" + register: complete_status + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Cleanup + complete: false + when: complete_status.stdout == "true" + when: resource_kind == "vm" + + when: resource_state.resources[0].status.state == "Running" + diff --git a/roles/uperf/tasks/wait_client_ready.yml b/roles/uperf/tasks/wait_client_ready.yml new file mode 100644 index 000000000..e691270da --- /dev/null +++ b/roles/uperf/tasks/wait_client_ready.yml @@ -0,0 +1,55 @@ +--- +- block: + +##### kind + - block: # Pod block + - name: P22 Get client pod status + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_pods + + - name: P23 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + when: "num_server_pods|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (client_pods | json_query('resources[].status.podIP')|length)" + + when: resource_kind == "pod" + +##### kind + - block: + + - name: V22 set complete to false + command: "redis-cli set complete false" + + - name: V23 Get client vm status + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_vms + + - name: V24 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + when: "workload_args.pair|default('1')|int == client_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (client_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + + when: resource_kind == "vm" + + when: resource_state.resources[0].status.state == "Waiting for Clients" + diff --git a/roles/uperf/tasks/wait_server_ready.yml b/roles/uperf/tasks/wait_server_ready.yml new file mode 100644 index 000000000..5ec01046b --- /dev/null +++ b/roles/uperf/tasks/wait_server_ready.yml @@ -0,0 +1,55 @@ +--- +########### kind +- block: + + - name: P13 Get server pods + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods + + - name: P14 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Clients" + when: "num_server_pods|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + + when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" + +######## kind +- block: + + - name: V14 Wait for vms to be running.... + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_vms + + - name: V15 Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: "Starting Clients" + when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + + - name: V16 blocking client from running uperf + command: "redis-cli set {{ trunc_uuid }} false" + with_items: "{{ server_vms.resources }}" + when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + + when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" and workload_args.pair|default('1')|int|int == 1 + + diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml new file mode 100644 index 000000000..e608c4070 --- /dev/null +++ b/roles/uperf/tasks/wait_set_done.yml @@ -0,0 +1,31 @@ +--- + +- block: + - block: + + - name: debug + command: "redis-cli set task wait_set_done" + + - name: read pod completion count + command: "redis-cli get num_completion" + register: num_completion + + - name: read group_node_count + command: "redis-cli get group_node_count" + register: group_node_count + + - name: debug + command: "redis-cli set read_num_completion {{ num_completion.stdout }}" + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Run Next Set + when: "num_completion.stdout|int == group_node_count.stdout|int * workload_args.density|default('1')|int" + when: resource_kind == "pod" + + when: resource_state.resources[0].status.state == "Set Running" + diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 155b52a7a..1f1987c94 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -99,7 +99,8 @@ spec: export ips=$(hostname -I); export num_pairs={{workload_args.pair}}; while true; do - if [[ $(redis-cli -h {{bo.resources[0].status.podIP}} get start) =~ 'true' ]]; then + state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + if [[ $state =~ 'true' ]]; then {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} {% for size in workload_args.sizes %} @@ -121,6 +122,23 @@ spec: {% endfor %} {% endfor %} {% endfor %} + redis-cli -h {{bo.resources[0].status.podIP}} incr num_completion; + while true; do + state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + if [[ $state =~ 'restart' ]]; then + break; + elif [[ $state =~ 'done' ]]; then + break; + else + continue; + fi; + done; + if [[ $state =~ 'restart' ]]; then + continue; + fi; + + elif [[ $state =~ 'done' ]]; then + break; else continue; fi; diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index 6c879b5c3..4a156a383 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -2,3 +2,6 @@ # vars file for bench cleanup: true worker_node_list: [] +group_node_count: 0 +max_node_count: 0 + From 5ff2e2bff152c2c5f8cdb9d9c724cc9d10f152b0 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Wed, 6 Jan 2021 17:08:01 +0000 Subject: [PATCH 06/41] Make iteration from min_node to max_node working --- roles/uperf/tasks/start_server.yml | 2 +- roles/uperf/templates/workload.yml.j2 | 8 ++++++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index e54b5b02b..04f10d0df 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -6,7 +6,7 @@ - block: - name: init group node count set_fact: - group_node_count: "{{ workload_args.min_node | default('2')|int }}" + group_node_count: "{{ workload_args.min_node | default('1')|int }}" # # TBD HN range check, colocate and other checks # diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 1f1987c94..f0f19096e 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -96,11 +96,19 @@ spec: export networkpolicy={{workload_args.networkpolicy}}; {% endif %} export hostnet={{workload_args.hostnetwork}}; + mynode={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; export ips=$(hostname -I); export num_pairs={{workload_args.pair}}; + node_limit=0; while true; do state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); if [[ $state =~ 'true' ]]; then + node_limit=$(redis-cli -h {{bo.resources[0].status.podIP}} get group_node_count); + if [[ $node_limit -le $mynode ]]; then + continue; + fi; + + {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} {% for size in workload_args.sizes %} From ebba29c52b58f24d70d204b246f8586887bf949e Mon Sep 17 00:00:00 2001 From: Logan Blyth Date: Thu, 7 Jan 2021 12:51:26 -0500 Subject: [PATCH 07/41] retrieve list of nodes with role worker --- deploy/25_role.yaml | 13 +++++++++++++ deploy/35_role_binding.yaml | 13 +++++++++++++ roles/uperf/tasks/setup.yml | 31 ++++++++++++++++++++++--------- 3 files changed, 48 insertions(+), 9 deletions(-) create mode 100644 deploy/25_role.yaml create mode 100644 deploy/35_role_binding.yaml diff --git a/deploy/25_role.yaml b/deploy/25_role.yaml new file mode 100644 index 000000000..62b2971ab --- /dev/null +++ b/deploy/25_role.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + creationTimestamp: null + name: benchmark-operator +rules: +- apiGroups: + - "*" + resources: + - "*" + verbs: + - '*' + diff --git a/deploy/35_role_binding.yaml b/deploy/35_role_binding.yaml new file mode 100644 index 000000000..cd0b952a9 --- /dev/null +++ b/deploy/35_role_binding.yaml @@ -0,0 +1,13 @@ +kind: ClusterRoleBinding +apiVersion: rbac.authorization.k8s.io/v1 +metadata: + name: benchmark-operator +subjects: +- kind: ServiceAccount + name: benchmark-operator + namespace: my-ripsaw +roleRef: + kind: ClusterRole + name: benchmark-operator + apiGroup: rbac.authorization.k8s.io + diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 64b326b75..8b2c47b14 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -35,6 +35,19 @@ - name = benchmark-operator register: bo +- name: List Nodes Labeled as Workers + k8s_info: + api_version: v1 + kind: Node + label_selectors: + - "node-role.kubernetes.io/worker=" + register: node_list + no_log: True + +- name: Isolate Worker Role Hostnames + register: + worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" + - name: A5 Capture ServiceIP k8s_facts: kind: Service @@ -54,15 +67,15 @@ num_server_pods: "{{ workload_args.pair | default('1')|int }}" when: workload_args.max_node is not defined -- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder - set_fact: - worker_node_list: "{{workload_args.node_list[0]}}" - when: workload_args.max_node is not defined - -- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder - set_fact: - worker_node_list: "{{workload_args.node_list}}" - when: workload_args.max_node is defined + #- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder + # set_fact: + # worker_node_list: "{{workload_args.node_list[0]}}" + # when: workload_args.max_node is not defined + # + #- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder + # set_fact: + # worker_node_list: "{{workload_args.node_list}}" + # when: workload_args.max_node is defined - name: A8 V2 Record num server pods using new worker_node_list set_fact: From 445733e9d883c1920fda83f34e4d087c00ede8ed Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 7 Jan 2021 19:34:11 +0000 Subject: [PATCH 08/41] integrate with worker_node_list builder --- roles/uperf/tasks/setup.yml | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 8b2c47b14..ee3400721 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -45,9 +45,13 @@ no_log: True - name: Isolate Worker Role Hostnames - register: + set_fact: worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" +- name: Exlude unhealthy nodes i.e. low diskspace + set_fact: + worker_node_list: "{{ worker_node_list | reject('search', workload_args.exluded_node) | list }}" + - name: A5 Capture ServiceIP k8s_facts: kind: Service From ee3d23130d18d040161da279a71f9206eb31d807 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 7 Jan 2021 20:56:25 +0000 Subject: [PATCH 09/41] fix when no exclude_node is defined --- roles/uperf/tasks/setup.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index ee3400721..50087eeb2 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -51,6 +51,7 @@ - name: Exlude unhealthy nodes i.e. low diskspace set_fact: worker_node_list: "{{ worker_node_list | reject('search', workload_args.exluded_node) | list }}" + when: workload_args.exluded_node is defined - name: A5 Capture ServiceIP k8s_facts: From 74018d51eae86503615478ea59859598adf63220 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 7 Jan 2021 22:25:42 +0000 Subject: [PATCH 10/41] make the excluded_node being a list i.e excluded_node: [node1 node2 ...] --- roles/uperf/tasks/setup.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 50087eeb2..2c849e8a3 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -50,8 +50,8 @@ - name: Exlude unhealthy nodes i.e. low diskspace set_fact: - worker_node_list: "{{ worker_node_list | reject('search', workload_args.exluded_node) | list }}" - when: workload_args.exluded_node is defined + worker_node_list: "{{ worker_node_list | difference(workload_args.excluded_node[0]) }}" + when: workload_args.excluded_node is defined - name: A5 Capture ServiceIP k8s_facts: From cad504d08053715f69891882058fd242ab27ae5b Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 14 Jan 2021 14:12:23 +0000 Subject: [PATCH 11/41] Super Model-3, "model S" seems working. Model-S supports "node_range: [n, m]" and "density_range: [x, y]" to specify enumeration by both dimension. --- roles/uperf/tasks/init.yml | 31 ++++++++++ roles/uperf/tasks/main.yml | 8 +-- roles/uperf/tasks/next_set.yml | 80 +++++++++++++++++++++---- roles/uperf/tasks/run_a_set.yml | 4 +- roles/uperf/tasks/setup.yml | 36 ++++++++++- roles/uperf/tasks/start_client.yml | 2 + roles/uperf/tasks/start_server.yml | 23 +++---- roles/uperf/tasks/wait_client_done.yml | 3 + roles/uperf/tasks/wait_client_ready.yml | 3 + roles/uperf/tasks/wait_server_ready.yml | 7 +++ roles/uperf/tasks/wait_set_done.yml | 13 ++-- roles/uperf/templates/server.yml.j2 | 1 + roles/uperf/templates/workload.yml.j2 | 10 +++- roles/uperf/vars/main.yml | 9 +++ 14 files changed, 187 insertions(+), 43 deletions(-) create mode 100644 roles/uperf/tasks/init.yml diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml new file mode 100644 index 000000000..bbb839b47 --- /dev/null +++ b/roles/uperf/tasks/init.yml @@ -0,0 +1,31 @@ +--- + + +- name: Clear start flag + command: "redis-cli set start 0" + +- name: Clear num_completion + command: "redis-cli set num_completion 0" + +# Node +- name: Init node_hi_idx + command: "redis-cli set node_hi_idx {{ node_hi_idx }}" + +- name: Init node_low_idx + command: "redis-cli set node_low_idx {{ node_low_idx }}" + + # Pod +- name: Init pod_hi_idx + command: "redis-cli set pod_hi_idx {{ pod_hi_idx }}" + +- name: Init pod_low_idx + command: "redis-cli set pod_low_idx {{ pod_low_idx }}" + + # Starting Node and Pod +- name: Init node_idx + command: "redis-cli set node_idx {{ node_low_idx }}" + +- name: Init pod_idx + command: "redis-cli set pod_idx {{ pod_low_idx }}" + + diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index f9fa3d045..0c03f6921 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -16,6 +16,8 @@ - include_tasks: wait_client_ready.yml when: resource_state.resources[0].status.state == "Waiting for Clients" + # LOOP BEGIN + # - include_tasks: run_a_set.yml when: resource_state.resources[0].status.state == "Clients Running" @@ -26,10 +28,8 @@ - include_tasks: next_set.yml when: resource_state.resources[0].status.state == "Run Next Set" # will loop back to "Client Running" state, or FALLTHRU to "Running" state below and finish - -# - include_tasks: send_client_run_signal.yml -# when: resource_state.resources[0].status.state == "Clients Running" - + # + # LOOP END - include_tasks: wait_client_done.yml when: resource_state.resources[0].status.state == "Running" diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml index 7d7e4e544..c20162ade 100644 --- a/roles/uperf/tasks/next_set.yml +++ b/roles/uperf/tasks/next_set.yml @@ -1,20 +1,73 @@ --- - +# +# This module logically implements RE-ENTRANT double for loops +# with_items: +# range (node_low_idx, node_hi_idx) +# range (pod_low_idx, pod_hi_idx) +# Each iteration executes one item, and each re-entrance +# continues where it left off. +# - block: - name: debug command: "redis-cli set task next_set" + - name: + set_fact: + all_run_done: False - - name: read last group_node_count - command: "redis-cli get group_node_count" - register: redis_out + - name: read previous pod_idx + command: "redis-cli get pod_idx" + register: redis_pod_idx - - name: Compute next run group size + - name: Increment pod_idx set_fact: - group_node_count: "{{redis_out.stdout|int + 1}}" + pod_idx: "{{redis_pod_idx.stdout|int + 1}}" + + - name: read pod_hi_idx + command: "redis-cli get pod_hi_idx" + register: redis_pod_hi_idx + + - name: Read prev node_idx + command: "redis-cli get node_idx" + register: redis_node_idx + + - name: node_idx + set_fact: + node_idx: "{{redis_node_idx.stdout|int}}" + + - block: + # + # This block starts a new node loop + # + - name: Increment node_idx + set_fact: + node_idx: "{{node_idx|int + 1}}" + + - name: Read node_hi_idx + command: "redis-cli get node_hi_idx" + register: redis_node_hi_idx + + - name: Check node loop for ending condition + set_fact: + all_run_done: True + when: "node_idx|int > redis_node_hi_idx.stdout|int" + + # + # Reset pod_idx AFTER node_idx tasks above, else cond change + # causes it to skip node_idx tasks + # + - name: read pod_low_idx + command: "redis-cli get pod_low_idx" + register: redis_pod_low_idx + + - name: Reset pod_idx to pod_low_idx + set_fact: + pod_idx: "{{redis_pod_low_idx.stdout|int}}" + + when: "pod_idx|int > redis_pod_hi_idx.stdout|int" - block: # - # We have passed max_node - All done + # All done # - name: Unpause pods to complete command: "redis-cli set start done" @@ -28,21 +81,24 @@ status: state: Set Running - when: "group_node_count|int > workload_args.max_node|int" + when: all_run_done == True - block: # # More round(s) to run. # - - name: Send redis restart signal command: "redis-cli set start restart" - name: Reset redis num_completion command: "redis-cli set num_completion 0" - - name: Set next run group_node_count - command: "redis-cli set group_node_count {{group_node_count}}" + # New node_idx value on new node loop ONLY. But we are lazy, just write every time. + - name: Set next run node_idx + command: "redis-cli set node_idx {{node_idx}}" + + - name: Set next run pod_idx + command: "redis-cli set pod_idx {{pod_idx}}" - name: Change state to run next round operator_sdk.util.k8s_status: @@ -53,7 +109,7 @@ status: state: Clients Running - when: "group_node_count|int <= workload_args.max_node|int" + when: all_run_done == False when: resource_state.resources[0].status.state == "Run Next Set" diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml index 1b3114598..c14dc6c1e 100644 --- a/roles/uperf/tasks/run_a_set.yml +++ b/roles/uperf/tasks/run_a_set.yml @@ -3,9 +3,9 @@ - block: # # Entry Condition: - # 1. A previous task has set 'group_node_count' in redis + # 1. A previous task has set 'node_idx' in redis # 2. All cliest are polling for 'start' to run its workoad - # Output: Clients with idx <= group_node_count will run + # Output: Clients with idx <= node_idx will run # - name: Signal group to run diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 2c849e8a3..0db28dece 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -48,7 +48,7 @@ set_fact: worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" -- name: Exlude unhealthy nodes i.e. low diskspace +- name: Exclude unhealthy nodes i.e. low diskspace set_fact: worker_node_list: "{{ worker_node_list | difference(workload_args.excluded_node[0]) }}" when: workload_args.excluded_node is defined @@ -63,6 +63,38 @@ register: serviceip when: workload_args.serviceip is defined and workload_args.serviceip +# +# Compute node and pod limits using CR params while taking into account +# of the actual number of nodes available in the system +# +- name: init node idx + set_fact: + node_idx: "{{ workload_args.min_node | default('0')|int }}" + # + # TBD HN range check, colocate and other checks + # + +- name: init pod low idx + set_fact: + pod_low_idx: "{{ workload_args.density_range[0]|int | default('1')|int - 1 }}" + +- name: init pod hi idx + set_fact: + pod_hi_idx: "{{ workload_args.density_range[1]|int | default('1')|int - 1 }}" + +- name: init node low idx + set_fact: + node_low_idx: "{{ workload_args.node_range[0]|int | default('1')|int - 1 }}" + +- name: init node hi idx + set_fact: + node_hi_idx: "{{ workload_args.node_range[1]|int | default('1')|int - 1 }}" + +- name: Adjust node_hi_idx if cluster has less nodes + set_fact: + node_hi_idx: "{{ worker_node_list|length| default('0')|int -1 }}" + when: "node_hi_idx|int >= worker_node_list|length| default('0')|int " + # # "pin" mode exists prior to "scale" mode. If "pin: true", we will # do the old way using pin_server and pin_client @@ -84,6 +116,6 @@ - name: A8 V2 Record num server pods using new worker_node_list set_fact: - num_server_pods: "{{ worker_node_list|length * workload_args.density | default('1')|int }}" + num_server_pods: "{{ (node_hi_idx|int+1) * (pod_hi_idx|int+1) }}" when: workload_args.max_node is defined diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml index 220ecde4f..e40bc068e 100644 --- a/roles/uperf/tasks/start_client.yml +++ b/roles/uperf/tasks/start_client.yml @@ -40,6 +40,8 @@ ### kind - block: + - name: debug + command: "redis-cli set state Starting_Clients" - name: V19 Wait for vms to be running.... k8s_facts: diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index 04f10d0df..1397ae27f 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -4,21 +4,9 @@ # Start servers #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - block: - - name: init group node count - set_fact: - group_node_count: "{{ workload_args.min_node | default('1')|int }}" - # - # TBD HN range check, colocate and other checks - # - - name: init redis - command: "redis-cli set group_mark 0" - - name: init redis - command: "redis-cli set start 0" - - name: init redis - command: "redis-cli set num_completion 0" - - name: init redis - command: "redis-cli set group_node_count {{ group_node_count }}" + - include_tasks: init.yml + - name: P8 Create service for server pods k8s: @@ -39,8 +27,11 @@ k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" with_nested: - - "{{ range(0, worker_node_list|length| default('0'|int)) | list }}" - - "{{ range(0, workload_args.density | default('1'|int)) | list }}" + - "{{ range(0, node_hi_idx|int +1) | list }}" + - "{{ range(0, pod_hi_idx|int +1) | list }}" + + #- "{{ range(0, worker_node_list|length| default('0'|int)) | list }}" + #- "{{ range(0, workload_args.density | default('1'|int)) | list }}" # # Each server annotates a "node_idx" which will allow its peer client # to derive its affinity according the 'colocate' variable diff --git a/roles/uperf/tasks/wait_client_done.yml b/roles/uperf/tasks/wait_client_done.yml index 1ff18c530..47f2b3b82 100644 --- a/roles/uperf/tasks/wait_client_done.yml +++ b/roles/uperf/tasks/wait_client_done.yml @@ -1,6 +1,9 @@ --- - block: + - name: debug + command: "redis-cli set task Running" + #### kind - block: - name: P27 Waiting for pods to complete.... diff --git a/roles/uperf/tasks/wait_client_ready.yml b/roles/uperf/tasks/wait_client_ready.yml index e691270da..8668a7e95 100644 --- a/roles/uperf/tasks/wait_client_ready.yml +++ b/roles/uperf/tasks/wait_client_ready.yml @@ -3,6 +3,9 @@ ##### kind - block: # Pod block + - name: debug + command: "redis-cli set state Waiting_for_Clients" + - name: P22 Get client pod status k8s_facts: kind: Pod diff --git a/roles/uperf/tasks/wait_server_ready.yml b/roles/uperf/tasks/wait_server_ready.yml index 5ec01046b..f86601f8c 100644 --- a/roles/uperf/tasks/wait_server_ready.yml +++ b/roles/uperf/tasks/wait_server_ready.yml @@ -2,6 +2,9 @@ ########### kind - block: + - name: debug + command: "redis-cli set state Starting_Servers" + - name: P13 Get server pods k8s_facts: kind: Pod @@ -11,6 +14,10 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods + - name: debug + debug: + msg: "{{ num_server_pods }}" + - name: P14 Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index e608c4070..26dae01f9 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -10,9 +10,13 @@ command: "redis-cli get num_completion" register: num_completion - - name: read group_node_count - command: "redis-cli get group_node_count" - register: group_node_count + - name: read node_idx + command: "redis-cli get node_idx" + register: node_idx + + - name: read pod_idx + command: "redis-cli get pod_idx" + register: pod_idx - name: debug command: "redis-cli set read_num_completion {{ num_completion.stdout }}" @@ -24,7 +28,8 @@ namespace: "{{ operator_namespace }}" status: state: Run Next Set - when: "num_completion.stdout|int == group_node_count.stdout|int * workload_args.density|default('1')|int" + when: "num_completion.stdout|int == ((node_idx.stdout|int+1) * (pod_idx.stdout|int +1))" + #when: "num_completion.stdout|int == node_idx.stdout|int * workload_args.density|default('1')|int" when: resource_kind == "pod" when: resource_state.resources[0].status.state == "Set Running" diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 8a67fbf14..9580eadfd 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -30,6 +30,7 @@ spec: k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} {% endif %} node_idx: '{{ item[0] }}' + pod_idx: '{{ item[1] }}' spec: {% if workload_args.runtime_class is defined %} runtimeClassName: "{{ workload_args.runtime_class }}" diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index f0f19096e..4cb93a107 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -96,18 +96,22 @@ spec: export networkpolicy={{workload_args.networkpolicy}}; {% endif %} export hostnet={{workload_args.hostnetwork}}; - mynode={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; + my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; + my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; export ips=$(hostname -I); export num_pairs={{workload_args.pair}}; node_limit=0; + pod_limit=0; while true; do state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); if [[ $state =~ 'true' ]]; then - node_limit=$(redis-cli -h {{bo.resources[0].status.podIP}} get group_node_count); - if [[ $node_limit -le $mynode ]]; then + node_limit=$(redis-cli -h {{bo.resources[0].status.podIP}} get node_idx); + pod_limit=$(redis-cli -h {{bo.resources[0].status.podIP}} get pod_idx); + if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then continue; fi; + /bin/echo 'UPERF scale config: num_node=' $node_limit+1' density= {{ pod_idx }}'; {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index 4a156a383..4490915c1 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -5,3 +5,12 @@ worker_node_list: [] group_node_count: 0 max_node_count: 0 +pod_low_idx: 1 +pod_hi_idx: 1 +node_low_idx: 1 +node_hi_idx: 1 + +node_idx: 0 +pod_idx: 0 + +all_run_done: false From c38a70d145af10e95d40377044c22b402ef345e1 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Fri, 15 Jan 2021 12:54:20 +0000 Subject: [PATCH 12/41] Move pod_hi/low_idx and node_hi/low_idx out of redis and into benchmark context. Fix last commit which introduced node_range and density_range but inadvertedly still used min_node and max_node for condition check i.e "when: max_node is defined" --- .../crds/ripsaw_v1alpha1_ripsaw_crd.yaml | 13 +++++++ roles/uperf/tasks/init.yml | 35 +++++++++-------- roles/uperf/tasks/next_set.yml | 38 +++++-------------- roles/uperf/tasks/run_a_set.yml | 4 +- roles/uperf/tasks/setup.yml | 15 ++------ roles/uperf/tasks/start_server.yml | 4 +- roles/uperf/tasks/wait_client_done.yml | 2 - roles/uperf/tasks/wait_set_done.yml | 16 +------- roles/uperf/templates/server.yml.j2 | 6 +-- roles/uperf/templates/workload.yml.j2 | 2 +- 10 files changed, 52 insertions(+), 83 deletions(-) diff --git a/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml b/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml index 417f46c0b..2aa13a7a3 100644 --- a/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml +++ b/resources/crds/ripsaw_v1alpha1_ripsaw_crd.yaml @@ -123,6 +123,19 @@ spec: type: string cerberus: type: string + pod_hi_idx: + type: string + pod_low_idx: + type: string + node_hi_idx: + type: string + node_low_idx: + type: string + pod_idx: + type: string + node_idx: + type: string + additionalPrinterColumns: - name: Type type: string diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml index bbb839b47..186efade3 100644 --- a/roles/uperf/tasks/init.yml +++ b/roles/uperf/tasks/init.yml @@ -7,25 +7,24 @@ - name: Clear num_completion command: "redis-cli set num_completion 0" -# Node -- name: Init node_hi_idx - command: "redis-cli set node_hi_idx {{ node_hi_idx }}" - -- name: Init node_low_idx - command: "redis-cli set node_low_idx {{ node_low_idx }}" - - # Pod -- name: Init pod_hi_idx - command: "redis-cli set pod_hi_idx {{ pod_hi_idx }}" - -- name: Init pod_low_idx - command: "redis-cli set pod_low_idx {{ pod_low_idx }}" - - # Starting Node and Pod -- name: Init node_idx +- name: Init node and pod indices in benchmark context + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + pod_hi_idx: "{{pod_hi_idx}}" + pod_low_idx: "{{pod_low_idx}}" + node_hi_idx: "{{node_hi_idx}}" + node_low_idx: "{{node_low_idx}}" + node_idx: "{{node_low_idx}}" + pod_idx: "{{pod_low_idx}}" + +# Set redis starting Node and Pod +- name: Set starting node_idx command: "redis-cli set node_idx {{ node_low_idx }}" -- name: Init pod_idx +- name: Set redis starting pod_idx command: "redis-cli set pod_idx {{ pod_low_idx }}" - diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml index c20162ade..e7977235c 100644 --- a/roles/uperf/tasks/next_set.yml +++ b/roles/uperf/tasks/next_set.yml @@ -8,31 +8,17 @@ # continues where it left off. # - block: - - name: debug - command: "redis-cli set task next_set" - name: set_fact: all_run_done: False - - name: read previous pod_idx - command: "redis-cli get pod_idx" - register: redis_pod_idx - - name: Increment pod_idx set_fact: - pod_idx: "{{redis_pod_idx.stdout|int + 1}}" - - - name: read pod_hi_idx - command: "redis-cli get pod_hi_idx" - register: redis_pod_hi_idx + pod_idx: "{{resource_state.resources[0].status.pod_idx|int +1 }}" - - name: Read prev node_idx - command: "redis-cli get node_idx" - register: redis_node_idx - - - name: node_idx + - name: Read previous node_idx set_fact: - node_idx: "{{redis_node_idx.stdout|int}}" + node_idx: "{{resource_state.resources[0].status.node_idx|int}}" - block: # @@ -42,28 +28,20 @@ set_fact: node_idx: "{{node_idx|int + 1}}" - - name: Read node_hi_idx - command: "redis-cli get node_hi_idx" - register: redis_node_hi_idx - - name: Check node loop for ending condition set_fact: all_run_done: True - when: "node_idx|int > redis_node_hi_idx.stdout|int" + when: "node_idx|int > resource_state.resources[0].status.node_hi_idx|int" # # Reset pod_idx AFTER node_idx tasks above, else cond change # causes it to skip node_idx tasks # - - name: read pod_low_idx - command: "redis-cli get pod_low_idx" - register: redis_pod_low_idx - - name: Reset pod_idx to pod_low_idx set_fact: - pod_idx: "{{redis_pod_low_idx.stdout|int}}" + pod_idx: "{{resource_state.resources[0].status.pod_low_idx}}" - when: "pod_idx|int > redis_pod_hi_idx.stdout|int" + when: "pod_idx|int > resource_state.resources[0].status.pod_hi_idx|int" - block: # @@ -93,7 +71,7 @@ - name: Reset redis num_completion command: "redis-cli set num_completion 0" - # New node_idx value on new node loop ONLY. But we are lazy, just write every time. + # New node_idx value on new node loop ONLY. But we simply write every time. - name: Set next run node_idx command: "redis-cli set node_idx {{node_idx}}" @@ -108,6 +86,8 @@ namespace: "{{ operator_namespace }}" status: state: Clients Running + pod_idx: "{{pod_idx}}" + node_idx: "{{node_idx}}" when: all_run_done == False diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml index c14dc6c1e..47f415c4a 100644 --- a/roles/uperf/tasks/run_a_set.yml +++ b/roles/uperf/tasks/run_a_set.yml @@ -3,9 +3,9 @@ - block: # # Entry Condition: - # 1. A previous task has set 'node_idx' in redis + # 1. A previous task has set 'node_idx' and 'pod_idx' in redis # 2. All cliest are polling for 'start' to run its workoad - # Output: Clients with idx <= node_idx will run + # Output: Clients with node_idx <= redis node_idx && pod_idx <= redis pod_ix # - name: Signal group to run diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 0db28dece..3ae1c11ae 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -67,13 +67,6 @@ # Compute node and pod limits using CR params while taking into account # of the actual number of nodes available in the system # -- name: init node idx - set_fact: - node_idx: "{{ workload_args.min_node | default('0')|int }}" - # - # TBD HN range check, colocate and other checks - # - - name: init pod low idx set_fact: pod_low_idx: "{{ workload_args.density_range[0]|int | default('1')|int - 1 }}" @@ -102,20 +95,20 @@ - name: A6 Record num server (V1) pods using workload_args.pair - TBD set_fact: num_server_pods: "{{ workload_args.pair | default('1')|int }}" - when: workload_args.max_node is not defined + when: workload_args.node_range is not defined #- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder # set_fact: # worker_node_list: "{{workload_args.node_list[0]}}" - # when: workload_args.max_node is not defined + # when: workload_args.node_range is not defined # #- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder # set_fact: # worker_node_list: "{{workload_args.node_list}}" - # when: workload_args.max_node is defined + # when: workload_args.node_range is defined - name: A8 V2 Record num server pods using new worker_node_list set_fact: num_server_pods: "{{ (node_hi_idx|int+1) * (pod_hi_idx|int+1) }}" - when: workload_args.max_node is defined + when: workload_args.node_range is defined diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index 1397ae27f..f73e2a657 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -19,7 +19,7 @@ definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" register: servers with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} - when: workload_args.max_node is not defined + when: workload_args.node_range is not defined ############ @@ -30,8 +30,6 @@ - "{{ range(0, node_hi_idx|int +1) | list }}" - "{{ range(0, pod_hi_idx|int +1) | list }}" - #- "{{ range(0, worker_node_list|length| default('0'|int)) | list }}" - #- "{{ range(0, workload_args.density | default('1'|int)) | list }}" # # Each server annotates a "node_idx" which will allow its peer client # to derive its affinity according the 'colocate' variable diff --git a/roles/uperf/tasks/wait_client_done.yml b/roles/uperf/tasks/wait_client_done.yml index 47f2b3b82..08c66c0f7 100644 --- a/roles/uperf/tasks/wait_client_done.yml +++ b/roles/uperf/tasks/wait_client_done.yml @@ -1,8 +1,6 @@ --- - block: - - name: debug - command: "redis-cli set task Running" #### kind - block: diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index 26dae01f9..5320a495d 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -4,23 +4,12 @@ - block: - name: debug - command: "redis-cli set task wait_set_done" + command: "redis-cli set state Set_Running" - name: read pod completion count command: "redis-cli get num_completion" register: num_completion - - name: read node_idx - command: "redis-cli get node_idx" - register: node_idx - - - name: read pod_idx - command: "redis-cli get pod_idx" - register: pod_idx - - - name: debug - command: "redis-cli set read_num_completion {{ num_completion.stdout }}" - - operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -28,8 +17,7 @@ namespace: "{{ operator_namespace }}" status: state: Run Next Set - when: "num_completion.stdout|int == ((node_idx.stdout|int+1) * (pod_idx.stdout|int +1))" - #when: "num_completion.stdout|int == node_idx.stdout|int * workload_args.density|default('1')|int" + when: "num_completion.stdout|int == ((resource_state.resources[0].status.node_idx|int +1) * (resource_state.resources[0].status.pod_idx|int +1))" when: resource_kind == "pod" when: resource_state.resources[0].status.state == "Set Running" diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 9580eadfd..cc4f7ccf8 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -3,7 +3,7 @@ kind: Job apiVersion: batch/v1 metadata: -{% if workload_args.max_node is not defined %} +{% if workload_args.node_range is not defined %} name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' {% else %} name: 'uperf-server-{{worker_node_list[ item[0]] }}-{{ item[1] }}-{{ trunc_uuid }}' @@ -17,7 +17,7 @@ spec: metadata: labels: -{% if workload_args.max_node is not defined %} +{% if workload_args.node_range is not defined %} app: uperf-bench-server-{{item}}-{{ trunc_uuid }} {% else %} #app: uperf-bench-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }} @@ -57,7 +57,7 @@ spec: # # V2 pin server pod to node # -{% if workload_args.max_node is defined %} +{% if workload_args.node_range is defined %} nodeSelector: #kubernetes.io/hostname: '{{ item[0] }}' kubernetes.io/hostname: '{{ worker_node_list[item[0]] }}' diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 4cb93a107..8020b7617 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -165,7 +165,7 @@ spec: configMap: name: uperf-test-{{ trunc_uuid }} restartPolicy: OnFailure -{% if workload_args.max_node is defined %} +{% if workload_args.node_range is defined %} {% if workload_args.colocate is sameas true %} nodeSelector: # client node same as server node From 2224ab5d932995d012f6e5493cf3123683facfac Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Fri, 15 Jan 2021 16:19:32 +0000 Subject: [PATCH 13/41] Export node_count and pod_count variabled to benchmarkwrapper for data correlation. While at it beefup a few place with "when: xxx is defined" for robustness --- resources/crds/ripsaw_v1alpha1_uperf_cr.yaml | 9 +++++++++ roles/uperf/tasks/setup.yml | 4 ++++ roles/uperf/tasks/start_server.yml | 2 +- roles/uperf/templates/workload.yml.j2 | 10 +++++++--- roles/uperf/vars/main.yml | 8 ++++---- 5 files changed, 25 insertions(+), 8 deletions(-) diff --git a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml index bf2606875..c2b80540d 100644 --- a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml +++ b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml @@ -32,3 +32,12 @@ spec: nthrs: - 1 runtime: 30 + + # The following variables are for scale uperf. + # The scale mode will be activated with 'node_range' defined. + + #colocate: True + #density_range: [1, 2] + #node_range: [1, 2] + #excluded_node: [ worker001 ] + diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 3ae1c11ae..0f20bfbb4 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -70,18 +70,22 @@ - name: init pod low idx set_fact: pod_low_idx: "{{ workload_args.density_range[0]|int | default('1')|int - 1 }}" + when: workload_args.density_range is defined - name: init pod hi idx set_fact: pod_hi_idx: "{{ workload_args.density_range[1]|int | default('1')|int - 1 }}" + when: workload_args.density_range is defined - name: init node low idx set_fact: node_low_idx: "{{ workload_args.node_range[0]|int | default('1')|int - 1 }}" + when: workload_args.node_range is defined - name: init node hi idx set_fact: node_hi_idx: "{{ workload_args.node_range[1]|int | default('1')|int - 1 }}" + when: workload_args.node_range is defined - name: Adjust node_hi_idx if cluster has less nodes set_fact: diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index f73e2a657..a67651af2 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -29,7 +29,7 @@ with_nested: - "{{ range(0, node_hi_idx|int +1) | list }}" - "{{ range(0, pod_hi_idx|int +1) | list }}" - + when: workload_args.node_range is defined # # Each server annotates a "node_idx" which will allow its peer client # to derive its affinity according the 'colocate' variable diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 8020b7617..dccc48a6d 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -96,10 +96,12 @@ spec: export networkpolicy={{workload_args.networkpolicy}}; {% endif %} export hostnet={{workload_args.hostnetwork}}; - my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; - my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; + export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; + export my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; export ips=$(hostname -I); export num_pairs={{workload_args.pair}}; + export node_count=0 + export pod_count=0 node_limit=0; pod_limit=0; while true; do @@ -111,7 +113,9 @@ spec: continue; fi; - /bin/echo 'UPERF scale config: num_node=' $node_limit+1' density= {{ pod_idx }}'; + /bin/echo 'UPERF-run-context: num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; + node_count=$((node_limit+1)); + pod_count=$((pod_limit+1)); {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index 4490915c1..09cd865f9 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -5,10 +5,10 @@ worker_node_list: [] group_node_count: 0 max_node_count: 0 -pod_low_idx: 1 -pod_hi_idx: 1 -node_low_idx: 1 -node_hi_idx: 1 +pod_low_idx: 0 +pod_hi_idx: 0 +node_low_idx: 0 +node_hi_idx: 0 node_idx: 0 pod_idx: 0 From b990511ef7ac3d17c1eab8ba969ee07ec53c3f3b Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Tue, 19 Jan 2021 04:16:36 +0000 Subject: [PATCH 14/41] Add 'step_size' CR parameter. Valid step_size values are: addN or log2. N can be any decimal number. --- resources/crds/ripsaw_v1alpha1_uperf_cr.yaml | 16 +++++++++--- roles/uperf/tasks/next_set.yml | 27 ++++++++++++++------ roles/uperf/tasks/setup.yml | 2 +- 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml index c2b80540d..b4ab9cc3f 100644 --- a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml +++ b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml @@ -36,8 +36,16 @@ spec: # The following variables are for scale uperf. # The scale mode will be activated with 'node_range' defined. - #colocate: True - #density_range: [1, 2] - #node_range: [1, 2] - #excluded_node: [ worker001 ] + # colocate: True + # density_range: [1, 32] + # node_range: [1, 10] + # step_size: log2 + # Valid step_size values are: addN or log2 + # N can be any decimal number + # Enumeration examples: + # add1: 1,2,3,4 ,,, + # add2: 1,3,5,7 ... + # add10: 1,11,21,31 ... + # log2: 1,2,4,8,16,32 ,,, + # excluded_node: [ worker001 ] diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml index e7977235c..c80868346 100644 --- a/roles/uperf/tasks/next_set.yml +++ b/roles/uperf/tasks/next_set.yml @@ -1,6 +1,6 @@ --- # -# This module logically implements RE-ENTRANT double for loops +# This module logically implements RE-ENTRANT nested for loops # with_items: # range (node_low_idx, node_hi_idx) # range (pod_low_idx, pod_hi_idx) @@ -8,25 +8,36 @@ # continues where it left off. # - block: - - name: + - name: Read previous node_idx and pod_idx set_fact: all_run_done: False + inc: "{{workload_args.step_size|default('add1')}}" + amount: 0 + pod_idx: "{{resource_state.resources[0].status.pod_idx|int}}" + node_idx: "{{resource_state.resources[0].status.node_idx|int}}" - - name: Increment pod_idx + - name: Extract add amount set_fact: - pod_idx: "{{resource_state.resources[0].status.pod_idx|int +1 }}" + amount: "{{ inc | regex_replace('[^0-9]', '') }}" + inc: add + when: "'add' in inc" - - name: Read previous node_idx + - name: Increment pod_idx set_fact: - node_idx: "{{resource_state.resources[0].status.node_idx|int}}" - + pod_idx: "{%-if inc=='add' -%}{{pod_idx|int+amount|int}} + {%-elif inc=='log2' -%}{{(pod_idx|int*2)+1}} + {%-else -%}{{pod_idx|int+1}} + {% endif %}" - block: # # This block starts a new node loop # - name: Increment node_idx set_fact: - node_idx: "{{node_idx|int + 1}}" + node_idx: "{%- if inc=='add' -%}{{node_idx|int+amount|int}} + {%- elif inc=='log2' -%}{{(node_idx|int *2)+1}} + {%- else -%}{{node_idx|int+1}} + {% endif %}" - name: Check node loop for ending condition set_fact: diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 0f20bfbb4..8ab10fb1d 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -51,7 +51,7 @@ - name: Exclude unhealthy nodes i.e. low diskspace set_fact: worker_node_list: "{{ worker_node_list | difference(workload_args.excluded_node[0]) }}" - when: workload_args.excluded_node is defined + when: workload_args.excluded_node is defined and workload_args.excluded_node|length > 0 - name: A5 Capture ServiceIP k8s_facts: From ad250ef6e96e2591d559c8da894faea567e50b03 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Tue, 19 Jan 2021 18:45:16 +0000 Subject: [PATCH 15/41] Fix a timing window when redis is really busy. The idle client pods enter main loop (due to start-True) then acquire pod_idx and node_idx. They miss out the restart state, and run prematurely --- roles/uperf/tasks/wait_set_done.yml | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index 5320a495d..ba0a2d6f0 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -10,14 +10,20 @@ command: "redis-cli get num_completion" register: num_completion - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Run Next Set + - block: + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Run Next Set + + - name: Change redis state to force idle client pods to outside their main loop + command: "redis-cli set start 0" + when: "num_completion.stdout|int == ((resource_state.resources[0].status.node_idx|int +1) * (resource_state.resources[0].status.pod_idx|int +1))" + when: resource_kind == "pod" when: resource_state.resources[0].status.state == "Set Running" From 7c561b80d3b6ae919c5b831d4d051f0662e1b220 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 21 Jan 2021 01:08:18 +0000 Subject: [PATCH 16/41] Consolidate 3 redis vars 'start', 'node_idx', 'pod_idx' into one to reduce redis load. --- roles/uperf/tasks/init.yml | 6 ------ roles/uperf/tasks/next_set.yml | 9 +-------- roles/uperf/tasks/run_a_set.yml | 2 +- roles/uperf/tasks/wait_set_done.yml | 6 +----- roles/uperf/templates/workload.yml.j2 | 13 ++++++++++--- 5 files changed, 13 insertions(+), 23 deletions(-) diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml index 186efade3..164c67a43 100644 --- a/roles/uperf/tasks/init.yml +++ b/roles/uperf/tasks/init.yml @@ -21,10 +21,4 @@ node_idx: "{{node_low_idx}}" pod_idx: "{{pod_low_idx}}" -# Set redis starting Node and Pod -- name: Set starting node_idx - command: "redis-cli set node_idx {{ node_low_idx }}" - -- name: Set redis starting pod_idx - command: "redis-cli set pod_idx {{ pod_low_idx }}" diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml index c80868346..3ce613df2 100644 --- a/roles/uperf/tasks/next_set.yml +++ b/roles/uperf/tasks/next_set.yml @@ -68,7 +68,7 @@ name: "{{ meta.name }}" namespace: "{{ operator_namespace }}" status: - state: Set Running + state: Running when: all_run_done == True @@ -82,13 +82,6 @@ - name: Reset redis num_completion command: "redis-cli set num_completion 0" - # New node_idx value on new node loop ONLY. But we simply write every time. - - name: Set next run node_idx - command: "redis-cli set node_idx {{node_idx}}" - - - name: Set next run pod_idx - command: "redis-cli set pod_idx {{pod_idx}}" - - name: Change state to run next round operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml index 47f415c4a..dcf8f3a59 100644 --- a/roles/uperf/tasks/run_a_set.yml +++ b/roles/uperf/tasks/run_a_set.yml @@ -9,7 +9,7 @@ # - name: Signal group to run - command: "redis-cli set start true " + command: "redis-cli set start true-{{resource_state.resources[0].status.node_idx|int}}-{{resource_state.resources[0].status.pod_idx|int}}" - name: Update state to "Set Running" operator_sdk.util.k8s_status: diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index ba0a2d6f0..5461cbe44 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -10,17 +10,13 @@ command: "redis-cli get num_completion" register: num_completion - - block: - - operator_sdk.util.k8s_status: + - operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark name: "{{ meta.name }}" namespace: "{{ operator_namespace }}" status: state: Run Next Set - - - name: Change redis state to force idle client pods to outside their main loop - command: "redis-cli set start 0" when: "num_completion.stdout|int == ((resource_state.resources[0].status.node_idx|int +1) * (resource_state.resources[0].status.pod_idx|int +1))" diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index dccc48a6d..98d9efa4c 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -104,11 +104,14 @@ spec: export pod_count=0 node_limit=0; pod_limit=0; + STR=''; while true; do - state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + STR=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + state=$(echo $STR | cut -f1 -d-); if [[ $state =~ 'true' ]]; then - node_limit=$(redis-cli -h {{bo.resources[0].status.podIP}} get node_idx); - pod_limit=$(redis-cli -h {{bo.resources[0].status.podIP}} get pod_idx); + node_limit=$(echo $STR | cut -f2 -d-); + pod_limit=$(echo $STR | cut -f3 -d-); + echo 'state=' $state 'node=' $node_limit 'pod=' $pod_limit; if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then continue; fi; @@ -160,7 +163,11 @@ spec: fi; break; done; +{% if workload_args.node_range is not defined %} redis-cli -h {{bo.resources[0].status.podIP}} set start false" +{% else %} + " +{% endif %} volumeMounts: - name: config-volume mountPath: "/tmp/uperf-test" From 03a6c00c9a138e7dd34ee9db3e83210af1618383 Mon Sep 17 00:00:00 2001 From: Murali Krishnasamy <70236227+mukrishn@users.noreply.github.com> Date: Fri, 22 Jan 2021 11:19:49 -0500 Subject: [PATCH 17/41] single request to N pods (#1) --- roles/uperf/tasks/start_client.yml | 8 +- roles/uperf/tasks/start_server.yml | 9 +- roles/uperf/templates/server.yml.j2 | 109 ++++++---- roles/uperf/templates/workload.yml.j2 | 290 +++++++++++++------------- 4 files changed, 220 insertions(+), 196 deletions(-) diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml index e40bc068e..1018e1f84 100644 --- a/roles/uperf/tasks/start_client.yml +++ b/roles/uperf/tasks/start_client.yml @@ -20,8 +20,8 @@ - name: P19 Start Client(s) w/o serviceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: - - "{{ server_pods.resources }}" + vars: + resource_item: "{{ server_pods.resources }}" when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 # @@ -32,8 +32,8 @@ - name: P20 Start Client(s) with serviceIP k8s: definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - with_items: - - "{{ serviceip.resources }}" + vars: + resource_item: "{{ serviceip.resources }}" when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 when: resource_kind == "pod" diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index a67651af2..8cf6d6f55 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -18,7 +18,8 @@ k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" register: servers - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + vars: + pod_sequence: "{{ workload_args.pair | default('1')|int }}" when: workload_args.node_range is not defined ############ @@ -26,9 +27,9 @@ - name: P10 V2 Start Server(s) - total = eligible nodes * density k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" - with_nested: - - "{{ range(0, node_hi_idx|int +1) | list }}" - - "{{ range(0, pod_hi_idx|int +1) | list }}" + vars: + pod_sequence: "{{ pod_hi_idx|int +1 }}" + node_sequence: "{{ node_hi_idx|int +1 }}" when: workload_args.node_range is defined # # Each server annotates a "node_idx" which will allow its peer client diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index cc4f7ccf8..9b803a184 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -1,72 +1,89 @@ --- -kind: Job -apiVersion: batch/v1 -metadata: - +apiVersion: v1 +kind: List +metadata: {} +items: +{% macro job_template(item, node_idx_item='') %} + - kind: Job + apiVersion: batch/v1 + metadata: {% if workload_args.node_range is not defined %} - name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' + name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' {% else %} - name: 'uperf-server-{{worker_node_list[ item[0]] }}-{{ item[1] }}-{{ trunc_uuid }}' + name: 'uperf-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }}' {% endif %} - - namespace: "{{ operator_namespace }}" -spec: - ttlSecondsAfterFinished: 600 - backoffLimit: 0 - template: - metadata: - labels: - + namespace: "{{ operator_namespace }}" + spec: + ttlSecondsAfterFinished: 600 + backoffLimit: 0 + template: + metadata: + labels: {% if workload_args.node_range is not defined %} - app: uperf-bench-server-{{item}}-{{ trunc_uuid }} + app: uperf-bench-server-{{item}}-{{ trunc_uuid }} {% else %} - #app: uperf-bench-server-{{ item[0] }}-{{ item[1] }}-{{ trunc_uuid }} - app: uperf-bench-server-{{ worker_node_list[item[0]] }}-{{ item[1] }}-{{ trunc_uuid }} + #app: uperf-bench-server-{{ node_idx_item }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ worker_node_list[node_idx_item] }}-{{ item }}-{{ trunc_uuid }} {% endif %} - - type: uperf-bench-server-{{ trunc_uuid }} - annotations: + type: uperf-bench-server-{{ trunc_uuid }} + annotations: {% if workload_args.multus.enabled is sameas true %} - k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} + k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} {% endif %} - node_idx: '{{ item[0] }}' - pod_idx: '{{ item[1] }}' - spec: +{% if workload_args.node_range is defined %} + node_idx: '{{ node_idx_item }}' + pod_idx: '{{ item }}' +{% endif %} + spec: {% if workload_args.runtime_class is defined %} - runtimeClassName: "{{ workload_args.runtime_class }}" + runtimeClassName: "{{ workload_args.runtime_class }}" {% endif %} {% if workload_args.hostnetwork is sameas true %} - hostNetwork: true - serviceAccountName: benchmark-operator + hostNetwork: true + serviceAccountName: benchmark-operator {% endif %} - containers: - - name: benchmark - image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} + containers: + - name: benchmark + image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} {% if workload_args.server_resources is defined %} - resources: {{ workload_args.server_resources | to_json }} + resources: {{ workload_args.server_resources | to_json }} {% endif %} - imagePullPolicy: Always - command: ["/bin/sh","-c"] - args: ["uperf -s -v -P 20000"] - restartPolicy: OnFailure + imagePullPolicy: Always + command: ["/bin/sh","-c"] + args: ["uperf -s -v -P 20000"] + restartPolicy: OnFailure {% if workload_args.pin is sameas true %} - nodeSelector: - kubernetes.io/hostname: '{{ workload_args.pin_server }}' + nodeSelector: + kubernetes.io/hostname: '{{ workload_args.pin_server }}' {% endif %} # # V2 pin server pod to node # {% if workload_args.node_range is defined %} - nodeSelector: - #kubernetes.io/hostname: '{{ item[0] }}' - kubernetes.io/hostname: '{{ worker_node_list[item[0]] }}' + nodeSelector: + #kubernetes.io/hostname: '{{ node_idx_item }}' + kubernetes.io/hostname: '{{ worker_node_list[node_idx_item] }}' {% endif %} {% if workload_args.serviceip is sameas true %} - securityContext: - sysctls: - - name: net.ipv4.ip_local_port_range - value: 20000 20011 + securityContext: + sysctls: + - name: net.ipv4.ip_local_port_range + value: 20000 20011 +{% endif %} +{% macro metadata() %}{% include "metadata.yml.j2" %}{% endmacro %} + {{ metadata()|indent }} +{% endmacro %} +{% if workload_args.node_range is not defined %} +{% for item in range(pod_sequence|int) %} +{{ job_template(item) }} +{% endfor %} +{% else %} +{% for node_idx_item in range(node_sequence|int) %} +{% for item in range(pod_sequence|int) %} +{{ job_template(item,node_idx_item) }} +{% endfor %} +{% endfor %} {% endif %} -{% include "metadata.yml.j2" %} + diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 98d9efa4c..414690f84 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -1,124 +1,129 @@ --- -kind: Job -apiVersion: batch/v1 -metadata: +apiVersion: v1 +kind: List +metadata: {} +items: +{% for item in resource_item %} + - kind: Job + apiVersion: batch/v1 + metadata: {% if workload_args.serviceip is sameas true %} - name: 'uperf-client-{{item.spec.clusterIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item.spec.clusterIP}}-{{ trunc_uuid }}' {% else %} - name: 'uperf-client-{{item.status.podIP}}-{{ trunc_uuid }}' + name: 'uperf-client-{{item.status.podIP}}-{{ trunc_uuid }}' {% endif %} - namespace: '{{ operator_namespace }}' -spec: - template: - metadata: - labels: - app: uperf-bench-client-{{ trunc_uuid }} - clientfor: {{ item.metadata.labels.app }} - type: uperf-bench-client-{{ trunc_uuid }} + namespace: '{{ operator_namespace }}' + spec: + template: + metadata: + labels: + app: uperf-bench-client-{{ trunc_uuid }} + clientfor: {{ item.metadata.labels.app }} + type: uperf-bench-client-{{ trunc_uuid }} {% if workload_args.multus.enabled is sameas true %} - annotations: - k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.client }} + annotations: + k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.client }} {% endif %} - spec: + spec: {% if workload_args.runtime_class is defined %} - runtimeClassName: "{{ workload_args.runtime_class }}" + runtimeClassName: "{{ workload_args.runtime_class }}" {% endif %} {% if workload_args.hostnetwork is sameas true %} - hostNetwork: true - serviceAccountName: benchmark-operator -{% endif %} - affinity: - podAntiAffinity: - preferredDuringSchedulingIgnoredDuringExecution: - - weight: 100 - podAffinityTerm: - labelSelector: - matchExpressions: - - key: app - operator: In - values: - - {{ item.metadata.labels.app }} - topologyKey: kubernetes.io/hostname - containers: - - name: benchmark - image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} - env: - - name: uuid - value: "{{ uuid }}" - - name: test_user - value: "{{ test_user | default("ripsaw") }}" - - name: clustername - value: "{{ clustername }}" + hostNetwork: true + serviceAccountName: benchmark-operator +{% endif %} + affinity: + podAntiAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 100 + podAffinityTerm: + labelSelector: + matchExpressions: + - key: app + operator: In + values: + - {{ item.metadata.labels.app }} + topologyKey: kubernetes.io/hostname + containers: + - name: benchmark + image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} + env: + - name: uuid + value: "{{ uuid }}" + - name: test_user + value: "{{ test_user | default("ripsaw") }}" + - name: clustername + value: "{{ clustername }}" {% if elasticsearch is defined %} - - name: es - value: "{{ elasticsearch.url }}" - - name: es_index - value: "{{ elasticsearch.index_name | default("ripsaw-uperf") }}" - - name: parallel - value: "{{ elasticsearch.parallel | default(false) }}" - - name: es_verify_cert - value: "{{ elasticsearch.verify_cert | default(true) }}" + - name: es + value: "{{ elasticsearch.url }}" + - name: es_index + value: "{{ elasticsearch.index_name | default("ripsaw-uperf") }}" + - name: parallel + value: "{{ elasticsearch.parallel | default(false) }}" + - name: es_verify_cert + value: "{{ elasticsearch.verify_cert | default(true) }}" {% endif %} {% if prometheus is defined %} - - name: prom_es - value: "{{ prometheus.es_url }}" - - name: prom_parallel - value: "{{ prometheus.es_parallel | default(false) }}" - - name: prom_token - value: "{{ prometheus.prom_token | default() }}" - - name: prom_url - value: "{{ prometheus.prom_url | default() }}" -{% endif %} - - name: client_node - valueFrom: - fieldRef: - fieldPath: spec.nodeName - - name: server_node - value: "{{ uperf.pin_server|default("unknown") }}" + - name: prom_es + value: "{{ prometheus.es_url }}" + - name: prom_parallel + value: "{{ prometheus.es_parallel | default(false) }}" + - name: prom_token + value: "{{ prometheus.prom_token | default() }}" + - name: prom_url + value: "{{ prometheus.prom_url | default() }}" +{% endif %} + - name: client_node + valueFrom: + fieldRef: + fieldPath: spec.nodeName + - name: server_node + value: "{{ uperf.pin_server|default("unknown") }}" {% if workload_args.client_resources is defined %} - resources: {{ workload_args.client_resources | to_json }} + resources: {{ workload_args.client_resources | to_json }} {% endif %} - imagePullPolicy: Always - command: ["/bin/sh", "-c"] - args: + imagePullPolicy: Always + command: ["/bin/sh", "-c"] + args: {% if workload_args.serviceip is sameas true %} - - "export serviceip=true; - export h={{item.spec.clusterIP}}; + - "export serviceip=true; + export h={{item.spec.clusterIP}}; {% else %} {% if workload_args.multus.client is defined %} - - "export multus_client={{workload_args.multus.client}}; - export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; + - "export multus_client={{workload_args.multus.client}}; + export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; {% else %} - - "export h={{item.status.podIP}}; + - "export h={{item.status.podIP}}; {% endif %} {% endif %} {% if workload_args.networkpolicy is defined %} - export networkpolicy={{workload_args.networkpolicy}}; -{% endif %} - export hostnet={{workload_args.hostnetwork}}; - export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; - export my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; - export ips=$(hostname -I); - export num_pairs={{workload_args.pair}}; - export node_count=0 - export pod_count=0 - node_limit=0; - pod_limit=0; - STR=''; - while true; do - STR=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); - state=$(echo $STR | cut -f1 -d-); - if [[ $state =~ 'true' ]]; then - node_limit=$(echo $STR | cut -f2 -d-); - pod_limit=$(echo $STR | cut -f3 -d-); - echo 'state=' $state 'node=' $node_limit 'pod=' $pod_limit; - if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then - continue; - fi; - - /bin/echo 'UPERF-run-context: num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; - node_count=$((node_limit+1)); - pod_count=$((pod_limit+1)); + export networkpolicy={{workload_args.networkpolicy}}; +{% endif %} + export hostnet={{workload_args.hostnetwork}}; + export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; + export my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; + export ips=$(hostname -I); + export num_pairs={{workload_args.pair}}; + export node_count=0 + export pod_count=0 + node_limit=0; + pod_limit=0; + STR=''; + while true; do + STR=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + state=$(echo $STR | cut -f1 -d-); + if [[ $state =~ 'true' ]]; then + node_limit=$(echo $STR | cut -f2 -d-); + pod_limit=$(echo $STR | cut -f3 -d-); + echo 'state=' $state 'node=' $node_limit 'pod=' $pod_limit; + if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then + continue; + fi; + + /bin/echo 'UPERF-run-context: num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; + node_count=$((node_limit+1)); + pod_count=$((pod_limit+1)); {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} @@ -131,69 +136,70 @@ spec: {% set rsize = size %} {% endif %} {% for nthr in workload_args.nthrs %} - cat /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}}; + cat /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}}; {% if workload_args.run_id is defined %} - run_snafu --tool uperf --run-id {{workload_args.run_id}} -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; + run_snafu --tool uperf --run-id {{workload_args.run_id}} -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; {% else %} - run_snafu --tool uperf -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; + run_snafu --tool uperf -w /tmp/uperf-test/uperf-{{test}}-{{proto}}-{{wsize}}-{{rsize}}-{{nthr}} -s {{workload_args.samples}} --resourcetype {{resource_kind}} -u {{ uuid }} --user {{test_user | default("ripsaw")}}; {% endif %} {% endfor %} {% endfor %} {% endfor %} {% endfor %} - redis-cli -h {{bo.resources[0].status.podIP}} incr num_completion; - while true; do - state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + redis-cli -h {{bo.resources[0].status.podIP}} incr num_completion; + while true; do + state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + if [[ $state =~ 'restart' ]]; then + break; + elif [[ $state =~ 'done' ]]; then + break; + else + continue; + fi; + done; if [[ $state =~ 'restart' ]]; then - break; - elif [[ $state =~ 'done' ]]; then - break; - else - continue; + continue; fi; - done; - if [[ $state =~ 'restart' ]]; then - continue; + + elif [[ $state =~ 'done' ]]; then + break; + else + continue; fi; - - elif [[ $state =~ 'done' ]]; then break; - else - continue; - fi; - break; - done; + done; {% if workload_args.node_range is not defined %} - redis-cli -h {{bo.resources[0].status.podIP}} set start false" + redis-cli -h {{bo.resources[0].status.podIP}} set start false" {% else %} - " -{% endif %} - volumeMounts: - - name: config-volume - mountPath: "/tmp/uperf-test" - volumes: - - name: config-volume - configMap: - name: uperf-test-{{ trunc_uuid }} - restartPolicy: OnFailure + " +{% endif %} + volumeMounts: + - name: config-volume + mountPath: "/tmp/uperf-test" + volumes: + - name: config-volume + configMap: + name: uperf-test-{{ trunc_uuid }} + restartPolicy: OnFailure {% if workload_args.node_range is defined %} {% if workload_args.colocate is sameas true %} - nodeSelector: - # client node same as server node - kubernetes.io/hostname: "{{ worker_node_list[item['metadata']['annotations']['node_idx'] | from_json] }}" + nodeSelector: + # client node same as server node + kubernetes.io/hostname: "{{ worker_node_list[item['metadata']['annotations']['node_idx'] | from_json] }}" {% else %} - nodeSelector: - # skew client node one position left in the woker_node_list - kubernetes.io/hostname: "{{ worker_node_list[ (1+(item['metadata']['annotations']['node_idx'] | from_json)) % (worker_node_list|length)] }}" + nodeSelector: + # skew client node one position left in the woker_node_list + kubernetes.io/hostname: "{{ worker_node_list[ (1+(item['metadata']['annotations']['node_idx'] | from_json)) % (worker_node_list|length)] }}" {% endif %} {% else %} {% if workload_args.pin is sameas true %} - nodeSelector: - kubernetes.io/hostname: '{{ workload_args.pin_client }}' + nodeSelector: + kubernetes.io/hostname: '{{ workload_args.pin_client }}' {% endif %} {% endif %} - -{% include "metadata.yml.j2" %} +{% macro metadata() %}{% include "metadata.yml.j2" %}{% endmacro %} + {{ metadata()|indent }} +{% endfor %} From b62ea0421567649e43a84531d57cff61215db022 Mon Sep 17 00:00:00 2001 From: Sai Sindhur Malleni Date: Fri, 22 Jan 2021 10:20:34 -0600 Subject: [PATCH 18/41] Exclude nodes based on labels (#2) This fits in with the idea that we are viewing worker nodes as just a pool of resources and not by individual hostnames. This gives the flexibility for users to isolate the tests to only one hardware model of workers as well (since each model can be labelled with its model name). Also there was a bug in previous code where only the first node in the list was technically being excluded in `workload_args.excluded_node[0]`. Signed-off-by: Sai Sindhur Malleni --- resources/crds/ripsaw_v1alpha1_uperf_cr.yaml | 4 +++- roles/uperf/tasks/setup.yml | 19 ++++++++++++++++--- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml index b4ab9cc3f..0d2d88282 100644 --- a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml +++ b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml @@ -47,5 +47,7 @@ spec: # add2: 1,3,5,7 ... # add10: 1,11,21,31 ... # log2: 1,2,4,8,16,32 ,,, - # excluded_node: [ worker001 ] + # exclude_labels: (OR conditional, every node that matches any of these labels is excluded) + # - "bad=true" + # - "fc640=true" diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 8ab10fb1d..9ea46107a 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -48,10 +48,23 @@ set_fact: worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" -- name: Exclude unhealthy nodes i.e. low diskspace +- name: List Nodes Labeled with {{ workload_args.exclude_label }} + k8s_info: + api_version: v1 + kind: Node + label_selectors: + - '{{ item }}' + with_items: "{{ workload_args.exclude_labels }}" + register: exclude_node_list + +- name: Isolate Worker Role Hostnames for label {{ workload_args.exclude_label }} + set_fact: + worker_node_exclude_list: "{{ exclude_node_list | json_query('results[].resources[].metadata.name') }}" + +- name: Exclude labelled nodes set_fact: - worker_node_list: "{{ worker_node_list | difference(workload_args.excluded_node[0]) }}" - when: workload_args.excluded_node is defined and workload_args.excluded_node|length > 0 + worker_node_list: "{{ worker_node_list | difference(worker_node_exclude_list) }}" + when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 - name: A5 Capture ServiceIP k8s_facts: From 907e49ec1793b33f12d3adf0200005d74fcb07ba Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Fri, 22 Jan 2021 12:41:04 -0500 Subject: [PATCH 19/41] Temp alleviating redis-server overload: Increase redis-server to 2 CPU. Add sleep's to workload/uperf clients Point operator resource spec to compatible image. --- resources/operator.yaml | 6 +++--- roles/uperf/tasks/setup.yml | 1 + roles/uperf/templates/workload.yml.j2 | 8 ++++---- 3 files changed, 8 insertions(+), 7 deletions(-) diff --git a/resources/operator.yaml b/resources/operator.yaml index f49ac96f0..5df666dd5 100644 --- a/resources/operator.yaml +++ b/resources/operator.yaml @@ -34,14 +34,14 @@ spec: - /usr/local/bin/ao-logs - /tmp/ansible-operator/runner - stdout - image: quay.io/benchmark-operator/benchmark-operator:master + image: quay.io/hnhan/benchmark-operator:uperf-scale imagePullPolicy: "Always" volumeMounts: - mountPath: /tmp/ansible-operator/runner name: runner readOnly: true - name: benchmark-operator - image: quay.io/benchmark-operator/benchmark-operator:master + image: quay.io/hnhan/benchmark-operator:uperf-scale imagePullPolicy: Always env: - name: WATCH_NAMESPACE @@ -70,7 +70,7 @@ spec: - containerPort: 6379 resources: limits: - cpu: "0.1" + cpu: "2.0" volumeMounts: - mountPath: /redis-master-data name: data diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 9ea46107a..74863615c 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -56,6 +56,7 @@ - '{{ item }}' with_items: "{{ workload_args.exclude_labels }}" register: exclude_node_list + when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 - name: Isolate Worker Role Hostnames for label {{ workload_args.exclude_label }} set_fact: diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 414690f84..76fa809b5 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -118,7 +118,7 @@ items: pod_limit=$(echo $STR | cut -f3 -d-); echo 'state=' $state 'node=' $node_limit 'pod=' $pod_limit; if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then - continue; + sleep 0.5; continue; fi; /bin/echo 'UPERF-run-context: num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; @@ -154,17 +154,17 @@ items: elif [[ $state =~ 'done' ]]; then break; else - continue; + sleep 0.5; continue; fi; done; if [[ $state =~ 'restart' ]]; then - continue; + sleep 0.5; continue; fi; elif [[ $state =~ 'done' ]]; then break; else - continue; + sleep 0.5; continue; fi; break; done; From 7cedd321a9143e8c11c8ea58fd9c9dcea592ec8f Mon Sep 17 00:00:00 2001 From: Logan Blyth Date: Fri, 22 Jan 2021 14:56:22 -0500 Subject: [PATCH 20/41] add stepsize and colocate to environment variables --- roles/uperf/templates/workload.yml.j2 | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 76fa809b5..f017a8414 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -97,6 +97,12 @@ items: - "export h={{item.status.podIP}}; {% endif %} {% endif %} +{% if (workload_args.colocate is defined) %} + - export colocate={{ workload_args.colocate}}; +{% endif %} +{% if workload_args.step_size is defined %} + - export stepsize={{ workload_args.step_size }}; +{% endif %} {% if workload_args.networkpolicy is defined %} export networkpolicy={{workload_args.networkpolicy}}; {% endif %} From 7c51631197b6d2339918f75725939a6b29a3eaee Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Mon, 25 Jan 2021 16:51:59 -0500 Subject: [PATCH 21/41] Massage after rebase --- roles/uperf/tasks/main.yml | 3 +-- roles/uperf/tasks/wait_client_ready.yml | 13 ++++--------- roles/uperf/tasks/wait_server_ready.yml | 4 ++-- 3 files changed, 7 insertions(+), 13 deletions(-) diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index 0c03f6921..a12f46cc1 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -53,8 +53,7 @@ when: resource_state.resources[0].status.state == "Waiting for Clients" - include_tasks: send_client_run_signal.yml - #when: resource_state.resources[0].status.state == "Clients Running" - when: resource_state.resources[0].status.state == "Clients" + when: resource_state.resources[0].status.state == "Clients Running" - include_tasks: wait_client_done.yml when: resource_state.resources[0].status.state == "Running" diff --git a/roles/uperf/tasks/wait_client_ready.yml b/roles/uperf/tasks/wait_client_ready.yml index 8668a7e95..29aae341e 100644 --- a/roles/uperf/tasks/wait_client_ready.yml +++ b/roles/uperf/tasks/wait_client_ready.yml @@ -33,14 +33,9 @@ - name: V22 set complete to false command: "redis-cli set complete false" - - name: V23 Get client vm status - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_vms + - name: Get count of clients ready + command: "redis-cli get clients-{{ trunc_uuid }}" + register: clients_ready_count - name: V24 Update resource state operator_sdk.util.k8s_status: @@ -50,7 +45,7 @@ namespace: "{{ operator_namespace }}" status: state: Clients Running - when: "workload_args.pair|default('1')|int == client_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (client_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" + when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" when: resource_kind == "vm" diff --git a/roles/uperf/tasks/wait_server_ready.yml b/roles/uperf/tasks/wait_server_ready.yml index f86601f8c..80aaf55d1 100644 --- a/roles/uperf/tasks/wait_server_ready.yml +++ b/roles/uperf/tasks/wait_server_ready.yml @@ -53,10 +53,10 @@ when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - name: V16 blocking client from running uperf - command: "redis-cli set {{ trunc_uuid }} false" + command: "redis-cli set start false" with_items: "{{ server_vms.resources }}" when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" and workload_args.pair|default('1')|int|int == 1 + when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" From 585466c4af8415a40a76c85e1cee24eac6e3e071 Mon Sep 17 00:00:00 2001 From: Sai Sindhur Malleni Date: Wed, 27 Jan 2021 11:53:05 -0600 Subject: [PATCH 22/41] Fix list of args (#3) Signed-off-by: Sai Sindhur Malleni --- roles/uperf/templates/workload.yml.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index f017a8414..67eed3151 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -98,10 +98,10 @@ items: {% endif %} {% endif %} {% if (workload_args.colocate is defined) %} - - export colocate={{ workload_args.colocate}}; + export colocate={{ workload_args.colocate}}; {% endif %} {% if workload_args.step_size is defined %} - - export stepsize={{ workload_args.step_size }}; + export stepsize={{ workload_args.step_size }}; {% endif %} {% if workload_args.networkpolicy is defined %} export networkpolicy={{workload_args.networkpolicy}}; From 5974340bb016da97dbfb9b7cbd89e98f584bf30f Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 28 Jan 2021 13:31:52 +0000 Subject: [PATCH 23/41] Integrate "pin" mode into the Scale infra. ***Important*** pin=true/false "pin=true" will run Pin mode "pin=false" will run Scale mode In Pin mode, "pair=n" is now obsolete. Use denstiy_range[] instead Scale mode The default values are: node_range=[1,1], density_range=1,1], step_size=add1 and colcate=false --- resources/crds/ripsaw_v1alpha1_uperf_cr.yaml | 27 ++- roles/uperf/tasks/cleanup.yml | 6 +- roles/uperf/tasks/init.yml | 1 - roles/uperf/tasks/run_a_set.yml | 7 +- roles/uperf/tasks/setup.yml | 189 ++++++++++--------- roles/uperf/tasks/start_client.yml | 2 - roles/uperf/tasks/start_server.yml | 27 +-- roles/uperf/tasks/wait_client_done.yml | 6 +- roles/uperf/tasks/wait_client_ready.yml | 14 +- roles/uperf/tasks/wait_server_ready.yml | 13 +- roles/uperf/tasks/wait_set_done.yml | 4 - roles/uperf/templates/server.yml.j2 | 18 -- roles/uperf/templates/workload.yml.j2 | 6 +- roles/uperf/vars/main.yml | 12 +- 14 files changed, 146 insertions(+), 186 deletions(-) diff --git a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml index 0d2d88282..8e3182195 100644 --- a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml +++ b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml @@ -15,14 +15,22 @@ spec: serviceip: false hostnetwork: false networkpolicy: false - pin: false multus: enabled: false + pin: false + # + # pin: true/false - default=false + # - true will run 'Pin' mode using 1 pin_server and 1 pin_client nodes. + # - false will run 'Scale' mode. See colocate, density_range, node_range and step_size. pin_server: "node-0" pin_client: "node-1" samples: 1 kind: pod pair: 1 + # + # 'pair' is obsolete. Instead, use 'density_range' which allows fixed or enumerated + # number of pairs. Enumeration is an enhanced capability of the 'Pin' mode. + # test_types: - stream protos: @@ -33,13 +41,14 @@ spec: - 1 runtime: 30 - # The following variables are for scale uperf. - # The scale mode will be activated with 'node_range' defined. - - # colocate: True - # density_range: [1, 32] - # node_range: [1, 10] - # step_size: log2 + # The following variables are for 'Scale' mode. + # The 'Scale' mode is activated when 'pin=false' or undefined. + # The Scale mode op params are: colocate, denstisy_range, node_range and step_size. + # + # colocate: true/false - default=false + # density_range: [n, m] - default=[1,1] + # node_range: [x, y] - default=[1,1] + # step_size: log2 - default=add1 # Valid step_size values are: addN or log2 # N can be any decimal number # Enumeration examples: @@ -47,6 +56,8 @@ spec: # add2: 1,3,5,7 ... # add10: 1,11,21,31 ... # log2: 1,2,4,8,16,32 ,,, + # + # The 'exlude_labels' is the list of ineligible worker nodes. # exclude_labels: (OR conditional, every node that matches any of these labels is excluded) # - "bad=true" # - "fc640=true" diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml index ae67c2a6a..5a5dde1bc 100644 --- a/roles/uperf/tasks/cleanup.yml +++ b/roles/uperf/tasks/cleanup.yml @@ -3,7 +3,7 @@ - block: - block: - - name: P29 Get Server Pods + - name: Get Server Pods k8s_facts: kind: Pod api_version: v1 @@ -12,7 +12,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: P30 Pod names - to clean + - name: Pod names - to clean set_fact: clean_pods: | [ @@ -21,7 +21,7 @@ {% endfor %} ] - - name: P31 Cleanup run + - name: Cleanup run k8s: kind: pod api_version: v1 diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml index 164c67a43..82051961d 100644 --- a/roles/uperf/tasks/init.yml +++ b/roles/uperf/tasks/init.yml @@ -21,4 +21,3 @@ node_idx: "{{node_low_idx}}" pod_idx: "{{pod_low_idx}}" - diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml index dcf8f3a59..6620505f1 100644 --- a/roles/uperf/tasks/run_a_set.yml +++ b/roles/uperf/tasks/run_a_set.yml @@ -3,8 +3,8 @@ - block: # # Entry Condition: - # 1. A previous task has set 'node_idx' and 'pod_idx' in redis - # 2. All cliest are polling for 'start' to run its workoad + # 1. A previous task has set 'node_idx' and 'pod_idx' in benchmark ctx + # 2. All cliest are polling redis for 'start-node_idx-pod_idx' to start # Output: Clients with node_idx <= redis node_idx && pod_idx <= redis pod_ix # @@ -20,8 +20,5 @@ status: state: Set Running - - name: debug 2 - command: "redis-cli set state Set_Running" - when: resource_state.resources[0].status.state == "Clients Running" diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 74863615c..5f1d3afa7 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -1,6 +1,6 @@ --- -- name: A1 Get current state +- name: Get current state k8s_facts: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -18,7 +18,7 @@ complete: false when: resource_state.resources[0].status.state is not defined -- name: A3 Get current state - If it has changed +- name: Get current state - If it has changed k8s_facts: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -26,7 +26,7 @@ namespace: '{{ operator_namespace }}' register: resource_state -- name: A4 Capture operator information +- name: Capture operator information k8s_facts: kind: Pod api_version: v1 @@ -34,40 +34,102 @@ label_selectors: - name = benchmark-operator register: bo - -- name: List Nodes Labeled as Workers - k8s_info: - api_version: v1 - kind: Node - label_selectors: - - "node-role.kubernetes.io/worker=" - register: node_list - no_log: True - -- name: Isolate Worker Role Hostnames - set_fact: - worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" - -- name: List Nodes Labeled with {{ workload_args.exclude_label }} - k8s_info: - api_version: v1 - kind: Node - label_selectors: - - '{{ item }}' - with_items: "{{ workload_args.exclude_labels }}" - register: exclude_node_list - when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 - -- name: Isolate Worker Role Hostnames for label {{ workload_args.exclude_label }} - set_fact: - worker_node_exclude_list: "{{ exclude_node_list | json_query('results[].resources[].metadata.name') }}" - -- name: Exclude labelled nodes - set_fact: - worker_node_list: "{{ worker_node_list | difference(worker_node_exclude_list) }}" - when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 - -- name: A5 Capture ServiceIP + +- block: + # + # This block is for scale mode where client and server pods are spreaded + # across all eligible nodes + # + - name: List Nodes Labeled as Workers + k8s_info: + api_version: v1 + kind: Node + label_selectors: + - "node-role.kubernetes.io/worker=" + register: node_list + no_log: True + + - name: Isolate Worker Role Hostnames + set_fact: + worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" + + - name: List Nodes Labeled with {{ workload_args.exclude_label }} + k8s_info: + api_version: v1 + kind: Node + label_selectors: + - '{{ item }}' + with_items: "{{ workload_args.exclude_labels }}" + register: exclude_node_list + when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 + + - name: Isolate Worker Role Hostnames for label {{ workload_args.exclude_label }} + set_fact: + worker_node_exclude_list: "{{ exclude_node_list | json_query('results[].resources[].metadata.name') }}" + + - name: Exclude labelled nodes + set_fact: + worker_node_list: "{{ worker_node_list | difference(worker_node_exclude_list) }}" + when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 + # + # Compute node and pod limits using CR params while taking into account + # of the actual number of nodes available in the system + # + - name: init pod and node low/hi idx + set_fact: + pod_low_idx: "{{ workload_args.density_range[0] | default('1')|int - 1 }}" + pod_hi_idx: "{{ workload_args.density_range[1] | default('1')|int - 1 }}" + node_low_idx: "{{ workload_args.node_range[0] | default('1')|int - 1 }}" + node_hi_idx: "{{ workload_args.node_range[1] | default('1')|int - 1 }}" + + - name: Adjust node_hi_idx if cluster has less nodes + set_fact: + node_hi_idx: "{{ worker_node_list|length| default('0')|int -1 }}" + when: "node_hi_idx|int >= worker_node_list|length| default('0')|int " + + - name: Record num server pods using new worker_node_list + # in Scale mode, num server pods = num_node * number_pod + set_fact: + num_server_pods: "{{ (node_hi_idx|int+1) * (pod_hi_idx|int+1) }}" + + # + # End scle mode + # + when: workload_args.pin | default(False) == False + +- block: + # + # This block is for the "pin" mode where the server and the client node + # are specified by pin_server and pin_client variables. + + - name: Add "Pin" server and client node to worker list. + # The add order is significant as we will enumerate the server pods on + # the first node in the list, and client pods on the second node. + set_fact: + worker_node_list: "{{worker_node_list + [item]}}" + with_items: + - '{{workload_args.pin_server}}' + - '{{workload_args.pin_client}}' + + - name: Init "Pin" mode node and pod indices + set_fact: + pod_low_idx: "{{ workload_args.density_range[0] | default('1')|int - 1 }}" + pod_hi_idx: "{{ workload_args.density_range[1] | default('1')|int - 1 }}" + # node indices are used as client pod 'start' parameter. + node_low_idx: "0" + node_hi_idx: "0" + + - name: A8 V2 Record num Pin server pods using new worker_node_list + set_fact: + # in Pin mode, num server pods = number of pods + num_server_pods: "{{ pod_hi_idx|int +1 }}" + + # + # End pin mode where pin_client and pin_server are specified + # + when: workload_args.pin | default(False) == True + +- name: Capture ServiceIP k8s_facts: kind: Service api_version: v1 @@ -77,56 +139,3 @@ register: serviceip when: workload_args.serviceip is defined and workload_args.serviceip -# -# Compute node and pod limits using CR params while taking into account -# of the actual number of nodes available in the system -# -- name: init pod low idx - set_fact: - pod_low_idx: "{{ workload_args.density_range[0]|int | default('1')|int - 1 }}" - when: workload_args.density_range is defined - -- name: init pod hi idx - set_fact: - pod_hi_idx: "{{ workload_args.density_range[1]|int | default('1')|int - 1 }}" - when: workload_args.density_range is defined - -- name: init node low idx - set_fact: - node_low_idx: "{{ workload_args.node_range[0]|int | default('1')|int - 1 }}" - when: workload_args.node_range is defined - -- name: init node hi idx - set_fact: - node_hi_idx: "{{ workload_args.node_range[1]|int | default('1')|int - 1 }}" - when: workload_args.node_range is defined - -- name: Adjust node_hi_idx if cluster has less nodes - set_fact: - node_hi_idx: "{{ worker_node_list|length| default('0')|int -1 }}" - when: "node_hi_idx|int >= worker_node_list|length| default('0')|int " - -# -# "pin" mode exists prior to "scale" mode. If "pin: true", we will -# do the old way using pin_server and pin_client -# -- name: A6 Record num server (V1) pods using workload_args.pair - TBD - set_fact: - num_server_pods: "{{ workload_args.pair | default('1')|int }}" - when: workload_args.node_range is not defined - - #- name: A7 V2 scale run - Setup eligible node (static) list - Tobe replaced by real node list builder - # set_fact: - # worker_node_list: "{{workload_args.node_list[0]}}" - # when: workload_args.node_range is not defined - # - #- name: A7 V1 non-scale Setup eligible node (static) list - Tobe replaced by real node list builder - # set_fact: - # worker_node_list: "{{workload_args.node_list}}" - # when: workload_args.node_range is defined - -- name: A8 V2 Record num server pods using new worker_node_list - set_fact: - num_server_pods: "{{ (node_hi_idx|int+1) * (pod_hi_idx|int+1) }}" - when: workload_args.node_range is defined - diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml index 1018e1f84..41bd9068a 100644 --- a/roles/uperf/tasks/start_client.yml +++ b/roles/uperf/tasks/start_client.yml @@ -40,8 +40,6 @@ ### kind - block: - - name: debug - command: "redis-cli set state Starting_Clients" - name: V19 Wait for vms to be running.... k8s_facts: diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index 8cf6d6f55..f693d7a3e 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -1,44 +1,27 @@ --- - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - # Start servers - #@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - block: - include_tasks: init.yml - - - name: P8 Create service for server pods + - name: Create service for server pods k8s: definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} when: workload_args.serviceip is defined and workload_args.serviceip - - name: P9 Start Server(s) - k8s: - definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" - register: servers - vars: - pod_sequence: "{{ workload_args.pair | default('1')|int }}" - when: workload_args.node_range is not defined - -############ - - - name: P10 V2 Start Server(s) - total = eligible nodes * density + - name: Start Server(s) - total = eligible nodes * density k8s: definition: "{{ lookup('template', 'server.yml.j2') | from_yaml }}" vars: pod_sequence: "{{ pod_hi_idx|int +1 }}" node_sequence: "{{ node_hi_idx|int +1 }}" - when: workload_args.node_range is defined + # # Each server annotates a "node_idx" which will allow its peer client # to derive its affinity according the 'colocate' variable # - -############ - - - name: P11 Wait for pods to be running.... + - name: Wait for pods to be running.... k8s_facts: kind: Pod api_version: v1 @@ -47,7 +30,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: P12 Update resource state + - name: Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark diff --git a/roles/uperf/tasks/wait_client_done.yml b/roles/uperf/tasks/wait_client_done.yml index 08c66c0f7..4f77c73e8 100644 --- a/roles/uperf/tasks/wait_client_done.yml +++ b/roles/uperf/tasks/wait_client_done.yml @@ -2,9 +2,9 @@ - block: -#### kind + #### kind - block: - - name: P27 Waiting for pods to complete.... + - name: Waiting for pods to complete.... k8s_facts: kind: pod api_version: v1 @@ -24,7 +24,7 @@ when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" when: resource_kind == "pod" -#### kind + #### kind - block: - name: V28 get complete diff --git a/roles/uperf/tasks/wait_client_ready.yml b/roles/uperf/tasks/wait_client_ready.yml index 29aae341e..319abef7f 100644 --- a/roles/uperf/tasks/wait_client_ready.yml +++ b/roles/uperf/tasks/wait_client_ready.yml @@ -1,12 +1,8 @@ --- - block: - -##### kind - - block: # Pod block - - name: debug - command: "redis-cli set state Waiting_for_Clients" - - - name: P22 Get client pod status + ##### kind + - block: + - name: Get client pod status k8s_facts: kind: Pod api_version: v1 @@ -15,7 +11,7 @@ - app = uperf-bench-client-{{ trunc_uuid }} register: client_pods - - name: P23 Update resource state + - name: Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -27,8 +23,8 @@ when: resource_kind == "pod" -##### kind - block: + ##### kind - name: V22 set complete to false command: "redis-cli set complete false" diff --git a/roles/uperf/tasks/wait_server_ready.yml b/roles/uperf/tasks/wait_server_ready.yml index 80aaf55d1..f5b07d095 100644 --- a/roles/uperf/tasks/wait_server_ready.yml +++ b/roles/uperf/tasks/wait_server_ready.yml @@ -1,9 +1,6 @@ --- -########### kind - block: - - - name: debug - command: "redis-cli set state Starting_Servers" + ########### kind - name: P13 Get server pods k8s_facts: @@ -14,11 +11,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: debug - debug: - msg: "{{ num_server_pods }}" - - - name: P14 Update resource state + - name: Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -30,8 +23,8 @@ when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" -######## kind - block: + ######## kind - name: V14 Wait for vms to be running.... k8s_facts: diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index 5461cbe44..72097690c 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -2,10 +2,6 @@ - block: - block: - - - name: debug - command: "redis-cli set state Set_Running" - - name: read pod completion count command: "redis-cli get num_completion" register: num_completion diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 9b803a184..754e47dc4 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -7,11 +7,7 @@ items: - kind: Job apiVersion: batch/v1 metadata: -{% if workload_args.node_range is not defined %} - name: 'uperf-server-{{ item }}-{{ trunc_uuid }}' -{% else %} name: 'uperf-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }}' -{% endif %} namespace: "{{ operator_namespace }}" spec: ttlSecondsAfterFinished: 600 @@ -19,21 +15,15 @@ items: template: metadata: labels: -{% if workload_args.node_range is not defined %} - app: uperf-bench-server-{{item}}-{{ trunc_uuid }} -{% else %} #app: uperf-bench-server-{{ node_idx_item }}-{{ item }}-{{ trunc_uuid }} app: uperf-bench-server-{{ worker_node_list[node_idx_item] }}-{{ item }}-{{ trunc_uuid }} -{% endif %} type: uperf-bench-server-{{ trunc_uuid }} annotations: {% if workload_args.multus.enabled is sameas true %} k8s.v1.cni.cncf.io/networks: {{ workload_args.multus.server}} {% endif %} -{% if workload_args.node_range is defined %} node_idx: '{{ node_idx_item }}' pod_idx: '{{ item }}' -{% endif %} spec: {% if workload_args.runtime_class is defined %} runtimeClassName: "{{ workload_args.runtime_class }}" @@ -60,11 +50,9 @@ items: # # V2 pin server pod to node # -{% if workload_args.node_range is defined %} nodeSelector: #kubernetes.io/hostname: '{{ node_idx_item }}' kubernetes.io/hostname: '{{ worker_node_list[node_idx_item] }}' -{% endif %} {% if workload_args.serviceip is sameas true %} securityContext: @@ -75,15 +63,9 @@ items: {% macro metadata() %}{% include "metadata.yml.j2" %}{% endmacro %} {{ metadata()|indent }} {% endmacro %} -{% if workload_args.node_range is not defined %} -{% for item in range(pod_sequence|int) %} -{{ job_template(item) }} -{% endfor %} -{% else %} {% for node_idx_item in range(node_sequence|int) %} {% for item in range(pod_sequence|int) %} {{ job_template(item,node_idx_item) }} {% endfor %} {% endfor %} -{% endif %} diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 67eed3151..1fde8aa9f 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -174,11 +174,7 @@ items: fi; break; done; -{% if workload_args.node_range is not defined %} - redis-cli -h {{bo.resources[0].status.podIP}} set start false" -{% else %} " -{% endif %} volumeMounts: - name: config-volume mountPath: "/tmp/uperf-test" @@ -187,7 +183,7 @@ items: configMap: name: uperf-test-{{ trunc_uuid }} restartPolicy: OnFailure -{% if workload_args.node_range is defined %} +{% if workload_args.pin is sameas false %} {% if workload_args.colocate is sameas true %} nodeSelector: # client node same as server node diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index 09cd865f9..fe27a3bca 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -5,12 +5,12 @@ worker_node_list: [] group_node_count: 0 max_node_count: 0 -pod_low_idx: 0 -pod_hi_idx: 0 -node_low_idx: 0 -node_hi_idx: 0 +pod_low_idx: "0" +pod_hi_idx: "0" +node_low_idx: "0" +node_hi_idx: "0" -node_idx: 0 -pod_idx: 0 +node_idx: "0" +pod_idx: "0" all_run_done: false From d0404f12914063829217a3ec2811fc1971fde5fb Mon Sep 17 00:00:00 2001 From: Logan Blyth Date: Fri, 29 Jan 2021 10:01:26 -0500 Subject: [PATCH 24/41] merged code --- roles/uperf/templates/workload.yml.j2 | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 1fde8aa9f..6696a9dba 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -46,7 +46,7 @@ items: topologyKey: kubernetes.io/hostname containers: - name: benchmark - image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} + image: {{ workload_args.image | default('quay.io/lblyth/lblyth-uperf:latest') }} env: - name: uuid value: "{{ uuid }}" @@ -88,11 +88,11 @@ items: args: {% if workload_args.serviceip is sameas true %} - "export serviceip=true; - export h={{item.spec.clusterIP}}; + export h={{item.spec.clusterIP}}; {% else %} {% if workload_args.multus.client is defined %} - "export multus_client={{workload_args.multus.client}}; - export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; + export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; {% else %} - "export h={{item.status.podIP}}; {% endif %} @@ -103,8 +103,17 @@ items: {% if workload_args.step_size is defined %} export stepsize={{ workload_args.step_size }}; {% endif %} +{% if workload_args.step_size is defined %} + export stepsize={{ workload_args.step_size }}; +{% endif %} +{% if workload_args.node_range is defined %} + export node_range={{ workload_args.node_range[0] }}_{{ workload_args.node_range[1] }}; +{% endif %} +{% if workload_args.density_range is defined %} + export density_range={{ workload_args.density_range[0] }}_{{ workload_args.density_range[1] }}; +{% endif %} {% if workload_args.networkpolicy is defined %} - export networkpolicy={{workload_args.networkpolicy}}; + export networkpolicy={{workload_args.networkpolicy}}; {% endif %} export hostnet={{workload_args.hostnetwork}}; export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; @@ -126,8 +135,8 @@ items: if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then sleep 0.5; continue; fi; - - /bin/echo 'UPERF-run-context: num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; + + echo "UPERF-run-context num_node=" $((node_limit+1)) "density=" $((pod_limit+1)) "my_node_idx=" $my_node_idx "my_pod_idx=" $my_pod_idx; node_count=$((node_limit+1)); pod_count=$((pod_limit+1)); @@ -166,7 +175,7 @@ items: if [[ $state =~ 'restart' ]]; then sleep 0.5; continue; fi; - + elif [[ $state =~ 'done' ]]; then break; else From e8fb08b86403f1bc6917108c9af832f7328d521f Mon Sep 17 00:00:00 2001 From: Logan Blyth Date: Fri, 29 Jan 2021 15:07:09 -0500 Subject: [PATCH 25/41] more variable changes --- roles/uperf/templates/workload.yml.j2 | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 6696a9dba..b1f78f475 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -107,10 +107,10 @@ items: export stepsize={{ workload_args.step_size }}; {% endif %} {% if workload_args.node_range is defined %} - export node_range={{ workload_args.node_range[0] }}_{{ workload_args.node_range[1] }}; + export node_range='{{ workload_args.node_range[0] }}_{{ workload_args.node_range[1] }}'; {% endif %} {% if workload_args.density_range is defined %} - export density_range={{ workload_args.density_range[0] }}_{{ workload_args.density_range[1] }}; + export density_range='{{ workload_args.density_range[0] }}_{{ workload_args.density_range[1] }}'; {% endif %} {% if workload_args.networkpolicy is defined %} export networkpolicy={{workload_args.networkpolicy}}; @@ -136,7 +136,7 @@ items: sleep 0.5; continue; fi; - echo "UPERF-run-context num_node=" $((node_limit+1)) "density=" $((pod_limit+1)) "my_node_idx=" $my_node_idx "my_pod_idx=" $my_pod_idx; + echo 'UPERF-run-context num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; node_count=$((node_limit+1)); pod_count=$((pod_limit+1)); From 90114290cc79f8c579e9541af84aa3e857e89dab Mon Sep 17 00:00:00 2001 From: Logan Blyth Date: Fri, 29 Jan 2021 16:49:47 -0500 Subject: [PATCH 26/41] remove duplicate export --- roles/uperf/templates/workload.yml.j2 | 3 --- 1 file changed, 3 deletions(-) diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index b1f78f475..70a6588b8 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -100,9 +100,6 @@ items: {% if (workload_args.colocate is defined) %} export colocate={{ workload_args.colocate}}; {% endif %} -{% if workload_args.step_size is defined %} - export stepsize={{ workload_args.step_size }}; -{% endif %} {% if workload_args.step_size is defined %} export stepsize={{ workload_args.step_size }}; {% endif %} From 38ce7acfe2b00dc1170d3ac707e1c39ae7b6b60f Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Fri, 5 Feb 2021 13:46:08 +0000 Subject: [PATCH 27/41] Verified that VM-type works fine. Integrated "serviceip" into scale framework. Was not working until this commit. Cosmetic cleanup i.e debug TASK tags, and doublle "when" guards. --- roles/uperf/tasks/cleanup.yml | 75 ++++++------ roles/uperf/tasks/init.yml | 1 - roles/uperf/tasks/main.yml | 16 ++- roles/uperf/tasks/next_set.yml | 9 +- roles/uperf/tasks/run_a_set.yml | 8 +- roles/uperf/tasks/send_client_run_signal.yml | 4 +- roles/uperf/tasks/setup.yml | 25 +++- roles/uperf/tasks/start_client.yml | 115 +++++++++---------- roles/uperf/tasks/start_server.yml | 25 ++-- roles/uperf/tasks/wait_client_done.yml | 78 ++++++------- roles/uperf/tasks/wait_client_ready.yml | 91 ++++++++------- roles/uperf/tasks/wait_server_ready.yml | 18 +-- roles/uperf/tasks/wait_set_done.yml | 8 +- roles/uperf/templates/server.yml.j2 | 6 - roles/uperf/templates/service.yml.j2 | 65 +++++++---- roles/uperf/templates/workload.yml.j2 | 5 +- roles/uperf/vars/main.yml | 5 - 17 files changed, 289 insertions(+), 265 deletions(-) diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml index 5a5dde1bc..0da5b68ec 100644 --- a/roles/uperf/tasks/cleanup.yml +++ b/roles/uperf/tasks/cleanup.yml @@ -1,45 +1,46 @@ --- - block: + ### kind + - name: Get Server Pods + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods - - block: - - name: Get Server Pods - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods + - name: Pod names - to clean + set_fact: + clean_pods: | + [ + {% for item in server_pods.resources %} + "{{ item['metadata']['name'] }}", + {% endfor %} + ] - - name: Pod names - to clean - set_fact: - clean_pods: | - [ - {% for item in server_pods.resources %} - "{{ item['metadata']['name'] }}", - {% endfor %} - ] + - name: Cleanup run + k8s: + kind: pod + api_version: v1 + namespace: '{{ operator_namespace }}' + state: absent + name: "{{ item }}" + with_items: "{{ clean_pods }}" + when: cleanup + when: resource_kind == "pod" - - name: Cleanup run - k8s: - kind: pod - api_version: v1 - namespace: '{{ operator_namespace }}' - state: absent - name: "{{ item }}" - with_items: "{{ clean_pods }}" - when: cleanup - when: resource_kind == "pod" +# +# no kind block - We leave VM running +# - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Complete - complete: true - - when: resource_state.resources[0].status.state == "Cleanup" +- operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Complete + complete: true diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml index 82051961d..fbae05ce8 100644 --- a/roles/uperf/tasks/init.yml +++ b/roles/uperf/tasks/init.yml @@ -1,6 +1,5 @@ --- - - name: Clear start flag command: "redis-cli set start 0" diff --git a/roles/uperf/tasks/main.yml b/roles/uperf/tasks/main.yml index a12f46cc1..435a9b6b9 100644 --- a/roles/uperf/tasks/main.yml +++ b/roles/uperf/tasks/main.yml @@ -16,8 +16,9 @@ - include_tasks: wait_client_ready.yml when: resource_state.resources[0].status.state == "Waiting for Clients" - # LOOP BEGIN - # + # LOOP BEGIN + # This loop iterates density_range[] and node_range[] for "scale" mode + - include_tasks: run_a_set.yml when: resource_state.resources[0].status.state == "Clients Running" @@ -27,9 +28,10 @@ - include_tasks: next_set.yml when: resource_state.resources[0].status.state == "Run Next Set" - # will loop back to "Client Running" state, or FALLTHRU to "Running" state below and finish - # - # LOOP END + # will loop back to "Client Running" state, or FALLTHRU to "Running" + # state below and finish + + # LOOP END - include_tasks: wait_client_done.yml when: resource_state.resources[0].status.state == "Running" @@ -39,7 +41,9 @@ when: resource_kind == "pod" - +# +# kind does not support "scale" mode yet +# - block: diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml index 3ce613df2..df5acbc9c 100644 --- a/roles/uperf/tasks/next_set.yml +++ b/roles/uperf/tasks/next_set.yml @@ -1,9 +1,11 @@ --- # -# This module logically implements RE-ENTRANT nested for loops +# This module logically implements an RE-ENTRANT nested "for" loops; +# # with_items: # range (node_low_idx, node_hi_idx) # range (pod_low_idx, pod_hi_idx) +# # Each iteration executes one item, and each re-entrance # continues where it left off. # @@ -95,5 +97,8 @@ when: all_run_done == False - when: resource_state.resources[0].status.state == "Run Next Set" + when: resource_kind == "pod" +# +# No block - Scale mode support is N/A +# diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml index 6620505f1..1ec4d9260 100644 --- a/roles/uperf/tasks/run_a_set.yml +++ b/roles/uperf/tasks/run_a_set.yml @@ -5,7 +5,7 @@ # Entry Condition: # 1. A previous task has set 'node_idx' and 'pod_idx' in benchmark ctx # 2. All cliest are polling redis for 'start-node_idx-pod_idx' to start - # Output: Clients with node_idx <= redis node_idx && pod_idx <= redis pod_ix + # Output: Clients with node_idx <= redis node_idx && pod_idx <= redis pod_ix will run # - name: Signal group to run @@ -19,6 +19,8 @@ namespace: "{{ operator_namespace }}" status: state: Set Running + when: resource_kind == "pod" - when: resource_state.resources[0].status.state == "Clients Running" - +# +# No kind - It has not been adapted to Scale mode +# diff --git a/roles/uperf/tasks/send_client_run_signal.yml b/roles/uperf/tasks/send_client_run_signal.yml index 043d4c5cd..52fd92a97 100644 --- a/roles/uperf/tasks/send_client_run_signal.yml +++ b/roles/uperf/tasks/send_client_run_signal.yml @@ -1,8 +1,8 @@ --- - +# This module is invoked by VM-kind only - block: - - name: A25 Signal workload + - name: Signal workload command: "redis-cli set start true" - name: A26 Update resource state diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 5f1d3afa7..be74b0af1 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -67,7 +67,7 @@ set_fact: worker_node_exclude_list: "{{ exclude_node_list | json_query('results[].resources[].metadata.name') }}" - - name: Exclude labelled nodes + - name: Exclude labeled nodes set_fact: worker_node_list: "{{ worker_node_list | difference(worker_node_exclude_list) }}" when: workload_args.exclude_labels is defined and workload_args.exclude_labels | length > 0 @@ -81,20 +81,33 @@ pod_hi_idx: "{{ workload_args.density_range[1] | default('1')|int - 1 }}" node_low_idx: "{{ workload_args.node_range[0] | default('1')|int - 1 }}" node_hi_idx: "{{ workload_args.node_range[1] | default('1')|int - 1 }}" - + # + # Next sanity check and massage the indices if necessary. + # We shall complete gracefully and not iterate wildly. + # - name: Adjust node_hi_idx if cluster has less nodes set_fact: node_hi_idx: "{{ worker_node_list|length| default('0')|int -1 }}" when: "node_hi_idx|int >= worker_node_list|length| default('0')|int " + - name: Adjust node_low_idx if necessary + set_fact: + node_low_idx: "{{node_hi_idx|int}}" + when: "node_low_idx|int > node_hi_idx|int" + + - name: Adjust pod_low_idx if necessary + set_fact: + pod_low_idx: "{{pod_hi_idx|int}}" + when: "pod_low_idx|int > pod_hi_idx|int" + - name: Record num server pods using new worker_node_list # in Scale mode, num server pods = num_node * number_pod set_fact: num_server_pods: "{{ (node_hi_idx|int+1) * (pod_hi_idx|int+1) }}" - # - # End scle mode - # + # + # End scale mode + # when: workload_args.pin | default(False) == False - block: @@ -119,7 +132,7 @@ node_low_idx: "0" node_hi_idx: "0" - - name: A8 V2 Record num Pin server pods using new worker_node_list + - name: Record num Pin server pods using new worker_node_list set_fact: # in Pin mode, num server pods = number of pods num_server_pods: "{{ pod_hi_idx|int +1 }}" diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml index 41bd9068a..222621277 100644 --- a/roles/uperf/tasks/start_client.yml +++ b/roles/uperf/tasks/start_client.yml @@ -1,76 +1,73 @@ --- -- block: +- name: Get pod info + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_pods -### kind - - name: A17 Get pod info - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_pods +- name: Generate uperf xml files + k8s: + definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" - - name: A18 Generate uperf xml files +- block: + ### kind + - name: Start Client(s) w/o serviceIP k8s: - definition: "{{ lookup('template', 'configmap.yml.j2') | from_yaml }}" + definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" + vars: + resource_item: "{{ server_pods.resources }}" + when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 - - block: # Starting Clients" - - name: P19 Start Client(s) w/o serviceIP - k8s: - definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - vars: - resource_item: "{{ server_pods.resources }}" - when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 + # + # Each server annotates a "node_idx". Each peer client will + # derive its affinity according the 'colocate' variable. + # - # - # Each server annotates a "node_idx". Each peer client will - # derive its affinity according the 'colocate' variable. - # - - - name: P20 Start Client(s) with serviceIP - k8s: - definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" - vars: - resource_item: "{{ serviceip.resources }}" - when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 + - name: Start Client(s) with serviceIP + k8s: + definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" + vars: + resource_item: "{{ serviceip.resources }}" + when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 - when: resource_kind == "pod" + when: resource_kind == "pod" -### kind - - block: +- block: + ### kind - - name: V19 Wait for vms to be running.... - k8s_facts: - kind: VirtualMachineInstance - api_version: kubevirt.io/v1alpha3 - namespace: '{{ operator_namespace }}' - label_selectors: - - type = uperf-bench-server-{{ trunc_uuid }} - register: server_vms + - name: Wait for vms to be running.... + k8s_facts: + kind: VirtualMachineInstance + api_version: kubevirt.io/v1alpha3 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_vms - - name: V20 Generate uperf test files - k8s: - definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" - with_items: "{{ server_vms.resources }}" + - name: Generate uperf test files + k8s: + definition: "{{ lookup('template', 'configmap_script.yml.j2') | from_yaml }}" + with_items: "{{ server_vms.resources }}" - - name: V21 Start Client(s) - k8s: - definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" - with_items: "{{ server_vms.resources }}" - when: server_vms.resources|length > 0 + - name: Start Client(s) + k8s: + definition: "{{ lookup('template', 'workload_vm.yml.j2') | from_yaml }}" + with_items: "{{ server_vms.resources }}" + when: server_vms.resources|length > 0 - when: resource_kind == "vm" + when: resource_kind == "vm" - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Waiting for Clients +- operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Waiting for Clients - when: resource_state.resources[0].status.state == "Starting Clients" diff --git a/roles/uperf/tasks/start_server.yml b/roles/uperf/tasks/start_server.yml index f693d7a3e..743faa176 100644 --- a/roles/uperf/tasks/start_server.yml +++ b/roles/uperf/tasks/start_server.yml @@ -1,13 +1,16 @@ --- - block: - + ### kind - include_tasks: init.yml - name: Create service for server pods k8s: definition: "{{ lookup('template', 'service.yml.j2') | from_yaml }}" - with_sequence: start=0 count={{ workload_args.pair | default('1')|int }} + vars: + pod_sequence: "{{ pod_hi_idx|int +1 }}" + node_sequence: "{{ node_hi_idx|int +1 }}" + when: workload_args.serviceip is defined and workload_args.serviceip - name: Start Server(s) - total = eligible nodes * density @@ -39,20 +42,17 @@ status: state: "Starting Servers" - when: resource_state.resources[0].status.state == "Building" and resource_kind == "pod" + when: resource_kind == "pod" -########### -# VM remains scale agnostic for now -########### - block: - - - name: V11 Start Server(s) + ### - + when: resource_kind == "vm" diff --git a/roles/uperf/tasks/wait_client_done.yml b/roles/uperf/tasks/wait_client_done.yml index 4f77c73e8..85036a28b 100644 --- a/roles/uperf/tasks/wait_client_done.yml +++ b/roles/uperf/tasks/wait_client_done.yml @@ -1,46 +1,40 @@ --- - - block: + ### kind + - name: Waiting for pods to complete.... + k8s_facts: + kind: pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_pods + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Cleanup + complete: false + when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" + when: resource_kind == "pod" - #### kind - - block: - - name: Waiting for pods to complete.... - k8s_facts: - kind: pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_pods - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Cleanup - complete: false - when: "num_server_pods|int == (client_pods|json_query('resources[].status[]')|selectattr('phase','match','Succeeded')|list|length)" - when: resource_kind == "pod" - - #### kind - - block: - - - name: V28 get complete - command: "redis-cli get complete" - register: complete_status - - - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Cleanup - complete: false - when: complete_status.stdout == "true" - when: resource_kind == "vm" - - when: resource_state.resources[0].status.state == "Running" +- block: + ### kind + - name: get complete + command: "redis-cli get complete" + register: complete_status + + - operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Cleanup + complete: false + when: complete_status.stdout == "true" + when: resource_kind == "vm" diff --git a/roles/uperf/tasks/wait_client_ready.yml b/roles/uperf/tasks/wait_client_ready.yml index 319abef7f..c6455fea7 100644 --- a/roles/uperf/tasks/wait_client_ready.yml +++ b/roles/uperf/tasks/wait_client_ready.yml @@ -1,49 +1,48 @@ --- + +- block: + ### kind + + - name: Get client pod status + k8s_facts: + kind: Pod + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - app = uperf-bench-client-{{ trunc_uuid }} + register: client_pods + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + when: "num_server_pods|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (client_pods | json_query('resources[].status.podIP')|length)" + + when: resource_kind == "pod" + - block: - ##### kind - - block: - - name: Get client pod status - k8s_facts: - kind: Pod - api_version: v1 - namespace: '{{ operator_namespace }}' - label_selectors: - - app = uperf-bench-client-{{ trunc_uuid }} - register: client_pods - - - name: Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Clients Running - when: "num_server_pods|int == client_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and num_server_pods|int == (client_pods | json_query('resources[].status.podIP')|length)" - - when: resource_kind == "pod" - - - block: - ##### kind - - - name: V22 set complete to false - command: "redis-cli set complete false" - - - name: Get count of clients ready - command: "redis-cli get clients-{{ trunc_uuid }}" - register: clients_ready_count - - - name: V24 Update resource state - operator_sdk.util.k8s_status: - api_version: ripsaw.cloudbulldozer.io/v1alpha1 - kind: Benchmark - name: "{{ meta.name }}" - namespace: "{{ operator_namespace }}" - status: - state: Clients Running - when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" - - when: resource_kind == "vm" - - when: resource_state.resources[0].status.state == "Waiting for Clients" + ### kind + + - name: set complete to false + command: "redis-cli set complete false" + + - name: Get count of clients ready + command: "redis-cli get clients-{{ trunc_uuid }}" + register: clients_ready_count + + - name: Update resource state + operator_sdk.util.k8s_status: + api_version: ripsaw.cloudbulldozer.io/v1alpha1 + kind: Benchmark + name: "{{ meta.name }}" + namespace: "{{ operator_namespace }}" + status: + state: Clients Running + when: "workload_args.pair|default('1')|int == clients_ready_count.stdout|int" + + when: resource_kind == "vm" diff --git a/roles/uperf/tasks/wait_server_ready.yml b/roles/uperf/tasks/wait_server_ready.yml index f5b07d095..195125e97 100644 --- a/roles/uperf/tasks/wait_server_ready.yml +++ b/roles/uperf/tasks/wait_server_ready.yml @@ -1,8 +1,8 @@ --- - block: - ########### kind + ### kind - - name: P13 Get server pods + - name: Get server pods k8s_facts: kind: Pod api_version: v1 @@ -19,14 +19,14 @@ namespace: "{{ operator_namespace }}" status: state: "Starting Clients" - when: "num_server_pods|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" + when: "num_server_pods|int == server_pods | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length" - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "pod" + when: resource_kind == "pod" - block: - ######## kind + ### kind - - name: V14 Wait for vms to be running.... + - name: Wait for vms to be running.... k8s_facts: kind: VirtualMachineInstance api_version: kubevirt.io/v1alpha3 @@ -35,7 +35,7 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_vms - - name: V15 Update resource state + - name: Update resource state operator_sdk.util.k8s_status: api_version: ripsaw.cloudbulldozer.io/v1alpha1 kind: Benchmark @@ -45,11 +45,11 @@ state: "Starting Clients" when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - - name: V16 blocking client from running uperf + - name: blocking client from running uperf command: "redis-cli set start false" with_items: "{{ server_vms.resources }}" when: "workload_args.pair|default('1')|int == server_vms | json_query('resources[].status[]')|selectattr('phase','match','Running')|list|length and workload_args.pair|default('1')|int == (server_vms | json_query('resources[].status.interfaces[0].ipAddress')|length)" - when: resource_state.resources[0].status.state == "Starting Servers" and resource_kind == "vm" + when: resource_kind == "vm" diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index 72097690c..5c620b017 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -2,6 +2,7 @@ - block: - block: + ### kind - name: read pod completion count command: "redis-cli get num_completion" register: num_completion @@ -18,5 +19,10 @@ when: resource_kind == "pod" - when: resource_state.resources[0].status.state == "Set Running" + ### no kind block - Run a "set" is not yet supported + when: resource_kind == "pod" + +# +# No kind block - It has not been adapted to scale mode yet. +# diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 754e47dc4..f11ff45a1 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -15,7 +15,6 @@ items: template: metadata: labels: - #app: uperf-bench-server-{{ node_idx_item }}-{{ item }}-{{ trunc_uuid }} app: uperf-bench-server-{{ worker_node_list[node_idx_item] }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: @@ -46,12 +45,7 @@ items: nodeSelector: kubernetes.io/hostname: '{{ workload_args.pin_server }}' {% endif %} - -# -# V2 pin server pod to node -# nodeSelector: - #kubernetes.io/hostname: '{{ node_idx_item }}' kubernetes.io/hostname: '{{ worker_node_list[node_idx_item] }}' {% if workload_args.serviceip is sameas true %} diff --git a/roles/uperf/templates/service.yml.j2 b/roles/uperf/templates/service.yml.j2 index 89177dad0..59cde767e 100644 --- a/roles/uperf/templates/service.yml.j2 +++ b/roles/uperf/templates/service.yml.j2 @@ -1,27 +1,46 @@ --- -kind: Service apiVersion: v1 -metadata: - name: uperf-service-{{ item }}-{{ trunc_uuid }} - namespace: '{{ operator_namespace }}' - labels: - app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - type: uperf-bench-server-{{ trunc_uuid }} -spec: - selector: - app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - ports: - - name: uperf - port: 20000 - targetPort: 20000 - protocol: TCP +kind: List +metadata: {} +items: +{% macro job_template(item, node_idx_item='') %} + - kind: Service + apiVersion: v1 + metadata: + #name: uperf-service-{{ item }}-{{ trunc_uuid }} + name: uperf-service-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }} + + namespace: '{{ operator_namespace }}' + labels: + #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }} + type: uperf-bench-server-{{ trunc_uuid }} + annotations: + node_idx: '{{ node_idx_item }}' + pod_idx: '{{ item }}' + spec: + selector: + #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }} + ports: + - name: uperf + port: 20000 + targetPort: 20000 + protocol: TCP {% for num in range(20001,20012,1) %} - - name: uperf-control-tcp-{{num}} - port: {{num}} - targetPort: {{num}} - protocol: TCP - - name: uperf-control-udp-{{num}} - port: {{num}} - targetPort: {{num}} - protocol: UDP + - name: uperf-control-tcp-{{num}} + port: {{num}} + targetPort: {{num}} + protocol: TCP + - name: uperf-control-udp-{{num}} + port: {{num}} + targetPort: {{num}} + protocol: UDP {% endfor %} +{% endmacro %} +{% for node_idx_item in range(node_sequence|int) %} +{% for item in range(pod_sequence|int) %} +{{ job_template(item,node_idx_item) }} +{% endfor %} +{% endfor %} + diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 70a6588b8..36889ffaf 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -117,8 +117,8 @@ items: export my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; export ips=$(hostname -I); export num_pairs={{workload_args.pair}}; - export node_count=0 - export pod_count=0 + export node_count=0; + export pod_count=0; node_limit=0; pod_limit=0; STR=''; @@ -128,7 +128,6 @@ items: if [[ $state =~ 'true' ]]; then node_limit=$(echo $STR | cut -f2 -d-); pod_limit=$(echo $STR | cut -f3 -d-); - echo 'state=' $state 'node=' $node_limit 'pod=' $pod_limit; if [[ $my_node_idx -gt $node_limit || $my_pod_idx -gt $pod_limit ]]; then sleep 0.5; continue; fi; diff --git a/roles/uperf/vars/main.yml b/roles/uperf/vars/main.yml index fe27a3bca..aaee090cd 100644 --- a/roles/uperf/vars/main.yml +++ b/roles/uperf/vars/main.yml @@ -2,15 +2,10 @@ # vars file for bench cleanup: true worker_node_list: [] -group_node_count: 0 -max_node_count: 0 - pod_low_idx: "0" pod_hi_idx: "0" node_low_idx: "0" node_hi_idx: "0" - node_idx: "0" pod_idx: "0" - all_run_done: false From b6d345d12549f5501e34e2e1fc37220d5afd835d Mon Sep 17 00:00:00 2001 From: hnhan Date: Mon, 8 Feb 2021 15:16:35 -0500 Subject: [PATCH 28/41] Update uperf.md --- docs/uperf.md | 58 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/docs/uperf.md b/docs/uperf.md index 3d05dbc55..4bef880fc 100644 --- a/docs/uperf.md +++ b/docs/uperf.md @@ -44,7 +44,6 @@ spec: multus: enabled: false samples: 1 - pair: 1 test_types: - stream protos: @@ -54,6 +53,10 @@ spec: nthrs: - 1 runtime: 30 + colocate: false + density_range: [low, high] + node_range: [low, high] + step_size: addN, log2 ``` `client_resources` and `server_resources` will create uperf client's and server's containers with the given k8s compute resources respectively [k8s resources](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/) @@ -82,7 +85,7 @@ spec: ```yaml samples: 3 - pair: 1 + density: [1,1] test_types: - stream protos: @@ -108,7 +111,7 @@ size. For example: ```yaml samples: 3 - pair: 1 + density: [1,1] test_types: - rr protos: @@ -189,6 +192,55 @@ To enable Multus in Ripsaw, here is the relevant config. ... ``` +### Scale +Scale in this context refers to the ability to enumerate UPERF +client-server pairs during test in a control fashion using the following knobs. + +`colocate: true` will place each client and server pod pair on the same node. + +`density_range` to specify the range of client-server pairs that the test will iterate. + +`node_range` to specify the range of nodes that the test will iterate. + +`step_size` to specify the incrementing method. + +Here is one scale example: + +``` + ... + pin: false + colocate: false + density_range: [1,10] + node_range: [1,128] + step_size: log2 + ... +``` +Note, the `scale` mode is mutual exlusive to `pin` mode with the `pin` mode has higher precedence. +In other words, if `pin:true` the test will deploy pods on `pin_server` and `pin_client` nodes +and ignore `colocate`, `node_range`, and the number of pairs to deploy is specified by the + `density_range.high` value. + +In the above sample, the `scale` mode will be activated since `pin: false`. In the first phase, the +pod instantion phase, the system gathers node inventory and may reduce the `node_range.high` value +to match the number of worker node available in the cluster. + +According to `node_range: [1,128]`, and `density_range:[1,10]`, the system will instantiate 10 pairs on +each of 128 nodes. Each pair has a node_idx and a pod_idx that are used later to control +which one and when they should run the UPERF workload, After all pairs are up and ready, +next comes the test execution phase. + +The scale mode iterates the test as a double nested loop as follows: +``` + for node with node_idx less-or-equal node_range(low, high. step_size): + for pod with pod_idx less-or-equal density_range(low, high, step_size): + run uperf +``` +Hence, with the above params, the first iteration runs the pair with node_idx/pod_idx of {1,1}. After the first +run has completed, the second interation runs 2 pairs of {1,1} and {1,2} and so on. + +The valid `step_size` methods are: addN and log2. `N` can be any integer and `log2` will double the value at each iteration i.e. 1,2,4,8,16 ... +By choosing the appropriate values for `density_range` and `node_range`, the user can generate most if not all +combinations of UPERF data points to exercise datapath performance from many angles. Once done creating/editing the resource file, you can run it by: From 6b27d47dcdb15fc3349d104b8eb08598c79434c6 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Tue, 9 Feb 2021 12:47:23 -0500 Subject: [PATCH 29/41] 1 Address review comments. a. Keep 'pair' for pin mode b. Remove custom image URLs 2. Rebase to upstream --- resources/crds/ripsaw_v1alpha1_uperf_cr.yaml | 12 ++++++------ resources/operator.yaml | 4 ++-- roles/uperf/tasks/setup.yml | 15 ++++++++++++++- roles/uperf/templates/workload.yml.j2 | 5 +++-- 4 files changed, 25 insertions(+), 11 deletions(-) diff --git a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml index 8e3182195..8d44d1a88 100644 --- a/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml +++ b/resources/crds/ripsaw_v1alpha1_uperf_cr.yaml @@ -20,7 +20,7 @@ spec: pin: false # # pin: true/false - default=false - # - true will run 'Pin' mode using 1 pin_server and 1 pin_client nodes. + # - true will run 'Pin' mode using 1 server (pin_server:) and 1 client (pin_clien:) nodes. # - false will run 'Scale' mode. See colocate, density_range, node_range and step_size. pin_server: "node-0" pin_client: "node-1" @@ -28,9 +28,9 @@ spec: kind: pod pair: 1 # - # 'pair' is obsolete. Instead, use 'density_range' which allows fixed or enumerated - # number of pairs. Enumeration is an enhanced capability of the 'Pin' mode. - # + # 'pair' sepcifies fixed number of client-server pairs for "Pin" mode, + # If 'pair' is NOT present, it will use 'density_range' which allows + # enumeration in addition to fixed number of pair. test_types: - stream protos: @@ -43,7 +43,7 @@ spec: # The following variables are for 'Scale' mode. # The 'Scale' mode is activated when 'pin=false' or undefined. - # The Scale mode op params are: colocate, denstisy_range, node_range and step_size. + # The Scale mode params are: colocate, denstisy_range, node_range and step_size. # # colocate: true/false - default=false # density_range: [n, m] - default=[1,1] @@ -57,7 +57,7 @@ spec: # add10: 1,11,21,31 ... # log2: 1,2,4,8,16,32 ,,, # - # The 'exlude_labels' is the list of ineligible worker nodes. + # 'exclude_labels' specifies the list of ineligible worker nodes. # exclude_labels: (OR conditional, every node that matches any of these labels is excluded) # - "bad=true" # - "fc640=true" diff --git a/resources/operator.yaml b/resources/operator.yaml index 5df666dd5..dd4e0291a 100644 --- a/resources/operator.yaml +++ b/resources/operator.yaml @@ -34,14 +34,14 @@ spec: - /usr/local/bin/ao-logs - /tmp/ansible-operator/runner - stdout - image: quay.io/hnhan/benchmark-operator:uperf-scale + image: quay.io/benchmark-operator/benchmark-operator:master imagePullPolicy: "Always" volumeMounts: - mountPath: /tmp/ansible-operator/runner name: runner readOnly: true - name: benchmark-operator - image: quay.io/hnhan/benchmark-operator:uperf-scale + image: quay.io/benchmark-operator/benchmark-operator:master imagePullPolicy: Always env: - name: WATCH_NAMESPACE diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index be74b0af1..7fad55290 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -123,14 +123,27 @@ with_items: - '{{workload_args.pin_server}}' - '{{workload_args.pin_client}}' + # + # In 'Pin' mode, 'pair' specifies number of pairs (classic behavior), If 'pair' + # is undefined use 'density_range' (new bahavior with "Scale" enhancement) + # + - name: Init "Pin" mode indices using 'pair' + set_fact: + pod_low_idx: "{{ workload_args.pair | default('1')|int - 1 }}" + pod_hi_idx: "{{ workload_args.pair | default('1')|int - 1 }}" + # node indices are used as client pod 'start' parameter. + node_low_idx: "0" + node_hi_idx: "0" + when: workload_args.pair is defined - - name: Init "Pin" mode node and pod indices + - name: Init "Pin" mode indices using 'density_range' set_fact: pod_low_idx: "{{ workload_args.density_range[0] | default('1')|int - 1 }}" pod_hi_idx: "{{ workload_args.density_range[1] | default('1')|int - 1 }}" # node indices are used as client pod 'start' parameter. node_low_idx: "0" node_hi_idx: "0" + when: workload_args.pair is not defined - name: Record num Pin server pods using new worker_node_list set_fact: diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 36889ffaf..7437cb24f 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -46,7 +46,7 @@ items: topologyKey: kubernetes.io/hostname containers: - name: benchmark - image: {{ workload_args.image | default('quay.io/lblyth/lblyth-uperf:latest') }} + image: {{ workload_args.image | default('quay.io/cloud-bulldozer/uperf:latest') }} env: - name: uuid value: "{{ uuid }}" @@ -116,7 +116,7 @@ items: export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; export my_pod_idx={{ (item['metadata']['annotations']['pod_idx'] | from_json) }}; export ips=$(hostname -I); - export num_pairs={{workload_args.pair}}; + export num_pairs=1 export node_count=0; export pod_count=0; node_limit=0; @@ -135,6 +135,7 @@ items: echo 'UPERF-run-context num_node=' $((node_limit+1)) 'density=' $((pod_limit+1)) 'my_node_idx=' $my_node_idx 'my_pod_idx=' $my_pod_idx; node_count=$((node_limit+1)); pod_count=$((pod_limit+1)); + num_pairs=$((pod_limit+1)); {% for test in workload_args.test_types %} {% for proto in workload_args.protos %} From e9c5e07e9c24c4345ccc25686d72f26eb01c17ef Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Wed, 10 Feb 2021 03:15:53 +0000 Subject: [PATCH 30/41] Truncate label length to pass CI --- roles/uperf/templates/server.yml.j2 | 2 +- roles/uperf/templates/service.yml.j2 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index f11ff45a1..77716dcde 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -15,7 +15,7 @@ items: template: metadata: labels: - app: uperf-bench-server-{{ worker_node_list[node_idx_item] }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ worker_node_list[node_idx_item] | truncate(16,true,'') }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: {% if workload_args.multus.enabled is sameas true %} diff --git a/roles/uperf/templates/service.yml.j2 b/roles/uperf/templates/service.yml.j2 index 59cde767e..85b77e7a9 100644 --- a/roles/uperf/templates/service.yml.j2 +++ b/roles/uperf/templates/service.yml.j2 @@ -13,7 +13,7 @@ items: namespace: '{{ operator_namespace }}' labels: #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - app: uperf-bench-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(16,true,'') }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: node_idx: '{{ node_idx_item }}' @@ -21,7 +21,7 @@ items: spec: selector: #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - app: uperf-bench-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(16,true,'')}}-{{ item }}-{{ trunc_uuid }} ports: - name: uperf port: 20000 From f2b5ba0b9ac668bdd5946725d2cd191c0eabc5b9 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Wed, 10 Feb 2021 23:48:06 +0000 Subject: [PATCH 31/41] More name and lalel truncationc to keep within k8s API 63-chars max --- roles/uperf/templates/server.yml.j2 | 4 ++-- roles/uperf/templates/service.yml.j2 | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 77716dcde..669ad572f 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -7,7 +7,7 @@ items: - kind: Job apiVersion: batch/v1 metadata: - name: 'uperf-server-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }}' + name: 'uperf-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }}' namespace: "{{ operator_namespace }}" spec: ttlSecondsAfterFinished: 600 @@ -15,7 +15,7 @@ items: template: metadata: labels: - app: uperf-bench-server-{{ worker_node_list[node_idx_item] | truncate(16,true,'') }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ worker_node_list[node_idx_item] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: {% if workload_args.multus.enabled is sameas true %} diff --git a/roles/uperf/templates/service.yml.j2 b/roles/uperf/templates/service.yml.j2 index 85b77e7a9..9baec672d 100644 --- a/roles/uperf/templates/service.yml.j2 +++ b/roles/uperf/templates/service.yml.j2 @@ -8,12 +8,12 @@ items: apiVersion: v1 metadata: #name: uperf-service-{{ item }}-{{ trunc_uuid }} - name: uperf-service-{{worker_node_list[ node_idx_item ] }}-{{ item }}-{{ trunc_uuid }} + name: uperf-service-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} namespace: '{{ operator_namespace }}' labels: #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(16,true,'') }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: node_idx: '{{ node_idx_item }}' @@ -21,7 +21,7 @@ items: spec: selector: #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} - app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(16,true,'')}}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} ports: - name: uperf port: 20000 From 23eda0b74391226eb947c6b068ed2c115bc38f50 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Fri, 12 Feb 2021 20:27:38 +0000 Subject: [PATCH 32/41] Address PR review #3: reduce clusterrole's scope, and several cosmetics --- deploy/25_role.yaml | 9 +++++---- roles/uperf/tasks/start_client.yml | 4 ++-- roles/uperf/templates/service.yml.j2 | 3 --- roles/uperf/templates/workload.yml.j2 | 14 +++++++------- 4 files changed, 14 insertions(+), 16 deletions(-) diff --git a/deploy/25_role.yaml b/deploy/25_role.yaml index 62b2971ab..eb9e77c48 100644 --- a/deploy/25_role.yaml +++ b/deploy/25_role.yaml @@ -1,13 +1,14 @@ apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: - creationTimestamp: null name: benchmark-operator rules: - apiGroups: - - "*" + - "" resources: - - "*" + - nodes verbs: - - '*' + - get + - list + - patch diff --git a/roles/uperf/tasks/start_client.yml b/roles/uperf/tasks/start_client.yml index 222621277..9d6ec9ab9 100644 --- a/roles/uperf/tasks/start_client.yml +++ b/roles/uperf/tasks/start_client.yml @@ -20,7 +20,7 @@ definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" vars: resource_item: "{{ server_pods.resources }}" - when: workload_args.serviceip is defined and not workload_args.serviceip|default('false') and server_pods.resources|length > 0 + when: workload_args.serviceip|default(False) == False and server_pods.resources|length > 0 # # Each server annotates a "node_idx". Each peer client will @@ -32,7 +32,7 @@ definition: "{{ lookup('template', 'workload.yml.j2') | from_yaml }}" vars: resource_item: "{{ serviceip.resources }}" - when: workload_args.serviceip is defined and workload_args.serviceip and serviceip.resources|length > 0 + when: workload_args.serviceip|default(False) == True and serviceip.resources|length > 0 when: resource_kind == "pod" diff --git a/roles/uperf/templates/service.yml.j2 b/roles/uperf/templates/service.yml.j2 index 9baec672d..0a7edb7b7 100644 --- a/roles/uperf/templates/service.yml.j2 +++ b/roles/uperf/templates/service.yml.j2 @@ -7,12 +7,10 @@ items: - kind: Service apiVersion: v1 metadata: - #name: uperf-service-{{ item }}-{{ trunc_uuid }} name: uperf-service-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} namespace: '{{ operator_namespace }}' labels: - #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: @@ -20,7 +18,6 @@ items: pod_idx: '{{ item }}' spec: selector: - #app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} ports: - name: uperf diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 7437cb24f..8791b6360 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -88,11 +88,11 @@ items: args: {% if workload_args.serviceip is sameas true %} - "export serviceip=true; - export h={{item.spec.clusterIP}}; + export h={{item.spec.clusterIP}}; {% else %} {% if workload_args.multus.client is defined %} - "export multus_client={{workload_args.multus.client}}; - export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; + export h={{ (item['metadata']['annotations']['k8s.v1.cni.cncf.io/networks-status'] | from_json)[1]['ips'][0] }}; {% else %} - "export h={{item.status.podIP}}; {% endif %} @@ -101,16 +101,16 @@ items: export colocate={{ workload_args.colocate}}; {% endif %} {% if workload_args.step_size is defined %} - export stepsize={{ workload_args.step_size }}; + export stepsize={{ workload_args.step_size }}; {% endif %} {% if workload_args.node_range is defined %} - export node_range='{{ workload_args.node_range[0] }}_{{ workload_args.node_range[1] }}'; + export node_range='{{ workload_args.node_range[0] }}_{{ workload_args.node_range[1] }}'; {% endif %} {% if workload_args.density_range is defined %} - export density_range='{{ workload_args.density_range[0] }}_{{ workload_args.density_range[1] }}'; + export density_range='{{ workload_args.density_range[0] }}_{{ workload_args.density_range[1] }}'; {% endif %} {% if workload_args.networkpolicy is defined %} - export networkpolicy={{workload_args.networkpolicy}}; + export networkpolicy={{workload_args.networkpolicy}}; {% endif %} export hostnet={{workload_args.hostnetwork}}; export my_node_idx={{ (item['metadata']['annotations']['node_idx'] | from_json) }}; @@ -196,7 +196,7 @@ items: kubernetes.io/hostname: "{{ worker_node_list[item['metadata']['annotations']['node_idx'] | from_json] }}" {% else %} nodeSelector: - # skew client node one position left in the woker_node_list + # skew client node one position to the right in the worker_node_list kubernetes.io/hostname: "{{ worker_node_list[ (1+(item['metadata']['annotations']['node_idx'] | from_json)) % (worker_node_list|length)] }}" {% endif %} From 6a07358142235960bb381de1b33b2b7877809376 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Tue, 16 Feb 2021 15:55:23 +0000 Subject: [PATCH 33/41] Tried fix a CI uncovered issue by using worker node's label instead of Name. --- roles/uperf/tasks/setup.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/roles/uperf/tasks/setup.yml b/roles/uperf/tasks/setup.yml index 7fad55290..86ca12e9e 100644 --- a/roles/uperf/tasks/setup.yml +++ b/roles/uperf/tasks/setup.yml @@ -51,8 +51,8 @@ - name: Isolate Worker Role Hostnames set_fact: - worker_node_list: "{{ node_list | json_query('resources[].metadata.name') | list }}" - + worker_node_list: "{{ node_list | json_query('resources[].metadata.labels.\"kubernetes.io/hostname\"') | list }}" + - name: List Nodes Labeled with {{ workload_args.exclude_label }} k8s_info: api_version: v1 From 842c50bd31108a13e542764d179586563b9f6cd3 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 25 Feb 2021 15:30:45 +0000 Subject: [PATCH 34/41] Reverse a server pod label, "app: xxx" to before scale enhancement work for CI. --- roles/uperf/templates/server.yml.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 669ad572f..2bf8d813c 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -15,7 +15,7 @@ items: template: metadata: labels: - app: uperf-bench-server-{{ worker_node_list[node_idx_item] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: {% if workload_args.multus.enabled is sameas true %} From a5392fa498f7b744861b618eedbba4611f80050e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ra=C3=BAl=20Sevilla?= Date: Sat, 27 Feb 2021 19:33:17 +0100 Subject: [PATCH 35/41] Update service selector to match the uperf pods server label --- roles/uperf/templates/server.yml.j2 | 2 +- roles/uperf/templates/service.yml.j2 | 12 +++++------- tests/test_uperf.sh | 4 ++-- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/roles/uperf/templates/server.yml.j2 b/roles/uperf/templates/server.yml.j2 index 2bf8d813c..669ad572f 100644 --- a/roles/uperf/templates/server.yml.j2 +++ b/roles/uperf/templates/server.yml.j2 @@ -15,7 +15,7 @@ items: template: metadata: labels: - app: uperf-bench-server-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ worker_node_list[node_idx_item] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: {% if workload_args.multus.enabled is sameas true %} diff --git a/roles/uperf/templates/service.yml.j2 b/roles/uperf/templates/service.yml.j2 index 0a7edb7b7..5b787858a 100644 --- a/roles/uperf/templates/service.yml.j2 +++ b/roles/uperf/templates/service.yml.j2 @@ -7,18 +7,17 @@ items: - kind: Service apiVersion: v1 metadata: - name: uperf-service-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} - + name: uperf-service-{{ worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} namespace: '{{ operator_namespace }}' labels: - app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'') }}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{ worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} type: uperf-bench-server-{{ trunc_uuid }} annotations: - node_idx: '{{ node_idx_item }}' - pod_idx: '{{ item }}' + node_idx: '{{ node_idx_item }}' + pod_idx: '{{ item }}' spec: selector: - app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} + app: uperf-bench-server-{{worker_node_list[ node_idx_item ] | truncate(32,true,'')}}-{{ item }}-{{ trunc_uuid }} ports: - name: uperf port: 20000 @@ -40,4 +39,3 @@ items: {{ job_template(item,node_idx_item) }} {% endfor %} {% endfor %} - diff --git a/tests/test_uperf.sh b/tests/test_uperf.sh index 65abcc0f5..4f1b81272 100755 --- a/tests/test_uperf.sh +++ b/tests/test_uperf.sh @@ -27,8 +27,8 @@ function functional_test_uperf { uuid=${long_uuid:0:8} pod_count "type=uperf-bench-server-$uuid" 1 900 - uperf_server_pod=$(get_pod "app=uperf-bench-server-0-$uuid" 300) - wait_for "kubectl -n my-ripsaw wait --for=condition=Initialized -l app=uperf-bench-server-0-$uuid pods --timeout=300s" "300s" $uperf_server_pod + uperf_server_pod=$(get_pod "type=uperf-bench-server-${uuid}" 300) + wait_for "kubectl -n my-ripsaw wait --for=condition=Initialized -l type=uperf-bench-server-${uuid} pods --timeout=300s" "300s" $uperf_server_pod uperf_client_pod=$(get_pod "app=uperf-bench-client-$uuid" 900) wait_for "kubectl wait -n my-ripsaw --for=condition=Initialized pods/$uperf_client_pod --timeout=500s" "500s" $uperf_client_pod wait_for "kubectl wait -n my-ripsaw --for=condition=complete -l app=uperf-bench-client-$uuid jobs --timeout=500s" "500s" $uperf_client_pod From 37eafc520ac5d71db73c977ad9b2d46e94335d5a Mon Sep 17 00:00:00 2001 From: hnhan Date: Fri, 5 Mar 2021 09:48:24 -0500 Subject: [PATCH 36/41] Update uperf.md --- docs/uperf.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/docs/uperf.md b/docs/uperf.md index 4bef880fc..877104944 100644 --- a/docs/uperf.md +++ b/docs/uperf.md @@ -41,6 +41,7 @@ spec: kind: pod pin_server: "node-0" pin_client: "node-1" + pair: 1 multus: enabled: false samples: 1 @@ -77,6 +78,11 @@ spec: `pin_client` what node to pin the client pod to. +`pair` how many instances of uperf client-server pairs. `pair` is applicable for `pin: true` only. +If `pair` is not specified, the operator will use the value in `density_range` to detemine the number of pairs. +See **Scale** section for more info. `density_range` can do more than `pair` can, but `pair` support is retained +for backward compatibility. + `multus[1]` Configure our pods to use multus. `samples` how many times to run the tests. For example @@ -85,7 +91,7 @@ spec: ```yaml samples: 3 - density: [1,1] + density_range: [1,1] test_types: - stream protos: @@ -111,7 +117,7 @@ size. For example: ```yaml samples: 3 - density: [1,1] + density_range: [1,1] test_types: - rr protos: From 8a19e8252754924dd045ec5d4edc47d3998877da Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Tue, 9 Mar 2021 00:36:15 +0000 Subject: [PATCH 37/41] Support multi Pod uperf sessions running in paralell. --- roles/uperf/tasks/cleanup.yml | 7 +++++++ roles/uperf/tasks/init.yml | 4 ++-- roles/uperf/tasks/next_set.yml | 6 +++--- roles/uperf/tasks/run_a_set.yml | 2 +- roles/uperf/tasks/wait_set_done.yml | 2 +- roles/uperf/templates/workload.yml.j2 | 6 +++--- 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml index 0da5b68ec..a0dff46ae 100644 --- a/roles/uperf/tasks/cleanup.yml +++ b/roles/uperf/tasks/cleanup.yml @@ -29,8 +29,15 @@ name: "{{ item }}" with_items: "{{ clean_pods }}" when: cleanup + + - name: Cleanup redis + command: "redis-cli del num_completion-{{trunc_uuid}}" + command: "redis-cli del start-{{trunc_uuid}}" + when: resource_kind == "pod" + + # # no kind block - We leave VM running # diff --git a/roles/uperf/tasks/init.yml b/roles/uperf/tasks/init.yml index fbae05ce8..7c9b7220b 100644 --- a/roles/uperf/tasks/init.yml +++ b/roles/uperf/tasks/init.yml @@ -1,10 +1,10 @@ --- - name: Clear start flag - command: "redis-cli set start 0" + command: "redis-cli set start-{{trunc_uuid}} 0" - name: Clear num_completion - command: "redis-cli set num_completion 0" + command: "redis-cli set num_completion-{{trunc_uuid}} 0" - name: Init node and pod indices in benchmark context operator_sdk.util.k8s_status: diff --git a/roles/uperf/tasks/next_set.yml b/roles/uperf/tasks/next_set.yml index df5acbc9c..3f2a5f4cf 100644 --- a/roles/uperf/tasks/next_set.yml +++ b/roles/uperf/tasks/next_set.yml @@ -61,7 +61,7 @@ # All done # - name: Unpause pods to complete - command: "redis-cli set start done" + command: "redis-cli set start-{{trunc_uuid}} done" - name: Change state to proceed to exit operator_sdk.util.k8s_status: @@ -79,10 +79,10 @@ # More round(s) to run. # - name: Send redis restart signal - command: "redis-cli set start restart" + command: "redis-cli set start-{{trunc_uuid}} restart" - name: Reset redis num_completion - command: "redis-cli set num_completion 0" + command: "redis-cli set num_completion-{{trunc_uuid}} 0" - name: Change state to run next round operator_sdk.util.k8s_status: diff --git a/roles/uperf/tasks/run_a_set.yml b/roles/uperf/tasks/run_a_set.yml index 1ec4d9260..46c08eb9d 100644 --- a/roles/uperf/tasks/run_a_set.yml +++ b/roles/uperf/tasks/run_a_set.yml @@ -9,7 +9,7 @@ # - name: Signal group to run - command: "redis-cli set start true-{{resource_state.resources[0].status.node_idx|int}}-{{resource_state.resources[0].status.pod_idx|int}}" + command: "redis-cli set start-{{trunc_uuid}} true-{{resource_state.resources[0].status.node_idx|int}}-{{resource_state.resources[0].status.pod_idx|int}}" - name: Update state to "Set Running" operator_sdk.util.k8s_status: diff --git a/roles/uperf/tasks/wait_set_done.yml b/roles/uperf/tasks/wait_set_done.yml index 5c620b017..3f596d681 100644 --- a/roles/uperf/tasks/wait_set_done.yml +++ b/roles/uperf/tasks/wait_set_done.yml @@ -4,7 +4,7 @@ - block: ### kind - name: read pod completion count - command: "redis-cli get num_completion" + command: "redis-cli get num_completion-{{trunc_uuid}}" register: num_completion - operator_sdk.util.k8s_status: diff --git a/roles/uperf/templates/workload.yml.j2 b/roles/uperf/templates/workload.yml.j2 index 8791b6360..814bdac54 100644 --- a/roles/uperf/templates/workload.yml.j2 +++ b/roles/uperf/templates/workload.yml.j2 @@ -123,7 +123,7 @@ items: pod_limit=0; STR=''; while true; do - STR=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + STR=$(redis-cli -h {{bo.resources[0].status.podIP}} get start-{{trunc_uuid}}); state=$(echo $STR | cut -f1 -d-); if [[ $state =~ 'true' ]]; then node_limit=$(echo $STR | cut -f2 -d-); @@ -158,9 +158,9 @@ items: {% endfor %} {% endfor %} {% endfor %} - redis-cli -h {{bo.resources[0].status.podIP}} incr num_completion; + redis-cli -h {{bo.resources[0].status.podIP}} incr num_completion-{{trunc_uuid}}; while true; do - state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start); + state=$(redis-cli -h {{bo.resources[0].status.podIP}} get start-{{trunc_uuid}}); if [[ $state =~ 'restart' ]]; then break; elif [[ $state =~ 'done' ]]; then From b98a17173b8ee0ceb49c1575fe10622a68c711ae Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 11 Mar 2021 14:08:07 +0000 Subject: [PATCH 38/41] Fix clean up task syntax error --- roles/uperf/tasks/cleanup.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml index a0dff46ae..43c35ea25 100644 --- a/roles/uperf/tasks/cleanup.yml +++ b/roles/uperf/tasks/cleanup.yml @@ -31,8 +31,10 @@ when: cleanup - name: Cleanup redis - command: "redis-cli del num_completion-{{trunc_uuid}}" - command: "redis-cli del start-{{trunc_uuid}}" + command: "{{ item }}" + with_items: + - redis-cli del num_completion-{{trunc_uuid}} + - redis-cli del start-{{trunc_uuid}} when: resource_kind == "pod" From 03a3a93e7786412064dac3c6342d7cdf85b0ce92 Mon Sep 17 00:00:00 2001 From: Hugh Nhan Date: Thu, 11 Mar 2021 23:34:16 +0000 Subject: [PATCH 39/41] Fix cleanup task to delete server Pods by default. --- roles/uperf/tasks/cleanup.yml | 37 ++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/roles/uperf/tasks/cleanup.yml b/roles/uperf/tasks/cleanup.yml index 43c35ea25..a1e7b8348 100644 --- a/roles/uperf/tasks/cleanup.yml +++ b/roles/uperf/tasks/cleanup.yml @@ -2,6 +2,16 @@ - block: ### kind + # Cleanup servers, but leave clients around mostly for further examining of results. + - name: Get Server Jobs + k8s_facts: + kind: Job + api_version: v1 + namespace: '{{ operator_namespace }}' + label_selectors: + - type = uperf-bench-server-{{ trunc_uuid }} + register: server_jobs + - name: Get Server Pods k8s_facts: kind: Pod @@ -11,8 +21,14 @@ - type = uperf-bench-server-{{ trunc_uuid }} register: server_pods - - name: Pod names - to clean + - name: Server Job and Pod names - to clean set_fact: + clean_jobs: | + [ + {% for item in server_jobs.resources %} + "{{ item['metadata']['name'] }}", + {% endfor %} + ] clean_pods: | [ {% for item in server_pods.resources %} @@ -20,23 +36,34 @@ {% endfor %} ] - - name: Cleanup run + - name: Cleanup server Job k8s: - kind: pod + kind: Job + api_version: v1 + namespace: '{{ operator_namespace }}' + state: absent + name: "{{ item }}" + with_items: "{{ clean_jobs }}" + + - name: Cleanup server Pod + k8s: + kind: Pod api_version: v1 namespace: '{{ operator_namespace }}' state: absent name: "{{ item }}" with_items: "{{ clean_pods }}" - when: cleanup + when: resource_kind == "pod" and cleanup == True + +- block: - name: Cleanup redis command: "{{ item }}" with_items: - redis-cli del num_completion-{{trunc_uuid}} - redis-cli del start-{{trunc_uuid}} + when: resource_kind == "pod" - when: resource_kind == "pod" From 8c0393ee008379dc0877a1be1968c64add2506b7 Mon Sep 17 00:00:00 2001 From: hnhan Date: Wed, 17 Mar 2021 16:13:06 -0400 Subject: [PATCH 40/41] Fixed a uperf.md typos --- docs/uperf.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/uperf.md b/docs/uperf.md index 877104944..66955208b 100644 --- a/docs/uperf.md +++ b/docs/uperf.md @@ -221,7 +221,7 @@ Here is one scale example: step_size: log2 ... ``` -Note, the `scale` mode is mutual exlusive to `pin` mode with the `pin` mode has higher precedence. +Note, the `scale` mode is mutually exlusive to `pin` mode with the `pin` mode having higher precedence. In other words, if `pin:true` the test will deploy pods on `pin_server` and `pin_client` nodes and ignore `colocate`, `node_range`, and the number of pairs to deploy is specified by the `density_range.high` value. From a580d4f39448ed1e4cbfad3dcc31c778a8f2c4fd Mon Sep 17 00:00:00 2001 From: Murali Krishnasamy Date: Fri, 19 Mar 2021 14:42:09 -0400 Subject: [PATCH 41/41] custom net_policy --- resources/namespace.yaml | 2 + roles/common/templates/networkpolicy.yml.j2 | 44 ++++++++++++++++++++- 2 files changed, 44 insertions(+), 2 deletions(-) diff --git a/resources/namespace.yaml b/resources/namespace.yaml index 0908738fc..1505d5723 100644 --- a/resources/namespace.yaml +++ b/resources/namespace.yaml @@ -2,3 +2,5 @@ apiVersion: v1 kind: Namespace metadata: name: my-ripsaw + labels: + project: my-ripsaw diff --git a/roles/common/templates/networkpolicy.yml.j2 b/roles/common/templates/networkpolicy.yml.j2 index 6267d2196..361468fd6 100644 --- a/roles/common/templates/networkpolicy.yml.j2 +++ b/roles/common/templates/networkpolicy.yml.j2 @@ -4,11 +4,51 @@ metadata: name: "{{ meta.name }}-networkpolicy-{{ trunc_uuid }}" namespace: '{{ operator_namespace }}' spec: - podSelector: + podSelector: matchLabels: type: "{{ meta.name }}-bench-server-{{ trunc_uuid }}" ingress: - from: - - podSelector: + - podSelector: matchLabels: type: "{{ meta.name }}-bench-client-{{ trunc_uuid }}" + - namespaceSelector: + matchLabels: + project: "{{ operator_namespace }}" +{% if workload.args.ip_block.enable | default(false) %} + - ipBlock: + cidr: "{{ workload.args.ip_block.allow_subnet }}" + except: +{% for subnet in workload.args.ip_block.except_subnet %} + - "{{ subnet }}" +{% endfor %} +{% if workload.args.port_block.enable | default(false) %} + ports: + - protocol: TCP + port: 6379 +{% for prange in workload.args.port_block.range %} +{% for num in range(prange[0]|int,prange[1]|int) %} + - protocol: TCP + port: {{ num }} + - protocol: UDP + port: {{ num }} +{% endfor %} +{% endfor %} +{% endif %} + egress: + - to: + - ipBlock: + cidr: "{{ workload.args.ip_block.allow_subnet }}" +{% if workload.args.port_block.enable | default(false) %} + ports: +{% for prange in workload.args.port_block.range %} +{% for num in range(prange[0]|int,prange[1]|int) %} + - protocol: TCP + port: {{ num }} + - protocol: UDP + port: {{ num }} +{% endfor %} +{% endfor %} +{% endif %} +{% endif %} +