From 2aa0c0f86d366a3f6a65779f8f2754f07b30eaf5 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Wed, 28 Jan 2026 23:05:14 -0800 Subject: [PATCH 1/5] Add switches for vNUMA and SR-IOV devices Update the documentation. Signed-off-by: Andrew Collins --- ansible/roles/hv-install/defaults/main.yml | 17 +++++ ansible/roles/hv-install/tasks/main.yml | 67 +++++++++++++++++++ .../templates/hugetlb-reserve-pages.sh.j2 | 15 +++++ ansible/roles/hv-vm-create/defaults/main.yml | 19 +++++- ansible/roles/hv-vm-create/tasks/main.yml | 28 ++++++++ .../hv-vm-create/templates/kvm-def.xml.j2 | 24 +++++++ ansible/vars/hv.sample.yml | 35 ++++++++++ docs/deploy-mno-hybrid.md | 21 ++++++ docs/deploy-vmno.md | 34 ++++++++++ 9 files changed, 258 insertions(+), 2 deletions(-) create mode 100644 ansible/roles/hv-install/defaults/main.yml create mode 100644 ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 create mode 100644 docs/deploy-mno-hybrid.md diff --git a/ansible/roles/hv-install/defaults/main.yml b/ansible/roles/hv-install/defaults/main.yml new file mode 100644 index 00000000..bfd2265b --- /dev/null +++ b/ansible/roles/hv-install/defaults/main.yml @@ -0,0 +1,17 @@ +--- +# hv-install default vars + +# Hugepages configuration for hypervisors +enable_hugepages: false + +# Hugepage size: 2M or 1G +hugepage_size: "1G" + +# Number of hugepages to allocate (e.g., 32 for 32GB of 1G hugepages) +hugepage_count: 32 + +# Additional kernel parameters for performance tuning +additional_kernel_params: [] + +# Number of hugepages per node (e.g. total / 2) +hugepages_count_per_node: 190 diff --git a/ansible/roles/hv-install/tasks/main.yml b/ansible/roles/hv-install/tasks/main.yml index 4451e0c4..a73d5ee9 100644 --- a/ansible/roles/hv-install/tasks/main.yml +++ b/ansible/roles/hv-install/tasks/main.yml @@ -21,6 +21,55 @@ name: sushy-tools version: 1.2.0 +- name: Configure hugepages support + when: enable_hugepages + block: + + - name: Run grubby to add hugepages arguments + command: grubby --update-kernel=ALL --args="default_hugepagesz={{ hugepage_size }} hugepagesz={{ hugepage_size }}" + register: grub_updated + + - name: Set reboot required flag + set_fact: + hugepages_reboot_required: true + when: grub_updated.changed + + - name: Create hugetlb-gigantic-pages.service file + copy: + dest: /usr/lib/systemd/system/hugetlb-gigantic-pages.service + content: | + [Unit] + Description=HugeTLB Gigantic Pages Reservation + DefaultDependencies=no + Before=dev-hugepages.mount + ConditionPathExists=/sys/devices/system/node + ConditionKernelCommandLine=hugepagesz=1G + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/usr/lib/systemd/hugetlb-reserve-pages.sh + + [Install] + WantedBy=sysinit.target + + - name: Create hugetlb-reserve-pages.sh + template: + src: hugetlb-reserve-pages.sh.j2 + dest: /usr/lib/systemd/hugetlb-reserve-pages.sh + mode: "0755" + register: hugetlb_script + + - name: Set reboot required flag + set_fact: + hugepages_reboot_required: true + when: hugetlb_script.changed + + - name: Enable hugetlb-gigantic-pages.service + systemd: + enabled: true + name: hugetlb-gigantic-pages.service + - name: Get coredns get_url: validate_certs: false @@ -65,3 +114,21 @@ state: started enabled: true name: ksmtuned + +- name: Reboot hypervisor for hugepages configuration + when: + - enable_hugepages + - hugepages_reboot_required | default(false) + block: + - name: Reboot hypervisor + reboot: + msg: "Rebooting to apply hugepages configuration" + reboot_timeout: 600 + + - name: Verify hugepages are configured + shell: cat /proc/meminfo | grep -E "HugePages_Total|HugePages_Free|Hugepagesize" + register: hugepages_status + + - name: Display hugepages status + debug: + msg: "{{ hugepages_status.stdout_lines }}" diff --git a/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 new file mode 100644 index 00000000..5bed529d --- /dev/null +++ b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 @@ -0,0 +1,15 @@ +#!/bin/sh + +nodes_path=/sys/devices/system/node/ +if [ ! -d $nodes_path ]; then + echo "ERROR: $nodes_path does not exist" + exit 1 +fi + +reserve_pages() +{ + echo $1 > $nodes_path/$2/hugepages/hugepages-1048576kB/nr_hugepages +} + +reserve_pages {{ hugepages_count_per_node }} node0 +reserve_pages {{ hugepages_count_per_node }} node1 diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml index 7585df82..527ba186 100644 --- a/ansible/roles/hv-vm-create/defaults/main.yml +++ b/ansible/roles/hv-vm-create/defaults/main.yml @@ -10,7 +10,7 @@ vnuma_enabled: false vnuma_memory_placement: "static" vnuma_cpu_placement: "static" -# Manual vNUMA configuration +# Manual vNUMA configuration # vnuma_nodes: # - id: 0 # cpus: "0-3" @@ -21,4 +21,19 @@ vnuma_cpu_placement: "static" # vNUMA topology settings vnuma_memory_mode: "strict" # strict, preferred, interleave -vnuma_cpu_mode: "strict" # strict, preferred +vnuma_cpu_mode: "strict" # strict, preferred + +# Hugepages configuration for VMs +vm_hugepages: false + +# Hugepage size for VMs: 2M or 1G +vm_hugepage_size: "1G" + +# Number of hugepages to allocate per VM (auto-calculated based on VM memory if not specified) +vm_hugepage_count: + +# Hugepage mount path in VMs +vm_hugepage_mount: "/mnt/hugepages" + +# Enable IGB NICs for VMs +vm_igb_nics: false \ No newline at end of file diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml index 1b9beffa..aae57c51 100644 --- a/ansible/roles/hv-vm-create/tasks/main.yml +++ b/ansible/roles/hv-vm-create/tasks/main.yml @@ -21,6 +21,34 @@ set_fact: hv_vm_cpu_count: "{{ hostvars[inventory_hostname]['cpus'] }}" +- name: Configure VM hugepages + when: vm_hugepages + block: + - name: Calculate hugepages needed for VM if not specified + set_fact: + calculated_vm_hugepage_count: "{{ (hostvars[inventory_hostname]['memory'] | int) // (vm_hugepage_size[:-1] | int) }}" + when: vm_hugepage_count is not defined or vm_hugepage_count == "" + + - name: Set hugepage count for VM + set_fact: + vm_hugepages_needed: "{{ vm_hugepage_count if vm_hugepage_count is defined and vm_hugepage_count != '' else calculated_vm_hugepage_count }}" + + # TODO This grep expression failed on the second installation. Need either a way to force huge pages, or advanced logic to check which is configured. + - name: Check host hugepages availability + shell: | + grep -E "HugePages_Free.*{{ vm_hugepage_size }}" /proc/meminfo | awk '{print $2}' || echo "0" + register: host_hugepages_free + delegate_to: "{{ hostvars[inventory_hostname]['ansible_host'] }}" + + - name: Validate sufficient hugepages available + fail: + msg: "Not enough {{ vm_hugepage_size }} hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}" + when: (host_hugepages_free.stdout | int) < (vm_hugepages_needed | int) + + - name: Display hugepages configuration for VM + debug: + msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} {{ vm_hugepage_size }} hugepages ({{ (vm_hugepages_needed | int) * (vm_hugepage_size[:-1] | int) }}G total)" + - name: Set vNUMA configuration tasks when: vnuma_enabled block: diff --git a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 index df33d85a..197e2fca 100644 --- a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 +++ b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 @@ -3,6 +3,13 @@ {{ hostvars[inventory_hostname]['domain_uuid'] }} {{ hostvars[inventory_hostname]['memory'] }} {{ hostvars[inventory_hostname]['memory'] }} +{% if vm_hugepages %} + + + + + +{% endif %} {{ hv_vm_cpu_count | int }} hvm @@ -11,6 +18,9 @@ +{% if vm_igb_nics | default(false) %} + +{% endif %} {% if vnuma_enabled %} @@ -125,6 +135,20 @@ {% endif %}
+{% if vm_igb_nics | default(false) %} +{% for i in range(1, 6) %} + +{% set mac_prefix = "%s:%02x" | format('52:54:00',i) %} + + + +
+ +{% endfor %} + + + +{% endif %} diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml index b50b2551..288e4959 100644 --- a/ansible/vars/hv.sample.yml +++ b/ansible/vars/hv.sample.yml @@ -48,3 +48,38 @@ hv_vm_manifest_acm_cr: true use_bastion_registry: false # Provide pull-secret for connected manifests pull_secret: "{{ lookup('file', '../pull-secret.txt') | b64encode }}" + +################################################################################ +# Hugepages Configuration +################################################################################ + +# Enable hugepages on hypervisors +enable_hugepages: false + +# Hugepage size for hypervisors: 2M or 1G +hugepage_size: "1G" + +# Number of hugepages to allocate on hypervisors (e.g., 64 for 64GB of 1G hugepages) +# Calculate based on total memory and VM requirements +hugepage_count: 64 + +# Additional kernel parameters for performance tuning +additional_kernel_params: + - "intel_iommu=on" + - "iommu=pt" + - "isolcpus=2-15,18-31" + +# Enable hugepages for VMs +vm_hugepages: false + +# Hugepage size for VMs (should match hypervisor hugepage_size) +vm_hugepage_size: "1G" + +# Number of hugepages per VM (auto-calculated based on VM memory if not specified) +# vm_hugepage_count: 18 + +# Enable vNUMA for performance (recommended with hugepages) +vnuma_enabled: false + +# Enable IGB NICs for VMs +vm_igb_nics: false \ No newline at end of file diff --git a/docs/deploy-mno-hybrid.md b/docs/deploy-mno-hybrid.md new file mode 100644 index 00000000..0a423f78 --- /dev/null +++ b/docs/deploy-mno-hybrid.md @@ -0,0 +1,21 @@ +# Deploy Multi-Node OpenShift Hybrid Deployment in ScaleLab + +1. Configure `all.yml` like a standard scalelab allocation. Add the following variables: +``` +cluster_type: mno # This is important to distinguish from the 'vmno' type deployment, which does not permit any nodes to be bare metal. +hv_inventory: true +hv_ssh_pass: 200metersq+ +hybrid_worker_count: 500 +``` +This is to verify the inventory looks good. However, we want to run the install in two steps. The reasoning behind this is to separate any issues that could happen from the initial install from the scale up to 500 workers. Each task has its own set of moving pieces and it is nicer to debug an install on a smaller cluster than also trying to troubleshoot massive node creations at the same time. + +Step 1: Install small cluster 3x CP + 3x worker +1. set: `hybrid_worker_count: 0` +2. Run create-inventory +3. Run deploy-mno +4. Run `setup-hypervisor` + +Step 2: Scale up to 500 workers. +1. set `hybrid_worker_count: 500` +2. run create-inventory +3. \ No newline at end of file diff --git a/docs/deploy-vmno.md b/docs/deploy-vmno.md index aab34f85..03da3368 100644 --- a/docs/deploy-vmno.md +++ b/docs/deploy-vmno.md @@ -124,9 +124,21 @@ hw_vm_counts: nvme0n1: 7 ``` +When mixing different machines, the hv_vm_counts may be adjusted for those machine models to create the same number of VMs per hypervisor. For example, when mixing Dell r640 and r650 in ScaleLab, the following counts were used: + +```yaml +hw_vm_counts: + scalelab: + r650: + default: 4 + nvme0n1: 16 +``` + > [!NOTE] > Depending upon your hardware, you may have to parition and format a 2nd disk to help store VM disk files. +In some VM scenarios, hugepages may be required. To configure VMs with hugepages, enable with the variable `enable_hugepages`, and configure specifics with other similar variables found in: `ansible/roles/hv-install/defaults/main.yml`. + ## Configure Ansible vars in `hv.yml` ```console @@ -485,3 +497,25 @@ vm00008 Ready worker 1d v1.31.7 (.ansible) [root@ jetlag]# cat /root/vmno/kubeadmin-password xxxxx-xxxxx-xxxxx-xxxxx ``` + +## Additional helper playbooks for VM management + +If VMs become unresponsive, sometimes destroying and restarting them is the only remedy. Since the garbage cleanup of pods of all VMs on a single hypervisor at a time can cause stalling, it also may be beneficial to start one VM per HV at a time. Playbooks have been added for all of these tasks. + +See the following playbooks to help in these cases: +``` +ansible/hv-vm-stop-all.yml +ansible/hv-vm-start-one.yml +``` + +## Disabling NetworkManager devices and connections for SR-IOV devices on VMs + +One option of creating SR-IOV capable interfaces in a VM is to create them using the Intel IGB driver. +This may be achieved by setting the variable `vm_igb_nics: true` in your variables. + +**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections will never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. + +See the following playbook: +``` +ansible/vm-sriov-disable.yml +``` \ No newline at end of file From 350312cb7d9673f87ca11ea9c4b217b7b589f702 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Mon, 9 Feb 2026 16:08:51 -0800 Subject: [PATCH 2/5] Removed vm_hugepages_size. Coded to 1Gi pages. --- ansible/roles/hv-vm-create/defaults/main.yml | 3 --- ansible/roles/hv-vm-create/tasks/main.yml | 10 +++++----- ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 | 2 +- ansible/vars/hv.sample.yml | 3 --- 4 files changed, 6 insertions(+), 12 deletions(-) diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml index 527ba186..38be300e 100644 --- a/ansible/roles/hv-vm-create/defaults/main.yml +++ b/ansible/roles/hv-vm-create/defaults/main.yml @@ -26,9 +26,6 @@ vnuma_cpu_mode: "strict" # strict, preferred # Hugepages configuration for VMs vm_hugepages: false -# Hugepage size for VMs: 2M or 1G -vm_hugepage_size: "1G" - # Number of hugepages to allocate per VM (auto-calculated based on VM memory if not specified) vm_hugepage_count: diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml index aae57c51..2a1efd6b 100644 --- a/ansible/roles/hv-vm-create/tasks/main.yml +++ b/ansible/roles/hv-vm-create/tasks/main.yml @@ -26,28 +26,28 @@ block: - name: Calculate hugepages needed for VM if not specified set_fact: - calculated_vm_hugepage_count: "{{ (hostvars[inventory_hostname]['memory'] | int) // (vm_hugepage_size[:-1] | int) }}" + calculated_vm_hugepage_count: "{{ (hostvars[inventory_hostname]['memory'] | int) // (1 | int) }}" when: vm_hugepage_count is not defined or vm_hugepage_count == "" - name: Set hugepage count for VM set_fact: vm_hugepages_needed: "{{ vm_hugepage_count if vm_hugepage_count is defined and vm_hugepage_count != '' else calculated_vm_hugepage_count }}" - # TODO This grep expression failed on the second installation. Need either a way to force huge pages, or advanced logic to check which is configured. + # Hard-coding 1Gi hugepages for now. In the unlikely event we may need small hugepages, we can refactor code at that time. - name: Check host hugepages availability shell: | - grep -E "HugePages_Free.*{{ vm_hugepage_size }}" /proc/meminfo | awk '{print $2}' || echo "0" + cat /sys/kernel/mm/hugepages/hugepages-1048576kB/free_hugepages register: host_hugepages_free delegate_to: "{{ hostvars[inventory_hostname]['ansible_host'] }}" - name: Validate sufficient hugepages available fail: - msg: "Not enough {{ vm_hugepage_size }} hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}" + msg: "Not enough 1048576kB hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}" when: (host_hugepages_free.stdout | int) < (vm_hugepages_needed | int) - name: Display hugepages configuration for VM debug: - msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} {{ vm_hugepage_size }} hugepages ({{ (vm_hugepages_needed | int) * (vm_hugepage_size[:-1] | int) }}G total)" + msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} 1Gi hugepages ({{ (vm_hugepages_needed | int) }}G total)" - name: Set vNUMA configuration tasks when: vnuma_enabled diff --git a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 index 197e2fca..d0d311f3 100644 --- a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 +++ b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 @@ -6,7 +6,7 @@ {% if vm_hugepages %} - + {% endif %} diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml index 288e4959..c83f55b5 100644 --- a/ansible/vars/hv.sample.yml +++ b/ansible/vars/hv.sample.yml @@ -72,9 +72,6 @@ additional_kernel_params: # Enable hugepages for VMs vm_hugepages: false -# Hugepage size for VMs (should match hypervisor hugepage_size) -vm_hugepage_size: "1G" - # Number of hugepages per VM (auto-calculated based on VM memory if not specified) # vm_hugepage_count: 18 From c7f75f10b703af71962cdc8ec1f977df30ec3494 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 26 Feb 2026 08:18:57 -0800 Subject: [PATCH 3/5] Remove blurb for vm playbooks in other PR. Docs update Signed-off-by: Andrew Collins --- docs/deploy-vmno.md | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/docs/deploy-vmno.md b/docs/deploy-vmno.md index 03da3368..b139ef97 100644 --- a/docs/deploy-vmno.md +++ b/docs/deploy-vmno.md @@ -498,24 +498,9 @@ vm00008 Ready worker 1d v1.31.7 xxxxx-xxxxx-xxxxx-xxxxx ``` -## Additional helper playbooks for VM management - -If VMs become unresponsive, sometimes destroying and restarting them is the only remedy. Since the garbage cleanup of pods of all VMs on a single hypervisor at a time can cause stalling, it also may be beneficial to start one VM per HV at a time. Playbooks have been added for all of these tasks. - -See the following playbooks to help in these cases: -``` -ansible/hv-vm-stop-all.yml -ansible/hv-vm-start-one.yml -``` - ## Disabling NetworkManager devices and connections for SR-IOV devices on VMs One option of creating SR-IOV capable interfaces in a VM is to create them using the Intel IGB driver. This may be achieved by setting the variable `vm_igb_nics: true` in your variables. -**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections will never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. - -See the following playbook: -``` -ansible/vm-sriov-disable.yml -``` \ No newline at end of file +**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections may never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. \ No newline at end of file From 4e74118ea9de223983c3ce5e33175a9f152c2043 Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 26 Feb 2026 10:23:22 -0800 Subject: [PATCH 4/5] Update deployment guide for Multi-Node OpenShift Hybrid in ScaleLab This update enhances the usability and understanding of the deployment process. Co-Authored-By: Cursor agent, claude-4.6-opus model Signed-off-by: Andrew Collins --- docs/deploy-mno-hybrid.md | 81 +++++++++++++++++++++++++++++++-------- 1 file changed, 64 insertions(+), 17 deletions(-) diff --git a/docs/deploy-mno-hybrid.md b/docs/deploy-mno-hybrid.md index 0a423f78..ab9c9eea 100644 --- a/docs/deploy-mno-hybrid.md +++ b/docs/deploy-mno-hybrid.md @@ -1,21 +1,68 @@ -# Deploy Multi-Node OpenShift Hybrid Deployment in ScaleLab +# Deploy Multi-Node OpenShift Hybrid in ScaleLab + +This guide describes how to deploy a hybrid Multi-Node OpenShift (MNO) cluster in ScaleLab: some nodes are bare metal and some are virtual machines (VMs). + +## 1. Configure Ansible variables in `all.yml` + +Set up `all.yml` the same way as for a standard ScaleLab allocation. Then add these variables: -1. Configure `all.yml` like a standard scalelab allocation. Add the following variables: ``` -cluster_type: mno # This is important to distinguish from the 'vmno' type deployment, which does not permit any nodes to be bare metal. +cluster_type: mno # Use "mno" (not "vmno"). MNO allows bare metal nodes; VMNO does not. hv_inventory: true -hv_ssh_pass: 200metersq+ -hybrid_worker_count: 500 +hv_ssh_pass: +hybrid_worker_count: 123 # Total number of VM workers you want. +``` + +**Why use two phases?** +First you install a small cluster and confirm it works. Then you add the VM workers. Doing it in two steps makes it easier to find and fix problems: issues from the first install are separate from issues when creating many VMs. + +## 2. Run the playbooks + +### Phase 1: Install a small cluster (3 control-plane + 3 workers) + +1. In `all.yml`, set `hybrid_worker_count: 0`. +2. Run the `create-inventory.yml` playbook. +3. Run the `mno-deploy.yml` playbook. +4. Run the `hv-setup.yml` playbook. + +### Phase 2: Add the VM workers + +1. In `all.yml`, set `hybrid_worker_count: 123`. +2. Run the `create-inventory.yml` playbook. +3. Open the inventory file at `ansible/inventory/cloudXX.local`. Check: + - **`[worker]`**: It should list the bare metal workers and the correct number of VMs to create. + - **`[hv_vm]`**: It should list the expected number of VMs with the right CPU, memory, and disk. Confirm how many VMs are assigned to each hypervisor (HV). This ratio is set by machine type in `hw_vm_counts` in `lab.yml`. +4. Run the `hv-vm-create.yml` playbook. For more about this playbook, see [Virtual MultiNode OpenShift](deploy-vmno.md). +5. Run the `ocp-scale-out.yml` playbook. For more about this playbook, see [Scale out a Multi-Node OpenShift deployment](scale-out-mno.md). + +## Command reference + +### create-inventory.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook ansible/create-inventory.yml +``` + +### hv-setup.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-setup.yml +``` + +### hv-vm-create.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-vm-create.yml +``` + +### mno-deploy.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/mno-deploy.yml ``` -This is to verify the inventory looks good. However, we want to run the install in two steps. The reasoning behind this is to separate any issues that could happen from the initial install from the scale up to 500 workers. Each task has its own set of moving pieces and it is nicer to debug an install on a smaller cluster than also trying to troubleshoot massive node creations at the same time. - -Step 1: Install small cluster 3x CP + 3x worker -1. set: `hybrid_worker_count: 0` -2. Run create-inventory -3. Run deploy-mno -4. Run `setup-hypervisor` - -Step 2: Scale up to 500 workers. -1. set `hybrid_worker_count: 500` -2. run create-inventory -3. \ No newline at end of file + +### ocp-scale-out.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/ocp-scale-out.yml +``` \ No newline at end of file From 1f309ad2f66b2189bc2f03de34f698c2cec5b68d Mon Sep 17 00:00:00 2001 From: Andrew Collins Date: Thu, 26 Feb 2026 16:57:41 -0800 Subject: [PATCH 5/5] Remove vm_hugepage_count as it is confusing. --- ansible/roles/hv-vm-create/defaults/main.yml | 3 --- ansible/roles/hv-vm-create/tasks/main.yml | 7 +------ ansible/vars/hv.sample.yml | 3 --- 3 files changed, 1 insertion(+), 12 deletions(-) diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml index 38be300e..6812b2be 100644 --- a/ansible/roles/hv-vm-create/defaults/main.yml +++ b/ansible/roles/hv-vm-create/defaults/main.yml @@ -26,9 +26,6 @@ vnuma_cpu_mode: "strict" # strict, preferred # Hugepages configuration for VMs vm_hugepages: false -# Number of hugepages to allocate per VM (auto-calculated based on VM memory if not specified) -vm_hugepage_count: - # Hugepage mount path in VMs vm_hugepage_mount: "/mnt/hugepages" diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml index 2a1efd6b..638cfafb 100644 --- a/ansible/roles/hv-vm-create/tasks/main.yml +++ b/ansible/roles/hv-vm-create/tasks/main.yml @@ -24,14 +24,9 @@ - name: Configure VM hugepages when: vm_hugepages block: - - name: Calculate hugepages needed for VM if not specified - set_fact: - calculated_vm_hugepage_count: "{{ (hostvars[inventory_hostname]['memory'] | int) // (1 | int) }}" - when: vm_hugepage_count is not defined or vm_hugepage_count == "" - - name: Set hugepage count for VM set_fact: - vm_hugepages_needed: "{{ vm_hugepage_count if vm_hugepage_count is defined and vm_hugepage_count != '' else calculated_vm_hugepage_count }}" + vm_hugepages_needed: "{{ (hostvars[inventory_hostname]['memory'] | int) // (1 | int) }}" # Hard-coding 1Gi hugepages for now. In the unlikely event we may need small hugepages, we can refactor code at that time. - name: Check host hugepages availability diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml index c83f55b5..76a62dea 100644 --- a/ansible/vars/hv.sample.yml +++ b/ansible/vars/hv.sample.yml @@ -72,9 +72,6 @@ additional_kernel_params: # Enable hugepages for VMs vm_hugepages: false -# Number of hugepages per VM (auto-calculated based on VM memory if not specified) -# vm_hugepage_count: 18 - # Enable vNUMA for performance (recommended with hugepages) vnuma_enabled: false