redhat-performance · openshift-merge-bot · Feb 27, 2026 · Jan 29, 2026 · Feb 10, 2026 · Feb 26, 2026
diff --git a/ansible/roles/hv-install/defaults/main.yml b/ansible/roles/hv-install/defaults/main.yml
@@ -0,0 +1,17 @@
+---
+# hv-install default vars
+
+# Hugepages configuration for hypervisors
+enable_hugepages: false
+
+# Hugepage size: 2M or 1G
+hugepage_size: "1G"
+
+# Number of hugepages to allocate (e.g., 32 for 32GB of 1G hugepages)
+hugepage_count: 32
+
+# Additional kernel parameters for performance tuning
+additional_kernel_params: []
+
+# Number of hugepages per node (e.g. total / 2)
+hugepages_count_per_node: 190
diff --git a/ansible/roles/hv-install/tasks/main.yml b/ansible/roles/hv-install/tasks/main.yml
@@ -21,6 +21,55 @@
     name: sushy-tools
     version: 1.2.0
 
+- name: Configure hugepages support
+  when: enable_hugepages
+  block:
+
+  - name: Run grubby to add hugepages arguments
+    command: grubby --update-kernel=ALL --args="default_hugepagesz={{ hugepage_size }} hugepagesz={{ hugepage_size }}"
+    register: grub_updated
+
+  - name: Set reboot required flag
+    set_fact:
+      hugepages_reboot_required: true
+    when: grub_updated.changed
+
+  - name: Create hugetlb-gigantic-pages.service file
+    copy:
+      dest: /usr/lib/systemd/system/hugetlb-gigantic-pages.service
+      content: |
+        [Unit]
+        Description=HugeTLB Gigantic Pages Reservation
+        DefaultDependencies=no
+        Before=dev-hugepages.mount
+        ConditionPathExists=/sys/devices/system/node
+        ConditionKernelCommandLine=hugepagesz=1G
+
+        [Service]
+        Type=oneshot
+        RemainAfterExit=yes
+        ExecStart=/usr/lib/systemd/hugetlb-reserve-pages.sh
+
+        [Install]
+        WantedBy=sysinit.target
+
+  - name: Create hugetlb-reserve-pages.sh
+    template:
+      src: hugetlb-reserve-pages.sh.j2
+      dest: /usr/lib/systemd/hugetlb-reserve-pages.sh
+      mode: "0755"
+    register: hugetlb_script
+
+  - name: Set reboot required flag
+    set_fact:
+      hugepages_reboot_required: true
+    when: hugetlb_script.changed
+
+  - name: Enable hugetlb-gigantic-pages.service
+    systemd:
+      enabled: true
+      name: hugetlb-gigantic-pages.service
+
 - name: Get coredns
   get_url:
     validate_certs: false
@@ -65,3 +114,21 @@
     state: started
     enabled: true
     name: ksmtuned
+
+- name: Reboot hypervisor for hugepages configuration
+  when:
+  - enable_hugepages
+  - hugepages_reboot_required | default(false)
+  block:
+  - name: Reboot hypervisor
+    reboot:
+      msg: "Rebooting to apply hugepages configuration"
+      reboot_timeout: 600
+
+  - name: Verify hugepages are configured
+    shell: cat /proc/meminfo | grep -E "HugePages_Total|HugePages_Free|Hugepagesize"
+    register: hugepages_status
+
+  - name: Display hugepages status
+    debug:
+      msg: "{{ hugepages_status.stdout_lines }}"
diff --git a/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+nodes_path=/sys/devices/system/node/
+if [ ! -d $nodes_path ]; then
+    echo "ERROR: $nodes_path does not exist"
+    exit 1
+fi
+
+reserve_pages()
+{
+    echo $1 > $nodes_path/$2/hugepages/hugepages-1048576kB/nr_hugepages
+}
+
+reserve_pages {{ hugepages_count_per_node }} node0
+reserve_pages {{ hugepages_count_per_node }} node1
diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml
@@ -10,7 +10,7 @@ vnuma_enabled: false
 vnuma_memory_placement: "static"
 vnuma_cpu_placement: "static"
 
-# Manual vNUMA configuration 
+# Manual vNUMA configuration
 # vnuma_nodes:
 #   - id: 0
 #     cpus: "0-3"
@@ -21,4 +21,13 @@ vnuma_cpu_placement: "static"
 
 # vNUMA topology settings
 vnuma_memory_mode: "strict"  # strict, preferred, interleave
-vnuma_cpu_mode: "strict"     # strict, preferred                                                
+vnuma_cpu_mode: "strict"     # strict, preferred
+
+# Hugepages configuration for VMs
+vm_hugepages: false
+
+# Hugepage mount path in VMs
+vm_hugepage_mount: "/mnt/hugepages"
+
+# Enable IGB NICs for VMs
+vm_igb_nics: false
diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml
@@ -21,6 +21,29 @@
   set_fact:
     hv_vm_cpu_count: "{{ hostvars[inventory_hostname]['cpus'] }}"
 
+- name: Configure VM hugepages
+  when: vm_hugepages
+  block:
+    - name: Set hugepage count for VM
+      set_fact:
+        vm_hugepages_needed: "{{ (hostvars[inventory_hostname]['memory'] | int) // (1 | int) }}"
+
+    # Hard-coding 1Gi hugepages for now. In the unlikely event we may need small hugepages, we can refactor code at that time.
+    - name: Check host hugepages availability
+      shell: |
+        cat /sys/kernel/mm/hugepages/hugepages-1048576kB/free_hugepages
+      register: host_hugepages_free
+      delegate_to: "{{ hostvars[inventory_hostname]['ansible_host'] }}"
+
+    - name: Validate sufficient hugepages available
+      fail:
+        msg: "Not enough 1048576kB hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}"
+      when: (host_hugepages_free.stdout | int) < (vm_hugepages_needed | int)
+
+    - name: Display hugepages configuration for VM
+      debug:
+        msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} 1Gi hugepages ({{ (vm_hugepages_needed | int) }}G total)"
+
 - name: Set vNUMA configuration tasks
   when: vnuma_enabled
   block:

diff --git a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2
@@ -3,6 +3,13 @@
   <uuid>{{ hostvars[inventory_hostname]['domain_uuid'] }}</uuid>
   <memory unit='GiB'>{{ hostvars[inventory_hostname]['memory'] }}</memory>
   <currentMemory unit='GiB'>{{ hostvars[inventory_hostname]['memory'] }}</currentMemory>
+{% if vm_hugepages %}
+  <memoryBacking>
+    <hugepages>
+      <page size='1048576' unit='KiB'/>
+    </hugepages>
+  </memoryBacking>
+{% endif %}
   <vcpu placement='static'>{{ hv_vm_cpu_count | int }}</vcpu>
   <os>
     <type arch='x86_64' machine='pc-q35-rhel7.6.0'>hvm</type>
@@ -11,6 +18,9 @@
   <features>
     <acpi/>
     <apic/>
+{% if vm_igb_nics | default(false) %}
+    <ioapic driver='qemu'/>
+{% endif %}
   </features>
 {% if vnuma_enabled %}
   <cpu mode='host-model' check='partial'>
@@ -125,6 +135,20 @@
 {% endif %}
       <address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
     </interface>
+{% if vm_igb_nics | default(false) %}
+{% for i in range(1, 6) %}
+    <interface type='bridge'>
+{% set mac_prefix = "%s:%02x" | format('52:54:00',i) %}
+      <mac address='{{ mac_prefix | community.general.random_mac(seed=inventory_hostname) }}'/>
+      <source bridge='br0'/>
+      <model type='igb'/>
+      <address type='pci' domain='0x0000' bus='{{ "0x%02x" | format(i + 4) }}' slot='0x00' function='0x0'/>
+    </interface>
+{% endfor %}
+    <iommu model='intel'>
+      <driver intremap='on'/>
+    </iommu>
+{% endif %}
     <serial type='pty'>
       <target type='isa-serial' port='0'>
         <model name='isa-serial'/>

diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml
@@ -48,3 +48,32 @@ hv_vm_manifest_acm_cr: true
 use_bastion_registry: false
 # Provide pull-secret for connected manifests
 pull_secret: "{{ lookup('file', '../pull-secret.txt') | b64encode }}"
+
+################################################################################
+# Hugepages Configuration
+################################################################################
+
+# Enable hugepages on hypervisors
+enable_hugepages: false
+
+# Hugepage size for hypervisors: 2M or 1G
+hugepage_size: "1G"
+
+# Number of hugepages to allocate on hypervisors (e.g., 64 for 64GB of 1G hugepages)
+# Calculate based on total memory and VM requirements
+hugepage_count: 64
+
+# Additional kernel parameters for performance tuning
+additional_kernel_params:
+  - "intel_iommu=on"
+  - "iommu=pt"
+  - "isolcpus=2-15,18-31"
+
+# Enable hugepages for VMs
+vm_hugepages: false
+
+# Enable vNUMA for performance (recommended with hugepages)
+vnuma_enabled: false
+
+# Enable IGB NICs for VMs
+vm_igb_nics: false
diff --git a/docs/deploy-mno-hybrid.md b/docs/deploy-mno-hybrid.md
@@ -0,0 +1,68 @@
+# Deploy Multi-Node OpenShift Hybrid in ScaleLab
+
+This guide describes how to deploy a hybrid Multi-Node OpenShift (MNO) cluster in ScaleLab: some nodes are bare metal and some are virtual machines (VMs).
+
+## 1. Configure Ansible variables in `all.yml`
+
+Set up `all.yml` the same way as for a standard ScaleLab allocation. Then add these variables:
+
+```
+cluster_type: mno           # Use "mno" (not "vmno"). MNO allows bare metal nodes; VMNO does not.
+hv_inventory: true
+hv_ssh_pass: <hv-password>
+hybrid_worker_count: 123    # Total number of VM workers you want.
+```
+
+**Why use two phases?**  
+First you install a small cluster and confirm it works. Then you add the VM workers. Doing it in two steps makes it easier to find and fix problems: issues from the first install are separate from issues when creating many VMs.
+
+## 2. Run the playbooks
+
+### Phase 1: Install a small cluster (3 control-plane + 3 workers)
+
+1. In `all.yml`, set `hybrid_worker_count: 0`.
+2. Run the `create-inventory.yml` playbook.
+3. Run the `mno-deploy.yml` playbook.
+4. Run the `hv-setup.yml` playbook.
+
+### Phase 2: Add the VM workers
+
+1. In `all.yml`, set `hybrid_worker_count: 123`.
+2. Run the `create-inventory.yml` playbook.
+3. Open the inventory file at `ansible/inventory/cloudXX.local`. Check:
+   - **`[worker]`**: It should list the bare metal workers and the correct number of VMs to create.
+   - **`[hv_vm]`**: It should list the expected number of VMs with the right CPU, memory, and disk. Confirm how many VMs are assigned to each hypervisor (HV). This ratio is set by machine type in `hw_vm_counts` in `lab.yml`.
+4. Run the `hv-vm-create.yml` playbook. For more about this playbook, see [Virtual MultiNode OpenShift](deploy-vmno.md).
+5. Run the `ocp-scale-out.yml` playbook. For more about this playbook, see [Scale out a Multi-Node OpenShift deployment](scale-out-mno.md).
+
+## Command reference
+
+### create-inventory.yml
+
+```console
+(.ansible) [root@<bastion> jetlag]# ansible-playbook ansible/create-inventory.yml
+```
+
+### hv-setup.yml
+
+```console
+(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-setup.yml
+```
+
+### hv-vm-create.yml
+
+```console
+(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-vm-create.yml
+```
+
+### mno-deploy.yml
+
+```console
+(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/mno-deploy.yml
+```
+
+### ocp-scale-out.yml
+
+```console
+(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/ocp-scale-out.yml
+```
diff --git a/docs/deploy-vmno.md b/docs/deploy-vmno.md
@@ -124,9 +124,21 @@ hw_vm_counts:
       nvme0n1: 7
 ```
 
+When mixing different machines, the hv_vm_counts may be adjusted for those machine models to create the same number of VMs per hypervisor. For example, when mixing Dell r640 and r650 in ScaleLab, the following counts were used:
+
+```yaml
+hw_vm_counts:
+  scalelab:
+    r650:
+      default: 4
+      nvme0n1: 16
+```
+
 > [!NOTE]
 > Depending upon your hardware, you may have to parition and format a 2nd disk to help store VM disk files.
 
+In some VM scenarios, hugepages may be required. To configure VMs with hugepages, enable with the variable `enable_hugepages`, and configure specifics with other similar variables found in: `ansible/roles/hv-install/defaults/main.yml`.
+
 ## Configure Ansible vars in `hv.yml`
 
 ```console
@@ -485,3 +497,10 @@ vm00008   Ready    worker                 1d    v1.31.7
 (.ansible) [root@<bastion> jetlag]# cat /root/vmno/kubeadmin-password
 xxxxx-xxxxx-xxxxx-xxxxx
 ```
+
+## Disabling NetworkManager devices and connections for SR-IOV devices on VMs
+
+One option of creating SR-IOV capable interfaces in a VM is to create them using the Intel IGB driver.
+This may be achieved by setting the variable `vm_igb_nics: true` in your variables.
+
+**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections may never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces.