diff --git a/ansible/roles/hv-install/defaults/main.yml b/ansible/roles/hv-install/defaults/main.yml new file mode 100644 index 00000000..bfd2265b --- /dev/null +++ b/ansible/roles/hv-install/defaults/main.yml @@ -0,0 +1,17 @@ +--- +# hv-install default vars + +# Hugepages configuration for hypervisors +enable_hugepages: false + +# Hugepage size: 2M or 1G +hugepage_size: "1G" + +# Number of hugepages to allocate (e.g., 32 for 32GB of 1G hugepages) +hugepage_count: 32 + +# Additional kernel parameters for performance tuning +additional_kernel_params: [] + +# Number of hugepages per node (e.g. total / 2) +hugepages_count_per_node: 190 diff --git a/ansible/roles/hv-install/tasks/main.yml b/ansible/roles/hv-install/tasks/main.yml index 4451e0c4..a73d5ee9 100644 --- a/ansible/roles/hv-install/tasks/main.yml +++ b/ansible/roles/hv-install/tasks/main.yml @@ -21,6 +21,55 @@ name: sushy-tools version: 1.2.0 +- name: Configure hugepages support + when: enable_hugepages + block: + + - name: Run grubby to add hugepages arguments + command: grubby --update-kernel=ALL --args="default_hugepagesz={{ hugepage_size }} hugepagesz={{ hugepage_size }}" + register: grub_updated + + - name: Set reboot required flag + set_fact: + hugepages_reboot_required: true + when: grub_updated.changed + + - name: Create hugetlb-gigantic-pages.service file + copy: + dest: /usr/lib/systemd/system/hugetlb-gigantic-pages.service + content: | + [Unit] + Description=HugeTLB Gigantic Pages Reservation + DefaultDependencies=no + Before=dev-hugepages.mount + ConditionPathExists=/sys/devices/system/node + ConditionKernelCommandLine=hugepagesz=1G + + [Service] + Type=oneshot + RemainAfterExit=yes + ExecStart=/usr/lib/systemd/hugetlb-reserve-pages.sh + + [Install] + WantedBy=sysinit.target + + - name: Create hugetlb-reserve-pages.sh + template: + src: hugetlb-reserve-pages.sh.j2 + dest: /usr/lib/systemd/hugetlb-reserve-pages.sh + mode: "0755" + register: hugetlb_script + + - name: Set reboot required flag + set_fact: + hugepages_reboot_required: true + when: hugetlb_script.changed + + - name: Enable hugetlb-gigantic-pages.service + systemd: + enabled: true + name: hugetlb-gigantic-pages.service + - name: Get coredns get_url: validate_certs: false @@ -65,3 +114,21 @@ state: started enabled: true name: ksmtuned + +- name: Reboot hypervisor for hugepages configuration + when: + - enable_hugepages + - hugepages_reboot_required | default(false) + block: + - name: Reboot hypervisor + reboot: + msg: "Rebooting to apply hugepages configuration" + reboot_timeout: 600 + + - name: Verify hugepages are configured + shell: cat /proc/meminfo | grep -E "HugePages_Total|HugePages_Free|Hugepagesize" + register: hugepages_status + + - name: Display hugepages status + debug: + msg: "{{ hugepages_status.stdout_lines }}" diff --git a/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 new file mode 100644 index 00000000..5bed529d --- /dev/null +++ b/ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2 @@ -0,0 +1,15 @@ +#!/bin/sh + +nodes_path=/sys/devices/system/node/ +if [ ! -d $nodes_path ]; then + echo "ERROR: $nodes_path does not exist" + exit 1 +fi + +reserve_pages() +{ + echo $1 > $nodes_path/$2/hugepages/hugepages-1048576kB/nr_hugepages +} + +reserve_pages {{ hugepages_count_per_node }} node0 +reserve_pages {{ hugepages_count_per_node }} node1 diff --git a/ansible/roles/hv-vm-create/defaults/main.yml b/ansible/roles/hv-vm-create/defaults/main.yml index 7585df82..6812b2be 100644 --- a/ansible/roles/hv-vm-create/defaults/main.yml +++ b/ansible/roles/hv-vm-create/defaults/main.yml @@ -10,7 +10,7 @@ vnuma_enabled: false vnuma_memory_placement: "static" vnuma_cpu_placement: "static" -# Manual vNUMA configuration +# Manual vNUMA configuration # vnuma_nodes: # - id: 0 # cpus: "0-3" @@ -21,4 +21,13 @@ vnuma_cpu_placement: "static" # vNUMA topology settings vnuma_memory_mode: "strict" # strict, preferred, interleave -vnuma_cpu_mode: "strict" # strict, preferred +vnuma_cpu_mode: "strict" # strict, preferred + +# Hugepages configuration for VMs +vm_hugepages: false + +# Hugepage mount path in VMs +vm_hugepage_mount: "/mnt/hugepages" + +# Enable IGB NICs for VMs +vm_igb_nics: false \ No newline at end of file diff --git a/ansible/roles/hv-vm-create/tasks/main.yml b/ansible/roles/hv-vm-create/tasks/main.yml index 1b9beffa..638cfafb 100644 --- a/ansible/roles/hv-vm-create/tasks/main.yml +++ b/ansible/roles/hv-vm-create/tasks/main.yml @@ -21,6 +21,29 @@ set_fact: hv_vm_cpu_count: "{{ hostvars[inventory_hostname]['cpus'] }}" +- name: Configure VM hugepages + when: vm_hugepages + block: + - name: Set hugepage count for VM + set_fact: + vm_hugepages_needed: "{{ (hostvars[inventory_hostname]['memory'] | int) // (1 | int) }}" + + # Hard-coding 1Gi hugepages for now. In the unlikely event we may need small hugepages, we can refactor code at that time. + - name: Check host hugepages availability + shell: | + cat /sys/kernel/mm/hugepages/hugepages-1048576kB/free_hugepages + register: host_hugepages_free + delegate_to: "{{ hostvars[inventory_hostname]['ansible_host'] }}" + + - name: Validate sufficient hugepages available + fail: + msg: "Not enough 1048576kB hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}" + when: (host_hugepages_free.stdout | int) < (vm_hugepages_needed | int) + + - name: Display hugepages configuration for VM + debug: + msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} 1Gi hugepages ({{ (vm_hugepages_needed | int) }}G total)" + - name: Set vNUMA configuration tasks when: vnuma_enabled block: diff --git a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 index df33d85a..d0d311f3 100644 --- a/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 +++ b/ansible/roles/hv-vm-create/templates/kvm-def.xml.j2 @@ -3,6 +3,13 @@ {{ hostvars[inventory_hostname]['domain_uuid'] }} {{ hostvars[inventory_hostname]['memory'] }} {{ hostvars[inventory_hostname]['memory'] }} +{% if vm_hugepages %} + + + + + +{% endif %} {{ hv_vm_cpu_count | int }} hvm @@ -11,6 +18,9 @@ +{% if vm_igb_nics | default(false) %} + +{% endif %} {% if vnuma_enabled %} @@ -125,6 +135,20 @@ {% endif %}
+{% if vm_igb_nics | default(false) %} +{% for i in range(1, 6) %} + +{% set mac_prefix = "%s:%02x" | format('52:54:00',i) %} + + + +
+ +{% endfor %} + + + +{% endif %} diff --git a/ansible/vars/hv.sample.yml b/ansible/vars/hv.sample.yml index b50b2551..76a62dea 100644 --- a/ansible/vars/hv.sample.yml +++ b/ansible/vars/hv.sample.yml @@ -48,3 +48,32 @@ hv_vm_manifest_acm_cr: true use_bastion_registry: false # Provide pull-secret for connected manifests pull_secret: "{{ lookup('file', '../pull-secret.txt') | b64encode }}" + +################################################################################ +# Hugepages Configuration +################################################################################ + +# Enable hugepages on hypervisors +enable_hugepages: false + +# Hugepage size for hypervisors: 2M or 1G +hugepage_size: "1G" + +# Number of hugepages to allocate on hypervisors (e.g., 64 for 64GB of 1G hugepages) +# Calculate based on total memory and VM requirements +hugepage_count: 64 + +# Additional kernel parameters for performance tuning +additional_kernel_params: + - "intel_iommu=on" + - "iommu=pt" + - "isolcpus=2-15,18-31" + +# Enable hugepages for VMs +vm_hugepages: false + +# Enable vNUMA for performance (recommended with hugepages) +vnuma_enabled: false + +# Enable IGB NICs for VMs +vm_igb_nics: false \ No newline at end of file diff --git a/docs/deploy-mno-hybrid.md b/docs/deploy-mno-hybrid.md new file mode 100644 index 00000000..ab9c9eea --- /dev/null +++ b/docs/deploy-mno-hybrid.md @@ -0,0 +1,68 @@ +# Deploy Multi-Node OpenShift Hybrid in ScaleLab + +This guide describes how to deploy a hybrid Multi-Node OpenShift (MNO) cluster in ScaleLab: some nodes are bare metal and some are virtual machines (VMs). + +## 1. Configure Ansible variables in `all.yml` + +Set up `all.yml` the same way as for a standard ScaleLab allocation. Then add these variables: + +``` +cluster_type: mno # Use "mno" (not "vmno"). MNO allows bare metal nodes; VMNO does not. +hv_inventory: true +hv_ssh_pass: +hybrid_worker_count: 123 # Total number of VM workers you want. +``` + +**Why use two phases?** +First you install a small cluster and confirm it works. Then you add the VM workers. Doing it in two steps makes it easier to find and fix problems: issues from the first install are separate from issues when creating many VMs. + +## 2. Run the playbooks + +### Phase 1: Install a small cluster (3 control-plane + 3 workers) + +1. In `all.yml`, set `hybrid_worker_count: 0`. +2. Run the `create-inventory.yml` playbook. +3. Run the `mno-deploy.yml` playbook. +4. Run the `hv-setup.yml` playbook. + +### Phase 2: Add the VM workers + +1. In `all.yml`, set `hybrid_worker_count: 123`. +2. Run the `create-inventory.yml` playbook. +3. Open the inventory file at `ansible/inventory/cloudXX.local`. Check: + - **`[worker]`**: It should list the bare metal workers and the correct number of VMs to create. + - **`[hv_vm]`**: It should list the expected number of VMs with the right CPU, memory, and disk. Confirm how many VMs are assigned to each hypervisor (HV). This ratio is set by machine type in `hw_vm_counts` in `lab.yml`. +4. Run the `hv-vm-create.yml` playbook. For more about this playbook, see [Virtual MultiNode OpenShift](deploy-vmno.md). +5. Run the `ocp-scale-out.yml` playbook. For more about this playbook, see [Scale out a Multi-Node OpenShift deployment](scale-out-mno.md). + +## Command reference + +### create-inventory.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook ansible/create-inventory.yml +``` + +### hv-setup.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-setup.yml +``` + +### hv-vm-create.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-vm-create.yml +``` + +### mno-deploy.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/mno-deploy.yml +``` + +### ocp-scale-out.yml + +```console +(.ansible) [root@ jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/ocp-scale-out.yml +``` \ No newline at end of file diff --git a/docs/deploy-vmno.md b/docs/deploy-vmno.md index aab34f85..b139ef97 100644 --- a/docs/deploy-vmno.md +++ b/docs/deploy-vmno.md @@ -124,9 +124,21 @@ hw_vm_counts: nvme0n1: 7 ``` +When mixing different machines, the hv_vm_counts may be adjusted for those machine models to create the same number of VMs per hypervisor. For example, when mixing Dell r640 and r650 in ScaleLab, the following counts were used: + +```yaml +hw_vm_counts: + scalelab: + r650: + default: 4 + nvme0n1: 16 +``` + > [!NOTE] > Depending upon your hardware, you may have to parition and format a 2nd disk to help store VM disk files. +In some VM scenarios, hugepages may be required. To configure VMs with hugepages, enable with the variable `enable_hugepages`, and configure specifics with other similar variables found in: `ansible/roles/hv-install/defaults/main.yml`. + ## Configure Ansible vars in `hv.yml` ```console @@ -485,3 +497,10 @@ vm00008 Ready worker 1d v1.31.7 (.ansible) [root@ jetlag]# cat /root/vmno/kubeadmin-password xxxxx-xxxxx-xxxxx-xxxxx ``` + +## Disabling NetworkManager devices and connections for SR-IOV devices on VMs + +One option of creating SR-IOV capable interfaces in a VM is to create them using the Intel IGB driver. +This may be achieved by setting the variable `vm_igb_nics: true` in your variables. + +**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections may never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces. \ No newline at end of file