Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions ansible/roles/hv-install/defaults/main.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
---
# hv-install default vars

# Hugepages configuration for hypervisors
enable_hugepages: false

# Hugepage size: 2M or 1G
hugepage_size: "1G"

# Number of hugepages to allocate (e.g., 32 for 32GB of 1G hugepages)
hugepage_count: 32

# Additional kernel parameters for performance tuning
additional_kernel_params: []

# Number of hugepages per node (e.g. total / 2)
hugepages_count_per_node: 190
67 changes: 67 additions & 0 deletions ansible/roles/hv-install/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,55 @@
name: sushy-tools
version: 1.2.0

- name: Configure hugepages support
when: enable_hugepages
block:

- name: Run grubby to add hugepages arguments
command: grubby --update-kernel=ALL --args="default_hugepagesz={{ hugepage_size }} hugepagesz={{ hugepage_size }}"
register: grub_updated

- name: Set reboot required flag
set_fact:
hugepages_reboot_required: true
when: grub_updated.changed

- name: Create hugetlb-gigantic-pages.service file
copy:
dest: /usr/lib/systemd/system/hugetlb-gigantic-pages.service
content: |
[Unit]
Description=HugeTLB Gigantic Pages Reservation
DefaultDependencies=no
Before=dev-hugepages.mount
ConditionPathExists=/sys/devices/system/node
ConditionKernelCommandLine=hugepagesz=1G

[Service]
Type=oneshot
RemainAfterExit=yes
ExecStart=/usr/lib/systemd/hugetlb-reserve-pages.sh

[Install]
WantedBy=sysinit.target

- name: Create hugetlb-reserve-pages.sh
template:
src: hugetlb-reserve-pages.sh.j2
dest: /usr/lib/systemd/hugetlb-reserve-pages.sh
mode: "0755"
register: hugetlb_script

- name: Set reboot required flag
set_fact:
hugepages_reboot_required: true
when: hugetlb_script.changed

- name: Enable hugetlb-gigantic-pages.service
systemd:
enabled: true
name: hugetlb-gigantic-pages.service

- name: Get coredns
get_url:
validate_certs: false
Expand Down Expand Up @@ -65,3 +114,21 @@
state: started
enabled: true
name: ksmtuned

- name: Reboot hypervisor for hugepages configuration
when:
- enable_hugepages
- hugepages_reboot_required | default(false)
block:
- name: Reboot hypervisor
reboot:
msg: "Rebooting to apply hugepages configuration"
reboot_timeout: 600

- name: Verify hugepages are configured
shell: cat /proc/meminfo | grep -E "HugePages_Total|HugePages_Free|Hugepagesize"
register: hugepages_status

- name: Display hugepages status
debug:
msg: "{{ hugepages_status.stdout_lines }}"
15 changes: 15 additions & 0 deletions ansible/roles/hv-install/templates/hugetlb-reserve-pages.sh.j2
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/sh

nodes_path=/sys/devices/system/node/
if [ ! -d $nodes_path ]; then
echo "ERROR: $nodes_path does not exist"
exit 1
fi

reserve_pages()
{
echo $1 > $nodes_path/$2/hugepages/hugepages-1048576kB/nr_hugepages
}

reserve_pages {{ hugepages_count_per_node }} node0
reserve_pages {{ hugepages_count_per_node }} node1
13 changes: 11 additions & 2 deletions ansible/roles/hv-vm-create/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ vnuma_enabled: false
vnuma_memory_placement: "static"
vnuma_cpu_placement: "static"

# Manual vNUMA configuration
# Manual vNUMA configuration
# vnuma_nodes:
# - id: 0
# cpus: "0-3"
Expand All @@ -21,4 +21,13 @@ vnuma_cpu_placement: "static"

# vNUMA topology settings
vnuma_memory_mode: "strict" # strict, preferred, interleave
vnuma_cpu_mode: "strict" # strict, preferred
vnuma_cpu_mode: "strict" # strict, preferred

# Hugepages configuration for VMs
vm_hugepages: false

# Hugepage mount path in VMs
vm_hugepage_mount: "/mnt/hugepages"

# Enable IGB NICs for VMs
vm_igb_nics: false
23 changes: 23 additions & 0 deletions ansible/roles/hv-vm-create/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,29 @@
set_fact:
hv_vm_cpu_count: "{{ hostvars[inventory_hostname]['cpus'] }}"

- name: Configure VM hugepages
when: vm_hugepages
block:
- name: Set hugepage count for VM
set_fact:
vm_hugepages_needed: "{{ (hostvars[inventory_hostname]['memory'] | int) // (1 | int) }}"

# Hard-coding 1Gi hugepages for now. In the unlikely event we may need small hugepages, we can refactor code at that time.
- name: Check host hugepages availability
shell: |
cat /sys/kernel/mm/hugepages/hugepages-1048576kB/free_hugepages
register: host_hugepages_free
delegate_to: "{{ hostvars[inventory_hostname]['ansible_host'] }}"

- name: Validate sufficient hugepages available
fail:
msg: "Not enough 1048576kB hugepages available on host {{ hostvars[inventory_hostname]['ansible_host'] }}. Need: {{ vm_hugepages_needed }}, Available: {{ host_hugepages_free.stdout }}"
when: (host_hugepages_free.stdout | int) < (vm_hugepages_needed | int)

- name: Display hugepages configuration for VM
debug:
msg: "VM {{ inventory_hostname }} will use {{ vm_hugepages_needed }} 1Gi hugepages ({{ (vm_hugepages_needed | int) }}G total)"

- name: Set vNUMA configuration tasks
when: vnuma_enabled
block:
Expand Down
24 changes: 24 additions & 0 deletions ansible/roles/hv-vm-create/templates/kvm-def.xml.j2
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,13 @@
<uuid>{{ hostvars[inventory_hostname]['domain_uuid'] }}</uuid>
<memory unit='GiB'>{{ hostvars[inventory_hostname]['memory'] }}</memory>
<currentMemory unit='GiB'>{{ hostvars[inventory_hostname]['memory'] }}</currentMemory>
{% if vm_hugepages %}
<memoryBacking>
<hugepages>
<page size='1048576' unit='KiB'/>
</hugepages>
</memoryBacking>
{% endif %}
<vcpu placement='static'>{{ hv_vm_cpu_count | int }}</vcpu>
<os>
<type arch='x86_64' machine='pc-q35-rhel7.6.0'>hvm</type>
Expand All @@ -11,6 +18,9 @@
<features>
<acpi/>
<apic/>
{% if vm_igb_nics | default(false) %}
<ioapic driver='qemu'/>
{% endif %}
</features>
{% if vnuma_enabled %}
<cpu mode='host-model' check='partial'>
Expand Down Expand Up @@ -125,6 +135,20 @@
{% endif %}
<address type='pci' domain='0x0000' bus='0x01' slot='0x00' function='0x0'/>
</interface>
{% if vm_igb_nics | default(false) %}
{% for i in range(1, 6) %}
<interface type='bridge'>
{% set mac_prefix = "%s:%02x" | format('52:54:00',i) %}
<mac address='{{ mac_prefix | community.general.random_mac(seed=inventory_hostname) }}'/>
<source bridge='br0'/>
<model type='igb'/>
<address type='pci' domain='0x0000' bus='{{ "0x%02x" | format(i + 4) }}' slot='0x00' function='0x0'/>
</interface>
{% endfor %}
<iommu model='intel'>
<driver intremap='on'/>
</iommu>
{% endif %}
<serial type='pty'>
<target type='isa-serial' port='0'>
<model name='isa-serial'/>
Expand Down
29 changes: 29 additions & 0 deletions ansible/vars/hv.sample.yml
Original file line number Diff line number Diff line change
Expand Up @@ -48,3 +48,32 @@ hv_vm_manifest_acm_cr: true
use_bastion_registry: false
# Provide pull-secret for connected manifests
pull_secret: "{{ lookup('file', '../pull-secret.txt') | b64encode }}"

################################################################################
# Hugepages Configuration
################################################################################

# Enable hugepages on hypervisors
enable_hugepages: false

# Hugepage size for hypervisors: 2M or 1G
hugepage_size: "1G"

# Number of hugepages to allocate on hypervisors (e.g., 64 for 64GB of 1G hugepages)
# Calculate based on total memory and VM requirements
hugepage_count: 64

# Additional kernel parameters for performance tuning
additional_kernel_params:
- "intel_iommu=on"
- "iommu=pt"
- "isolcpus=2-15,18-31"

# Enable hugepages for VMs
vm_hugepages: false

# Enable vNUMA for performance (recommended with hugepages)
vnuma_enabled: false

# Enable IGB NICs for VMs
vm_igb_nics: false
68 changes: 68 additions & 0 deletions docs/deploy-mno-hybrid.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Deploy Multi-Node OpenShift Hybrid in ScaleLab

This guide describes how to deploy a hybrid Multi-Node OpenShift (MNO) cluster in ScaleLab: some nodes are bare metal and some are virtual machines (VMs).

## 1. Configure Ansible variables in `all.yml`

Set up `all.yml` the same way as for a standard ScaleLab allocation. Then add these variables:

```
cluster_type: mno # Use "mno" (not "vmno"). MNO allows bare metal nodes; VMNO does not.
hv_inventory: true
hv_ssh_pass: <hv-password>
hybrid_worker_count: 123 # Total number of VM workers you want.
```

**Why use two phases?**
First you install a small cluster and confirm it works. Then you add the VM workers. Doing it in two steps makes it easier to find and fix problems: issues from the first install are separate from issues when creating many VMs.

## 2. Run the playbooks

### Phase 1: Install a small cluster (3 control-plane + 3 workers)

1. In `all.yml`, set `hybrid_worker_count: 0`.
2. Run the `create-inventory.yml` playbook.
3. Run the `mno-deploy.yml` playbook.
4. Run the `hv-setup.yml` playbook.

### Phase 2: Add the VM workers

1. In `all.yml`, set `hybrid_worker_count: 123`.
2. Run the `create-inventory.yml` playbook.
3. Open the inventory file at `ansible/inventory/cloudXX.local`. Check:
- **`[worker]`**: It should list the bare metal workers and the correct number of VMs to create.
- **`[hv_vm]`**: It should list the expected number of VMs with the right CPU, memory, and disk. Confirm how many VMs are assigned to each hypervisor (HV). This ratio is set by machine type in `hw_vm_counts` in `lab.yml`.
4. Run the `hv-vm-create.yml` playbook. For more about this playbook, see [Virtual MultiNode OpenShift](deploy-vmno.md).
5. Run the `ocp-scale-out.yml` playbook. For more about this playbook, see [Scale out a Multi-Node OpenShift deployment](scale-out-mno.md).

## Command reference

### create-inventory.yml

```console
(.ansible) [root@<bastion> jetlag]# ansible-playbook ansible/create-inventory.yml
```

### hv-setup.yml

```console
(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-setup.yml
```

### hv-vm-create.yml

```console
(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/hv-vm-create.yml
```

### mno-deploy.yml

```console
(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/mno-deploy.yml
```

### ocp-scale-out.yml

```console
(.ansible) [root@<bastion> jetlag]# ansible-playbook -i ansible/inventory/cloud99.local ansible/ocp-scale-out.yml
```
19 changes: 19 additions & 0 deletions docs/deploy-vmno.md
Original file line number Diff line number Diff line change
Expand Up @@ -124,9 +124,21 @@ hw_vm_counts:
nvme0n1: 7
```

When mixing different machines, the hv_vm_counts may be adjusted for those machine models to create the same number of VMs per hypervisor. For example, when mixing Dell r640 and r650 in ScaleLab, the following counts were used:

```yaml
hw_vm_counts:
scalelab:
r650:
default: 4
nvme0n1: 16
```

> [!NOTE]
> Depending upon your hardware, you may have to parition and format a 2nd disk to help store VM disk files.

In some VM scenarios, hugepages may be required. To configure VMs with hugepages, enable with the variable `enable_hugepages`, and configure specifics with other similar variables found in: `ansible/roles/hv-install/defaults/main.yml`.

## Configure Ansible vars in `hv.yml`

```console
Expand Down Expand Up @@ -485,3 +497,10 @@ vm00008 Ready worker 1d v1.31.7
(.ansible) [root@<bastion> jetlag]# cat /root/vmno/kubeadmin-password
xxxxx-xxxxx-xxxxx-xxxxx
```

## Disabling NetworkManager devices and connections for SR-IOV devices on VMs

One option of creating SR-IOV capable interfaces in a VM is to create them using the Intel IGB driver.
This may be achieved by setting the variable `vm_igb_nics: true` in your variables.

**Please note:** When VMs are created with SR-IOV devices using the IGB driver, the devices and connections may never fully initialize. NetworkManager repeatedly attempts to start them, which results in a large amount of churn on the VMs. A workaround to this churn is to force the devices down and connections' autoconnect off for those created for the interfaces.