Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 32 additions & 0 deletions files/postgres/pg-member
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash
set -uo pipefail
#==============================================================#
# File : pg-member
# Desc : retrieve patroni member name from patroni REST API
# Path : /pg/bin/pg-member
# Depend : patroni
# License : AGPLv3 @ https://pigsty.io/docs/about/license
# Author : waiting
#==============================================================#

# method 1: get patroni member name from patroni REST API
api_ip=$(ss -tlnpH | grep -E ':8008\b' | awk '{print $4}' | cut -d':' -f1 | grep -vE '^0\.0\.0\.0$|^::$' | head -n1)
[ -z "$api_ip" ] && api_ip="127.0.0.1"

name=$(curl -s "http://${api_ip}:8008/patroni" 2>/dev/null | jq -r '.patroni.name')

# method 2: get patroni member name from patronictl
if [ -z "$name" ]; then
ips=($(hostname -I | tr ' ' '\n' | grep -v '^$'))
if [ ${#ips[@]} -eq 0 ]; then
# echo "Cannot get local IP address" >&2
exit 0
fi
ip_json=$(printf '"%s",' "${ips[@]}" | sed 's/,$//')
name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json | jq -r --argjson ips "[$ip_json]" '.[] | select(.Host as $h | $ips | index($h)) | .Member')
fi

if [[ -n "$name" ]]; then
echo "$name"
fi

14 changes: 14 additions & 0 deletions files/postgres/pg-primary-host
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
set -uo pipefail
#==============================================================#
# File : pg-primary-host
# Desc : retrieve patroni primary member host from patroni REST API
# Path : /pg/bin/pg-primary-host
# Depend : patroni
# License : AGPLv3 @ https://pigsty.io/docs/about/license
# Author : waiting
#==============================================================#

name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json 2>/dev/null | jq -r '.[] | select(.Role == "Leader") | .Host' )
echo $name

14 changes: 14 additions & 0 deletions files/postgres/pg-primary-member
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash
set -uo pipefail
#==============================================================#
# File : pg-primary-member
# Desc : retrieve patroni primary member name from patroni REST API
# Path : /pg/bin/pg-primary-member
# Depend : patroni
# License : AGPLv3 @ https://pigsty.io/docs/about/license
# Author : waiting
#==============================================================#

name=$(/usr/bin/patronictl -c /pg/bin/patroni.yml list -f json 2>/dev/null | jq -r '.[] | select(.Role == "Leader") | .Member' )
echo $name

3 changes: 2 additions & 1 deletion roles/node/defaults/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ node_disable_swap: false # disable node swap, use with caution
node_static_network: true # preserve dns resolver settings after reboot
node_disk_prefetch: false # setup disk prefetch on HDD to increase performance
node_kernel_modules: [ softdog, br_netfilter, ip_vs, ip_vs_rr, ip_vs_wrr, ip_vs_sh ]
node_hugepage_count: 0 # number of 2MB hugepage, take precedence over ratio
node_hugepage_count: 0 # number of 2MB hugepage, take precedence over ratio,
# -1: use shared_memory_size_in_huge_pages calculated by pg (PG 15+ available)
node_hugepage_ratio: 0 # node mem hugepage ratio, 0 disable it by default
node_overcommit_ratio: 0 # node mem overcommit ratio, 0 disable it by default
node_tune: oltp # node tuned profile: none,oltp,olap,crit,tiny
Expand Down
95 changes: 95 additions & 0 deletions roles/pgsql/tasks/grace_patroni_restart.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,95 @@
#!/usr/bin/ansible-playbook
---
#--------------------------------------------------------------#
# patroni restart pg_cls gracefully [pt_restart]
# avoiding switching pg primary/standby
# usage:
# - ./pgsql.yml -l <pg_cls> -t pt_restart -e "pt_restart=true"
# - ./pgsql.yml -l '<ip>,&<pg_cls>' -t pt_restart -e "pt_restart=true"
#--------------------------------------------------------------#
# inner steps:
# 1. patroni pause
# 2. patroni restart pg_cls
# 3. patroni resume
#--------------------------------------------------------------#
# tested scenarios:
# - ./pgsql.yml -l <pg_cls> -t pt_restart -e "pt_restart=true"
# - all nodes have been restarted
# - ./pgsql.yml -l '<ip>,&<pg_cls>' -t pt_restart -e "pt_restart=true"
# - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored
# - ./pgsql.yml -l '<replica>,<primary>,&<pg_cls>' -t pt_restart -e "pt_restart=true"
# - nodes in ansible_play_hosts_all have been restarted, other nodes in the same pg_cls ignored
# - switchover triggered when patroni restart primary failed
#--------------------------------------------------------------#

- name: set variable
tags: [ pg_hugepage, patroni, pg_launch, pt_restart ]
when: pg_role_runtime is undefined or pg_primary_host_runtime is undefined
block:
- name: run pg-role
command: /pg/bin/pg-role
register: pg_role_cmd

- name: set variable pg_role_runtime
set_fact:
pg_role_runtime: "{{ pg_role_cmd.stdout | default(pg_role) | trim }}"

- name: set pg_primary_host_runtime
tags: [ pg_hugepage, patroni, pg_launch, pt_restart ]
import_tasks: util/patroni_primary_runtime.yml

- name: print variables
debug:
msg: |
pg_role_runtime: {{ pg_role_runtime|default('') }},
pg_primary_host_runtime: {{ pg_primary_host_runtime|default('') }},
pg_primary_member_runtime: {{ pg_primary_member_runtime|default('') }}
changed_when: false


- name: 0. patroni restart pg_cls gracefully
tags: [ pg_hugepage, patroni, pg_launch, pt_restart ]
when: patroni_mode != 'remove'
become_user: "{{ dbsu }}"
vars:
pg_role: "{{ pg_role_runtime }}"
dbsu: "{{ pg_dbsu|default('postgres') }}"
block:
- name: 1. patroni pause gracefully {{ pg_cluster }}
include_tasks: util/grace_patroni_pause.yml
run_once: true

- name: 2. patroni restart pg_cls {{ pg_cluster }}
include_tasks: util/patroni_restart_cls.yml

- name: 3. check pg ready {{ pg_cluster }}
include_tasks: util/check_pg_ready.yml

- name: 4.patroni resume pg_cls gracefully {{ pg_cluster }}
include_tasks: util/grace_patroni_resume.yml
run_once: true

- name: 5. print message
debug:
msg: finally, check if all postgres is ready {{ pg_cluster }}

# finally, check if all postgres is ready
- import_tasks: util/check_pg_ready.yml

- name: 6. re-set variable pt_restart to false
set_fact:
pt_restart: false

rescue:
- name: check postgres ready failed for {{ pg_cluster }}
debug:
msg: |
rc: {{ pg_ready_result.rc }}
STDOUT: {{ pg_ready_result.stdout }}
STDERR: {{ pg_ready_result.stderr }}
when: pg_ready_result is defined and pg_ready_result.rc != 0

- name: Exit Playbook due to error
meta: end_play

...
15 changes: 15 additions & 0 deletions roles/pgsql/tasks/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,21 @@
vars: { database: "{{ item }}" }
with_items: "{{ pg_databases }}"

#--------------------------------------------------------------#
# pg hugepages [pg_hugepage]
#--------------------------------------------------------------#
- import_tasks: pg_hugepage.yml
when: patroni_enabled|bool
tags: [ patroni, pg_launch, pg_hugepage ]

#--------------------------------------------------------------#
# patroni restart pg_cls gracefully [pt_restart]
#--------------------------------------------------------------#
- name: patroni restart pg_cls gracefully
include_tasks: grace_patroni_restart.yml
when: patroni_enabled|bool and (pt_restart is defined and pt_restart|bool)
tags: [ patroni, pg_launch, pt_restart, pg_hugepage ]

#--------------------------------------------------------------#
# Summary [pg_done]
#--------------------------------------------------------------#
Expand Down
71 changes: 71 additions & 0 deletions roles/pgsql/tasks/pg_hugepage.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
---
#--------------------------------------------------------------#
# Enable hugepage for pg and restart pg cluster [pg_hugepage]
# if `node_hugepage_count` or `node_hugepage_ratio` changed,
# it will full backup the pg_cls and restart it gracefully
# support usage:
# - `pig install`
# - `bin/pgsql-add <pg_cls>`
# - `bin/pgsql-add <pg_cls> <ip>`
# service of nodes in ansible_play_hosts_all will be unavailable during the process,
# other nodes in the same pg_cls ignored
# - `pgsql.yml -l <pg_cls> -t pg_hugepage`
# entire service of this postgres cluster will be unavailable during the process
# - `pgsql.yml -l '<ip>,&<pg_cls>' -t pg_hugepage`
# service of nodes in ansible_play_hosts_all will be unavailable during the process,
# other nodes in the same pg_cls ignored
#--------------------------------------------------------------#
# calculation rules:
# 1. `shared_memory_size_in_huge_pages` (from PG15+): if `node_hugepage_count` is -1 and huge_pages (from PG) != off
# 2. if `node_hugepage_count` > 0
# - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > `node_hugepage_count`
# - `node_hugepage_count`
# 3. disable hugepage if `node_hugepage_count` is 0
# 4. if `node_hugepage_ratio` > 0
# - `shared_memory_size_in_huge_pages`: if `shared_memory_size_in_huge_pages` > pages calculated from `node_hugepage_ratio`
# - page value calculated from `node_hugepage_ratio`
# All above rules and usages are tested successfully on Rocky Linux 9
#--------------------------------------------------------------#

- name: Set default value
set_fact:
curr_nr_hugepages: 0
new_nr_hugepages: 0
changed_when: false


- name: enable hugepage for pg
tags: pg_hugepage
vars:
block:
- import_tasks: util/pg_read_hugepage.yml

- name: calculate hugepage need update
set_fact:
hugepages_need_update: "{{ new_nr_hugepages|int != curr_nr_hugepages|int }}"
changed_when: false

- name: set pt_restart according to hugepages_need_update
set_fact:
pt_restart: "{{ hugepages_need_update|bool }}"
changed_when: false

- name: print nr_hugepages values
when: new_nr_hugepages is defined and curr_nr_hugepages is defined
debug:
msg: |
curr_nr_hugepages: {{ curr_nr_hugepages|trim }}, new_nr_hugepages: {{ new_nr_hugepages|trim }},
hugepages_need_update: {{ hugepages_need_update|bool }},
pt_restart: {{ pt_restart|bool }}
changed_when: false

# full back if contains primary, otherwise backup (full or incremental) at primary
- name: pg-backup before update hugepage
include_tasks: util/pg_backup.yml
when: hugepages_need_update|bool

- name: update hugepage
include_tasks: util/pg_write_hugepage.yml
when: hugepages_need_update|bool

...
27 changes: 27 additions & 0 deletions roles/pgsql/tasks/util/check_pg_ready.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/usr/bin/ansible-playbook
---

- name: check postgres ready
tags: [ pg_hugepage, patroni, pg_launch, pt_restart ]
vars:
dbsu: "{{ pg_dbsu|default('postgres') }}"
block:
- name: wait for postgres ready
wait_for: host={{ inventory_hostname }} port={{ pg_port }} state=started timeout=60
ignore_errors: true

- name: check postgres ready
become_user: "{{ dbsu }}"
shell: |
{{ pg_bin_dir }}/pg_isready -t 5 -p {{ pg_port }}
register: result
retries: 6
until: result.rc == 0
delay: 5

- name: Set fact pg_ready_result
set_fact:
pg_ready_result: "{{ result }}"
changed_when: false

...
36 changes: 36 additions & 0 deletions roles/pgsql/tasks/util/grace_patroni_pause.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/usr/bin/ansible-playbook
---
#--------------------------------------------------------------#
# patroni pause pg_cls gracefully
#--------------------------------------------------------------#

- name: patroni pause gracefully
tags: grace_patroni_pause
become_user: "{{ dbsu }}"
vars:
dbsu: "{{ pg_dbsu|default('postgres') }}"
block:
- name: check is paused {{ pg_cluster }}
import_tasks: is_patroni_paused.yml

- name: patroni pause {{ pg_cluster }}
when: is_patroni_paused == ''
command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause
register: patroni_pause_result
until: patroni_pause_result.rc == 0 and patroni_pause_result.stdout.find('Success') != -1
retries: 2
delay: 1
run_once: true
ignore_errors: true

- name: check is paused {{ pg_cluster }}
import_tasks: is_patroni_paused.yml

- name: delegate task to the primary node of {{ pg_cluster }} if previous failed
command: /usr/bin/patronictl -c /pg/bin/patroni.yml pause
when:
- is_patroni_paused == ''
- pg_primary_host_runtime != ''
delegate_to: "{{ pg_primary_host_runtime }}"

...
35 changes: 35 additions & 0 deletions roles/pgsql/tasks/util/grace_patroni_resume.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/ansible-playbook
---
#--------------------------------------------------------------#
# patroni resume pg_cls gracefully
#--------------------------------------------------------------#

- name: patroni resume pg_cls gracefully
tags: [ pg_hugepage, patroni, pg_launch, pt_restart ]
become_user: "{{ dbsu }}"
vars:
dbsu: "{{ pg_dbsu|default('postgres') }}"
block:
- name: check is paused {{ pg_cluster }}
import_tasks: is_patroni_paused.yml

- name: patroni resume {{ pg_cluster }}
when: is_patroni_paused|length > 0
command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume
register: patroni_resume_result
until: patroni_resume_result.rc == 0 and patroni_resume_result.stdout.find('Success') != -1
retries: 2
delay: 1
ignore_errors: true

- name: check is paused {{ pg_cluster }}
import_tasks: is_patroni_paused.yml

- name: delegate task to the primary node of {{ pg_cluster }} if previous failed
command: /usr/bin/patronictl -c /pg/bin/patroni.yml resume
when:
- is_patroni_paused|length > 0
- pg_primary_host_runtime != ''
delegate_to: "{{ pg_primary_host_runtime }}"

...
Loading