diff --git a/roles/postgres/replica/final/defaults/main.yml b/roles/postgres/replica/final/defaults/main.yml new file mode 100644 index 000000000..2c3b8edbb --- /dev/null +++ b/roles/postgres/replica/final/defaults/main.yml @@ -0,0 +1,14 @@ +--- + +# © Copyright EnterpriseDB UK Limited 2015-2025 - All rights reserved. + +## pg_basebackup timeout for cloning replicas +# +# For large databases, pg_basebackup can take multiple hours to complete. +# This timeout value (in seconds) determines how long to wait for the +# pg_basebackup operation to complete. +# +# Default: 14400 seconds (4 hours) +# For very large databases, increase this value as needed (e.g., 28800 for 8 hours) + +pg_basebackup_timeout: 14400 diff --git a/roles/postgres/replica/final/tasks/clone.yml b/roles/postgres/replica/final/tasks/clone.yml index d990d02b2..db82e5e5b 100644 --- a/roles/postgres/replica/final/tasks/clone.yml +++ b/roles/postgres/replica/final/tasks/clone.yml @@ -39,12 +39,27 @@ extra_option: "{{ waldir_option if pg_wal_dir_outside_pgdata else '' }}" become_user: "{{ postgres_user }}" become: true - register: this - failed_when: - this.rc != 0 or 'error' in this.stderr + async: "{{ pg_basebackup_timeout }}" + poll: 0 + register: pg_basebackup_job when: task_selector|selects('postgres', 'replica') +- name: Wait for pg_basebackup to complete + async_status: + jid: "{{ pg_basebackup_job.ansible_job_id }}" + become_user: "{{ postgres_user }}" + become: true + register: pg_basebackup_result + until: pg_basebackup_result.finished + retries: "{{ [(pg_basebackup_timeout / 10) | int, 1] | max }}" + delay: 10 + failed_when: + pg_basebackup_result.rc != 0 or 'error' in (pg_basebackup_result.stderr | default('')) + when: + - task_selector|selects('postgres', 'replica') + - pg_basebackup_job is defined + # If we are cloning an instance with postgres_conf_dir separated from # postgres_data_dir, we copy its configuration files to the replica's # postgres_conf_dir (which may or may not be the same as PGDATA, and