Skip to content

Commit 66f87fe

Browse files
committed
feat(install): add retry logic with exponential backoff for ansible-galaxy
Enhance retry_with_timeout to support retry delays and exponential backoff via RETRY_DELAY and EXPONENTIAL_BACKOFF environment variables. Add retries to ansible-galaxy role and collection installations to handle transient network errors like HTTP 502. Check requirements.yml existence before proceeding with installation. Signed-off-by: Roberto Alfieri <ralfieri@redhat.com>
1 parent 808f79d commit 66f87fe

File tree

2 files changed

+49
-6
lines changed

2 files changed

+49
-6
lines changed

01_install_requirements.sh

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,10 +163,29 @@ sudo python -m pip install netaddr lxml
163163
sudo python -m pip install ansible=="${ANSIBLE_VERSION}"
164164

165165
pushd ${METAL3_DEV_ENV_PATH}
166-
ansible-galaxy install -r vm-setup/requirements.yml
166+
167+
# Check if requirements.yml exists before attempting installation
168+
if [[ ! -f vm-setup/requirements.yml ]]; then
169+
echo "ERROR: requirements.yml file not found in vm-setup directory." >&2
170+
exit 1
171+
fi
172+
173+
# Install roles from requirements.yml with retry logic
174+
# retries default to 5, no timeout limit
175+
RETRY_DELAY=${ANSIBLE_GALAXY_RETRY_DELAY:-15} \
176+
EXPONENTIAL_BACKOFF=true \
177+
retry_with_timeout ${ANSIBLE_GALAXY_MAX_RETRIES:-5} 0 \
178+
"ansible-galaxy install -r vm-setup/requirements.yml"
179+
167180
# Let's temporarily pin these collections to the latest compatible with ansible-2.15
168181
#ansible-galaxy collection install --upgrade ansible.netcommon ansible.posix ansible.utils community.general
169-
ansible-galaxy collection install 'ansible.netcommon<8.0.0' ansible.posix 'ansible.utils<6.0.0' community.general
182+
# Install collections with retry logic
183+
# retries default to 5, no timeout limit
184+
RETRY_DELAY=${ANSIBLE_GALAXY_RETRY_DELAY:-15} \
185+
EXPONENTIAL_BACKOFF=true \
186+
retry_with_timeout ${ANSIBLE_GALAXY_MAX_RETRIES:-5} 0 \
187+
"ansible-galaxy collection install 'ansible.netcommon<8.0.0' ansible.posix 'ansible.utils<6.0.0' community.general"
188+
170189
ANSIBLE_FORCE_COLOR=true ansible-playbook \
171190
-e "working_dir=$WORKING_DIR" \
172191
-e "virthost=$HOSTNAME" \

utils.sh

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,16 +13,40 @@ function default_installer_cmd() {
1313
}
1414

1515
function retry_with_timeout() {
16-
retries=$1
17-
timeout_duration=$2
18-
command=${*:3}
16+
local retries=$1
17+
local timeout_duration=$2
18+
local command=${*:3}
19+
local retry_delay=${RETRY_DELAY:-0}
20+
local exponential_backoff=${EXPONENTIAL_BACKOFF:-false}
21+
# Use RETRY_TIMEOUT env var if set, otherwise use timeout_duration parameter
22+
local timeout=${RETRY_TIMEOUT:-${timeout_duration:-0}}
23+
local attempt=1
1924

2025
for _ in $(seq "$retries"); do
2126
exit_code=0
22-
timeout "$timeout_duration" bash -c "$command" || exit_code=$?
27+
28+
# Use timeout only if timeout is greater than 0
29+
if (( timeout > 0 )); then
30+
timeout "$timeout" bash -c "$command" || exit_code=$?
31+
else
32+
eval "$command" || exit_code=$?
33+
fi
34+
2335
if (( exit_code == 0 )); then
2436
return 0
2537
fi
38+
39+
# Add delay between retries if configured
40+
if (( attempt < retries )) && (( retry_delay > 0 )); then
41+
local sleep_time=$retry_delay
42+
if [[ "$exponential_backoff" == "true" ]]; then
43+
sleep_time=$(( retry_delay * attempt ))
44+
fi
45+
echo "Command failed (attempt $attempt/$retries). Retrying in ${sleep_time}s..."
46+
sleep $sleep_time
47+
fi
48+
49+
(( attempt++ ))
2650
done
2751

2852
return $(( exit_code ))

0 commit comments

Comments
 (0)