@@ -61,6 +61,11 @@ sub execute {
6161 # Wait for cloud-init completion
6262 $self -> _wait_for_cloud_init($ssh_connection );
6363
64+ # Force reconnection after cloud-init completes (VM reboots during cloud-init)
65+ say " 🔄 Refreshing SSH connection after cloud-init reboot..." ;
66+ STDOUT -> flush();
67+ $ssh_connection -> force_reconnect();
68+
6469 # Verify SSH key authentication after cloud-init completes
6570 $self -> _verify_ssh_key_auth($ssh_connection );
6671
@@ -108,6 +113,7 @@ sub _wait_for_cloud_init {
108113
109114 say " Waiting for cloud-init to complete..." ;
110115 say " This may take several minutes while packages are installed and configured." ;
116+ STDOUT -> flush();
111117
112118 my $completion_file = " /var/lib/cloud/torrust-setup-complete" ;
113119 my $max_attempts = 360; # 30 minutes with 5-second intervals
@@ -117,60 +123,124 @@ sub _wait_for_cloud_init {
117123
118124 # Step 1: Wait until SSH connection is available (for password auth to check cloud-init)
119125 say " ⏳ Waiting for SSH service to become available..." ;
126+ STDOUT -> flush();
120127
121128 while ($attempt < $max_attempts && !$ssh_connected ) {
122129 $attempt ++;
123130
124131 if ($ssh_connection -> test_password_connection()) {
125132 $ssh_connected = 1;
126133 say " ✅ SSH password connection established to " . $ssh_connection -> host;
134+ STDOUT -> flush();
127135 } else {
128136 if ($attempt % 6 == 0) { # Every 30 seconds
129137 say " [Waiting for SSH connection... ${attempt} 0s elapsed]" ;
138+ STDOUT -> flush();
130139 }
131140 sleep (5);
132141 }
133142 }
134143
135144 if (!$ssh_connected ) {
136145 say " ❌ Failed to establish SSH connection to " . $ssh_connection -> host . " after " . ($max_attempts * 5 / 60) . " minutes" ;
146+ STDOUT -> flush();
137147 $self -> _print_cloud_init_logs($ssh_connection );
138148 die " SSH connection failed" ;
139149 }
140150
141151 # Step 2: Wait until cloud-init completion marker is created
142152 say " ⏳ Waiting for cloud-init to complete..." ;
153+ STDOUT -> flush();
143154
144155 $attempt = 0;
156+ my $consecutive_ssh_failures = 0;
145157 while ($attempt < $max_attempts ) {
146158 $attempt ++;
147159
148160 my $result = $ssh_connection -> execute_command(" test -f $completion_file " );
149161
150- if ($result -> {success }) {
162+ # Debug: Always show result details when exit code is 0
163+ if ($result -> exit_code == 0) {
164+ say " [DEBUG] File exists! Exit code: " . $result -> exit_code .
165+ " , Success method: " . ($result -> success ? ' true' : ' false' ) .
166+ " , Output: '" . ($result -> output // ' EMPTY' ) . " '" ;
167+ STDOUT -> flush();
168+ }
169+
170+ if ($result -> success) {
151171 say " ✅ Cloud-init setup completed successfully!" ;
172+ STDOUT -> flush();
152173
153174 # Show completion message
154175 my $completion_result = $ssh_connection -> execute_command(" cat $completion_file " );
155- if ($completion_result -> {success } && $completion_result -> {output }) {
156- chomp $completion_result -> {output };
157- say " 📅 Completion marker: " . $completion_result -> {output };
176+ if ($completion_result -> success && $completion_result -> output) {
177+ chomp (my $output = $completion_result -> output);
178+ say " 📅 Completion marker: " . $output ;
179+ STDOUT -> flush();
158180 }
159181 $cloud_init_success = 1;
160182 last ;
183+ } else {
184+ # Track consecutive SSH failures (exit code 255)
185+ if ($result -> exit_code == 255) {
186+ $consecutive_ssh_failures ++;
187+ # If we have too many consecutive SSH failures, try to re-establish password connection
188+ if ($consecutive_ssh_failures >= 12) { # 1 minute of consecutive failures
189+ say " ⚠️ SSH connection lost, attempting to re-establish (VM may be rebooting)..." ;
190+ say " [Waiting 30s for VM to complete reboot...]" ;
191+ STDOUT -> flush();
192+ sleep (30); # Give VM time to fully reboot
193+
194+ # Try to re-establish password connection (VM might have rebooted)
195+ my $reconnect_attempts = 0;
196+ while ($reconnect_attempts < 12 && !$ssh_connection -> test_password_connection()) {
197+ $reconnect_attempts ++;
198+ say " [Reconnection attempt $reconnect_attempts /12...]" ;
199+ STDOUT -> flush();
200+ sleep (15); # Wait longer between attempts
201+ }
202+
203+ if ($ssh_connection -> test_password_connection()) {
204+ say " ✅ SSH connection re-established!" ;
205+ STDOUT -> flush();
206+ $consecutive_ssh_failures = 0; # Reset counter after successful reconnection
207+ } else {
208+ say " ❌ Failed to re-establish SSH connection after VM reboot." ;
209+ say " [DEBUG] Last error: " . $result -> output;
210+ STDOUT -> flush();
211+ last ;
212+ }
213+ }
214+ } else {
215+ # Reset counter for non-SSH failures (normal file-not-found errors)
216+ $consecutive_ssh_failures = 0;
217+ }
218+
219+ # Debug: Show why the command failed
220+ if ($attempt % 6 == 0) { # Every 30 seconds
221+ my $elapsed_seconds = $attempt * 5;
222+ say " [DEBUG ${elapsed_seconds} s] File check failed - Exit code: " . $result -> exit_code .
223+ " (this is normal until cloud-init completes)" ;
224+ if ($consecutive_ssh_failures > 0) {
225+ say " [SSH failures: $consecutive_ssh_failures consecutive]" ;
226+ }
227+ STDOUT -> flush();
228+ }
161229 }
162230
163231 # Show progress indicator every 2 minutes
164232 if ($attempt % 24 == 0) {
165233 my $elapsed_minutes = int ($attempt * 5 / 60);
166234 say " [Cloud-init still running... ${elapsed_minutes} minutes elapsed]" ;
235+ STDOUT -> flush();
167236 }
168237
169238 sleep (5);
170239 }
171240
172241 if (!$cloud_init_success ) {
173242 say " ❌ Timeout waiting for cloud-init to complete on " . $ssh_connection -> host . " after " . ($max_attempts * 5 / 60) . " minutes" ;
243+ STDOUT -> flush();
174244 $self -> _print_cloud_init_logs($ssh_connection );
175245 die " Cloud-init timeout" ;
176246 }
@@ -180,19 +250,52 @@ sub _show_final_summary {
180250 my ($self , $ssh_connection ) = @_ ;
181251
182252 say " 📦 Final system summary:" ;
253+ STDOUT -> flush();
254+
255+ # Try multiple approaches to detect Docker
256+ my $docker_result ;
257+ my $docker_method = " unknown" ;
258+
259+ # Method 1: Try with newgrp (preferred for group activation)
260+ $docker_result = $ssh_connection -> execute_command(' newgrp docker -c "docker --version" 2>&1' );
261+ if ($docker_result -> success) {
262+ $docker_method = " newgrp" ;
263+ } else {
264+ # Method 2: Try with sudo (fallback)
265+ $docker_result = $ssh_connection -> execute_command(' sudo docker --version 2>&1' );
266+ if ($docker_result -> success) {
267+ $docker_method = " sudo" ;
268+ } else {
269+ # Method 3: Try direct command (may fail due to group membership)
270+ $docker_result = $ssh_connection -> execute_command(' docker --version 2>&1' );
271+ if ($docker_result -> success) {
272+ $docker_method = " direct" ;
273+ }
274+ }
275+ }
276+
277+ my $docker_version ;
278+ if ($docker_result -> success) {
279+ $docker_version = $docker_result -> output . " (via $docker_method )" ;
280+ } else {
281+ $docker_version = " Docker not available - all methods failed" ;
282+ }
183283
184- my $docker_result = $ssh_connection -> execute_command(' docker --version' );
185- my $docker_version = $docker_result -> {success } ? $docker_result -> {output } : " Docker not available" ;
186284 chomp $docker_version if $docker_version ;
187- say " Docker: $docker_version " if $docker_version ;
285+ say " Docker: $docker_version " ;
286+ STDOUT -> flush();
287+
288+ # Check firewall status
188289
189- my $ufw_result = $ssh_connection -> execute_command(' ufw status | head -1' );
190- my $ufw_status = $ufw_result -> { success } ? $ufw_result -> { output } : " UFW not available" ;
290+ my $ufw_result = $ssh_connection -> execute_command(' sudo ufw status | head -1' );
291+ my $ufw_status = $ufw_result -> success ? $ufw_result -> output : " UFW not available" ;
191292 chomp $ufw_status if $ufw_status ;
192293 say " Firewall: $ufw_status " if $ufw_status ;
294+ STDOUT -> flush();
193295
194296 say " Provisioning completed successfully!" ;
195297 say " VM is ready at IP: " . $ssh_connection -> host;
298+ STDOUT -> flush();
196299}
197300
198301sub _print_cloud_init_logs {
@@ -203,16 +306,16 @@ sub _print_cloud_init_logs {
203306 # Print cloud-init-output.log
204307 say " === /var/log/cloud-init-output.log ===" ;
205308 my $output_result = $ssh_connection -> execute_command_with_sudo(' cat /var/log/cloud-init-output.log' );
206- if ($output_result -> { success } ) {
207- print $output_result -> { output } ;
309+ if ($output_result -> success) {
310+ print $output_result -> output;
208311 } else {
209312 say " Cloud-init output log not available" ;
210313 }
211314
212315 say " === /var/log/cloud-init.log ===" ;
213316 my $main_result = $ssh_connection -> execute_command_with_sudo(' cat /var/log/cloud-init.log' );
214- if ($main_result -> { success } ) {
215- print $main_result -> { output } ;
317+ if ($main_result -> success) {
318+ print $main_result -> output;
216319 } else {
217320 say " Cloud-init main log not available" ;
218321 }
@@ -222,6 +325,7 @@ sub _verify_ssh_key_auth {
222325 my ($self , $ssh_connection ) = @_ ;
223326
224327 say " 🔑 Checking SSH key authentication..." ;
328+ STDOUT -> flush();
225329
226330 # SSH authentication might need time to fully stabilize after cloud-init reboot
227331 # Try with progressive delays: immediate, 5s, 10s, 15s
@@ -231,6 +335,7 @@ sub _verify_ssh_key_auth {
231335 if ($attempt > 0) {
232336 my $delay = $retry_delays [$attempt ];
233337 say " ⏳ Waiting ${delay} s before retry attempt " . ($attempt + 1) . " ..." ;
338+ STDOUT -> flush();
234339 sleep $delay ;
235340 }
236341
@@ -243,16 +348,19 @@ sub _verify_ssh_key_auth {
243348 if ($fresh_ssh -> test_key_connection()) {
244349 say " ✅ SSH key authentication is working correctly!" ;
245350 say " You can now connect using: ssh -i " . $fresh_ssh -> ssh_key_path . " " . $fresh_ssh -> username . " @" . $fresh_ssh -> host;
351+ STDOUT -> flush();
246352 return ;
247353 }
248354
249355 if ($attempt < $#retry_delays ) {
250356 say " ⚠️ SSH key authentication failed, will retry..." ;
357+ STDOUT -> flush();
251358 }
252359 }
253360
254361 # All retries failed
255362 say " ❌ SSH key authentication failed after all retries" ;
363+ STDOUT -> flush();
256364 $self -> _print_cloud_init_logs($ssh_connection );
257365 die " SSH key authentication failed" ;
258366}
0 commit comments