Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lisa/features/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from .nested_virtualization import NestedVirtualization
from .network_interface import NetworkInterface, Sriov, Synthetic
from .nfs import Nfs
from .non_ssh_executor import NonSshExecutor
from .nvme import Nvme, NvmeSettings
from .password_extension import PasswordExtension
from .resize import Resize, ResizeAction
Expand Down Expand Up @@ -73,4 +74,5 @@
"VMStatus",
"Synthetic",
"StartStop",
"NonSshExecutor",
]
107 changes: 107 additions & 0 deletions lisa/features/non_ssh_executor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
import re
from typing import List

from lisa.feature import Feature
from lisa.features.serial_console import SerialConsole
from lisa.util import LisaException


class NonSshExecutor(Feature):
"""
NonSshExecutor is used to run commands on the node when SSH is not available.
Lisa by default uses SSH for connection, but this feature provides an alternative
execution method for scenarios where SSH connectivity is not possible or desired.
"""

@classmethod
def name(cls) -> str:
return "NonSshExecutor"

def enabled(self) -> bool:
return True

def execute(self, commands: List[str]) -> List[str]:
"""
Executes a list of commands on the node and returns their outputs.

:param commands: A list of shell commands to execute.
:return: A string containing the output of the executed commands.
"""

if not self._node.features.is_supported(SerialConsole):
raise NotImplementedError(
"NonSshExecutor requires SerialConsole feature to be supported."
)
out = self._execute(commands)
return out

def _execute(self, commands: List[str]) -> List[str]:
out: List[str] = []
serial_console = self._node.features[SerialConsole]
try:
serial_console.ensure_login()
# clear the console before executing commands
_ = serial_console.read()
# write a newline and read to make sure serial console has the prompt
serial_console.write("\n")
response = serial_console.read()

# Check for full prompt pattern instead of individual characters
if not self._is_valid_prompt(response):
raise LisaException(
f"Valid shell prompt not found in output. "
f"Expected a shell prompt ending with $, #, or >, "
f"but got: {response.strip()}"
)

for command in commands:
serial_console.write(self._add_newline(command))
out.append(serial_console.read())
collected_info = "\n\n".join(out)
self._log.info(
f"Collected information using NonSshExecutor:\n{collected_info}"
)
return out
except Exception as e:
raise LisaException(f"Failed to execute commands: {e}") from e
finally:
serial_console.close()

def _is_valid_prompt(self, response: str) -> bool:
"""
Check if the response contains a valid shell prompt pattern.

:param response: The response from the serial console
:return: True if a valid prompt is found, False otherwise
"""
if not response:
return False

# Generic pattern that matches any prompt format:
# - Username and hostname part: word chars, @, hyphens, dots
# - Colon separator
# - Path part: ~, /, word chars, dots, hyphens, slashes
# - Optional whitespace
# - Ending with $, #, or >
# - Optional trailing whitespace
prompt_pattern = r"[a-zA-Z0-9_@.-]+:[~/a-zA-Z0-9_./-]*\s*[\$#>]\s*$"

# Check each line in the response for the prompt pattern
lines = response.split("\n")
for line in lines:
line = line.strip()
if re.search(prompt_pattern, line):
self._log.debug(f"Valid prompt found: '{line}'")
return True

self._log.debug(f"No valid prompt found in response: '{response.strip()}'")
return False

def _add_newline(self, command: str) -> str:
"""
Adds a newline character to the command if it does not already end with one.
newline is required to run the command in serial console.
"""
if not command.endswith("\n"):
return f"{command}\n"
return command
29 changes: 29 additions & 0 deletions lisa/features/serial_console.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
get_datetime_path,
get_matched_str,
)
from lisa.util.constants import ENVIRONMENTS_NODES_REMOTE_USERNAME

FEATURE_NAME_SERIAL_CONSOLE = "SerialConsole"
NAME_SERIAL_CONSOLE_LOG = "serial_console.log"
Expand Down Expand Up @@ -190,6 +191,34 @@ def check_initramfs(
f"{initramfs_logs} {filesystem_exception_logs}"
)

def ensure_login(self) -> None:
# Clear the serial console and try to get the login prompt
self.read()
self.write("\n")
serial_output = self.read()

if "login" not in serial_output:
self._log.debug(
"No login prompt found, serial console is already logged in."
)
return

from lisa.node import RemoteNode

if not isinstance(self._node, RemoteNode):
raise LisaException(
"SerialConsole login is only implemented for RemoteNode"
)

username = self._node.connection_info[ENVIRONMENTS_NODES_REMOTE_USERNAME]
password = self._node.get_password()

self.write(f"{username}\n")
password_prompt = self.read()

if "password" in password_prompt.lower():
self.write(f"{password}\n")

def read(self) -> str:
raise NotImplementedError

Expand Down
60 changes: 59 additions & 1 deletion lisa/node.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
InitializableMixin,
LisaException,
RequireUserPasswordException,
TcpConnectionException,
constants,
fields_to_dict,
generate_strong_password,
Expand Down Expand Up @@ -693,7 +694,16 @@ def set_connection_info(

def _initialize(self, *args: Any, **kwargs: Any) -> None:
assert self._connection_info, "call setConnectionInfo before use remote node"
super()._initialize(*args, **kwargs)
try:
super()._initialize(*args, **kwargs)
except TcpConnectionException:
try:
self._collect_logs_using_non_ssh_executor()
except Exception as log_error:
self.log.debug(
f"Failed to collect logs using non-ssh executor: {log_error}"
)
raise

def get_working_path(self) -> PurePath:
return self._get_remote_working_path()
Expand Down Expand Up @@ -737,6 +747,34 @@ def check_sudo_password_required(self) -> None:
raise RequireUserPasswordException("Reset password failed")
self._check_password_and_store_prompt()

def _collect_logs_using_non_ssh_executor(self) -> None:
"""
Collects information using the NonSshExecutor feature.
This is used when the connection to the node is not stable.
"""
from lisa.features import NonSshExecutor

commands = [
"ip addr show",
"ip link show",
"ip neigh",
"ping -c 3 -n 8.8.8.8",
"cat /var/log/waagent.log | tail -n 50",
"journalctl -n 50 --no-pager",
"systemctl status NetworkManager --no-pager --plain",
"systemctl status network --no-pager --plain",
"systemctl status systemd-networkd --no-pager --plain",
]

if self.features.is_supported(NonSshExecutor):
non_ssh_executor = self.features[NonSshExecutor]
non_ssh_executor.execute(commands=commands)
else:
self.log.debug(
f"NonSshExecutor is not supported on {self.name}, "
"cannot collect logs using non-ssh executor."
)

def _check_password_and_store_prompt(self) -> None:
# self.shell.is_sudo_required_password is true, so running sudo command
# will input password in process.wait_result. Check running sudo again
Expand Down Expand Up @@ -784,6 +822,26 @@ def _check_bash_prompt(self) -> None:
ssh_shell.bash_prompt = bash_prompt
self.has_checked_bash_prompt = True

def get_password(self, generate: bool = True) -> str:
"""
Get the password for the node. If the password is not set, it will
generate a strong password and reset it.
"""
if not self._connection_info.password:
if not generate:
raise RequireUserPasswordException(
"The password is not set and generation is disabled."
)
self.log.debug("password is not set, generating a strong password.")
if not self._reset_password():
raise RequireUserPasswordException("Reset password failed")
password = self._connection_info.password
if not password:
raise RequireUserPasswordException(
"The password has neither been set nor generated."
)
return password

def _reset_password(self) -> bool:
from lisa.features import PasswordExtension

Expand Down
76 changes: 76 additions & 0 deletions lisa/sut_orchestrator/azure/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@
DiskCreateOptionTypes,
HardwareProfile,
NetworkInterfaceReference,
RunCommandInput,
RunCommandResult,
VirtualMachineExtension,
VirtualMachineUpdate,
)
Expand Down Expand Up @@ -3741,3 +3743,77 @@ def _prepare_azure_file_share(
sudo=True,
append=True,
)


class RunCommand(AzureFeatureMixin, Feature):
@classmethod
def create_setting(
cls, *args: Any, **kwargs: Any
) -> Optional[schema.FeatureSettings]:
return schema.FeatureSettings.create(cls.name())

@classmethod
def can_disable(cls) -> bool:
return False

def is_enabled(self) -> bool:
# RunCommand is always enabled for Azure
return True

def execute(self, commands: List[str]) -> str:
"""
Executes a list of commands on the Azure VM using RunCommand.

:param commands: A list of shell commands to execute.
:return: The output of the commands.
"""
context = get_node_context(self._node)
platform: AzurePlatform = self._platform # type: ignore
compute_client = get_compute_client(platform)

# Prepare the RunCommandInput for Azure
run_command_input = RunCommandInput(
command_id="RunShellScript",
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this Linux specific? Consider whether you can add support for Windows. If not possible, think about how to exclude the Feature for non-Posix systems.

script=self._add_echo_before_command(commands),
)

# Execute the command on the VM
operation = compute_client.virtual_machines.begin_run_command(
resource_group_name=context.resource_group_name,
vm_name=context.vm_name,
parameters=run_command_input,
)
result = wait_operation(operation=operation, failure_identity="run command")
value = result.get("value")
if value and value[0].get("message"):
message = value[0]["message"]
else:
raise LisaException(
"RunCommand did not run successfully. "
f"Got response: '{value}'. Expected response to contain `value[0]['message']`"
)

return message

def _add_echo_before_command(self, commands: List[str]) -> List[str]:
"""
Adds an echo command before each command in the list to ensure
that the output of each command is captured in the logs.
"""
return [f"echo 'Running command: {cmd}' && {cmd}" for cmd in commands]


class NonSshExecutor(AzureFeatureMixin, features.NonSshExecutor):
def execute(self, commands: List[str]) -> List[str]:
# RunCommand does not require password login, hence attempt to use it first.
# RunCommand has a limitation on 4KB of output.
try:
result = []
for command in commands:
out = self._node.features[RunCommand].execute([command])
result.append(out)
return result
except Exception as e:
self._log.info(f"RunCommand failed: {e}")
# Fallback to the default non-SSH executor behavior
return super().execute(commands)
2 changes: 2 additions & 0 deletions lisa/sut_orchestrator/azure/platform_.py
Original file line number Diff line number Diff line change
Expand Up @@ -501,6 +501,8 @@ def supported_features(cls) -> List[Type[feature.Feature]]:
features.Availability,
features.Infiniband,
features.Hibernation,
features.RunCommand,
features.NonSshExecutor,
]

def _prepare_environment(self, environment: Environment, log: Logger) -> bool:
Expand Down
Loading