Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,11 @@ _version.py

# NeMo Run
.nemo_run/

# Local dev artifacts
configs/
local/
scripts/
log.txt
torchrun_job.sh
test/run/core/
3 changes: 3 additions & 0 deletions nemo_run/core/execution/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,9 @@ class ResourceRequest:
#: Template name to use for Ray jobs (e.g., "ray.sub.j2" or "ray_enroot.sub.j2")
ray_template: str = "ray.sub.j2"

print_script: bool = False
"""If True, print the generated sbatch script to stdout before submission."""

#: Set by the executor; cannot be initialized
job_name: str = field(init=False, default="nemo-job")
stderr_to_stdout: bool = field(init=False, default=True)
Expand Down
3 changes: 3 additions & 0 deletions nemo_run/run/torchx_backend/schedulers/slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,9 @@ def _submit_dryrun(self, app: AppDef, cfg: Executor) -> AppDryRunInfo[Any]: # t
path = os.path.join(sbatch_dir, f"{executor.job_name}_sbatch.sh")
script = req.materialize()

if executor.print_script:
print(script)

with open(path, "w") as f:
f.write(script)

Expand Down
35 changes: 35 additions & 0 deletions test/run/torchx_backend/schedulers/test_slurm.py
Original file line number Diff line number Diff line change
Expand Up @@ -726,3 +726,38 @@ def test_non_heterogeneous_ray_cluster(slurm_scheduler, temp_dir):
# Verify run_as_group was NOT set
assert not hasattr(executor, "run_as_group") or not executor.run_as_group
assert isinstance(dryrun_info.request, SlurmRayRequest)


@mock.patch("nemo_run.core.execution.slurm.fill_template")
def test_submit_dryrun_print_script(
mock_fill_template, slurm_scheduler, mock_app_def, slurm_executor, capsys
):
"""When print_script=True, sbatch script is printed to stdout."""
mock_fill_template.return_value = "#!/bin/bash\n# Mock script content"
slurm_executor.print_script = True

with mock.patch.object(SlurmTunnelScheduler, "_initialize_tunnel"):
slurm_scheduler.tunnel = mock.MagicMock()
with mock.patch.object(SlurmExecutor, "package"):
with mock.patch("builtins.open", mock.mock_open()):
slurm_scheduler._submit_dryrun(mock_app_def, slurm_executor)

captured = capsys.readouterr()
assert "#!/bin/bash" in captured.out


@mock.patch("nemo_run.core.execution.slurm.fill_template")
def test_submit_dryrun_no_print_by_default(
mock_fill_template, slurm_scheduler, mock_app_def, slurm_executor, capsys
):
"""By default (print_script=False), nothing is printed to stdout."""
mock_fill_template.return_value = "#!/bin/bash\n# Mock script content"

with mock.patch.object(SlurmTunnelScheduler, "_initialize_tunnel"):
slurm_scheduler.tunnel = mock.MagicMock()
with mock.patch.object(SlurmExecutor, "package"):
with mock.patch("builtins.open", mock.mock_open()):
slurm_scheduler._submit_dryrun(mock_app_def, slurm_executor)

captured = capsys.readouterr()
assert captured.out == ""
Loading