-
Notifications
You must be signed in to change notification settings - Fork 43
Do not run CommandShell check during object creation #841
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
216bf6a
0fb93cd
e75edc7
1567b71
65e7761
4fbd785
7b90e2d
e8e453c
33dd1aa
3f61208
0bbebb5
638a0c1
1d9aed4
fb24acd
d549240
5493528
e298085
b30a194
17b5d1f
d09d889
a7e3dab
6559932
44904a0
f3488ae
d2aa80b
84f4f56
df4ce24
72676cd
99e6c23
c42033e
7b4fdb6
ed1cd2f
abeb9b1
fd50fce
b17a2f5
f34a541
9f95bf8
9ab843a
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES | ||
| # Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
| # SPDX-License-Identifier: Apache-2.0 | ||
| # | ||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||
| # you may not use this file except in compliance with the License. | ||
| # You may obtain a copy of the License at | ||
| # | ||
| # http://www.apache.org/licenses/LICENSE-2.0 | ||
| # | ||
| # Unless required by applicable law or agreed to in writing, software | ||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| # See the License for the specific language governing permissions and | ||
| # limitations under the License. | ||
|
|
||
| name = "megatron_bridge_qwen_30b" | ||
|
|
||
| [[Tests]] | ||
| id = "megatron_bridge_qwen_30b" | ||
| test_name = "megatron_bridge_qwen_30b" | ||
| num_nodes = "2" | ||
|
|
||
| [[Tests.git_repos]] | ||
| url = "https://github.com/NVIDIA-NeMo/Megatron-Bridge.git" | ||
| commit = "r0.3.0" | ||
| mount_as = "/opt/Megatron-Bridge" | ||
| init_submodules = true | ||
|
|
||
| [Tests.extra_env_vars] | ||
| PYTHONPATH = "/opt/Megatron-Bridge/3rdparty/Megatron-LM:${PYTHONPATH}" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -209,11 +209,23 @@ def _install_one_git_repo(self, item: GitRepo) -> InstallStatusResult: | |
| verify_res = self._verify_commit(item.commit, repo_path) | ||
| if not verify_res.success: | ||
| return verify_res | ||
| if item.init_submodules: | ||
| res = self._init_submodules(repo_path) | ||
| if not res.success: | ||
| return res | ||
| item.installed_path = repo_path | ||
| msg = f"Git repository already exists at {repo_path}." | ||
| logging.debug(msg) | ||
| return InstallStatusResult(True, msg) | ||
|
|
||
| res = self._clone_and_setup_repo(item, repo_path) | ||
| if not res.success: | ||
| return res | ||
|
|
||
| item.installed_path = repo_path | ||
| return InstallStatusResult(True) | ||
|
Comment on lines
+212
to
+226
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🧩 Analysis chain🏁 Script executed: #!/bin/bash
set -euo pipefail
echo "== Base installer orchestration =="
BASE_INSTALLER_FILE="$(fd 'base_installer\.py$' src | head -n 1 || true)"
if [ -n "${BASE_INSTALLER_FILE}" ]; then
sed -n '1,260p' "${BASE_INSTALLER_FILE}"
fi
echo
echo "== Git repo installed checks =="
rg -n -C3 'def (_is_git_repo_installed|is_installed_one)|is_installed_one\(' \
src/cloudai/systems/slurm/slurm_installer.py \
src/cloudai/systems/kubernetes/kubernetes_installer.pyRepository: NVIDIA/cloudai Length of output: 13058 🏁 Script executed: # Get full implementation of is_installed_one and _is_git_repo_installed in slurm_installer.py
echo "=== slurm_installer.py: is_installed_one() ==="
sed -n '139,170p' src/cloudai/systems/slurm/slurm_installer.py
echo
echo "=== slurm_installer.py: _is_git_repo_installed() ==="
sed -n '446,485p' src/cloudai/systems/slurm/slurm_installer.py
echo
echo "=== kubernetes_installer.py: is_installed_one() ==="
sed -n '110,145p' src/cloudai/systems/kubernetes/kubernetes_installer.py
echo
echo "=== Check if init_submodules is validated in status checks ==="
rg -n 'init_submodules' src/cloudai/systems/slurm/slurm_installer.py src/cloudai/systems/kubernetes/kubernetes_installer.pyRepository: NVIDIA/cloudai Length of output: 6552 🏁 Script executed: # Verify if _verify_commit checks submodules state
echo "=== _verify_commit implementation ==="
rg -n -A10 'def _verify_commit' src/cloudai/systems/slurm/slurm_installer.py | head -20Repository: NVIDIA/cloudai Length of output: 700 Add The Update 🧰 Tools🪛 Ruff (0.15.6)[warning] 218-218: (LOG015) [warning] 219-219: Boolean positional value in function call (FBT003) [warning] 226-226: Boolean positional value in function call (FBT003) 🤖 Prompt for AI Agents |
||
|
|
||
| def _clone_and_setup_repo(self, item: GitRepo, repo_path: Path) -> InstallStatusResult: | ||
| res = self._clone_repository(item.url, repo_path) | ||
| if not res.success: | ||
| return res | ||
|
|
@@ -225,7 +237,14 @@ def _install_one_git_repo(self, item: GitRepo) -> InstallStatusResult: | |
| rmtree(repo_path) | ||
| return res | ||
|
|
||
| item.installed_path = repo_path | ||
| if item.init_submodules: | ||
| res = self._init_submodules(repo_path) | ||
| if not res.success: | ||
| logging.error(f"Submodule init failed, removing cloned repository at {repo_path}") | ||
| if repo_path.exists(): | ||
| rmtree(repo_path) | ||
| return res | ||
|
|
||
| return InstallStatusResult(True) | ||
|
|
||
| def _install_python_executable(self, item: PythonExecutable) -> InstallStatusResult: | ||
|
|
@@ -290,6 +309,14 @@ def _checkout_commit(self, commit_hash: str, path: Path) -> InstallStatusResult: | |
| return InstallStatusResult(False, f"Failed to checkout commit {commit_hash}: {result.stderr}") | ||
| return InstallStatusResult(True) | ||
|
|
||
| def _init_submodules(self, path: Path) -> InstallStatusResult: | ||
| logging.debug(f"Initializing submodules in {path}") | ||
| submodule_cmd = ["git", "submodule", "update", "--init", "--recursive"] | ||
| result = subprocess.run(submodule_cmd, cwd=str(path), capture_output=True, text=True) | ||
| if result.returncode != 0: | ||
| return InstallStatusResult(False, f"Failed to initialize submodules: {result.stderr}") | ||
| return InstallStatusResult(True) | ||
|
|
||
| def _verify_commit(self, ref: str, path: Path) -> InstallStatusResult: | ||
| try: | ||
| result = subprocess.run(["git", "rev-parse", "HEAD"], cwd=str(path), capture_output=True, text=True) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Persist
exclude_nodesin the stored run spec.#SBATCH --exclude=only exists in the generated batch script.store_test_run()still dumps the rawsruncommand, andTestRunDetailshas noexclude_nodesfield, so the saved run metadata can no longer reconstruct the actual allocation request.🤖 Prompt for AI Agents