From 02defd3af48478c3624d2f35eff94499e7955564 Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 24 Feb 2026 13:01:10 +0000 Subject: [PATCH] fix: redact credentials from URLs in logs, exceptions, and persisted state This addresses security issue #2152 by preventing credential leakage: - Add redact_url_credentials() utility function to sanitize URLs with embedded credentials (e.g., https://oauth2:token@host/path becomes https://****@host/path) - Update git/utils.py:run_git_command() to redact credentials from: - Error log messages - Debug log messages - GitCommandError exception messages and command fields - Update git/cached_repo.py to redact credentials in clone log messages - Update plugin/fetch.py to redact credentials in: - PluginFetchError exception messages - Warning log messages - Update plugin/types.py:ResolvedPluginSource.from_plugin_source() to redact credentials before persistence (credentials only needed at fetch time) - Export redact_url_credentials from openhands.sdk public API for cross-repo usage (related: OpenHands/OpenHands#12959) - Add comprehensive tests for credential redaction Co-authored-by: openhands --- openhands-sdk/openhands/sdk/__init__.py | 2 + .../openhands/sdk/git/cached_repo.py | 4 +- openhands-sdk/openhands/sdk/git/utils.py | 65 +++++- openhands-sdk/openhands/sdk/plugin/fetch.py | 19 +- openhands-sdk/openhands/sdk/plugin/types.py | 11 +- tests/sdk/git/test_credential_redaction.py | 186 ++++++++++++++++++ 6 files changed, 273 insertions(+), 14 deletions(-) create mode 100644 tests/sdk/git/test_credential_redaction.py diff --git a/openhands-sdk/openhands/sdk/__init__.py b/openhands-sdk/openhands/sdk/__init__.py index 7cb34c7e3a..486a34bdf7 100644 --- a/openhands-sdk/openhands/sdk/__init__.py +++ b/openhands-sdk/openhands/sdk/__init__.py @@ -24,6 +24,7 @@ from openhands.sdk.conversation.conversation_stats import ConversationStats from openhands.sdk.event import Event, LLMConvertibleEvent from openhands.sdk.event.llm_convertible import MessageEvent +from openhands.sdk.git.utils import redact_url_credentials from openhands.sdk.io import FileStore, LocalFileStore from openhands.sdk.llm import ( LLM, @@ -116,5 +117,6 @@ "load_project_skills", "load_skills_from_dir", "load_user_skills", + "redact_url_credentials", "__version__", ] diff --git a/openhands-sdk/openhands/sdk/git/cached_repo.py b/openhands-sdk/openhands/sdk/git/cached_repo.py index d6e910cb07..7f2acc4079 100644 --- a/openhands-sdk/openhands/sdk/git/cached_repo.py +++ b/openhands-sdk/openhands/sdk/git/cached_repo.py @@ -12,7 +12,7 @@ from filelock import FileLock, Timeout from openhands.sdk.git.exceptions import GitCommandError -from openhands.sdk.git.utils import run_git_command +from openhands.sdk.git.utils import redact_url_credentials, run_git_command from openhands.sdk.logger import get_logger @@ -285,7 +285,7 @@ def _do_clone_or_update( else: logger.debug(f"Using cached repository at {repo_path}") else: - logger.info(f"Cloning repository from {url}") + logger.info(f"Cloning repository from {redact_url_credentials(url)}") _clone_repository(url, repo_path, ref, git) return repo_path diff --git a/openhands-sdk/openhands/sdk/git/utils.py b/openhands-sdk/openhands/sdk/git/utils.py index bc5bebf610..fa902356d9 100644 --- a/openhands-sdk/openhands/sdk/git/utils.py +++ b/openhands-sdk/openhands/sdk/git/utils.py @@ -14,6 +14,56 @@ GIT_EMPTY_TREE_HASH = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" +def redact_url_credentials(url: str) -> str: + """Redact credentials from a URL for safe logging. + + Handles various URL formats with embedded credentials: + - https://user:password@host/path + - https://oauth2:token@host/path + - https://token@host/path + + Args: + url: URL string that may contain credentials. + + Returns: + URL with credentials replaced by '****', or the original URL + if no credentials are detected. + + Examples: + >>> redact_url_credentials("https://oauth2:SECRET@gitlab.com/org/repo") + "https://****@gitlab.com/org/repo" + >>> redact_url_credentials("https://user:pass@github.com/owner/repo.git") + "https://****@github.com/owner/repo.git" + >>> redact_url_credentials("https://github.com/owner/repo.git") + "https://github.com/owner/repo.git" + >>> redact_url_credentials("git@github.com:owner/repo.git") + "git@github.com:owner/repo.git" + """ + # Pattern matches: scheme://[user[:password]@]host + # We want to replace user:password@ or user@ with ****@ + # Only for http:// and https:// URLs (not ssh git@ URLs) + pattern = r"^(https?://)([^@]+)@" + match = re.match(pattern, url) + if match: + return f"{match.group(1)}****@{url[match.end() :]}" + return url + + +def _redact_git_args(args: list[str]) -> list[str]: + """Redact credentials from git command arguments for safe logging. + + Applies credential redaction to any argument that looks like a URL + with embedded credentials. + + Args: + args: List of git command arguments. + + Returns: + New list with credentials redacted from URL arguments. + """ + return [redact_url_credentials(arg) for arg in args] + + def run_git_command( args: list[str], cwd: str | Path | None = None, @@ -43,28 +93,31 @@ def run_git_command( ) if result.returncode != 0: - cmd_str = shlex.join(args) + # Redact credentials from command for logging and error messages + redacted_args = _redact_git_args(args) + cmd_str = shlex.join(redacted_args) error_msg = f"Git command failed: {cmd_str}" logger.error( f"{error_msg}. Exit code: {result.returncode}. Stderr: {result.stderr}" ) raise GitCommandError( message=error_msg, - command=args, + command=redacted_args, exit_code=result.returncode, stderr=result.stderr.strip(), ) - logger.debug(f"Git command succeeded: {shlex.join(args)}") + logger.debug(f"Git command succeeded: {shlex.join(_redact_git_args(args))}") return result.stdout.strip() except subprocess.TimeoutExpired as e: - cmd_str = shlex.join(args) + redacted_args = _redact_git_args(args) + cmd_str = shlex.join(redacted_args) error_msg = f"Git command timed out: {cmd_str}" logger.error(error_msg) raise GitCommandError( message=error_msg, - command=args, + command=redacted_args, exit_code=-1, stderr="Command timed out", ) from e @@ -73,7 +126,7 @@ def run_git_command( logger.error(error_msg) raise GitCommandError( message=error_msg, - command=args, + command=_redact_git_args(args), exit_code=-1, stderr="Git executable not found", ) from e diff --git a/openhands-sdk/openhands/sdk/plugin/fetch.py b/openhands-sdk/openhands/sdk/plugin/fetch.py index 592236dfd7..b3ac256d08 100644 --- a/openhands-sdk/openhands/sdk/plugin/fetch.py +++ b/openhands-sdk/openhands/sdk/plugin/fetch.py @@ -6,7 +6,12 @@ from pathlib import Path from openhands.sdk.git.cached_repo import GitHelper, try_cached_clone_or_update -from openhands.sdk.git.utils import extract_repo_name, is_git_url, normalize_git_url +from openhands.sdk.git.utils import ( + extract_repo_name, + is_git_url, + normalize_git_url, + redact_url_credentials, +) from openhands.sdk.logger import get_logger @@ -160,7 +165,9 @@ def _fetch_remote_source( ) if result is None: - raise PluginFetchError(f"Failed to fetch plugin from {source}") + raise PluginFetchError( + f"Failed to fetch plugin from {redact_url_credentials(source)}" + ) return _apply_subpath(plugin_path, subpath, "plugin repository") @@ -321,13 +328,17 @@ def _fetch_remote_source_with_resolution( ) if result is None: - raise PluginFetchError(f"Failed to fetch plugin from {source}") + raise PluginFetchError( + f"Failed to fetch plugin from {redact_url_credentials(source)}" + ) # Get the actual commit SHA that was checked out try: resolved_ref = git_helper.get_head_commit(repo_cache_path) except Exception as e: - logger.warning(f"Could not get commit SHA for {source}: {e}") + logger.warning( + f"Could not get commit SHA for {redact_url_credentials(source)}: {e}" + ) # Fall back to the requested ref if we can't get the SHA resolved_ref = ref or "HEAD" diff --git a/openhands-sdk/openhands/sdk/plugin/types.py b/openhands-sdk/openhands/sdk/plugin/types.py index 60624d79ac..1cecb343f8 100644 --- a/openhands-sdk/openhands/sdk/plugin/types.py +++ b/openhands-sdk/openhands/sdk/plugin/types.py @@ -10,6 +10,8 @@ import frontmatter from pydantic import BaseModel, Field, field_validator, model_validator +from openhands.sdk.git.utils import redact_url_credentials + # Directories to check for marketplace manifest MARKETPLACE_MANIFEST_DIRS = [".plugin", ".claude-plugin"] @@ -105,9 +107,14 @@ class ResolvedPluginSource(BaseModel): def from_plugin_source( cls, plugin_source: PluginSource, resolved_ref: str | None ) -> ResolvedPluginSource: - """Create a ResolvedPluginSource from a PluginSource and resolved ref.""" + """Create a ResolvedPluginSource from a PluginSource and resolved ref. + + The source URL is redacted to remove any embedded credentials before + persistence. Credentials are only needed at fetch time and should not + be stored in conversation state. + """ return cls( - source=plugin_source.source, + source=redact_url_credentials(plugin_source.source), resolved_ref=resolved_ref, repo_path=plugin_source.repo_path, original_ref=plugin_source.ref, diff --git a/tests/sdk/git/test_credential_redaction.py b/tests/sdk/git/test_credential_redaction.py new file mode 100644 index 0000000000..1b9b510342 --- /dev/null +++ b/tests/sdk/git/test_credential_redaction.py @@ -0,0 +1,186 @@ +"""Tests for credential redaction in git utilities.""" + +from openhands.sdk.git.utils import redact_url_credentials +from openhands.sdk.plugin.types import PluginSource, ResolvedPluginSource + + +class TestRedactUrlCredentials: + """Tests for redact_url_credentials function.""" + + def test_https_url_with_user_password(self) -> None: + """Should redact user:password credentials in HTTPS URLs.""" + url = "https://user:password@github.com/owner/repo.git" + result = redact_url_credentials(url) + assert result == "https://****@github.com/owner/repo.git" + assert "password" not in result + + def test_https_url_with_oauth2_token(self) -> None: + """Should redact oauth2:token credentials in HTTPS URLs.""" + url = "https://oauth2:SECRET_TOKEN@gitlab.com/org/repo.git" + result = redact_url_credentials(url) + assert result == "https://****@gitlab.com/org/repo.git" + assert "SECRET_TOKEN" not in result + assert "oauth2" not in result + + def test_https_url_with_token_only(self) -> None: + """Should redact token-only credentials in HTTPS URLs.""" + url = "https://ghp_supersecrettoken@github.com/owner/repo.git" + result = redact_url_credentials(url) + assert result == "https://****@github.com/owner/repo.git" + assert "ghp_supersecrettoken" not in result + + def test_http_url_with_credentials(self) -> None: + """Should redact credentials in HTTP URLs.""" + url = "http://user:pass@example.com/repo.git" + result = redact_url_credentials(url) + assert result == "http://****@example.com/repo.git" + assert "pass" not in result + + def test_https_url_without_credentials(self) -> None: + """Should not modify URLs without credentials.""" + url = "https://github.com/owner/repo.git" + result = redact_url_credentials(url) + assert result == url + + def test_ssh_url_not_modified(self) -> None: + """Should not modify SSH-style git URLs (they don't use embedded creds).""" + url = "git@github.com:owner/repo.git" + result = redact_url_credentials(url) + assert result == url + + def test_git_protocol_url(self) -> None: + """Should not modify git:// protocol URLs.""" + url = "git://github.com/owner/repo.git" + result = redact_url_credentials(url) + assert result == url + + def test_local_path_not_modified(self) -> None: + """Should not modify local paths.""" + path = "/local/path/to/repo" + result = redact_url_credentials(path) + assert result == path + + def test_github_shorthand_not_modified(self) -> None: + """Should not modify github: shorthand syntax.""" + source = "github:owner/repo" + result = redact_url_credentials(source) + assert result == source + + def test_empty_string(self) -> None: + """Should handle empty string gracefully.""" + result = redact_url_credentials("") + assert result == "" + + def test_complex_url_with_port(self) -> None: + """Should handle URLs with port numbers.""" + url = "https://user:pass@gitlab.example.com:8443/org/repo.git" + result = redact_url_credentials(url) + assert result == "https://****@gitlab.example.com:8443/org/repo.git" + assert "pass" not in result + + def test_url_with_special_chars_in_password(self) -> None: + """Should handle special characters in credentials.""" + # Password with special chars like @, :, etc. + url = "https://user:p%40ss%3Aword@github.com/owner/repo.git" + result = redact_url_credentials(url) + assert result == "https://****@github.com/owner/repo.git" + assert "p%40ss%3Aword" not in result + + +class TestPublicAPIExport: + """Tests for public API export of redact_url_credentials.""" + + def test_import_from_sdk(self) -> None: + """Should be importable from openhands.sdk.""" + from openhands.sdk import redact_url_credentials as sdk_redact + + # Verify it's the same function + assert sdk_redact is redact_url_credentials + + def test_function_works_via_sdk_import(self) -> None: + """Should work correctly when imported from SDK.""" + from openhands.sdk import redact_url_credentials as sdk_redact + + url = "https://token@github.com/owner/repo.git" + result = sdk_redact(url) + assert result == "https://****@github.com/owner/repo.git" + + +class TestResolvedPluginSourceCredentialRedaction: + """Tests for credential redaction in ResolvedPluginSource persistence.""" + + def test_from_plugin_source_redacts_credentials(self) -> None: + """Should redact credentials when creating ResolvedPluginSource.""" + plugin_source = PluginSource( + source="https://oauth2:SECRET_TOKEN@gitlab.com/org/private-repo.git", + ref="main", + repo_path="plugins/my-plugin", + ) + + resolved = ResolvedPluginSource.from_plugin_source( + plugin_source, resolved_ref="abc123def456" + ) + + # Source should be redacted + assert resolved.source == "https://****@gitlab.com/org/private-repo.git" + assert "SECRET_TOKEN" not in resolved.source + assert "oauth2" not in resolved.source + + # Other fields should be preserved + assert resolved.resolved_ref == "abc123def456" + assert resolved.repo_path == "plugins/my-plugin" + assert resolved.original_ref == "main" + + def test_from_plugin_source_preserves_url_without_credentials(self) -> None: + """Should not modify URLs that don't have credentials.""" + plugin_source = PluginSource( + source="https://github.com/owner/repo.git", + ref="v1.0.0", + ) + + resolved = ResolvedPluginSource.from_plugin_source( + plugin_source, resolved_ref="def456" + ) + + assert resolved.source == "https://github.com/owner/repo.git" + assert resolved.resolved_ref == "def456" + + def test_from_plugin_source_handles_local_paths(self) -> None: + """Should not modify local paths.""" + plugin_source = PluginSource(source="/local/path/to/plugin") + + resolved = ResolvedPluginSource.from_plugin_source( + plugin_source, resolved_ref=None + ) + + assert resolved.source == "/local/path/to/plugin" + assert resolved.resolved_ref is None + + def test_from_plugin_source_handles_github_shorthand(self) -> None: + """Should not modify github: shorthand syntax.""" + plugin_source = PluginSource(source="github:owner/repo", ref="main") + + resolved = ResolvedPluginSource.from_plugin_source( + plugin_source, resolved_ref="abc123" + ) + + # github: shorthand doesn't contain credentials + assert resolved.source == "github:owner/repo" + + def test_to_plugin_source_uses_redacted_url(self) -> None: + """Converted PluginSource should use the redacted URL.""" + plugin_source = PluginSource( + source="https://token@github.com/owner/repo.git", + ref="main", + ) + + resolved = ResolvedPluginSource.from_plugin_source( + plugin_source, resolved_ref="abc123" + ) + + converted = resolved.to_plugin_source() + + # Source should still be redacted in converted object + assert converted.source == "https://****@github.com/owner/repo.git" + # Should use resolved_ref, not original_ref + assert converted.ref == "abc123"