Skip to content

Commit 3c73e3d

Browse files
CM-57660-Remove PAT token from repository URL (#375)
1 parent 46cdd9e commit 3c73e3d

File tree

5 files changed

+172
-4
lines changed

5 files changed

+172
-4
lines changed

cycode/cli/apps/report/sbom/repository_url/repository_url_command.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88
from cycode.cli.utils.get_api_client import get_report_cycode_client
99
from cycode.cli.utils.progress_bar import SbomReportProgressBarSection
1010
from cycode.cli.utils.sentry import add_breadcrumb
11+
from cycode.cli.utils.url_utils import sanitize_repository_url
12+
from cycode.logger import get_logger
13+
14+
logger = get_logger('Repository URL Command')
1115

1216

1317
def repository_url_command(
@@ -28,8 +32,13 @@ def repository_url_command(
2832
start_scan_time = time.time()
2933
report_execution_id = -1
3034

35+
# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
36+
sanitized_uri = sanitize_repository_url(uri)
37+
if sanitized_uri != uri:
38+
logger.debug('Sanitized repository URL to remove credentials')
39+
3140
try:
32-
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=uri)
41+
report_execution = client.request_sbom_report_execution(report_parameters, repository_url=sanitized_uri)
3342
report_execution_id = report_execution.id
3443

3544
create_sbom_report(progress_bar, client, report_execution_id, output_file, output_format)

cycode/cli/apps/scan/remote_url_resolver.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from cycode.cli import consts
44
from cycode.cli.utils.git_proxy import git_proxy
55
from cycode.cli.utils.shell_executor import shell
6+
from cycode.cli.utils.url_utils import sanitize_repository_url
67
from cycode.logger import get_logger
78

89
logger = get_logger('Remote URL Resolver')
@@ -102,7 +103,11 @@ def _try_get_git_remote_url(path: str) -> Optional[str]:
102103
repo = git_proxy.get_repo(path, search_parent_directories=True)
103104
remote_url = repo.remotes[0].config_reader.get('url')
104105
logger.debug('Found Git remote URL, %s', {'remote_url': remote_url, 'repo_path': repo.working_dir})
105-
return remote_url
106+
# Sanitize URL to remove any embedded credentials/tokens before returning
107+
sanitized_url = sanitize_repository_url(remote_url)
108+
if sanitized_url != remote_url:
109+
logger.debug('Sanitized repository URL to remove credentials')
110+
return sanitized_url
106111
except Exception as e:
107112
logger.debug('Failed to get Git remote URL. Probably not a Git repository', exc_info=e)
108113
return None
@@ -124,7 +129,9 @@ def get_remote_url_scan_parameter(paths: tuple[str, ...]) -> Optional[str]:
124129
# - len(paths)*2 Plastic SCM subprocess calls
125130
remote_url = _try_get_any_remote_url(path)
126131
if remote_url:
127-
remote_urls.add(remote_url)
132+
# URLs are already sanitized in _try_get_git_remote_url, but sanitize again as safety measure
133+
sanitized_url = sanitize_repository_url(remote_url)
134+
remote_urls.add(sanitized_url)
128135

129136
if len(remote_urls) == 1:
130137
# we are resolving remote_url only if all paths belong to the same repo (identical remote URLs),

cycode/cli/utils/url_utils.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
from typing import Optional
2+
from urllib.parse import urlparse, urlunparse
3+
4+
from cycode.logger import get_logger
5+
6+
logger = get_logger('URL Utils')
7+
8+
9+
def sanitize_repository_url(url: Optional[str]) -> Optional[str]:
10+
"""Remove credentials (username, password, tokens) from repository URL.
11+
12+
This function sanitizes repository URLs to prevent sending PAT tokens or other
13+
credentials to the API. It handles both HTTP/HTTPS URLs with embedded credentials
14+
and SSH URLs (which are returned as-is since they don't contain credentials in the URL).
15+
16+
Args:
17+
url: Repository URL that may contain credentials (e.g., https://token@github.com/user/repo.git)
18+
19+
Returns:
20+
Sanitized URL without credentials (e.g., https://github.com/user/repo.git), or None if input is None
21+
22+
Examples:
23+
>>> sanitize_repository_url('https://token@github.com/user/repo.git')
24+
'https://github.com/user/repo.git'
25+
>>> sanitize_repository_url('https://user:token@github.com/user/repo.git')
26+
'https://github.com/user/repo.git'
27+
>>> sanitize_repository_url('git@github.com:user/repo.git')
28+
'git@github.com:user/repo.git'
29+
>>> sanitize_repository_url(None)
30+
None
31+
"""
32+
if not url:
33+
return url
34+
35+
# Handle SSH URLs - no credentials to remove
36+
# ssh:// URLs have the format ssh://git@host/path
37+
if url.startswith('ssh://'):
38+
return url
39+
# git@host:path format (scp-style)
40+
if '@' in url and '://' not in url and url.startswith('git@'):
41+
return url
42+
43+
try:
44+
parsed = urlparse(url)
45+
# Remove username and password from netloc
46+
# Reconstruct URL without credentials
47+
sanitized_netloc = parsed.hostname
48+
if parsed.port:
49+
sanitized_netloc = f'{sanitized_netloc}:{parsed.port}'
50+
51+
return urlunparse(
52+
(
53+
parsed.scheme,
54+
sanitized_netloc,
55+
parsed.path,
56+
parsed.params,
57+
parsed.query,
58+
parsed.fragment,
59+
)
60+
)
61+
except Exception as e:
62+
logger.debug('Failed to sanitize repository URL, returning original, %s', {'url': url, 'error': str(e)})
63+
# If parsing fails, return original URL to avoid breaking functionality
64+
return url

cycode/cyclient/report_client.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,12 @@
66

77
from cycode.cli.exceptions.custom_exceptions import CycodeError
88
from cycode.cli.files_collector.models.in_memory_zip import InMemoryZip
9+
from cycode.cli.utils.url_utils import sanitize_repository_url
910
from cycode.cyclient import models
1011
from cycode.cyclient.cycode_client_base import CycodeClientBase
12+
from cycode.logger import get_logger
13+
14+
logger = get_logger('Report Client')
1115

1216

1317
@dataclasses.dataclass
@@ -49,7 +53,11 @@ def request_sbom_report_execution(
4953
# entity type required only for zipped-file
5054
request_data = {'report_parameters': params.to_json(without_entity_type=zip_file is None)}
5155
if repository_url:
52-
request_data['repository_url'] = repository_url
56+
# Sanitize repository URL to remove any embedded credentials/tokens before sending to API
57+
sanitized_url = sanitize_repository_url(repository_url)
58+
if sanitized_url != repository_url:
59+
logger.debug('Sanitized repository URL to remove credentials')
60+
request_data['repository_url'] = sanitized_url
5361

5462
request_args = {
5563
'url_path': url_path,

tests/utils/test_url_utils.py

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
from cycode.cli.utils.url_utils import sanitize_repository_url
2+
3+
4+
def test_sanitize_repository_url_with_token() -> None:
5+
"""Test that PAT tokens are removed from HTTPS URLs."""
6+
url = 'https://token@github.com/user/repo.git'
7+
expected = 'https://github.com/user/repo.git'
8+
assert sanitize_repository_url(url) == expected
9+
10+
11+
def test_sanitize_repository_url_with_username_and_token() -> None:
12+
"""Test that username and token are removed from HTTPS URLs."""
13+
url = 'https://user:token@github.com/user/repo.git'
14+
expected = 'https://github.com/user/repo.git'
15+
assert sanitize_repository_url(url) == expected
16+
17+
18+
def test_sanitize_repository_url_with_port() -> None:
19+
"""Test that URLs with ports are handled correctly."""
20+
url = 'https://token@github.com:443/user/repo.git'
21+
expected = 'https://github.com:443/user/repo.git'
22+
assert sanitize_repository_url(url) == expected
23+
24+
25+
def test_sanitize_repository_url_ssh_format() -> None:
26+
"""Test that SSH URLs are returned as-is (no credentials in URL format)."""
27+
url = 'git@github.com:user/repo.git'
28+
assert sanitize_repository_url(url) == url
29+
30+
31+
def test_sanitize_repository_url_ssh_protocol() -> None:
32+
"""Test that ssh:// URLs are returned as-is."""
33+
url = 'ssh://git@github.com/user/repo.git'
34+
assert sanitize_repository_url(url) == url
35+
36+
37+
def test_sanitize_repository_url_no_credentials() -> None:
38+
"""Test that URLs without credentials are returned unchanged."""
39+
url = 'https://github.com/user/repo.git'
40+
assert sanitize_repository_url(url) == url
41+
42+
43+
def test_sanitize_repository_url_none() -> None:
44+
"""Test that None input returns None."""
45+
assert sanitize_repository_url(None) is None
46+
47+
48+
def test_sanitize_repository_url_empty_string() -> None:
49+
"""Test that empty string is returned as-is."""
50+
assert sanitize_repository_url('') == ''
51+
52+
53+
def test_sanitize_repository_url_gitlab() -> None:
54+
"""Test that GitLab URLs are sanitized correctly."""
55+
url = 'https://oauth2:token@gitlab.com/user/repo.git'
56+
expected = 'https://gitlab.com/user/repo.git'
57+
assert sanitize_repository_url(url) == expected
58+
59+
60+
def test_sanitize_repository_url_bitbucket() -> None:
61+
"""Test that Bitbucket URLs are sanitized correctly."""
62+
url = 'https://x-token-auth:token@bitbucket.org/user/repo.git'
63+
expected = 'https://bitbucket.org/user/repo.git'
64+
assert sanitize_repository_url(url) == expected
65+
66+
67+
def test_sanitize_repository_url_with_path_and_query() -> None:
68+
"""Test that URLs with paths, query params, and fragments are preserved."""
69+
url = 'https://token@github.com/user/repo.git?ref=main#section'
70+
expected = 'https://github.com/user/repo.git?ref=main#section'
71+
assert sanitize_repository_url(url) == expected
72+
73+
74+
def test_sanitize_repository_url_invalid_url() -> None:
75+
"""Test that invalid URLs are returned as-is (graceful degradation)."""
76+
# This should not raise an exception, but return the original
77+
url = 'not-a-valid-url'
78+
result = sanitize_repository_url(url)
79+
# Should return original since parsing fails
80+
assert result == url

0 commit comments

Comments
 (0)