Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 8 additions & 3 deletions api/data_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,17 +102,19 @@ def download_repo(repo_url: str, local_path: str, repo_type: str = None, access_
clone_url = repo_url
if access_token:
parsed = urlparse(repo_url)
# URL-encode the token to handle special characters
encoded_token = quote(access_token, safe='')
# Determine the repository type and format the URL accordingly
if repo_type == "github":
# Format: https://{token}@{domain}/owner/repo.git
# Works for both github.com and enterprise GitHub domains
clone_url = urlunparse((parsed.scheme, f"{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
clone_url = urlunparse((parsed.scheme, f"{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
elif repo_type == "gitlab":
# Format: https://oauth2:{token}@gitlab.com/owner/repo.git
clone_url = urlunparse((parsed.scheme, f"oauth2:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
clone_url = urlunparse((parsed.scheme, f"oauth2:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
elif repo_type == "bitbucket":
# Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
clone_url = urlunparse((parsed.scheme, f"x-token-auth:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
clone_url = urlunparse((parsed.scheme, f"x-token-auth:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
Comment on lines 108 to +117
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

The logic for constructing the clone URL is repeated for each repository type. This can be refactored to reduce code duplication and improve maintainability by using a dictionary to map repository types to their authentication prefixes. This makes the code cleaner and easier to extend with new repository types in the future.

Suggested change
if repo_type == "github":
# Format: https://{token}@{domain}/owner/repo.git
# Works for both github.com and enterprise GitHub domains
clone_url = urlunparse((parsed.scheme, f"{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
clone_url = urlunparse((parsed.scheme, f"{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
elif repo_type == "gitlab":
# Format: https://oauth2:{token}@gitlab.com/owner/repo.git
clone_url = urlunparse((parsed.scheme, f"oauth2:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
clone_url = urlunparse((parsed.scheme, f"oauth2:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
elif repo_type == "bitbucket":
# Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
clone_url = urlunparse((parsed.scheme, f"x-token-auth:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
clone_url = urlunparse((parsed.scheme, f"x-token-auth:{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))
auth_user_map = {
"github": "",
"gitlab": "oauth2:",
"bitbucket": "x-token-auth:",
}
if repo_type in auth_user_map:
user_prefix = auth_user_map[repo_type]
clone_url = urlunparse((parsed.scheme, f"{user_prefix}{encoded_token}@{parsed.netloc}", parsed.path, '', '', ''))


logger.info("Using access token for authentication")

Expand Down Expand Up @@ -780,6 +782,9 @@ def _create_repo(self, repo_url_or_path: str, repo_type: str = None, access_toke
logger.info(f"Preparing repo storage for {repo_url_or_path}...")

try:
# Strip whitespace to handle URLs with leading/trailing spaces
repo_url_or_path = repo_url_or_path.strip()

root_path = get_adalflow_default_root_path()

os.makedirs(root_path, exist_ok=True)
Expand Down