diff --git a/api/data_pipeline.py b/api/data_pipeline.py index 67b2dcbf..a6c859f7 100644 --- a/api/data_pipeline.py +++ b/api/data_pipeline.py @@ -102,17 +102,19 @@ def download_repo(repo_url: str, local_path: str, repo_type: str = None, access_ clone_url = repo_url if access_token: parsed = urlparse(repo_url) + # URL-encode the token to handle special characters + encoded_token = quote(access_token, safe='') # Determine the repository type and format the URL accordingly if repo_type == "github": # Format: https://{token}@{domain}/owner/repo.git # Works for both github.com and enterprise GitHub domains - clone_url = urlunparse((parsed.scheme, f"{access_token}@{parsed.netloc}", parsed.path, '', '', '')) + clone_url = urlunparse((parsed.scheme, f"{encoded_token}@{parsed.netloc}", parsed.path, '', '', '')) elif repo_type == "gitlab": # Format: https://oauth2:{token}@gitlab.com/owner/repo.git - clone_url = urlunparse((parsed.scheme, f"oauth2:{access_token}@{parsed.netloc}", parsed.path, '', '', '')) + clone_url = urlunparse((parsed.scheme, f"oauth2:{encoded_token}@{parsed.netloc}", parsed.path, '', '', '')) elif repo_type == "bitbucket": # Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git - clone_url = urlunparse((parsed.scheme, f"x-token-auth:{access_token}@{parsed.netloc}", parsed.path, '', '', '')) + clone_url = urlunparse((parsed.scheme, f"x-token-auth:{encoded_token}@{parsed.netloc}", parsed.path, '', '', '')) logger.info("Using access token for authentication") @@ -780,6 +782,9 @@ def _create_repo(self, repo_url_or_path: str, repo_type: str = None, access_toke logger.info(f"Preparing repo storage for {repo_url_or_path}...") try: + # Strip whitespace to handle URLs with leading/trailing spaces + repo_url_or_path = repo_url_or_path.strip() + root_path = get_adalflow_default_root_path() os.makedirs(root_path, exist_ok=True)