Skip to content

Commit 722b011

Browse files
fix: Detection of repository type for self hosted instances (#381)
* fix repository type selection and usage * Update src/app/[owner]/[repo]/page.tsx Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> * Update api/data_pipeline.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent ac35556 commit 722b011

File tree

2 files changed

+34
-21
lines changed

2 files changed

+34
-21
lines changed

api/data_pipeline.py

Lines changed: 22 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import tiktoken
88
import logging
99
import base64
10-
import re
1110
import glob
1211
from adalflow.utils import get_adalflow_default_root_path
1312
from adalflow.core.db import LocalDB
@@ -67,11 +66,12 @@ def count_tokens(text: str, embedder_type: str = None, is_ollama_embedder: bool
6766
# Rough approximation: 4 characters per token
6867
return len(text) // 4
6968

70-
def download_repo(repo_url: str, local_path: str, type: str = "github", access_token: str = None) -> str:
69+
def download_repo(repo_url: str, local_path: str, repo_type: str = None, access_token: str = None) -> str:
7170
"""
7271
Downloads a Git repository (GitHub, GitLab, or Bitbucket) to a specified local path.
7372
7473
Args:
74+
repo_type(str): Type of repository
7575
repo_url (str): The URL of the Git repository to clone.
7676
local_path (str): The local directory where the repository will be cloned.
7777
access_token (str, optional): Access token for private repositories.
@@ -103,14 +103,14 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
103103
if access_token:
104104
parsed = urlparse(repo_url)
105105
# Determine the repository type and format the URL accordingly
106-
if type == "github":
106+
if repo_type == "github":
107107
# Format: https://{token}@{domain}/owner/repo.git
108108
# Works for both github.com and enterprise GitHub domains
109109
clone_url = urlunparse((parsed.scheme, f"{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
110-
elif type == "gitlab":
110+
elif repo_type == "gitlab":
111111
# Format: https://oauth2:{token}@gitlab.com/owner/repo.git
112112
clone_url = urlunparse((parsed.scheme, f"oauth2:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
113-
elif type == "bitbucket":
113+
elif repo_type == "bitbucket":
114114
# Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
115115
clone_url = urlunparse((parsed.scheme, f"x-token-auth:{access_token}@{parsed.netloc}", parsed.path, '', '', ''))
116116

@@ -675,11 +675,12 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str
675675
raise ValueError(f"Failed to get file content: {str(e)}")
676676

677677

678-
def get_file_content(repo_url: str, file_path: str, type: str = "github", access_token: str = None) -> str:
678+
def get_file_content(repo_url: str, file_path: str, repo_type: str = None, access_token: str = None) -> str:
679679
"""
680680
Retrieves the content of a file from a Git repository (GitHub or GitLab).
681681
682682
Args:
683+
repo_type (str): Type of repository
683684
repo_url (str): The URL of the repository
684685
file_path (str): The path to the file within the repository
685686
access_token (str, optional): Access token for private repositories
@@ -690,14 +691,14 @@ def get_file_content(repo_url: str, file_path: str, type: str = "github", access
690691
Raises:
691692
ValueError: If the file cannot be fetched or if the URL is not valid
692693
"""
693-
if type == "github":
694+
if repo_type == "github":
694695
return get_github_file_content(repo_url, file_path, access_token)
695-
elif type == "gitlab":
696+
elif repo_type == "gitlab":
696697
return get_gitlab_file_content(repo_url, file_path, access_token)
697-
elif type == "bitbucket":
698+
elif repo_type == "bitbucket":
698699
return get_bitbucket_file_content(repo_url, file_path, access_token)
699700
else:
700-
raise ValueError("Unsupported repository URL. Only GitHub and GitLab are supported.")
701+
raise ValueError("Unsupported repository type. Only GitHub, GitLab, and Bitbucket are supported.")
701702

702703
class DatabaseManager:
703704
"""
@@ -709,14 +710,15 @@ def __init__(self):
709710
self.repo_url_or_path = None
710711
self.repo_paths = None
711712

712-
def prepare_database(self, repo_url_or_path: str, type: str = "github", access_token: str = None,
713-
embedder_type: str = None, is_ollama_embedder: bool = None,
714-
excluded_dirs: List[str] = None, excluded_files: List[str] = None,
715-
included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
713+
def prepare_database(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None,
714+
embedder_type: str = None, is_ollama_embedder: bool = None,
715+
excluded_dirs: List[str] = None, excluded_files: List[str] = None,
716+
included_dirs: List[str] = None, included_files: List[str] = None) -> List[Document]:
716717
"""
717718
Create a new database from the repository.
718719
719720
Args:
721+
repo_type(str): Type of repository
720722
repo_url_or_path (str): The URL or local path of the repository
721723
access_token (str, optional): Access token for private repositories
722724
embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama').
@@ -736,7 +738,7 @@ def prepare_database(self, repo_url_or_path: str, type: str = "github", access_t
736738
embedder_type = 'ollama' if is_ollama_embedder else None
737739

738740
self.reset_database()
739-
self._create_repo(repo_url_or_path, type, access_token)
741+
self._create_repo(repo_url_or_path, repo_type, access_token)
740742
return self.prepare_db_index(embedder_type=embedder_type, excluded_dirs=excluded_dirs, excluded_files=excluded_files,
741743
included_dirs=included_dirs, included_files=included_files)
742744

@@ -763,14 +765,15 @@ def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) ->
763765
repo_name = url_parts[-1].replace(".git", "")
764766
return repo_name
765767

766-
def _create_repo(self, repo_url_or_path: str, repo_type: str = "github", access_token: str = None) -> None:
768+
def _create_repo(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None) -> None:
767769
"""
768770
Download and prepare all paths.
769771
Paths:
770772
~/.adalflow/repos/{owner}_{repo_name} (for url, local path will be the same)
771773
~/.adalflow/databases/{owner}_{repo_name}.pkl
772774
773775
Args:
776+
repo_type(str): Type of repository
774777
repo_url_or_path (str): The URL or local path of the repository
775778
access_token (str, optional): Access token for private repositories
776779
"""
@@ -866,16 +869,17 @@ def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool =
866869
logger.info(f"Total transformed documents: {len(transformed_docs)}")
867870
return transformed_docs
868871

869-
def prepare_retriever(self, repo_url_or_path: str, type: str = "github", access_token: str = None):
872+
def prepare_retriever(self, repo_url_or_path: str, repo_type: str = None, access_token: str = None):
870873
"""
871874
Prepare the retriever for a repository.
872875
This is a compatibility method for the isolated API.
873876
874877
Args:
878+
repo_type(str): Type of repository
875879
repo_url_or_path (str): The URL or local path of the repository
876880
access_token (str, optional): Access token for private repositories
877881
878882
Returns:
879883
List[Document]: List of Document objects
880884
"""
881-
return self.prepare_database(repo_url_or_path, type, access_token)
885+
return self.prepare_database(repo_url_or_path, repo_type, access_token)

src/app/[owner]/[repo]/page.tsx

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -192,11 +192,20 @@ export default function RepoWikiPage() {
192192
const isCustomModelParam = searchParams.get('is_custom_model') === 'true';
193193
const customModelParam = searchParams.get('custom_model') || '';
194194
const language = searchParams.get('language') || 'en';
195-
const repoType = repoUrl?.includes('bitbucket.org')
195+
const repoHost = (() => {
196+
if (!repoUrl) return '';
197+
try {
198+
return new URL(repoUrl).hostname.toLowerCase();
199+
} catch (e) {
200+
console.warn(`Invalid repoUrl provided: ${repoUrl}`);
201+
return '';
202+
}
203+
})();
204+
const repoType = repoHost?.includes('bitbucket')
196205
? 'bitbucket'
197-
: repoUrl?.includes('gitlab.com')
206+
: repoHost?.includes('gitlab')
198207
? 'gitlab'
199-
: repoUrl?.includes('github.com')
208+
: repoHost?.includes('github')
200209
? 'github'
201210
: searchParams.get('type') || 'github';
202211

0 commit comments

Comments
 (0)