77import tiktoken
88import logging
99import base64
10- import re
1110import glob
1211from adalflow .utils import get_adalflow_default_root_path
1312from adalflow .core .db import LocalDB
@@ -67,11 +66,12 @@ def count_tokens(text: str, embedder_type: str = None, is_ollama_embedder: bool
6766 # Rough approximation: 4 characters per token
6867 return len (text ) // 4
6968
70- def download_repo (repo_url : str , local_path : str , type : str = "github" , access_token : str = None ) -> str :
69+ def download_repo (repo_url : str , local_path : str , repo_type : str = None , access_token : str = None ) -> str :
7170 """
7271 Downloads a Git repository (GitHub, GitLab, or Bitbucket) to a specified local path.
7372
7473 Args:
74+ repo_type(str): Type of repository
7575 repo_url (str): The URL of the Git repository to clone.
7676 local_path (str): The local directory where the repository will be cloned.
7777 access_token (str, optional): Access token for private repositories.
@@ -103,14 +103,14 @@ def download_repo(repo_url: str, local_path: str, type: str = "github", access_t
103103 if access_token :
104104 parsed = urlparse (repo_url )
105105 # Determine the repository type and format the URL accordingly
106- if type == "github" :
106+ if repo_type == "github" :
107107 # Format: https://{token}@{domain}/owner/repo.git
108108 # Works for both github.com and enterprise GitHub domains
109109 clone_url = urlunparse ((parsed .scheme , f"{ access_token } @{ parsed .netloc } " , parsed .path , '' , '' , '' ))
110- elif type == "gitlab" :
110+ elif repo_type == "gitlab" :
111111 # Format: https://oauth2:{token}@gitlab.com/owner/repo.git
112112 clone_url = urlunparse ((parsed .scheme , f"oauth2:{ access_token } @{ parsed .netloc } " , parsed .path , '' , '' , '' ))
113- elif type == "bitbucket" :
113+ elif repo_type == "bitbucket" :
114114 # Format: https://x-token-auth:{token}@bitbucket.org/owner/repo.git
115115 clone_url = urlunparse ((parsed .scheme , f"x-token-auth:{ access_token } @{ parsed .netloc } " , parsed .path , '' , '' , '' ))
116116
@@ -675,11 +675,12 @@ def get_bitbucket_file_content(repo_url: str, file_path: str, access_token: str
675675 raise ValueError (f"Failed to get file content: { str (e )} " )
676676
677677
678- def get_file_content (repo_url : str , file_path : str , type : str = "github" , access_token : str = None ) -> str :
678+ def get_file_content (repo_url : str , file_path : str , repo_type : str = None , access_token : str = None ) -> str :
679679 """
680680 Retrieves the content of a file from a Git repository (GitHub or GitLab).
681681
682682 Args:
683+ repo_type (str): Type of repository
683684 repo_url (str): The URL of the repository
684685 file_path (str): The path to the file within the repository
685686 access_token (str, optional): Access token for private repositories
@@ -690,14 +691,14 @@ def get_file_content(repo_url: str, file_path: str, type: str = "github", access
690691 Raises:
691692 ValueError: If the file cannot be fetched or if the URL is not valid
692693 """
693- if type == "github" :
694+ if repo_type == "github" :
694695 return get_github_file_content (repo_url , file_path , access_token )
695- elif type == "gitlab" :
696+ elif repo_type == "gitlab" :
696697 return get_gitlab_file_content (repo_url , file_path , access_token )
697- elif type == "bitbucket" :
698+ elif repo_type == "bitbucket" :
698699 return get_bitbucket_file_content (repo_url , file_path , access_token )
699700 else :
700- raise ValueError ("Unsupported repository URL . Only GitHub and GitLab are supported." )
701+ raise ValueError ("Unsupported repository type . Only GitHub, GitLab, and Bitbucket are supported." )
701702
702703class DatabaseManager :
703704 """
@@ -709,14 +710,15 @@ def __init__(self):
709710 self .repo_url_or_path = None
710711 self .repo_paths = None
711712
712- def prepare_database (self , repo_url_or_path : str , type : str = "github" , access_token : str = None ,
713- embedder_type : str = None , is_ollama_embedder : bool = None ,
714- excluded_dirs : List [str ] = None , excluded_files : List [str ] = None ,
715- included_dirs : List [str ] = None , included_files : List [str ] = None ) -> List [Document ]:
713+ def prepare_database (self , repo_url_or_path : str , repo_type : str = None , access_token : str = None ,
714+ embedder_type : str = None , is_ollama_embedder : bool = None ,
715+ excluded_dirs : List [str ] = None , excluded_files : List [str ] = None ,
716+ included_dirs : List [str ] = None , included_files : List [str ] = None ) -> List [Document ]:
716717 """
717718 Create a new database from the repository.
718719
719720 Args:
721+ repo_type(str): Type of repository
720722 repo_url_or_path (str): The URL or local path of the repository
721723 access_token (str, optional): Access token for private repositories
722724 embedder_type (str, optional): Embedder type to use ('openai', 'google', 'ollama').
@@ -736,7 +738,7 @@ def prepare_database(self, repo_url_or_path: str, type: str = "github", access_t
736738 embedder_type = 'ollama' if is_ollama_embedder else None
737739
738740 self .reset_database ()
739- self ._create_repo (repo_url_or_path , type , access_token )
741+ self ._create_repo (repo_url_or_path , repo_type , access_token )
740742 return self .prepare_db_index (embedder_type = embedder_type , excluded_dirs = excluded_dirs , excluded_files = excluded_files ,
741743 included_dirs = included_dirs , included_files = included_files )
742744
@@ -763,14 +765,15 @@ def _extract_repo_name_from_url(self, repo_url_or_path: str, repo_type: str) ->
763765 repo_name = url_parts [- 1 ].replace (".git" , "" )
764766 return repo_name
765767
766- def _create_repo (self , repo_url_or_path : str , repo_type : str = "github" , access_token : str = None ) -> None :
768+ def _create_repo (self , repo_url_or_path : str , repo_type : str = None , access_token : str = None ) -> None :
767769 """
768770 Download and prepare all paths.
769771 Paths:
770772 ~/.adalflow/repos/{owner}_{repo_name} (for url, local path will be the same)
771773 ~/.adalflow/databases/{owner}_{repo_name}.pkl
772774
773775 Args:
776+ repo_type(str): Type of repository
774777 repo_url_or_path (str): The URL or local path of the repository
775778 access_token (str, optional): Access token for private repositories
776779 """
@@ -866,16 +869,17 @@ def prepare_db_index(self, embedder_type: str = None, is_ollama_embedder: bool =
866869 logger .info (f"Total transformed documents: { len (transformed_docs )} " )
867870 return transformed_docs
868871
869- def prepare_retriever (self , repo_url_or_path : str , type : str = "github" , access_token : str = None ):
872+ def prepare_retriever (self , repo_url_or_path : str , repo_type : str = None , access_token : str = None ):
870873 """
871874 Prepare the retriever for a repository.
872875 This is a compatibility method for the isolated API.
873876
874877 Args:
878+ repo_type(str): Type of repository
875879 repo_url_or_path (str): The URL or local path of the repository
876880 access_token (str, optional): Access token for private repositories
877881
878882 Returns:
879883 List[Document]: List of Document objects
880884 """
881- return self .prepare_database (repo_url_or_path , type , access_token )
885+ return self .prepare_database (repo_url_or_path , repo_type , access_token )
0 commit comments