@@ -20,20 +20,20 @@ def __init__(self, ingest_service: IngestService) -> None:
2020
2121 self ._files_under_root_folder : list [Path ] = list ()
2222
23- def _find_all_files_in_folder (self , root_path : Path ) -> None :
23+ def _find_all_files_in_folder (self , root_path : Path , ignored : list [ str ] ) -> None :
2424 """Search all files under the root folder recursively.
2525 Count them at the same time
2626 """
2727 for file_path in root_path .iterdir ():
28- if file_path .is_file ():
28+ if file_path .is_file () and file_path . name not in ignored :
2929 self .total_documents += 1
3030 self ._files_under_root_folder .append (file_path )
31- elif file_path .is_dir ():
32- self ._find_all_files_in_folder (file_path )
31+ elif file_path .is_dir () and file_path . name not in ignored :
32+ self ._find_all_files_in_folder (file_path , ignored )
3333
34- def ingest_folder (self , folder_path : Path ) -> None :
34+ def ingest_folder (self , folder_path : Path , ignored : list [ str ] ) -> None :
3535 # Count total documents before ingestion
36- self ._find_all_files_in_folder (folder_path )
36+ self ._find_all_files_in_folder (folder_path , ignored )
3737 self ._ingest_all (self ._files_under_root_folder )
3838
3939 def _ingest_all (self , files_to_ingest : list [Path ]) -> None :
@@ -64,12 +64,19 @@ def _do_ingest_one(self, changed_path: Path) -> None:
6464 action = argparse .BooleanOptionalAction ,
6565 default = False ,
6666)
67+ parser .add_argument (
68+ "--ignored" ,
69+ nargs = "*" ,
70+ help = "List of files/directories to ignore" ,
71+ default = [],
72+ )
6773parser .add_argument (
6874 "--log-file" ,
6975 help = "Optional path to a log file. If provided, logs will be written to this file." ,
7076 type = str ,
7177 default = None ,
7278)
79+
7380args = parser .parse_args ()
7481
7582# Set up logging to a file if a path is provided
@@ -91,9 +98,17 @@ def _do_ingest_one(self, changed_path: Path) -> None:
9198
9299 ingest_service = global_injector .get (IngestService )
93100 worker = LocalIngestWorker (ingest_service )
94- worker .ingest_folder (root_path )
101+ worker .ingest_folder (root_path , args .ignored )
102+
103+ if args .ignored :
104+ logger .info (f"Skipping following files and directories: { args .ignored } " )
95105
96106 if args .watch :
97107 logger .info (f"Watching { args .folder } for changes, press Ctrl+C to stop..." )
108+ directories_to_watch = [
109+ dir
110+ for dir in root_path .iterdir ()
111+ if dir .is_dir () and dir .name not in args .ignored
112+ ]
98113 watcher = IngestWatcher (args .folder , worker .ingest_on_watch )
99114 watcher .start ()
0 commit comments