@@ -44,6 +44,53 @@ def find_python_files(directory: str) -> List[str]:
4444def analyze_imports (nemo_root : str , file_path : str ) -> Set [str ]:
4545 """Analyze a Python file and return its NeMo package dependencies using AST parsing."""
4646 imports = set ()
47+ visited = set () # Track visited modules to prevent circular imports
48+
49+ def get_init_imports (module_path : str , depth : int = 0 ) -> Dict [str , str ]:
50+ """Recursively analyze imports from __init__.py files and map them to their final destinations."""
51+ # Prevent infinite recursion
52+ if depth > 10 or module_path in visited : # Limit depth to 10 levels
53+ return {}
54+
55+ visited .add (module_path )
56+ init_path = os .path .join (module_path , '__init__.py' )
57+ if not os .path .exists (init_path ):
58+ return {}
59+
60+ try :
61+ with open (init_path , 'r' , encoding = 'utf-8' ) as f :
62+ init_tree = ast .parse (f .read (), filename = init_path )
63+
64+ import_map = {}
65+ for node in ast .walk (init_tree ):
66+ if isinstance (node , ast .ImportFrom ) and node .module and node .module .startswith ('nemo.' ):
67+ if node .names :
68+ for name in node .names :
69+ if name .name == '*' :
70+ continue
71+
72+ # Get the full module path for the import
73+ module_parts = node .module .split ('.' )
74+ module_dir = os .path .join (nemo_root , * module_parts )
75+
76+ # If the imported module has an __init__.py, recursively analyze it
77+ if os .path .exists (os .path .join (module_dir , '__init__.py' )):
78+ sub_imports = get_init_imports (module_dir , depth + 1 )
79+ if name .name in sub_imports :
80+ import_map [name .name ] = sub_imports [name .name ]
81+ else :
82+ # If not found in sub-imports, it might be from the module itself
83+ module_file = os .path .join (module_dir , f"{ module_parts [- 1 ]} .py" )
84+ if os .path .exists (module_file ):
85+ import_map [name .name ] = f"{ node .module } .{ name .name } "
86+ else :
87+ # Direct module import
88+ import_map [name .name ] = f"{ node .module } .{ name .name } "
89+
90+ return import_map
91+ except Exception as e :
92+ print (f"Error analyzing { init_path } : { e } " )
93+ return {}
4794
4895 try :
4996 with open (file_path , 'r' , encoding = 'utf-8' ) as f :
@@ -68,14 +115,31 @@ def analyze_imports(nemo_root: str, file_path: str) -> Set[str]:
68115 if name .name == '*' :
69116 continue
70117
71- imports .add (f"{ node .module } .{ name .name } " )
118+ # Check if this is an __init__ import
119+ module_path = os .path .join (nemo_root , * parts )
120+ init_imports = get_init_imports (module_path )
121+
122+ if name .name in init_imports :
123+ # Use the mapped import path
124+ imports .add (init_imports [name .name ])
125+ else :
126+ imports .add (f"{ node .module } .{ name .name } " )
72127
73128 elif module_type in find_top_level_packages (nemo_root ):
74129 if node .names :
75130 for name in node .names :
76131 if name .name == '*' :
77132 continue
78- imports .add (f"{ node .module } .{ name .name } " )
133+
134+ # Check if this is an __init__ import
135+ module_path = os .path .join (nemo_root , * parts )
136+ init_imports = get_init_imports (module_path )
137+
138+ if name .name in init_imports :
139+ # Use the mapped import path
140+ imports .add (init_imports [name .name ])
141+ else :
142+ imports .add (f"{ node .module } .{ name .name } " )
79143
80144 except Exception as e :
81145 print (f"Error analyzing { file_path } : { e } " )
@@ -87,12 +151,16 @@ def find_top_level_packages(nemo_root: str) -> List[str]:
87151 """Find all top-level packages under nemo directory."""
88152 packages : List [str ] = []
89153 nemo_dir = os .path .join (nemo_root , 'nemo' )
154+ tests_dir = os .path .join (nemo_root , 'tests' )
90155
91156 if not os .path .exists (nemo_dir ):
92157 print (f"Warning: nemo directory not found at { nemo_dir } " )
93158 return packages
159+ if not os .path .exists (tests_dir ):
160+ print (f"Warning: nemo directory not found at { nemo_dir } " )
161+ return packages
94162
95- for item in os .listdir (nemo_dir ):
163+ for item in os .listdir (nemo_dir ) + os . listdir ( tests_dir ) :
96164 item_path = os .path .join (nemo_dir , item )
97165 if os .path .isdir (item_path ) and not item .startswith ('__' ):
98166 packages .append (item )
@@ -125,17 +193,19 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
125193
126194 dependencies : Dict [str , List [str ]] = {}
127195
128- # Second pass: analyze imports and build reverse dependencies
129196 for file_path in find_python_files (nemo_root ):
130197 relative_path = os .path .relpath (file_path , nemo_root )
198+
131199 parts = relative_path .split (os .sep )
132200
133- if len (parts ) == 1 or parts [- 1 ] == "__init__.py" or parts [0 ] != "nemo" :
201+ if len (parts ) == 1 or ( parts [0 ] != "nemo" and parts [0 ] != "tests" ) :
134202 continue
135203
136204 module_path = relative_path .replace (".py" , "" ).replace ("/" , "." )
137- if parts [1 ] in top_level_packages and parts [1 ] != 'collections' :
205+ if parts [1 ] in top_level_packages and parts [1 ] != 'collections' and parts [ 0 ] != 'tests' :
138206 dependencies [module_path ] = list (set (analyze_imports (nemo_root , file_path )))
207+ elif parts [0 ] == 'tests' :
208+ dependencies [module_path ] = [relative_path .replace ("/" , "." ).replace (".py" , "" )]
139209 elif parts [1 ] == 'collections' :
140210 dependencies [module_path ] = list (set (analyze_imports (nemo_root , file_path )))
141211
@@ -181,8 +251,10 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
181251 simplified_dependencies : Dict [str , List [str ]] = {}
182252 for package , deps in dependencies .items ():
183253 package_parts = package .split ('.' )
184- print (f"{ os .path .join (* package_parts [:- 1 ])} .py" )
185- if os .path .isfile ((file_path := f"{ os .path .join (* package_parts [:- 1 ])} .py" )):
254+
255+ if package_parts [0 ] == "tests" :
256+ simplified_package_path = f"{ os .path .join (* package_parts )} .py"
257+ elif os .path .isfile ((file_path := f"{ os .path .join (* package_parts [:- 1 ])} .py" )):
186258 simplified_package_path = file_path
187259 elif os .path .isdir ((file_path := f"{ os .path .join (* package_parts [:- 1 ])} " )):
188260 simplified_package_path = file_path
@@ -197,13 +269,14 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
197269
198270 if (
199271 len (dep_parts ) >= 2
200- and dep_parts [1 ] in find_top_level_packages (nemo_root )
272+ and ( dep_parts [1 ] in find_top_level_packages (nemo_root ) )
201273 and dep_parts [1 ] != 'collections'
202274 ):
203275 simplified_dependencies [simplified_package_path ].append (f"{ dep_parts [0 ]} .{ dep_parts [1 ]} " )
204-
276+ elif dep_parts [0 ] == "tests" :
277+ simplified_dependencies [simplified_package_path ].append ("." .join (dep_parts ))
205278 elif len (dep_parts ) >= 3 and (
206- simplified_name := f"{ dep_parts [ 0 ] } .{ dep_parts [1 ]} .{ dep_parts [2 ]} "
279+ simplified_name := f"nemo .{ dep_parts [1 ]} .{ dep_parts [2 ]} "
207280 ) in find_collection_modules (nemo_root ):
208281 simplified_dependencies [simplified_package_path ].append (simplified_name )
209282
@@ -218,22 +291,90 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
218291 for package , deps in dependencies .items ():
219292 new_deps = []
220293 for dep in deps :
221- if "asr" in dep or "tts" in dep or "speechlm" in dep or "audio" in dep :
294+ if (
295+ "nemo.collections.asr" in dep
296+ or "nemo.collections.tts" in dep
297+ or "nemo.collections.speechlm" in dep
298+ or "nemo.collections.audio" in dep
299+ or "tests.collections.asr" in dep
300+ or "tests.collections.tts" in dep
301+ or "tests.collections.speechlm" in dep
302+ or "tests.collections.audio" in dep
303+ ):
222304 new_deps .append ("speech" )
305+ new_deps .append ("unit-tests" )
223306
224- if "export" in dep or "deploy" in dep :
307+ if "nemo. export" in dep or "nemo.deploy" in dep or "tests.export" in dep or "tests. deploy" in dep :
225308 new_deps .append ("export-deploy" )
309+ new_deps .append ("unit-tests" )
226310
227- if "llm" in dep or "vlm" in dep or "automodel" in dep :
311+ if (
312+ "nemo.collections.llm" in dep
313+ or "nemo.collections.vlm" in dep
314+ or "nemo.automodel" in dep
315+ or "tests.collections.llm" in dep
316+ or "tests.collections.vlm" in dep
317+ or "tests.automodel" in dep
318+ ):
228319 new_deps .append ("automodel" )
320+ new_deps .append ("unit-tests" )
321+
322+ if "tests" in dep and "tests.functional_tests" not in dep :
323+ new_deps .append ("unit-tests" )
229324
230- if "collections" in dep and not ("asr" in dep or "tts" in dep or "speechlm" in dep or "audio" in dep ):
325+ if (
326+ "nemo.collections" in dep
327+ and "nemo.collections.asr" not in dep
328+ and "nemo.collections.tts" not in dep
329+ and "nemo.collections.speechlm" not in dep
330+ and "nemo.collections.audio" not in dep
331+ and "tests.collections.asr" not in dep
332+ and "tests.collections.tts" not in dep
333+ and "tests.collections.speechlm" not in dep
334+ and "tests.collections.audio" not in dep
335+ ):
231336 new_deps .append ("nemo2" )
337+ new_deps .append ("unit-tests" )
232338
233339 bucket_deps [package ] = sorted (list (set (new_deps )))
234340
235341 dependencies = bucket_deps
236342
343+ # Additional dependencies
344+ # Add all files in requirements/ directory
345+ requirements_dir = os .path .join (nemo_root , "requirements" )
346+ if os .path .exists (requirements_dir ):
347+ for filename in os .listdir (requirements_dir ):
348+ filepath = os .path .join ("requirements" , filename )
349+ relative_path = os .path .relpath (filepath , nemo_root )
350+
351+ dependencies [relative_path ] = [
352+ "nemo2" ,
353+ "unit-tests" ,
354+ "speech" ,
355+ "automodel" ,
356+ "export-deploy" ,
357+ ]
358+
359+ # Add all Dockerfile files
360+ for root , _ , files in os .walk (nemo_root ):
361+ for file_path in files :
362+ full_path = os .path .join (root , file_path )
363+ relative_path = os .path .relpath (full_path , nemo_root )
364+
365+ if "cicd-main-export-deploy" in file_path :
366+ dependencies [relative_path ] = ["export-deploy" ]
367+ if "cicd-main-nemo2" in file_path :
368+ dependencies [relative_path ] = ["nemo2" ]
369+ if "cicd-main-speech" in file_path :
370+ dependencies [relative_path ] = ["speech" ]
371+ if "cicd-main-automodel" in file_path :
372+ dependencies [relative_path ] = ["automodel" ]
373+ if "cicd-main-unit-tests" in file_path :
374+ dependencies [relative_path ] = ["unit-tests" ]
375+ if "Dockerfile" in file_path :
376+ dependencies [relative_path ] = ["nemo2" , "unit-tests" , "speech" , "automodel" , "export-deploy" ]
377+
237378 # Sort dependencies by length of values (number of dependencies)
238379 dependencies = dict (sorted (dependencies .items (), key = lambda x : len (x [1 ]), reverse = True ))
239380
@@ -250,7 +391,7 @@ def main():
250391
251392 # Output as JSON
252393 data = json .dumps (dependencies , indent = 4 )
253- # print(data)
394+
254395 with open ('nemo_dependencies.json' , 'w' , encoding = 'utf-8' ) as f :
255396 f .write (data )
256397
0 commit comments