Skip to content

Commit f2133d8

Browse files
Merge branch 'main' of https://github.com/NVIDIA/NeMo into lgrigoryan/fix-eval_beamsearch_ngram_ctc
2 parents 47c2f45 + 70b2ddf commit f2133d8

File tree

1,869 files changed

+66676
-73415
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

1,869 files changed

+66676
-73415
lines changed

.coveragerc

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,29 @@ omit =
44
/tmp/*
55
/home/TestData/*
66
/workspace/Megatron-LM/*
7-
nemo/collections/nlp/*
87
nemo/collections/multimodal/*
8+
nemo/collections/multimodal_autoregressive/*
99
nemo/collections/vision/*
10+
nemo/collections/diffusion/*
11+
nemo/collections/nlp/*
12+
13+
nemo/collections/asr/*
14+
nemo/collections/speechlm/*
15+
nemo/collections/tts/*
16+
17+
# omit from audio
18+
nemo/collections/audio/data/data_simulation.py
19+
nemo/collections/audio/metrics/squim.py
20+
nemo/collections/audio/losses/maxine/*
21+
nemo/collections/audio/models/maxine/*
22+
nemo/collections/audio/parts/utils/maxine.py
23+
24+
nemo/core/*
25+
nemo/collections/common/*
26+
27+
/workspace/config-3.12.py
28+
/workspace/config-3.py
29+
/workspace/config.py
1030

1131
[paths]
1232
source =

.github/CODEOWNERS

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
.github/ @pablo-garay @ko3n1g @thomasdhc @chtruong814
2-
Dockerfile.ci @pablo-garay @ko3n1g @thomasdhc @chtruong814
2+
docker/Dockerfile.ci @pablo-garay @ko3n1g @thomasdhc @chtruong814
33
.pylintrc.* @pablo-garay @ko3n1g @thomasdhc @chtruong814
4-
.flake8.* @pablo-garay @ko3n1g @thomasdhc @chtruong814
4+
.flake8.* @pablo-garay @ko3n1g @thomasdhc @chtruong814

.github/actions/test-template/action.yml

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -72,11 +72,6 @@ runs:
7272
shell: bash
7373
run: |
7474
75-
- name: Install jq
76-
shell: bash
77-
run: |
78-
curl -sS https://webi.sh/jq | sh
79-
8075
- name: Create UUID
8176
id: uuid
8277
shell: bash

.github/scripts/components_to_run.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,9 +76,6 @@ def main(source_sha: str, target_sha: str):
7676

7777
test_modules = list(set(test_modules))
7878

79-
if len(test_modules) == 0:
80-
test_modules = ["nemo2", "automodel", "export-deploy", "speech"]
81-
8279
with open("test_modules.json", "w", encoding="utf-8") as f:
8380
json.dump(test_modules, f)
8481

.github/scripts/nemo_dependencies.py

Lines changed: 157 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,53 @@ def find_python_files(directory: str) -> List[str]:
4444
def analyze_imports(nemo_root: str, file_path: str) -> Set[str]:
4545
"""Analyze a Python file and return its NeMo package dependencies using AST parsing."""
4646
imports = set()
47+
visited = set() # Track visited modules to prevent circular imports
48+
49+
def get_init_imports(module_path: str, depth: int = 0) -> Dict[str, str]:
50+
"""Recursively analyze imports from __init__.py files and map them to their final destinations."""
51+
# Prevent infinite recursion
52+
if depth > 10 or module_path in visited: # Limit depth to 10 levels
53+
return {}
54+
55+
visited.add(module_path)
56+
init_path = os.path.join(module_path, '__init__.py')
57+
if not os.path.exists(init_path):
58+
return {}
59+
60+
try:
61+
with open(init_path, 'r', encoding='utf-8') as f:
62+
init_tree = ast.parse(f.read(), filename=init_path)
63+
64+
import_map = {}
65+
for node in ast.walk(init_tree):
66+
if isinstance(node, ast.ImportFrom) and node.module and node.module.startswith('nemo.'):
67+
if node.names:
68+
for name in node.names:
69+
if name.name == '*':
70+
continue
71+
72+
# Get the full module path for the import
73+
module_parts = node.module.split('.')
74+
module_dir = os.path.join(nemo_root, *module_parts)
75+
76+
# If the imported module has an __init__.py, recursively analyze it
77+
if os.path.exists(os.path.join(module_dir, '__init__.py')):
78+
sub_imports = get_init_imports(module_dir, depth + 1)
79+
if name.name in sub_imports:
80+
import_map[name.name] = sub_imports[name.name]
81+
else:
82+
# If not found in sub-imports, it might be from the module itself
83+
module_file = os.path.join(module_dir, f"{module_parts[-1]}.py")
84+
if os.path.exists(module_file):
85+
import_map[name.name] = f"{node.module}.{name.name}"
86+
else:
87+
# Direct module import
88+
import_map[name.name] = f"{node.module}.{name.name}"
89+
90+
return import_map
91+
except Exception as e:
92+
print(f"Error analyzing {init_path}: {e}")
93+
return {}
4794

4895
try:
4996
with open(file_path, 'r', encoding='utf-8') as f:
@@ -68,14 +115,31 @@ def analyze_imports(nemo_root: str, file_path: str) -> Set[str]:
68115
if name.name == '*':
69116
continue
70117

71-
imports.add(f"{node.module}.{name.name}")
118+
# Check if this is an __init__ import
119+
module_path = os.path.join(nemo_root, *parts)
120+
init_imports = get_init_imports(module_path)
121+
122+
if name.name in init_imports:
123+
# Use the mapped import path
124+
imports.add(init_imports[name.name])
125+
else:
126+
imports.add(f"{node.module}.{name.name}")
72127

73128
elif module_type in find_top_level_packages(nemo_root):
74129
if node.names:
75130
for name in node.names:
76131
if name.name == '*':
77132
continue
78-
imports.add(f"{node.module}.{name.name}")
133+
134+
# Check if this is an __init__ import
135+
module_path = os.path.join(nemo_root, *parts)
136+
init_imports = get_init_imports(module_path)
137+
138+
if name.name in init_imports:
139+
# Use the mapped import path
140+
imports.add(init_imports[name.name])
141+
else:
142+
imports.add(f"{node.module}.{name.name}")
79143

80144
except Exception as e:
81145
print(f"Error analyzing {file_path}: {e}")
@@ -87,12 +151,16 @@ def find_top_level_packages(nemo_root: str) -> List[str]:
87151
"""Find all top-level packages under nemo directory."""
88152
packages: List[str] = []
89153
nemo_dir = os.path.join(nemo_root, 'nemo')
154+
tests_dir = os.path.join(nemo_root, 'tests')
90155

91156
if not os.path.exists(nemo_dir):
92157
print(f"Warning: nemo directory not found at {nemo_dir}")
93158
return packages
159+
if not os.path.exists(tests_dir):
160+
print(f"Warning: nemo directory not found at {nemo_dir}")
161+
return packages
94162

95-
for item in os.listdir(nemo_dir):
163+
for item in os.listdir(nemo_dir) + os.listdir(tests_dir):
96164
item_path = os.path.join(nemo_dir, item)
97165
if os.path.isdir(item_path) and not item.startswith('__'):
98166
packages.append(item)
@@ -125,17 +193,19 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
125193

126194
dependencies: Dict[str, List[str]] = {}
127195

128-
# Second pass: analyze imports and build reverse dependencies
129196
for file_path in find_python_files(nemo_root):
130197
relative_path = os.path.relpath(file_path, nemo_root)
198+
131199
parts = relative_path.split(os.sep)
132200

133-
if len(parts) == 1 or parts[-1] == "__init__.py" or parts[0] != "nemo":
201+
if len(parts) == 1 or (parts[0] != "nemo" and parts[0] != "tests"):
134202
continue
135203

136204
module_path = relative_path.replace(".py", "").replace("/", ".")
137-
if parts[1] in top_level_packages and parts[1] != 'collections':
205+
if parts[1] in top_level_packages and parts[1] != 'collections' and parts[0] != 'tests':
138206
dependencies[module_path] = list(set(analyze_imports(nemo_root, file_path)))
207+
elif parts[0] == 'tests':
208+
dependencies[module_path] = [relative_path.replace("/", ".").replace(".py", "")]
139209
elif parts[1] == 'collections':
140210
dependencies[module_path] = list(set(analyze_imports(nemo_root, file_path)))
141211

@@ -181,8 +251,10 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
181251
simplified_dependencies: Dict[str, List[str]] = {}
182252
for package, deps in dependencies.items():
183253
package_parts = package.split('.')
184-
print(f"{os.path.join(*package_parts[:-1])}.py")
185-
if os.path.isfile((file_path := f"{os.path.join(*package_parts[:-1])}.py")):
254+
255+
if package_parts[0] == "tests":
256+
simplified_package_path = f"{os.path.join(*package_parts)}.py"
257+
elif os.path.isfile((file_path := f"{os.path.join(*package_parts[:-1])}.py")):
186258
simplified_package_path = file_path
187259
elif os.path.isdir((file_path := f"{os.path.join(*package_parts[:-1])}")):
188260
simplified_package_path = file_path
@@ -197,13 +269,14 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
197269

198270
if (
199271
len(dep_parts) >= 2
200-
and dep_parts[1] in find_top_level_packages(nemo_root)
272+
and (dep_parts[1] in find_top_level_packages(nemo_root))
201273
and dep_parts[1] != 'collections'
202274
):
203275
simplified_dependencies[simplified_package_path].append(f"{dep_parts[0]}.{dep_parts[1]}")
204-
276+
elif dep_parts[0] == "tests":
277+
simplified_dependencies[simplified_package_path].append(".".join(dep_parts))
205278
elif len(dep_parts) >= 3 and (
206-
simplified_name := f"{dep_parts[0]}.{dep_parts[1]}.{dep_parts[2]}"
279+
simplified_name := f"nemo.{dep_parts[1]}.{dep_parts[2]}"
207280
) in find_collection_modules(nemo_root):
208281
simplified_dependencies[simplified_package_path].append(simplified_name)
209282

@@ -218,22 +291,90 @@ def build_dependency_graph(nemo_root: str) -> Dict[str, List[str]]:
218291
for package, deps in dependencies.items():
219292
new_deps = []
220293
for dep in deps:
221-
if "asr" in dep or "tts" in dep or "speechlm" in dep or "audio" in dep:
294+
if (
295+
"nemo.collections.asr" in dep
296+
or "nemo.collections.tts" in dep
297+
or "nemo.collections.speechlm" in dep
298+
or "nemo.collections.audio" in dep
299+
or "tests.collections.asr" in dep
300+
or "tests.collections.tts" in dep
301+
or "tests.collections.speechlm" in dep
302+
or "tests.collections.audio" in dep
303+
):
222304
new_deps.append("speech")
305+
new_deps.append("unit-tests")
223306

224-
if "export" in dep or "deploy" in dep:
307+
if "nemo.export" in dep or "nemo.deploy" in dep or "tests.export" in dep or "tests.deploy" in dep:
225308
new_deps.append("export-deploy")
309+
new_deps.append("unit-tests")
226310

227-
if "llm" in dep or "vlm" in dep or "automodel" in dep:
311+
if (
312+
"nemo.collections.llm" in dep
313+
or "nemo.collections.vlm" in dep
314+
or "nemo.automodel" in dep
315+
or "tests.collections.llm" in dep
316+
or "tests.collections.vlm" in dep
317+
or "tests.automodel" in dep
318+
):
228319
new_deps.append("automodel")
320+
new_deps.append("unit-tests")
321+
322+
if "tests" in dep and "tests.functional_tests" not in dep:
323+
new_deps.append("unit-tests")
229324

230-
if "collections" in dep and not ("asr" in dep or "tts" in dep or "speechlm" in dep or "audio" in dep):
325+
if (
326+
"nemo.collections" in dep
327+
and "nemo.collections.asr" not in dep
328+
and "nemo.collections.tts" not in dep
329+
and "nemo.collections.speechlm" not in dep
330+
and "nemo.collections.audio" not in dep
331+
and "tests.collections.asr" not in dep
332+
and "tests.collections.tts" not in dep
333+
and "tests.collections.speechlm" not in dep
334+
and "tests.collections.audio" not in dep
335+
):
231336
new_deps.append("nemo2")
337+
new_deps.append("unit-tests")
232338

233339
bucket_deps[package] = sorted(list(set(new_deps)))
234340

235341
dependencies = bucket_deps
236342

343+
# Additional dependencies
344+
# Add all files in requirements/ directory
345+
requirements_dir = os.path.join(nemo_root, "requirements")
346+
if os.path.exists(requirements_dir):
347+
for filename in os.listdir(requirements_dir):
348+
filepath = os.path.join("requirements", filename)
349+
relative_path = os.path.relpath(filepath, nemo_root)
350+
351+
dependencies[relative_path] = [
352+
"nemo2",
353+
"unit-tests",
354+
"speech",
355+
"automodel",
356+
"export-deploy",
357+
]
358+
359+
# Add all Dockerfile files
360+
for root, _, files in os.walk(nemo_root):
361+
for file_path in files:
362+
full_path = os.path.join(root, file_path)
363+
relative_path = os.path.relpath(full_path, nemo_root)
364+
365+
if "cicd-main-export-deploy" in file_path:
366+
dependencies[relative_path] = ["export-deploy"]
367+
if "cicd-main-nemo2" in file_path:
368+
dependencies[relative_path] = ["nemo2"]
369+
if "cicd-main-speech" in file_path:
370+
dependencies[relative_path] = ["speech"]
371+
if "cicd-main-automodel" in file_path:
372+
dependencies[relative_path] = ["automodel"]
373+
if "cicd-main-unit-tests" in file_path:
374+
dependencies[relative_path] = ["unit-tests"]
375+
if "Dockerfile" in file_path:
376+
dependencies[relative_path] = ["nemo2", "unit-tests", "speech", "automodel", "export-deploy"]
377+
237378
# Sort dependencies by length of values (number of dependencies)
238379
dependencies = dict(sorted(dependencies.items(), key=lambda x: len(x[1]), reverse=True))
239380

@@ -250,7 +391,7 @@ def main():
250391

251392
# Output as JSON
252393
data = json.dumps(dependencies, indent=4)
253-
# print(data)
394+
254395
with open('nemo_dependencies.json', 'w', encoding='utf-8') as f:
255396
f.write(data)
256397

.github/workflows/_build_container.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ jobs:
6868
echo "EOF" >> $GITHUB_OUTPUT
6969
7070
build:
71-
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
71+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
7272
needs: [pre-flight]
7373
with:
7474
image-name: ${{ inputs.image-name }}

.github/workflows/_bump_mcore_tag.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
git push -u origin $SOURCE_BRANCH
4343
4444
mcore:
45-
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
45+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
4646
needs: [update-branch]
4747
with:
4848
source-repository: NVIDIA/Megatron-LM

.github/workflows/build-test-publish-wheel.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ defaults:
2626

2727
jobs:
2828
build-test-publish-wheel:
29-
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
29+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
3030
with:
3131
dry-run: true
3232
python-package: nemo

.github/workflows/cherry-pick-release-commit.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ on:
77

88
jobs:
99
cherry-pick:
10-
uses: NVIDIA/NeMo-FW-CI-templates/.github/workflows/[email protected]
10+
uses: NVIDIA-NeMo/FW-CI-templates/.github/workflows/[email protected]
1111
secrets:
1212
PAT: ${{ secrets.PAT }}
1313
SLACK_WEBHOOK_ADMIN: ${{ secrets.SLACK_WEBHOOK_ADMIN }}

0 commit comments

Comments
 (0)