Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion 3rdparty/Megatron-LM
Submodule Megatron-LM updated 241 files
2 changes: 1 addition & 1 deletion src/megatron/bridge/recipes/gemma/gemma2.py
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ def _gemma2_common(
reset_attention_mask=False,
reset_position_ids=False,
eod_mask_loss=False,
sequence_length=seq_length,
seq_length=seq_length,
num_dataset_builder_threads=1,
blend=blend,
blend_per_split=blend_per_split,
Expand Down
9 changes: 6 additions & 3 deletions src/megatron/bridge/training/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -316,19 +316,22 @@ class GPTDatasetConfig(MCoreGPTDatasetConfig, DataloaderConfig):

def __init__(
self,
seq_length: int,
seq_length: int | None = None,
skip_getting_attention_mask_from_dataset: bool = True,
*args,
**kwargs,
):
"""
Args:
seq_length (int): the sequence length.
seq_length (int): the sequence length. If not provided, `sequence_length` must be in kwargs.
skip_getting_attention_mask_from_dataset (bool): if set, the dataset will pass a None attention mask
and the attention mask is autogenerated from the attn backend.
"""
self.skip_getting_attention_mask_from_dataset = skip_getting_attention_mask_from_dataset
kwargs["sequence_length"] = seq_length
if seq_length is not None:
kwargs["sequence_length"] = seq_length
elif "sequence_length" not in kwargs:
raise ValueError("Either `seq_length` or `sequence_length` must be provided.")

dataloader_kwargs = {k: kwargs.pop(k) for k in list(kwargs) if k in DataloaderConfig.__dataclass_fields__}
MCoreGPTDatasetConfig.__init__(self, *args, **kwargs)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from transformers import AutoTokenizer, Qwen3MoeConfig, Qwen3MoeForCausalLM


pytestmark = pytest.mark.pleasefixme(reason="Blocked on upstream quantization dependency issue; re-enable once fixed.")


HF_QWEN3_MOE_TOY_MODEL_CONFIG = {
"architectures": ["Qwen3MoeForCausalLM"],
"attention_bias": False,
Expand Down
3 changes: 3 additions & 0 deletions tests/functional_tests/quantization/test_export_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@
from safetensors import safe_open


pytestmark = pytest.mark.pleasefixme(reason="Blocked on upstream quantization dependency issue; re-enable once fixed.")


class TestExportWorkflow:
"""
Test complete export workflow: quantize HuggingFace models to Megatron format,
Expand Down
3 changes: 3 additions & 0 deletions tests/functional_tests/quantization/test_qat_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

import pytest


pytestmark = pytest.mark.pleasefixme(reason="Blocked on upstream quantization dependency issue; re-enable once fixed.")

from megatron.bridge.training.utils.checkpoint_utils import (
TRACKER_PREFIX,
get_checkpoint_name,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
import pytest


pytestmark = pytest.mark.pleasefixme(reason="Blocked on upstream quantization dependency issue; re-enable once fixed.")


class TestQuantizationWorkflow:
"""
Test complete quantization workflow: quantize HuggingFace models to Megatron format,
Expand Down
1,076 changes: 542 additions & 534 deletions uv.lock

Large diffs are not rendered by default.

Loading