File tree Expand file tree Collapse file tree 1 file changed +3
-0
lines changed
nemo/collections/llm/gpt/data Expand file tree Collapse file tree 1 file changed +3
-0
lines changed Original file line number Diff line number Diff line change @@ -163,6 +163,7 @@ class PreTrainingDataModule(pl.LightningDataModule, IOMixin):
163163 init_consumed_samples: (Optional[int]): Number of samples already consumed at initialization.
164164 init_global_step: (Optional[int]): Starting global training step count, used for resuming training.
165165 output_log: (Optional[bool]): Whether to print logging/debug output during sampling.
166+ mmap_bin_files: (Optional[bool]): Whether to mmap the .bin files or use file pointers.
166167 object_storage_cache_path: (Optional[str]): Path for caching indices for s3 or msc dataloading.
167168 """
168169
@@ -193,6 +194,7 @@ def __init__(
193194 init_global_step : Optional [int ] = 0 ,
194195 output_log : Optional [bool ] = True ,
195196 dataset_cls : Type [MegatronDataset ] = GPTDataset ,
197+ mmap_bin_files : Optional [bool ] = True ,
196198 object_storage_cache_path : Optional [str ] = None ,
197199 ) -> None :
198200 super ().__init__ ()
@@ -206,6 +208,7 @@ def __init__(
206208 validate_dataset_asset_accessibility (paths )
207209
208210 build_kwargs = {}
211+ build_kwargs ["mmap_bin_files" ] = mmap_bin_files
209212 if isinstance (paths , dict ):
210213 if split is not None :
211214 warnings .warn (
You can’t perform that action at this time.
0 commit comments