Skip to content

Commit 407cd34

Browse files
committed
lint
Apply isort and black reformatting Signed-off-by: soluwalana <[email protected]> Signed-off-by: Sam Oluwalana <[email protected]>
1 parent c6290c5 commit 407cd34

File tree

4 files changed

+6
-12
lines changed

4 files changed

+6
-12
lines changed

nemo/collections/llm/gpt/data/core.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,12 +25,7 @@
2525
from datasets import load_dataset
2626

2727
from nemo.collections.common.tokenizers import TokenizerSpec
28-
from nemo.collections.llm.gpt.data.utils import (
29-
_get_samples_mapping,
30-
_JSONLMemMapDataset,
31-
_OnlineSampleMapping,
32-
_preprocess,
33-
)
28+
from nemo.collections.llm.gpt.data.utils import _get_samples_mapping, _JSONLMemMapDataset, _OnlineSampleMapping
3429
from nemo.core.classes import Dataset
3530
from nemo.lightning.base import NEMO_DATASETS_CACHE
3631

nemo/collections/llm/gpt/data/packed_sequence.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414
import json
15-
import os
1615

1716
from dataclasses import dataclass
1817
from pathlib import Path

nemo/collections/nlp/models/language_modeling/megatron_gpt_sft_model.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -211,7 +211,7 @@ def _build_dataset(self, data_cfg, is_train=True):
211211
# Determine if we are using a single dataset or a list of datasets.
212212
is_list_config = isinstance(data_cfg.file_names, ListConfig)
213213
if not is_list_config:
214-
raise ValueError(f"SFT train/validation datasets must be provided as a list of individual JSONL files.")
214+
raise ValueError("SFT train/validation datasets must be provided as a list of individual JSONL files.")
215215

216216
if is_train:
217217
# Construct the data prefix list for `get_datasets_weights_and_num_samples()`
@@ -221,15 +221,15 @@ def _build_dataset(self, data_cfg, is_train=True):
221221
):
222222
raise ValueError(
223223
(
224-
f"concat_sampling_probabilities must be a ListConfig with the same number of files in file_names."
224+
"concat_sampling_probabilities must be a ListConfig with the same number of files in file_names."
225225
f"Found: {data_cfg.concat_sampling_probabilities}"
226226
)
227227
)
228228

229229
if len(data_cfg.get('concat_sampling_probabilities', None)) != len(data_cfg.file_names):
230230
raise ValueError(
231231
(
232-
f"concat_sampling_probabilities must be of the same size as file_names.",
232+
"concat_sampling_probabilities must be of the same size as file_names.",
233233
f"Provided size {len(data_cfg.concat_sampling_probabilities)}, number of datasets {len(data_cfg.file_names)}",
234234
)
235235
)

nemo/collections/nlp/modules/common/tokenizer_utils.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,12 +217,12 @@ def get_nmt_tokenizer(
217217
elif library == 'byte-level':
218218
from nemo.collections.common.tokenizers.bytelevel_tokenizers import ByteLevelTokenizer
219219

220-
logging.info(f'Using byte-level tokenization')
220+
logging.info('Using byte-level tokenization')
221221
return ByteLevelTokenizer(special_tokens_dict)
222222
elif library == 'regex':
223223
from nemo.collections.common.tokenizers.regex_tokenizer import RegExTokenizer
224224

225-
logging.info(f'Using regex tokenization')
225+
logging.info('Using regex tokenization')
226226
return RegExTokenizer().load_tokenizer(regex_file=tokenizer_model, vocab_file=vocab_file)
227227
elif library == 'megatron':
228228

0 commit comments

Comments
 (0)