huggingface · albertvillanova · May 31, 2022 · May 31, 2022 · May 31, 2022 · May 31, 2022
diff --git a/datasets/timit_asr/README.md b/datasets/timit_asr/README.md
@@ -170,7 +170,7 @@ A typical data point comprises the path to the audio file, usually called `file`
 
 - speaker_id: Unique id of the speaker. The same speaker id can be found for multiple data samples.
 
-- id: Unique id of the data sample. Contains the <SENTENCE_TYPE><SENTENCE_NUMBER>.  
+- id: ID of the data sample. Contains the <SENTENCE_TYPE><SENTENCE_NUMBER>.
 
 
 ### Data Splits

diff --git a/datasets/timit_asr/timit_asr.py b/datasets/timit_asr/timit_asr.py
@@ -129,7 +129,7 @@ def _split_generators(self, dl_manager):
     def _generate_examples(self, split, data_dir):
         """Generate examples from TIMIT archive_path based on the test/train csv information."""
         # Iterating the contents of the data to extract the relevant information
-        for wav_path in sorted(Path(data_dir).glob(f"**/{split.upper()}/**/*.WAV")):
+        for key, wav_path in enumerate(sorted(Path(data_dir).glob(f"**/{split.upper()}/**/*.WAV"))):
 
             # extract transcript
             with open(wav_path.with_suffix(".TXT"), encoding="utf-8") as op:
@@ -174,4 +174,4 @@ def _generate_examples(self, split, data_dir):
                 "id": id_,
             }
 
-            yield id_, example
+            yield key, example