I have a large ENG dataset where some of the wav files will cause "Everyvoice" to crash while processing "pitch".
When this crash happens it is also VERY HARD to pinpoint the exact file causing this issues. ( ex: I had to run some manual diffs / comparing files created and processed...)
Processing pitch on 1 CPU: 67%|██████▋ | 2/3 [00:00<00:00, 91.90it/s]
╭───────────────────── Traceback (most recent call last) ──────────────────────╮
│ /gpfs/fs5/nrc/nrc-fs1/ict/others/u/tes001/TxT2SPEECH/EveryVoice/everyvoice/m │
│ odel/feature_prediction/FastSpeech2_lightning/fs2/cli/preprocess.py:35 in │
│ preprocess │
│ │
│ 32 │ │
│ 33 │ from ..config import FastSpeech2Config │
│ 34 │ │
│ ❱ 35 │ preprocessor, config, processed = preprocess_base_command( │
│ 36 │ │ model_config=FastSpeech2Config, │
│ 37 │ │ steps=[step.name for step in steps], │
│ 38 │ │ **kwargs, │
│ │
│ /gpfs/fs5/nrc/nrc-fs1/ict/others/u/tes001/TxT2SPEECH/EveryVoice/everyvoice/b │
│ ase_cli/helpers.py:85 in preprocess_base_command │
│ │
│ 82 │ preprocessor = Preprocessor(config) │
│ 83 │ if isinstance(config, FastSpeech2Config) and config.model.use_phon │
│ 84 │ │ steps.append("pfs") │
│ ❱ 85 │ preprocessor.preprocess( │
│ 86 │ │ cpus=cpus, │
│ 87 │ │ overwrite=overwrite, │
│ 88 │ │ to_process=steps, │
│ │
│ /gpfs/fs5/nrc/nrc-fs1/ict/others/u/tes001/TxT2SPEECH/EveryVoice/everyvoice/p │
│ reprocessor/preprocessor.py:813 in preprocess │
│ │
│ 810 │ │ │ │ │ │ )(delayed(process_fn)(file) for file in fileli │
│ 811 │ │ │ │ else: │
│ 812 │ │ │ │ │ for f in tqdm(filelist, desc=f"Processing {process │
│ ❱ 813 │ │ │ │ │ │ process_fn(f) │
│ 814 │ │ if "audio" in to_process: │
│ 815 │ │ │ report = f"Here is a report:\n {self.report()}" │
│ 816 │ │ │ if not self.counters.value("duration"): │
│ │
│ /gpfs/fs5/nrc/nrc-fs1/ict/others/u/tes001/TxT2SPEECH/EveryVoice/everyvoice/p │
│ reprocessor/preprocessor.py:563 in process_pitch │
│ │
│ 560 │ │ │ item, "audio", f"audio-{self.input_sampling_rate}.pt" │
│ 561 │ │ ) │
│ 562 │ │ audio = torch.load(audio_path) │
│ ❱ 563 │ │ pitch = self.extract_pitch(audio) │
│ 564 │ │ if ( │
│ 565 │ │ │ isinstance(self.config, FeaturePredictionConfig) │
│ 566 │ │ │ and self.config.model.variance_predictors.pitch.level == " │
│ │
│ /gpfs/fs5/nrc/nrc-fs1/ict/others/u/tes001/TxT2SPEECH/EveryVoice/everyvoice/p │
│ reprocessor/preprocessor.py:244 in extract_pitch │
│ │
│ 241 │ │ │ self.input_sampling_rate, │
│ 242 │ │ ) │
│ 243 │ │ pitch[pitch == 0] = np.nan │
│ ❱ 244 │ │ pitch = self._interpolate(pitch) │
│ 245 │ │ pitch = torch.tensor(pitch).float() │
│ 246 │ │ return pitch │
│ 247 │
│ │
│ /gpfs/fs5/nrc/nrc-fs1/ict/others/u/tes001/TxT2SPEECH/EveryVoice/everyvoice/p │
│ reprocessor/preprocessor.py:206 in _interpolate │
│ │
│ 203 │ │ │ return np.isnan(y), lambda z: z.nonzero()[0] │
│ 204 │ │ │
│ 205 │ │ nans, y = nan_helper(x) │
│ ❱ 206 │ │ x[nans] = np.interp(y(nans), y(~nans), x[~nans]) │
│ 207 │ │ return x │
│ 208 │ │
│ 209 │ def extract_pitch(self, audio_tensor: torch.Tensor): │
│ │
│ /home/tes001/u/TxT2SPEECH/miniconda3_u20/envs/EveryVoice/lib/python3.10/site │
│ -packages/numpy/lib/function_base.py:1599 in interp │
│ │
│ 1596 │ │ xp = np.concatenate((xp[-1:]-period, xp, xp[0:1]+period)) │
│ 1597 │ │ fp = np.concatenate((fp[-1:], fp, fp[0:1])) │
│ 1598 │ │
│ ❱ 1599 │ return interp_func(x, xp, fp, left, right) │
│ 1600 │
│ 1601 │
│ 1602 def _angle_dispatcher(z, deg=None): │
╰──────────────────────────────────────────────────────────────────────────────╯
ValueError: array of sample points is empty
============ Finished job 1950661 on Fri 23 Feb 2024 02:15:09 PM EST with rc=1
I have a large ENG dataset where some of the wav files will cause "Everyvoice" to crash while processing "pitch".
When this crash happens it is also VERY HARD to pinpoint the exact file causing this issues. ( ex: I had to run some manual diffs / comparing files created and processed...)
See the attached example wav file :
file.zip
zarathustra_72_nietzsche_0015|"hush!|9017|eng