Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
396 changes: 385 additions & 11 deletions deep_speech_2/data_utils/audio.py

Large diffs are not rendered by default.

401 changes: 401 additions & 0 deletions deep_speech_2/data_utils/augmentor/audio_database.py

Large diffs are not rendered by default.

15 changes: 15 additions & 0 deletions deep_speech_2/data_utils/augmentor/augmentation.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,11 @@
import json
import random
from data_utils.augmentor.volume_perturb import VolumePerturbAugmentor
from data_utils.augmentor.resamler import ResamplerAugmentor
from data_utils.augmentor.speed_perturb import SpeedPerturbatioAugmentor
from data_utils.augmentor.online_bayesian_normalization import OnlineBayesianNormalizationAugmentor
from data_utils.augmentor.Impulse_response import ImpulseResponseAugmentor
from data_utils.augmentor.noise_speech import NoiseSpeechAugmentor


class AugmentationPipeline(object):
Expand Down Expand Up @@ -76,5 +81,15 @@ def _get_augmentor(self, augmentor_type, params):
"""Return an augmentation model by the type name, and pass in params."""
if augmentor_type == "volume":
return VolumePerturbAugmentor(self._rng, **params)
if augmentor_type == "resamle":
return ResamplerAugmentor(self._rng, **params)
if augmentor_type == "speed":
return SpeedPerturbatioAugmentor(self._rng, **params)
if augmentor_type == "online_bayesian_normalization":
return OnlineBayesianNormalizationAugmentor(self._rng, **params)
if augmentor_type == "Impulse_response":
return ImpulseResponseAugmentor(self._rng, **params)
if augmentor_type == "noise_speech":
return NoiseSpeechAugmentor(self._rng, **params)
else:
raise ValueError("Unknown augmentor type [%s]." % augmentor_type)
76 changes: 76 additions & 0 deletions deep_speech_2/data_utils/augmentor/implus_response.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
""" Impulse response"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

from . import base
from . import audio_database
from data_utils.speech import SpeechSegment


class ImpulseResponseAugmentor(base.AugmentorBase):
""" Instantiates an impulse response model

:param ir_dir: directory containing impulse responses
:type ir_dir: basestring
:param tags: optional parameter for specifying what
particular impulse responses to apply.
:type tags: list
:parm tag_distr: optional noise distribution
:type tag_distr: dict
"""

def __init__(self, rng, ir_dir, index_file, tags=None, tag_distr=None):
# Define all required parameter maps here.
self.ir_dir = ir_dir
self.index_file = index_file

self.tags = tags
self.tag_distr = tag_distr

self.audio_index = audio_database.AudioIndex()
self.rng = rng

def _init_data(self):
""" Preloads stuff from disk in an attempt (e.g. list of files, etc)
to make later loading faster. If the data configuration remains the
same, this function does nothing.

"""
self.audio_index.refresh_records_from_index_file(
self.ir_dir, self.index_file, self.tags)

def transform_audio(self, audio_segment):
""" Convolves the input audio with an impulse response.

:param audio_segment: input audio
:type audio_segment: AudioSegemnt
"""
# This handles the cases where the data source or directories change.
self._init_data()

read_size = 0
tag_distr = self.tag_distr
if not self.audio_index.has_audio(tag_distr):
if tag_distr is None:
if not self.tags:
raise RuntimeError("The ir index does not have audio "
"files to sample from.")
else:
raise RuntimeError("The ir index does not have audio "
"files of the given tags to sample "
"from.")
else:
raise RuntimeError("The ir index does not have audio "
"files to match the target ir "
"distribution.")
else:
# Querying with a negative duration triggers the index to search
# from all impulse responses.
success, record = self.audio_index.sample_audio(
-1.0, rng=self.rng, distr=tag_distr)
if success is True:
_, read_size, ir_fname = record
ir_wav = SpeechSegment.from_file(ir_fname)
audio_segment.convolve(ir_wav, allow_resampling=True)
Loading