Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions nemo/collections/common/data/lhotse/nemo_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ def __iter__(self) -> Generator[Cut, None, None]:
recording_id=cut.recording_id,
start=0,
duration=cut.duration,
channel=cut.channel,
text=data.get(self.text_field),
language=data.get(self.lang_field),
)
Expand Down
66 changes: 64 additions & 2 deletions tests/collections/common/test_lhotse_nemo_adapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,9 @@

import numpy as np
import pytest
from lhotse import AudioSource, CutSet, MonoCut, Recording, SupervisionSegment
from lhotse import AudioSource, CutSet, MonoCut, MultiCut, Recording, SupervisionSegment
from lhotse.serialization import save_to_jsonl
from lhotse.testing.dummies import DummyManifest
from lhotse.testing.dummies import DummyManifest, dummy_multi_cut, dummy_supervision

from nemo.collections.common.data.lhotse.nemo_adapters import LazyNeMoIterator

Expand All @@ -41,6 +41,29 @@ def nemo_manifest_path(tmp_path_factory):
return p


@pytest.fixture
def nemo_manifest_path_multichannel(tmp_path_factory):
"""2 utterances of length 1s with 3 channels as a NeMo manifest."""
tmpdir = tmp_path_factory.mktemp("nemo_data")
cuts = CutSet.from_cuts(
dummy_multi_cut(idx, supervisions=[dummy_supervision(idx)], channel=[0, 1, 2], with_data=True)
for idx in range(0, 2)
).save_audios(tmpdir, progress_bar=False)
nemo = []
for c in cuts:
nemo.append(
{
"audio_filepath": c.recording.sources[0].source,
"text": "irrelevant",
"duration": c.duration,
"lang": "en",
}
)
p = tmpdir / "nemo_manifest_multichannel.json"
save_to_jsonl(nemo, p)
return p


def test_lazy_nemo_iterator(nemo_manifest_path):
cuts = CutSet(LazyNeMoIterator(nemo_manifest_path))

Expand Down Expand Up @@ -78,6 +101,45 @@ def test_lazy_nemo_iterator(nemo_manifest_path):
assert s.language == "en"


def test_lazy_nemo_iterator_multichannel(nemo_manifest_path_multichannel):
cuts = CutSet(LazyNeMoIterator(nemo_manifest_path_multichannel))

assert len(cuts) == 2

for c in cuts:
assert isinstance(c, MultiCut)
assert c.start == 0.0
assert c.duration == 1.0
assert c.num_channels == 3
assert c.channel == [0, 1, 2] # cuts have three channels
assert c.sampling_rate == 16000
assert c.num_samples == 16000

assert c.has_recording
assert isinstance(c.recording, Recording)
assert c.recording.duration == 1.0
assert c.recording.num_channels == 3
assert c.recording.num_samples == 16000
assert len(c.recording.sources) == 1
assert isinstance(c.recording.sources[0], AudioSource)
assert c.recording.sources[0].type == "file"
assert c.recording.sources[0].channels == c.channel # recording has same channels as the cut

audio = c.load_audio()
assert isinstance(audio, np.ndarray)
assert audio.shape == (c.num_channels, 16000) # audio has same num_channels as the cut
assert audio.dtype == np.float32

assert len(c.supervisions) == 1
s = c.supervisions[0]
assert isinstance(s, SupervisionSegment)
assert s.start == 0
assert s.duration == 1
assert s.channel == c.channel # supervision has same channels as the cut
assert s.text == "irrelevant"
assert s.language == "en"


@pytest.fixture
def nemo_offset_manifest_path(tmp_path_factory):
"""
Expand Down
Loading