Skip to content

Commit 3eeca2c

Browse files
Mark tests that require librosa (#7044)
* Implement test require_librosa * Mark tests that require librosa * Mark tests in test_audiofolder with require_librosa * Mark test in test_upstream_hub with require_librosa
1 parent f26ab28 commit 3eeca2c

File tree

6 files changed

+49
-6
lines changed

6 files changed

+49
-6
lines changed

tests/features/test_audio.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
from datasets.features import Audio, Features, Sequence, Value
99

1010
from ..utils import (
11+
require_librosa,
1112
require_sndfile,
1213
)
1314

@@ -57,6 +58,7 @@ def test_audio_feature_type_to_arrow():
5758
assert features.arrow_schema == pa.schema({"sequence_of_audios": pa.list_(Audio().pa_type)})
5859

5960

61+
@require_librosa
6062
@pytest.mark.parametrize(
6163
"build_example",
6264
[
@@ -81,6 +83,7 @@ def test_audio_feature_encode_example(shared_datadir, build_example):
8183
assert decoded_example.keys() == {"path", "array", "sampling_rate"}
8284

8385

86+
@require_librosa
8487
@pytest.mark.parametrize(
8588
"build_example",
8689
[
@@ -101,6 +104,7 @@ def test_audio_feature_encode_example_pcm(shared_datadir, build_example):
101104
assert decoded_example.keys() == {"path", "array", "sampling_rate"}
102105

103106

107+
@require_librosa
104108
@require_sndfile
105109
def test_audio_decode_example(shared_datadir):
106110
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -115,6 +119,7 @@ def test_audio_decode_example(shared_datadir):
115119
Audio(decode=False).decode_example(audio_path)
116120

117121

122+
@require_librosa
118123
@require_sndfile
119124
def test_audio_resampling(shared_datadir):
120125
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -126,6 +131,7 @@ def test_audio_resampling(shared_datadir):
126131
assert decoded_example["sampling_rate"] == 16000
127132

128133

134+
@require_librosa
129135
@require_sndfile
130136
def test_audio_decode_example_mp3(shared_datadir):
131137
audio_path = str(shared_datadir / "test_audio_44100.mp3")
@@ -137,6 +143,7 @@ def test_audio_decode_example_mp3(shared_datadir):
137143
assert decoded_example["sampling_rate"] == 44100
138144

139145

146+
@require_librosa
140147
@require_sndfile
141148
def test_audio_decode_example_opus(shared_datadir):
142149
audio_path = str(shared_datadir / "test_audio_48000.opus")
@@ -148,6 +155,7 @@ def test_audio_decode_example_opus(shared_datadir):
148155
assert decoded_example["sampling_rate"] == 48000
149156

150157

158+
@require_librosa
151159
@pytest.mark.parametrize("sampling_rate", [16_000, 48_000])
152160
def test_audio_decode_example_pcm(shared_datadir, sampling_rate):
153161
audio_path = str(shared_datadir / "test_audio_16000.pcm")
@@ -160,6 +168,7 @@ def test_audio_decode_example_pcm(shared_datadir, sampling_rate):
160168
assert decoded_example["sampling_rate"] == sampling_rate
161169

162170

171+
@require_librosa
163172
@require_sndfile
164173
def test_audio_resampling_mp3_different_sampling_rates(shared_datadir):
165174
audio_path = str(shared_datadir / "test_audio_44100.mp3")
@@ -179,6 +188,7 @@ def test_audio_resampling_mp3_different_sampling_rates(shared_datadir):
179188
assert decoded_example["sampling_rate"] == 48000
180189

181190

191+
@require_librosa
182192
@require_sndfile
183193
def test_dataset_with_audio_feature(shared_datadir):
184194
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -206,6 +216,7 @@ def test_dataset_with_audio_feature(shared_datadir):
206216
assert column[0]["sampling_rate"] == 44100
207217

208218

219+
@require_librosa
209220
@require_sndfile
210221
def test_dataset_with_audio_feature_tar_wav(tar_wav_path):
211222
audio_filename = "test_audio_44100.wav"
@@ -236,6 +247,7 @@ def test_dataset_with_audio_feature_tar_wav(tar_wav_path):
236247
assert column[0]["sampling_rate"] == 44100
237248

238249

250+
@require_librosa
239251
@require_sndfile
240252
def test_dataset_with_audio_feature_tar_mp3(tar_mp3_path):
241253
audio_filename = "test_audio_44100.mp3"
@@ -300,6 +312,7 @@ def test_dataset_with_audio_feature_with_none():
300312
assert item["nested"]["audio"] is None
301313

302314

315+
@require_librosa
303316
@require_sndfile
304317
def test_resampling_at_loading_dataset_with_audio_feature(shared_datadir):
305318
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -327,6 +340,7 @@ def test_resampling_at_loading_dataset_with_audio_feature(shared_datadir):
327340
assert column[0]["sampling_rate"] == 16000
328341

329342

343+
@require_librosa
330344
@require_sndfile
331345
def test_resampling_at_loading_dataset_with_audio_feature_mp3(shared_datadir):
332346
audio_path = str(shared_datadir / "test_audio_44100.mp3")
@@ -354,6 +368,7 @@ def test_resampling_at_loading_dataset_with_audio_feature_mp3(shared_datadir):
354368
assert column[0]["sampling_rate"] == 16000
355369

356370

371+
@require_librosa
357372
@require_sndfile
358373
def test_resampling_after_loading_dataset_with_audio_feature(shared_datadir):
359374
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -384,6 +399,7 @@ def test_resampling_after_loading_dataset_with_audio_feature(shared_datadir):
384399
assert column[0]["sampling_rate"] == 16000
385400

386401

402+
@require_librosa
387403
@require_sndfile
388404
def test_resampling_after_loading_dataset_with_audio_feature_mp3(shared_datadir):
389405
audio_path = str(shared_datadir / "test_audio_44100.mp3")
@@ -414,6 +430,7 @@ def test_resampling_after_loading_dataset_with_audio_feature_mp3(shared_datadir)
414430
assert column[0]["sampling_rate"] == 16000
415431

416432

433+
@require_librosa
417434
@pytest.mark.parametrize(
418435
"build_data",
419436
[
@@ -438,6 +455,7 @@ def test_dataset_cast_to_audio_features(shared_datadir, build_data):
438455
assert item["audio"].keys() == {"path", "array", "sampling_rate"}
439456

440457

458+
@require_librosa
441459
def test_dataset_concatenate_audio_features(shared_datadir):
442460
# we use a different data structure between 1 and 2 to make sure they are compatible with each other
443461
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -451,6 +469,7 @@ def test_dataset_concatenate_audio_features(shared_datadir):
451469
assert concatenated_dataset[1]["audio"]["array"].shape == dset2[0]["audio"]["array"].shape
452470

453471

472+
@require_librosa
454473
def test_dataset_concatenate_nested_audio_features(shared_datadir):
455474
# we use a different data structure between 1 and 2 to make sure they are compatible with each other
456475
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -493,6 +512,7 @@ def process_text(example):
493512
assert item == {"audio": expected_audio, "text": "Hello World!"}
494513

495514

515+
@require_librosa
496516
@require_sndfile
497517
def test_dataset_with_audio_feature_map_is_decoded(shared_datadir):
498518
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -522,6 +542,7 @@ def process_audio_sampling_rate_by_batch(batch):
522542
assert item["double_sampling_rate"] == 88200
523543

524544

545+
@require_librosa
525546
@require_sndfile
526547
def test_formatted_dataset_with_audio_feature(shared_datadir):
527548
audio_path = str(shared_datadir / "test_audio_44100.wav")
@@ -585,6 +606,7 @@ def jsonl_audio_dataset_path(shared_datadir, tmp_path_factory):
585606
return path
586607

587608

609+
@require_librosa
588610
@require_sndfile
589611
@pytest.mark.parametrize("streaming", [False, True])
590612
def test_load_dataset_with_audio_feature(streaming, jsonl_audio_dataset_path, shared_datadir):

tests/packaged_modules/test_audiofolder.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import shutil
22
import textwrap
33

4-
import librosa
54
import numpy as np
65
import pytest
76
import soundfile as sf
@@ -12,7 +11,7 @@
1211
from datasets.download.streaming_download_manager import StreamingDownloadManager
1312
from datasets.packaged_modules.audiofolder.audiofolder import AudioFolder, AudioFolderConfig
1413

15-
from ..utils import require_sndfile
14+
from ..utils import require_librosa, require_sndfile
1615

1716

1817
@pytest.fixture
@@ -195,6 +194,8 @@ def data_files_with_two_splits_and_metadata(request, tmp_path, audio_file):
195194

196195
@pytest.fixture
197196
def data_files_with_zip_archives(tmp_path, audio_file):
197+
import librosa
198+
198199
data_dir = tmp_path / "audiofolder_data_dir_with_zip_archives"
199200
data_dir.mkdir(parents=True, exist_ok=True)
200201
archive_dir = data_dir / "archive"
@@ -242,6 +243,7 @@ def test_config_raises_when_invalid_data_files(data_files) -> None:
242243
_ = AudioFolderConfig(name="name", data_files=data_files)
243244

244245

246+
@require_librosa
245247
@require_sndfile
246248
# check that labels are inferred correctly from dir names
247249
def test_generate_examples_with_labels(data_files_with_labels_no_metadata, cache_dir):
@@ -256,6 +258,7 @@ def test_generate_examples_with_labels(data_files_with_labels_no_metadata, cache
256258
assert dataset[1]["label"] == label_feature._str2int["uk"]
257259

258260

261+
@require_librosa
259262
@require_sndfile
260263
@pytest.mark.parametrize("drop_metadata", [None, True, False])
261264
@pytest.mark.parametrize("drop_labels", [None, True, False])
@@ -385,6 +388,7 @@ def test_generate_examples_with_metadata_that_misses_one_audio(
385388
)
386389

387390

391+
@require_librosa
388392
@require_sndfile
389393
@pytest.mark.parametrize("streaming", [False, True])
390394
def test_data_files_with_metadata_and_single_split(streaming, cache_dir, data_files_with_one_split_and_metadata):
@@ -403,6 +407,7 @@ def test_data_files_with_metadata_and_single_split(streaming, cache_dir, data_fi
403407
assert all(example["text"] is not None for example in dataset)
404408

405409

410+
@require_librosa
406411
@require_sndfile
407412
@pytest.mark.parametrize("streaming", [False, True])
408413
def test_data_files_with_metadata_and_multiple_splits(streaming, cache_dir, data_files_with_two_splits_and_metadata):
@@ -421,6 +426,7 @@ def test_data_files_with_metadata_and_multiple_splits(streaming, cache_dir, data
421426
assert all(example["text"] is not None for example in dataset)
422427

423428

429+
@require_librosa
424430
@require_sndfile
425431
@pytest.mark.parametrize("streaming", [False, True])
426432
def test_data_files_with_metadata_and_archives(streaming, cache_dir, data_files_with_zip_archives):

tests/packaged_modules/test_webdataset.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from datasets import Audio, DownloadManager, Features, Image, Sequence, Value
88
from datasets.packaged_modules.webdataset.webdataset import WebDataset
99

10-
from ..utils import require_pil, require_sndfile, require_torch
10+
from ..utils import require_librosa, require_pil, require_sndfile, require_torch
1111

1212

1313
@pytest.fixture
@@ -159,6 +159,7 @@ def test_image_webdataset_missing_keys(image_wds_file):
159159
assert decoded["txt"] is None
160160

161161

162+
@require_librosa
162163
@require_sndfile
163164
def test_audio_webdataset(audio_wds_file):
164165
data_files = {"train": [audio_wds_file]}

tests/test_formatting.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,15 @@
1818
)
1919
from datasets.table import InMemoryTable
2020

21-
from .utils import require_jax, require_pil, require_polars, require_sndfile, require_tf, require_torch
21+
from .utils import (
22+
require_jax,
23+
require_librosa,
24+
require_pil,
25+
require_polars,
26+
require_sndfile,
27+
require_tf,
28+
require_torch,
29+
)
2230

2331

2432
class AnyArray:
@@ -300,6 +308,7 @@ def test_numpy_formatter_image(self):
300308
self.assertEqual(batch["image"][0].dtype, np.uint8)
301309
self.assertEqual(batch["image"][0].shape, (480, 640, 3))
302310

311+
@require_librosa
303312
@require_sndfile
304313
def test_numpy_formatter_audio(self):
305314
pa_table = pa.table({"audio": [{"bytes": None, "path": str(AUDIO_PATH_1)}]})
@@ -419,6 +428,7 @@ def test_torch_formatter_image(self):
419428
self.assertEqual(batch["image"][0].shape, (3, 480, 640))
420429

421430
@require_torch
431+
@require_librosa
422432
@require_sndfile
423433
def test_torch_formatter_audio(self):
424434
import torch
@@ -602,6 +612,7 @@ def test_jax_formatter_image(self):
602612
self.assertEqual(batch["image"][0].shape, (480, 640, 3))
603613

604614
@require_jax
615+
@require_librosa
605616
@require_sndfile
606617
def test_jax_formatter_audio(self):
607618
import jax.numpy as jnp

tests/test_upstream_hub.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,9 @@
3434
)
3535
from datasets.utils.file_utils import cached_path
3636
from datasets.utils.hub import hf_dataset_url
37-
from tests.fixtures.hub import CI_HUB_ENDPOINT, CI_HUB_USER, CI_HUB_USER_TOKEN
38-
from tests.utils import for_all_test_methods, require_pil, require_sndfile, xfail_if_500_502_http_error
37+
38+
from .fixtures.hub import CI_HUB_ENDPOINT, CI_HUB_USER, CI_HUB_USER_TOKEN
39+
from .utils import for_all_test_methods, require_librosa, require_pil, require_sndfile, xfail_if_500_502_http_error
3940

4041

4142
pytestmark = pytest.mark.integration
@@ -383,6 +384,7 @@ def test_push_dataset_to_hub_custom_features(self, temporary_repo):
383384
assert ds.features == hub_ds.features
384385
assert ds[:] == hub_ds[:]
385386

387+
@require_librosa
386388
@require_sndfile
387389
def test_push_dataset_to_hub_custom_features_audio(self, temporary_repo):
388390
audio_path = os.path.join(os.path.dirname(__file__), "features", "data", "test_audio_44100.wav")

tests/utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def parse_flag_from_env(key, default=False):
4848
require_zstandard = pytest.mark.skipif(not config.ZSTANDARD_AVAILABLE, reason="test requires zstandard")
4949

5050
# Audio
51+
require_librosa = pytest.mark.skipif(find_spec("librosa") is None, reason="test requires librosa")
5152
require_sndfile = pytest.mark.skipif(
5253
# On Windows and OS X, soundfile installs sndfile
5354
find_spec("soundfile") is None or version.parse(importlib.metadata.version("soundfile")) < version.parse("0.12.0"),

0 commit comments

Comments
 (0)