Skip to content
Merged
Changes from 2 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
af32b1e
Update: add using pcm bytes
YooSungHyun May 26, 2022
c86c1b9
re make style
YooSungHyun May 26, 2022
40cc82c
Update src/datasets/features/audio.py
YooSungHyun Jun 2, 2022
d4d25ee
Update src/datasets/features/audio.py
YooSungHyun Jun 2, 2022
4e70447
Update src/datasets/features/audio.py
YooSungHyun Jun 2, 2022
e2299db
delete: wrong comment
YooSungHyun Jun 2, 2022
f0f9d1f
Update: sampling_rate usage & test source update
YooSungHyun Jun 14, 2022
0f01966
Update: pcm2wav bytes don`t need path
YooSungHyun Jun 24, 2022
f7e8dc9
Update: we can get wav style bytes to pcm, so we can read to soundfil…
YooSungHyun Jun 24, 2022
9c358bb
Update: pcm doesn`t use path, so check 'None'
YooSungHyun Jun 24, 2022
ebb0bf8
Merge branch 'master' of https://github.com/YooSungHyun/datasets into…
YooSungHyun Jun 24, 2022
c445127
Merge branch 'huggingface:master' into YooSungHyun/features/audio
YooSungHyun Jun 24, 2022
c04f334
Merge branch 'YooSungHyun/features/audio' of https://github.com/YooSu…
YooSungHyun Jun 24, 2022
a19f7c0
Update: not used self`s sampling_rate
YooSungHyun Jun 29, 2022
3d45eca
Update: add sampling_rate
YooSungHyun Jun 29, 2022
93376f3
Update: sampling_rate variable
YooSungHyun Jun 29, 2022
6c0ede9
Update tests/features/test_audio.py
YooSungHyun Jun 29, 2022
b08489e
Update tests/features/test_audio.py
YooSungHyun Jun 29, 2022
f0a1c43
Update tests/features/test_audio.py
YooSungHyun Jun 29, 2022
1d7803e
Merge branch 'YooSungHyun/features/audio' of https://github.com/YooSu…
YooSungHyun Jun 29, 2022
28b26cc
Update: replace get sampling_rate
YooSungHyun Jun 29, 2022
2620c2f
Apply suggestions from code review
lhoestq Jul 7, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 21 additions & 1 deletion src/datasets/features/audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,10 @@
from io import BytesIO
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Union

import numpy as np
import pyarrow as pa
from packaging import version
from scipy.io import wavfile

from .. import config
from ..download.streaming_download_manager import xopen
Expand Down Expand Up @@ -92,7 +94,23 @@ def encode_example(self, value: Union[str, dict]) -> dict:
return {"bytes": buffer.getvalue(), "path": None}
elif value.get("path") is not None and os.path.isfile(value["path"]):
# we set "bytes": None to not duplicate the data if they're already available locally
return {"bytes": None, "path": value.get("path")}
if value.get("path").endswith("pcm"):
# "PCM" is only have byte RAW-Audio data style.
if value.get("sampling_rate") is None:
# At least, If you want to convert "PCM-byte" to "WAV-byte", you have to know sampling rate
raise KeyError("If you want to use pcm, you have to know your 'sampling_rate'!")
if value.get("bytes"):
# If we already had PCM-byte, we don`t have to make "read file, make bytes" (just use it!)
# I used fairseq's using PCM data (FileAudioDataset)
bytes_value = np.frombuffer(value["bytes"], dtype=np.int16).astype(np.float32) / 32767
else:
bytes_value = np.memmap(value["path"], dtype="h", mode="r").astype(np.float32) / 32767

buffer = BytesIO(bytes())
wavfile.write(buffer, value["sampling_rate"], bytes_value)
return {"bytes": buffer.getvalue(), "path": value.get("path")}
else:
return {"bytes": None, "path": value.get("path")}
elif value.get("bytes") is not None or value.get("path") is not None:
# store the audio bytes, and path is used to infer the audio format using the file extension
return {"bytes": value.get("bytes"), "path": value.get("path")}
Expand Down Expand Up @@ -131,6 +149,8 @@ def decode_example(self, value: dict, token_per_repo_id=None) -> dict:
array, sampling_rate = self._decode_non_mp3_path_like(
path, "opus", token_per_repo_id=token_per_repo_id
)
elif path is not None and path.endswith("pcm"):
sampling_rate, array = wavfile.read(file)
else:
if file:
array, sampling_rate = self._decode_non_mp3_file_like(file)
Expand Down