Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions src/datasets/features/audio.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from dataclasses import dataclass, field
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union

import numpy as np
Expand All @@ -25,6 +26,7 @@ class Audio:

Input: The Audio feature accepts as input:
- A `str`: Absolute path to the audio file (i.e. random access is allowed).
- A `pathlib.Path`: path to the audio file (i.e. random access is allowed).
- A `dict` with the keys:

- `path`: String with relative path of the audio file to the archive file.
Expand Down Expand Up @@ -112,6 +114,8 @@ def encode_example(self, value: Union[str, bytes, bytearray, dict, "AudioDecoder

if isinstance(value, str):
return {"bytes": None, "path": value}
elif isinstance(value, Path):
return {"bytes": None, "path": str(value.absolute())}
elif isinstance(value, (bytes, bytearray)):
return {"bytes": value, "path": None}
elif AudioDecoder is not None and isinstance(value, AudioDecoder):
Expand Down
4 changes: 4 additions & 0 deletions src/datasets/features/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import warnings
from dataclasses import dataclass, field
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Optional, Union

import numpy as np
Expand Down Expand Up @@ -48,6 +49,7 @@ class Image:

Input: The Image feature accepts as input:
- A `str`: Absolute path to the image file (i.e. random access is allowed).
- A `pathlib.Path`: path to the image file (i.e. random access is allowed).
- A `dict` with the keys:

- `path`: String with relative path of the image file to the archive file.
Expand Down Expand Up @@ -113,6 +115,8 @@ def encode_example(self, value: Union[str, bytes, bytearray, dict, np.ndarray, "

if isinstance(value, str):
return {"path": value, "bytes": None}
elif isinstance(value, Path):
return {"path": str(value.absolute()), "bytes": None}
elif isinstance(value, (bytes, bytearray)):
return {"path": None, "bytes": value}
elif isinstance(value, np.ndarray):
Expand Down
4 changes: 4 additions & 0 deletions src/datasets/features/pdf.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
from dataclasses import dataclass, field
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Dict, Optional, Union

import pyarrow as pa
Expand Down Expand Up @@ -34,6 +35,7 @@ class Pdf:

Input: The Pdf feature accepts as input:
- A `str`: Absolute path to the pdf file (i.e. random access is allowed).
- A `pathlib.Path`: path to the pdf file (i.e. random access is allowed).
- A `dict` with the keys:
- `path`: String with relative path of the pdf file in a dataset repository.
- `bytes`: Bytes of the pdf file.
Expand Down Expand Up @@ -92,6 +94,8 @@ def encode_example(self, value: Union[str, bytes, bytearray, dict, "pdfplumber.p

if isinstance(value, str):
return {"path": value, "bytes": None}
elif isinstance(value, Path):
return {"path": str(value.absolute()), "bytes": None}
elif isinstance(value, (bytes, bytearray)):
return {"path": None, "bytes": value}
elif pdfplumber is not None and isinstance(value, pdfplumber.pdf.PDF):
Expand Down
4 changes: 4 additions & 0 deletions src/datasets/features/video.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from dataclasses import dataclass, field
from pathlib import Path
from typing import TYPE_CHECKING, Any, ClassVar, Literal, Optional, TypedDict, Union

import numpy as np
Expand Down Expand Up @@ -31,6 +32,7 @@ class Video:

Input: The Video feature accepts as input:
- A `str`: Absolute path to the video file (i.e. random access is allowed).
- A `pathlib.Path`: path to the video file (i.e. random access is allowed).
- A `dict` with the keys:

- `path`: String with relative path of the video file in a dataset repository.
Expand Down Expand Up @@ -125,6 +127,8 @@ def encode_example(self, value: Union[str, bytes, bytearray, Example, np.ndarray

if isinstance(value, str):
return {"path": value, "bytes": None}
elif isinstance(value, Path):
return {"path": str(value.absolute()), "bytes": None}
elif isinstance(value, (bytes, bytearray)):
return {"path": None, "bytes": value}
elif isinstance(value, np.ndarray):
Expand Down
2 changes: 2 additions & 0 deletions tests/features/test_audio.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import os
import tarfile
from itertools import product
from pathlib import Path

import numpy as np
import pyarrow as pa
Expand Down Expand Up @@ -64,6 +65,7 @@ def test_audio_feature_type_to_arrow():
"build_example",
[
lambda audio_path: audio_path,
lambda audio_path: Path(audio_path),
lambda audio_path: open(audio_path, "rb").read(),
lambda audio_path: {"path": audio_path},
lambda audio_path: {"path": audio_path, "bytes": None},
Expand Down
2 changes: 2 additions & 0 deletions tests/features/test_image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import tarfile
import warnings
from io import BytesIO
from pathlib import Path

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -54,6 +55,7 @@ def test_image_feature_type_to_arrow():
"build_example",
[
lambda image_path: image_path,
lambda image_path: Path(image_path),
lambda image_path: open(image_path, "rb").read(),
lambda image_path: {"path": image_path},
lambda image_path: {"path": image_path, "bytes": None},
Expand Down
3 changes: 3 additions & 0 deletions tests/features/test_pdf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import pytest

from datasets import Dataset, Features, Pdf
Expand All @@ -10,6 +12,7 @@
"build_example",
[
lambda pdf_path: pdf_path,
lambda pdf_path: Path(pdf_path),
lambda pdf_path: open(pdf_path, "rb").read(),
lambda pdf_path: {"path": pdf_path},
lambda pdf_path: {"path": pdf_path, "bytes": None},
Expand Down
3 changes: 3 additions & 0 deletions tests/features/test_video.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from pathlib import Path

import pytest

from datasets import Column, Dataset, Features, Value, Video, load_dataset
Expand All @@ -10,6 +12,7 @@
"build_example",
[
lambda video_path: video_path,
lambda video_path: Path(video_path),
lambda video_path: open(video_path, "rb").read(),
lambda video_path: {"path": video_path},
lambda video_path: {"path": video_path, "bytes": None},
Expand Down
Loading