Skip to content

Commit 4f940cb

Browse files
authored
fix: 🐛 allow streaming=False in get_rows (#207)
it fixes #206.
1 parent 623606d commit 4f940cb

3 files changed

Lines changed: 21 additions & 2 deletions

File tree

src/datasets_preview_backend/config.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
DEFAULT_LOG_LEVEL,
1313
DEFAULT_MAX_AGE_LONG_SECONDS,
1414
DEFAULT_MAX_AGE_SHORT_SECONDS,
15+
DEFAULT_MAX_SIZE_FALLBACK,
1516
DEFAULT_MONGO_CACHE_DATABASE,
1617
DEFAULT_MONGO_QUEUE_DATABASE,
1718
DEFAULT_MONGO_URL,
@@ -50,6 +51,7 @@
5051
os.environ["HF_SCRIPTS_VERSION"] = DATASETS_REVISION
5152

5253
# for tests - to be removed
54+
MAX_SIZE_FALLBACK = get_int_value(os.environ, "MAX_SIZE_FALLBACK", DEFAULT_MAX_SIZE_FALLBACK)
5355
ROWS_MAX_BYTES = get_int_value(d=os.environ, key="ROWS_MAX_BYTES", default=DEFAULT_ROWS_MAX_BYTES)
5456
ROWS_MAX_NUMBER = get_int_value(d=os.environ, key="ROWS_MAX_NUMBER", default=DEFAULT_ROWS_MAX_NUMBER)
5557
ROWS_MIN_NUMBER = get_int_value(d=os.environ, key="ROWS_MIN_NUMBER", default=DEFAULT_ROWS_MIN_NUMBER)

src/datasets_preview_backend/models/row.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def get_rows(
2828
dataset_name,
2929
name=config_name,
3030
split=split_name,
31-
streaming=True,
31+
streaming=streaming,
3232
download_mode=DownloadMode.FORCE_REDOWNLOAD,
3333
use_auth_token=hf_token,
3434
)

tests/models/test_split.py

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from datasets_preview_backend.config import HF_TOKEN, ROWS_MAX_NUMBER
1+
from datasets_preview_backend.config import HF_TOKEN, MAX_SIZE_FALLBACK, ROWS_MAX_NUMBER
22
from datasets_preview_backend.models.split import get_split
33

44
# TODO: test fallback
@@ -24,4 +24,21 @@ def test_gated() -> None:
2424
assert split["rows_response"]["rows"][0]["row"]["year"] == "1855"
2525

2626

27+
def test_fallback() -> None:
28+
# https://github.com/huggingface/datasets/issues/3185
29+
dataset_name = "samsum"
30+
config_name = "samsum"
31+
split_name = "train"
32+
split = get_split(
33+
dataset_name,
34+
config_name,
35+
split_name,
36+
HF_TOKEN,
37+
rows_max_number=ROWS_MAX_NUMBER,
38+
max_size_fallback=MAX_SIZE_FALLBACK,
39+
)
40+
41+
assert len(split["rows_response"]["rows"]) == ROWS_MAX_NUMBER
42+
43+
2744
# TODO: test the truncation

0 commit comments

Comments
 (0)