File tree Expand file tree Collapse file tree
src/datasets_preview_backend Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1212 DEFAULT_LOG_LEVEL ,
1313 DEFAULT_MAX_AGE_LONG_SECONDS ,
1414 DEFAULT_MAX_AGE_SHORT_SECONDS ,
15+ DEFAULT_MAX_SIZE_FALLBACK ,
1516 DEFAULT_MONGO_CACHE_DATABASE ,
1617 DEFAULT_MONGO_QUEUE_DATABASE ,
1718 DEFAULT_MONGO_URL ,
5051os .environ ["HF_SCRIPTS_VERSION" ] = DATASETS_REVISION
5152
5253# for tests - to be removed
54+ MAX_SIZE_FALLBACK = get_int_value (os .environ , "MAX_SIZE_FALLBACK" , DEFAULT_MAX_SIZE_FALLBACK )
5355ROWS_MAX_BYTES = get_int_value (d = os .environ , key = "ROWS_MAX_BYTES" , default = DEFAULT_ROWS_MAX_BYTES )
5456ROWS_MAX_NUMBER = get_int_value (d = os .environ , key = "ROWS_MAX_NUMBER" , default = DEFAULT_ROWS_MAX_NUMBER )
5557ROWS_MIN_NUMBER = get_int_value (d = os .environ , key = "ROWS_MIN_NUMBER" , default = DEFAULT_ROWS_MIN_NUMBER )
Original file line number Diff line number Diff line change @@ -28,7 +28,7 @@ def get_rows(
2828 dataset_name ,
2929 name = config_name ,
3030 split = split_name ,
31- streaming = True ,
31+ streaming = streaming ,
3232 download_mode = DownloadMode .FORCE_REDOWNLOAD ,
3333 use_auth_token = hf_token ,
3434 )
Original file line number Diff line number Diff line change 1- from datasets_preview_backend .config import HF_TOKEN , ROWS_MAX_NUMBER
1+ from datasets_preview_backend .config import HF_TOKEN , MAX_SIZE_FALLBACK , ROWS_MAX_NUMBER
22from datasets_preview_backend .models .split import get_split
33
44# TODO: test fallback
@@ -24,4 +24,21 @@ def test_gated() -> None:
2424 assert split ["rows_response" ]["rows" ][0 ]["row" ]["year" ] == "1855"
2525
2626
27+ def test_fallback () -> None :
28+ # https://github.com/huggingface/datasets/issues/3185
29+ dataset_name = "samsum"
30+ config_name = "samsum"
31+ split_name = "train"
32+ split = get_split (
33+ dataset_name ,
34+ config_name ,
35+ split_name ,
36+ HF_TOKEN ,
37+ rows_max_number = ROWS_MAX_NUMBER ,
38+ max_size_fallback = MAX_SIZE_FALLBACK ,
39+ )
40+
41+ assert len (split ["rows_response" ]["rows" ]) == ROWS_MAX_NUMBER
42+
43+
2744# TODO: test the truncation
You can’t perform that action at this time.
0 commit comments