@@ -81,17 +81,17 @@ def _generate_examples(self, filepath, **kwargs):
8181 yield i, {"text": line.strip()}
8282"""
8383
84- SAMPLE_DATASET_IDENTIFIER = "lhoestq/test " # has dataset script
85- SAMPLE_DATASET_IDENTIFIER2 = "lhoestq/test2 " # only has data files
86- SAMPLE_DATASET_IDENTIFIER3 = "mariosasko/test_multi_dir_dataset " # has multiple data directories
87- SAMPLE_DATASET_IDENTIFIER4 = "mariosasko/test_imagefolder_with_metadata " # imagefolder with a metadata file outside of the train/test directories
88- SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "lhoestq /_dummy"
84+ SAMPLE_DATASET_IDENTIFIER = "hf-internal-testing/dataset_with_script " # has dataset script
85+ SAMPLE_DATASET_IDENTIFIER2 = "hf-internal-testing/dataset_with_data_files " # only has data files
86+ SAMPLE_DATASET_IDENTIFIER3 = "hf-internal-testing/multi_dir_dataset " # has multiple data directories
87+ SAMPLE_DATASET_IDENTIFIER4 = "hf-internal-testing/imagefolder_with_metadata " # imagefolder with a metadata file outside of the train/test directories
88+ SAMPLE_NOT_EXISTING_DATASET_IDENTIFIER = "hf-internal-testing /_dummy"
8989SAMPLE_DATASET_NAME_THAT_DOESNT_EXIST = "_dummy"
90- SAMPLE_DATASET_NO_CONFIGS_IN_METADATA = "datasets-maintainers /audiofolder_no_configs_in_metadata"
91- SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA = "datasets-maintainers /audiofolder_single_config_in_metadata"
92- SAMPLE_DATASET_TWO_CONFIG_IN_METADATA = "datasets-maintainers /audiofolder_two_configs_in_metadata"
90+ SAMPLE_DATASET_NO_CONFIGS_IN_METADATA = "hf-internal-testing /audiofolder_no_configs_in_metadata"
91+ SAMPLE_DATASET_SINGLE_CONFIG_IN_METADATA = "hf-internal-testing /audiofolder_single_config_in_metadata"
92+ SAMPLE_DATASET_TWO_CONFIG_IN_METADATA = "hf-internal-testing /audiofolder_two_configs_in_metadata"
9393SAMPLE_DATASET_TWO_CONFIG_IN_METADATA_WITH_DEFAULT = (
94- "datasets-maintainers /audiofolder_two_configs_in_metadata_with_default"
94+ "hf-internal-testing /audiofolder_two_configs_in_metadata_with_default"
9595)
9696
9797
@@ -876,18 +876,18 @@ def test_load_dataset_from_hub(self):
876876 str (context .exception ),
877877 )
878878
879- def test_load_dataset_users (self ):
879+ def test_load_dataset_namespace (self ):
880880 with self .assertRaises (FileNotFoundError ) as context :
881- datasets .load_dataset ("lhoestq /_dummy" )
881+ datasets .load_dataset ("hf-internal-testing /_dummy" )
882882 self .assertIn (
883- "lhoestq /_dummy" ,
883+ "hf-internal-testing /_dummy" ,
884884 str (context .exception ),
885885 )
886886 for offline_simulation_mode in list (OfflineSimulationMode ):
887887 with offline (offline_simulation_mode ):
888888 with self .assertRaises (ConnectionError ) as context :
889- datasets .load_dataset ("lhoestq /_dummy" )
890- self .assertIn ("lhoestq /_dummy" , str (context .exception ), msg = offline_simulation_mode )
889+ datasets .load_dataset ("hf-internal-testing /_dummy" )
890+ self .assertIn ("hf-internal-testing /_dummy" , str (context .exception ), msg = offline_simulation_mode )
891891
892892
893893@pytest .mark .integration
@@ -1064,7 +1064,7 @@ def test_load_dataset_streaming_gz_json(jsonl_gz_path):
10641064 "path" , ["sample.jsonl" , "sample.jsonl.gz" , "sample.tar" , "sample.jsonl.xz" , "sample.zip" , "sample.jsonl.zst" ]
10651065)
10661066def test_load_dataset_streaming_compressed_files (path ):
1067- repo_id = "albertvillanova/datasets-tests-compression "
1067+ repo_id = "hf-internal-testing/compressed_files "
10681068 data_files = f"https://huggingface.co/datasets/{ repo_id } /resolve/main/{ path } "
10691069 if data_files [- 3 :] in ("zip" , "tar" ): # we need to glob "*" inside archives
10701070 data_files = data_files [- 3 :] + "://*::" + data_files
@@ -1394,7 +1394,7 @@ def test_load_from_disk_with_default_in_memory(
13941394
13951395@pytest .mark .integration
13961396def test_remote_data_files ():
1397- repo_id = "albertvillanova/tests-raw-jsonl "
1397+ repo_id = "hf-internal-testing/raw_jsonl "
13981398 filename = "wikiann-bn-validation.jsonl"
13991399 data_files = f"https://huggingface.co/datasets/{ repo_id } /resolve/main/{ filename } "
14001400 ds = load_dataset ("json" , split = "train" , data_files = data_files , streaming = True )
0 commit comments