@@ -1488,7 +1488,7 @@ def save_to_disk(
14881488 parent_cache_files_paths = {
14891489 Path (cache_filename ["filename" ]).resolve ().parent for cache_filename in self .cache_files
14901490 }
1491- # Check that the dataset doesn't overwrite iself . It can cause a permission error on Windows and a segfault on linux.
1491+ # Check that the dataset doesn't overwrite itself . It can cause a permission error on Windows and a segfault on linux.
14921492 if Path (dataset_path ).expanduser ().resolve () in parent_cache_files_paths :
14931493 raise PermissionError (
14941494 f"Tried to overwrite { Path (dataset_path ).expanduser ().resolve ()} but a dataset can't overwrite itself."
@@ -2867,7 +2867,7 @@ def map(
28672867 Note that the last batch may have less than `n` examples.
28682868 A batch is a dictionary, e.g. a batch of `n` examples is `{"text": ["Hello there !"] * n}`.
28692869
2870- If the function is asynchronous, then `map` will run your function in parallel, with up to one thousand simulatenous calls.
2870+ If the function is asynchronous, then `map` will run your function in parallel, with up to one thousand simultaneous calls.
28712871 It is recommended to use a `asyncio.Semaphore` in your function if you want to set a maximum number of operations that can run at the same time.
28722872
28732873 Args:
@@ -3475,7 +3475,7 @@ def iter_outputs(shard_iterable):
34753475 yield i , apply_function (example , i , offset = offset )
34763476
34773477 num_examples_progress_update = 0
3478- # If `update_data` is True after processing the first example/batch, initalize these resources with `init_buffer_and_writer`
3478+ # If `update_data` is True after processing the first example/batch, initialize these resources with `init_buffer_and_writer`
34793479 buf_writer , writer , tmp_file = None , None , None
34803480
34813481 # Check if Polars is available and import it if so
@@ -3659,7 +3659,7 @@ def filter(
36593659 """Apply a filter function to all the elements in the table in batches
36603660 and update the table so that the dataset only includes examples according to the filter function.
36613661
3662- If the function is asynchronous, then `filter` will run your function in parallel, with up to one thousand simulatenous calls (configurable).
3662+ If the function is asynchronous, then `filter` will run your function in parallel, with up to one thousand simultaneous calls (configurable).
36633663 It is recommended to use a `asyncio.Semaphore` in your function if you want to set a maximum number of operations that can run at the same time.
36643664
36653665 Args:
@@ -4277,7 +4277,7 @@ def sort(
42774277 f"Column '{ column } ' not found in the dataset. Please provide a column selected in: { self ._data .column_names } "
42784278 )
42794279
4280- # Change null_placement to conform to pyarrow's sort_indices() while ensuring backwards compatability
4280+ # Change null_placement to conform to pyarrow's sort_indices() while ensuring backwards compatibility
42814281 if null_placement not in ["at_start" , "at_end" ]:
42824282 if null_placement == "first" :
42834283 null_placement = "at_start"
@@ -5345,7 +5345,7 @@ def _push_parquet_shards_to_hub(
53455345 Returns:
53465346 additions (`List[CommitOperation]`): list of the `CommitOperationAdd` of the uploaded shards
53475347 uploaded_size (`int`): number of uploaded bytes to the repository
5348- dataset_nbytes (`int`): approximate size in bytes of the uploaded dataset afer uncompression
5348+ dataset_nbytes (`int`): approximate size in bytes of the uploaded dataset after uncompression
53495349 """
53505350 # Find decodable columns, because if there are any, we need to:
53515351 # embed the bytes from the files in the shards
@@ -6178,7 +6178,7 @@ def _concatenate_map_style_datasets(
61786178 # Return first dataset if all datasets are empty
61796179 return dsets [0 ]
61806180
6181- # Perform checks (and a potentional cast if axis=0)
6181+ # Perform checks (and a potential cast if axis=0)
61826182 if axis == 0 :
61836183 _check_if_features_can_be_aligned ([dset .features for dset in dsets ])
61846184 else :
0 commit comments