From 484a51382d8f7a0f14c629be07709d57e6a0bba2 Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Mon, 23 Jun 2025 22:30:52 +0200
Subject: [PATCH 1/9] no more sequence

---
 src/datasets/arrow_dataset.py                 |  15 +-
 src/datasets/dataset_dict.py                  |  11 +-
 src/datasets/features/__init__.py             |   3 +-
 src/datasets/features/features.py             | 212 ++++++------------
 src/datasets/features/translation.py          |   6 +-
 .../folder_based_builder.py                   |   8 +-
 src/datasets/table.py                         |  68 ++----
 tests/commands/test_test.py                   |  10 +-
 tests/features/test_array_xd.py               |   4 +-
 tests/features/test_audio.py                  |   6 +-
 tests/features/test_features.py               | 168 +++++++-------
 tests/features/test_image.py                  |   6 +-
 tests/fixtures/files.py                       |  14 +-
 tests/io/test_parquet.py                      |   4 +-
 tests/packaged_modules/test_webdataset.py     |   4 +-
 tests/test_arrow_dataset.py                   |  90 ++++----
 tests/test_dataset_dict.py                    |   8 +-
 tests/test_dataset_list.py                    |   4 +-
 tests/test_table.py                           |  80 +++----
 19 files changed, 294 insertions(+), 427 deletions(-)

diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
index 37170a51bfa..d0169ea5b3b 100644
--- a/src/datasets/arrow_dataset.py
+++ b/src/datasets/arrow_dataset.py
@@ -76,7 +76,7 @@
 from .arrow_writer import ArrowWriter, OptimizedTypedSequence
 from .data_files import sanitize_patterns
 from .download.streaming_download_manager import xgetsize
-from .features import Audio, ClassLabel, Features, Image, Sequence, Value, Video
+from .features import Audio, ClassLabel, Features, Image, List, Value, Video
 from .features.features import (
     FeatureType,
     _align_features,
@@ -2028,11 +2028,12 @@ def flatten(self, new_fingerprint: Optional[str] = None, max_depth=16) -> "Datas
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rajpurkar/squad", split="train")
         >>> ds.features
-        {'answers': Sequence(feature={'text': Value(dtype='string', id=None), 'answer_start': Value(dtype='int32', id=None)}, length=-1, id=None),
-         'context': Value(dtype='string', id=None),
-         'id': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None),
-         'title': Value(dtype='string', id=None)}
+        {'id': Value(dtype='string'),
+         'title': Value(dtype='string'),
+         'context': Value(dtype='string'),
+         'question': Value(dtype='string'),
+         'answers': {'text': List(feature=Value(dtype='string'), length=-1),
+         'answer_start': List(feature=Value(dtype='int32'), length=-1)}}
         >>> ds.flatten()
         Dataset({
             features: ['id', 'title', 'context', 'question', 'answers.text', 'answers.answer_start'],
@@ -6350,7 +6351,7 @@ def process_label_ids(batch):
         features[label_column] = (
             ClassLabel(num_classes=len(label_names), names=label_names)
             if isinstance(label_feature, ClassLabel)
-            else Sequence(ClassLabel(num_classes=len(label_names), names=label_names))
+            else List(ClassLabel(num_classes=len(label_names), names=label_names))
         )
         return self.map(process_label_ids, features=features, batched=True, desc="Aligning the labels")
 
diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py
index 8f259c93c72..4d79f95620e 100644
--- a/src/datasets/dataset_dict.py
+++ b/src/datasets/dataset_dict.py
@@ -201,11 +201,12 @@ def flatten(self, max_depth=16) -> "DatasetDict":
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rajpurkar/squad")
         >>> ds["train"].features
-        {'answers': Sequence(feature={'text': Value(dtype='string', id=None), 'answer_start': Value(dtype='int32', id=None)}, length=-1, id=None),
-         'context': Value(dtype='string', id=None),
-         'id': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None),
-         'title': Value(dtype='string', id=None)}
+        {'id': Value(dtype='string'),
+         'title': Value(dtype='string'),
+         'context': Value(dtype='string'),
+         'question': Value(dtype='string'),
+         'answers.text': List(feature=Value(dtype='string'), length=-1),
+         'answers.answer_start': List(feature=Value(dtype='int32'), length=-1)}
         >>> ds.flatten()
         DatasetDict({
             train: Dataset({
diff --git a/src/datasets/features/__init__.py b/src/datasets/features/__init__.py
index 95bb1cf1080..36133ce5e5a 100644
--- a/src/datasets/features/__init__.py
+++ b/src/datasets/features/__init__.py
@@ -7,6 +7,7 @@
     "ClassLabel",
     "Features",
     "LargeList",
+    "List",
     "Sequence",
     "Value",
     "Image",
@@ -16,7 +17,7 @@
     "Pdf",
 ]
 from .audio import Audio
-from .features import Array2D, Array3D, Array4D, Array5D, ClassLabel, Features, LargeList, Sequence, Value
+from .features import Array2D, Array3D, Array4D, Array5D, ClassLabel, Features, LargeList, List, Sequence, Value
 from .image import Image
 from .pdf import Pdf
 from .translation import Translation, TranslationVariableLanguages
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index e69947fa61b..99222d8c32a 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -1157,34 +1157,30 @@ def _load_names_from_file(names_filepath):
             return [name.strip() for name in f.read().split("\n") if name.strip()]  # Filter empty names
 
 
+def Sequence(feature, length=-1):
+    if isinstance(feature, dict):
+        return {key: List(value, length=length) for key, value in feature.items()}
+    else:
+        return List(feature, length=length)
+
+
 @dataclass
-class Sequence:
-    """Construct a list of feature from a single type or a dict of types.
-    Mostly here for compatiblity with tfds.
+class List:
+    """Feature type for large list data composed of child feature data type.
+
+    It is backed by `pyarrow.ListType`, which uses 32-bit offsets or a fixed length.
 
     Args:
         feature ([`FeatureType`]):
-            A list of features of a single type or a dictionary of types.
-        length (`int`):
-            Length of the sequence.
-
-    Example:
-
-    ```py
-    >>> from datasets import Features, Sequence, Value, ClassLabel
-    >>> features = Features({'post': Sequence(feature={'text': Value(dtype='string'), 'upvotes': Value(dtype='int32'), 'label': ClassLabel(num_classes=2, names=['hot', 'cold'])})})
-    >>> features
-    {'post': Sequence(feature={'text': Value(dtype='string', id=None), 'upvotes': Value(dtype='int32', id=None), 'label': ClassLabel(names=['hot', 'cold'], id=None)}, length=-1, id=None)}
-    ```
+            Child feature data type of each item within the large list.
     """
 
     feature: Any
     length: int = -1
     id: Optional[str] = field(default=None, repr=False)
     # Automatically constructed
-    dtype: ClassVar[str] = "list"
     pa_type: ClassVar[Any] = None
-    _type: str = field(default="Sequence", init=False, repr=False)
+    _type: str = field(default="List", init=False, repr=False)
 
 
 @dataclass
@@ -1214,7 +1210,7 @@ class LargeList:
     Translation,
     TranslationVariableLanguages,
     LargeList,
-    Sequence,
+    List,
     Array2D,
     Array3D,
     Array4D,
@@ -1233,7 +1229,7 @@ def _check_non_null_non_empty_recursive(obj, schema: Optional[FeatureType] = Non
     """
     if obj is None:
         return False
-    elif isinstance(obj, (list, tuple)) and (schema is None or isinstance(schema, (list, tuple, LargeList, Sequence))):
+    elif isinstance(obj, (list, tuple)) and (schema is None or isinstance(schema, (list, tuple, LargeList, List))):
         if len(obj) > 0:
             if schema is None:
                 pass
@@ -1273,14 +1269,9 @@ def get_nested_type(schema: FeatureType) -> pa.DataType:
     elif isinstance(schema, LargeList):
         value_type = get_nested_type(schema.feature)
         return pa.large_list(value_type)
-    elif isinstance(schema, Sequence):
+    elif isinstance(schema, List):
         value_type = get_nested_type(schema.feature)
-        # We allow to reverse list of dict => dict of list for compatibility with tfds
-        if isinstance(schema.feature, dict):
-            data_type = pa.struct({f.name: pa.list_(f.type, schema.length) for f in value_type})
-        else:
-            data_type = pa.list_(value_type, schema.length)
-        return data_type
+        return pa.list_(value_type, schema.length)
 
     # Other objects are callable which returns their data type (ClassLabel, Array2D, Translation, Arrow datatype creation methods)
     return schema()
@@ -1317,7 +1308,7 @@ def encode_nested_example(schema, obj, level=0):
                 if encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt:
                     return [encode_nested_example(sub_schema, o, level=level + 1) for o in obj]
             return list(obj)
-    elif isinstance(schema, LargeList):
+    elif isinstance(schema, (LargeList, List)):
         if obj is None:
             return None
         else:
@@ -1329,42 +1320,6 @@ def encode_nested_example(schema, obj, level=0):
                 if encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt:
                     return [encode_nested_example(sub_schema, o, level=level + 1) for o in obj]
             return list(obj)
-    elif isinstance(schema, Sequence):
-        if obj is None:
-            return None
-        # We allow to reverse list of dict => dict of list for compatibility with tfds
-        if isinstance(schema.feature, dict):
-            # dict of list to fill
-            list_dict = {}
-            if isinstance(obj, (list, tuple)):
-                # obj is a list of dict
-                for k in schema.feature:
-                    list_dict[k] = [encode_nested_example(schema.feature[k], o.get(k), level=level + 1) for o in obj]
-                return list_dict
-            else:
-                # obj is a single dict
-                for k in schema.feature:
-                    list_dict[k] = (
-                        [encode_nested_example(schema.feature[k], o, level=level + 1) for o in obj[k]]
-                        if k in obj
-                        else None
-                    )
-                return list_dict
-        # schema.feature is not a dict
-        if isinstance(obj, str):  # don't interpret a string as a list
-            raise ValueError(f"Got a string but expected a list instead: '{obj}'")
-        else:
-            if len(obj) > 0:
-                for first_elmt in obj:
-                    if _check_non_null_non_empty_recursive(first_elmt, schema.feature):
-                        break
-                # be careful when comparing tensors here
-                if (
-                    not (isinstance(first_elmt, list) or np.isscalar(first_elmt))
-                    or encode_nested_example(schema.feature, first_elmt, level=level + 1) != first_elmt
-                ):
-                    return [encode_nested_example(schema.feature, o, level=level + 1) for o in obj]
-            return list(obj)
     # Object with special encoding:
     # ClassLabel will convert from string to int, TranslationVariableLanguages does some checks
     elif hasattr(schema, "encode_example"):
@@ -1399,7 +1354,7 @@ def decode_nested_example(schema, obj, token_per_repo_id: Optional[dict[str, Uni
                 if decode_nested_example(sub_schema, first_elmt) != first_elmt:
                     return [decode_nested_example(sub_schema, o) for o in obj]
             return list(obj)
-    elif isinstance(schema, LargeList):
+    elif isinstance(schema, (LargeList, List)):
         if obj is None:
             return None
         else:
@@ -1411,12 +1366,6 @@ def decode_nested_example(schema, obj, token_per_repo_id: Optional[dict[str, Uni
                 if decode_nested_example(sub_schema, first_elmt) != first_elmt:
                     return [decode_nested_example(sub_schema, o) for o in obj]
             return list(obj)
-    elif isinstance(schema, Sequence):
-        # We allow to reverse list of dict => dict of list for compatibility with tfds
-        if isinstance(schema.feature, dict):
-            return {k: decode_nested_example([schema.feature[k]], obj[k]) for k in schema.feature}
-        else:
-            return decode_nested_example([schema.feature], obj)
     # Object with special decoding:
     elif hasattr(schema, "decode_example") and getattr(schema, "decode", True):
         # we pass the token to read and decode files from private repositories in streaming mode
@@ -1430,7 +1379,7 @@ def decode_nested_example(schema, obj, token_per_repo_id: Optional[dict[str, Uni
     Translation.__name__: Translation,
     TranslationVariableLanguages.__name__: TranslationVariableLanguages,
     LargeList.__name__: LargeList,
-    Sequence.__name__: Sequence,
+    List.__name__: List,
     Array2D.__name__: Array2D,
     Array3D.__name__: Array3D,
     Array4D.__name__: Array4D,
@@ -1485,7 +1434,10 @@ def generate_from_dict(obj: Any):
     if class_type == LargeList:
         feature = obj.pop("feature")
         return LargeList(feature=generate_from_dict(feature), **obj)
-    if class_type == Sequence:
+    if class_type == List:
+        feature = obj.pop("feature")
+        return List(feature=generate_from_dict(feature), **obj)
+    if class_type == Sequence:  # backward compatibility, this translates to a List or a dict
         feature = obj.pop("feature")
         return Sequence(feature=generate_from_dict(feature), **obj)
 
@@ -1506,15 +1458,11 @@ def generate_from_arrow_type(pa_type: pa.DataType) -> FeatureType:
     if isinstance(pa_type, pa.StructType):
         return {field.name: generate_from_arrow_type(field.type) for field in pa_type}
     elif isinstance(pa_type, pa.FixedSizeListType):
-        return Sequence(feature=generate_from_arrow_type(pa_type.value_type), length=pa_type.list_size)
+        return List(feature=generate_from_arrow_type(pa_type.value_type), length=pa_type.list_size)
     elif isinstance(pa_type, pa.ListType):
-        feature = generate_from_arrow_type(pa_type.value_type)
-        if isinstance(feature, (dict, tuple, list)):
-            return [feature]
-        return Sequence(feature=feature)
+        return List(feature=generate_from_arrow_type(pa_type.value_type))
     elif isinstance(pa_type, pa.LargeListType):
-        feature = generate_from_arrow_type(pa_type.value_type)
-        return LargeList(feature=feature)
+        return LargeList(feature=generate_from_arrow_type(pa_type.value_type))
     elif isinstance(pa_type, _ArrayXDExtensionType):
         array_feature = [None, None, Array2D, Array3D, Array4D, Array5D][pa_type.ndims]
         return array_feature(shape=pa_type.shape, dtype=pa_type.value_type)
@@ -1596,7 +1544,7 @@ def to_pyarrow_listarray(data: Any, pa_type: _ArrayXDExtensionType) -> pa.Array:
     """Convert to PyArrow ListArray.
 
     Args:
-        data (Any): Sequence, iterable, np.ndarray or pd.Series.
+        data (Any): List, iterable, np.ndarray or pd.Series.
         pa_type (_ArrayXDExtensionType): Any of the ArrayNDExtensionType.
 
     Returns:
@@ -1624,8 +1572,8 @@ def _visit(feature: FeatureType, func: Callable[[FeatureType], Optional[FeatureT
         out = func([_visit(feature[0], func)])
     elif isinstance(feature, LargeList):
         out = func(LargeList(_visit(feature.feature, func)))
-    elif isinstance(feature, Sequence):
-        out = func(Sequence(_visit(feature.feature, func), length=feature.length))
+    elif isinstance(feature, List):
+        out = func(List(_visit(feature.feature, func), length=feature.length))
     else:
         out = func(feature)
     return feature if out is None else out
@@ -1653,19 +1601,12 @@ def _visit_with_path(
     Returns:
         `FeatureType`: the visited feature.
     """
-    if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
-        feature = {k: [f] for k, f in feature.feature.items()}
-        # ^ Sequence of dicts is special, it must be converted to a dict of lists (see https://huggingface.co/docs/datasets/v2.16.1/en/package_reference/main_classes#datasets.Features)
     if isinstance(feature, Features):
         out = func(Features({k: _visit_with_path(f, func, visit_path + [k]) for k, f in feature.items()}), visit_path)
     elif isinstance(feature, dict):
         out = func({k: _visit_with_path(f, func, visit_path + [k]) for k, f in feature.items()}, visit_path)
-    elif isinstance(feature, (list, tuple)):
-        out = func([_visit_with_path(feature[0], func, visit_path + [0])], visit_path)
-    elif isinstance(feature, Sequence):
-        out = func(
-            Sequence(_visit_with_path(feature.feature, func, visit_path + [0]), length=feature.length), visit_path
-        )
+    elif isinstance(feature, List):
+        out = func(List(_visit_with_path(feature.feature, func, visit_path + [0]), length=feature.length), visit_path)
     elif isinstance(feature, LargeList):
         out = func(LargeList(_visit_with_path(feature.feature, func, visit_path + [0])), visit_path)
     else:
@@ -1689,7 +1630,7 @@ def require_decoding(feature: FeatureType, ignore_decode_attribute: bool = False
         return require_decoding(feature[0])
     elif isinstance(feature, LargeList):
         return require_decoding(feature.feature)
-    elif isinstance(feature, Sequence):
+    elif isinstance(feature, List):
         return require_decoding(feature.feature)
     else:
         return hasattr(feature, "decode_example") and (
@@ -1707,11 +1648,9 @@ def require_storage_cast(feature: FeatureType) -> bool:
     """
     if isinstance(feature, dict):
         return any(require_storage_cast(f) for f in feature.values())
-    elif isinstance(feature, (list, tuple)):
-        return require_storage_cast(feature[0])
     elif isinstance(feature, LargeList):
         return require_storage_cast(feature.feature)
-    elif isinstance(feature, Sequence):
+    elif isinstance(feature, List):
         return require_storage_cast(feature.feature)
     else:
         return hasattr(feature, "cast_storage")
@@ -1727,11 +1666,9 @@ def require_storage_embed(feature: FeatureType) -> bool:
     """
     if isinstance(feature, dict):
         return any(require_storage_cast(f) for f in feature.values())
-    elif isinstance(feature, (list, tuple)):
-        return require_storage_cast(feature[0])
     elif isinstance(feature, LargeList):
         return require_storage_cast(feature.feature)
-    elif isinstance(feature, Sequence):
+    elif isinstance(feature, List):
         return require_storage_cast(feature.feature)
     else:
         return hasattr(feature, "embed_storage")
@@ -1771,14 +1708,14 @@ class Features(dict):
           will be stored as integers in the dataset.
         - Python `dict` specifies a composite feature containing a mapping of sub-fields to sub-features.
           It's possible to have nested fields of nested fields in an arbitrary manner.
-        - Python `list`, [`LargeList`] or [`Sequence`] specifies a composite feature containing a sequence of
+        - [`List`] or [`LargeList`] specifies a composite feature containing a sequence of
           sub-features, all of the same feature type.
 
           <Tip>
 
-           A [`Sequence`] with an internal dictionary feature will be automatically converted into a dictionary of
+           A `Sequence` is deprecated and automatically converts internal dictionary feature into a dictionary of
            lists. This behavior is implemented to have a compatibility layer with the TensorFlow Datasets library but may be
-           un-wanted in some cases. If you don't want this behavior, you can use a Python `list` or a [`LargeList`]
+           un-wanted in some cases. If you don't want this behavior, you can use a [`List`] or a [`LargeList`]
            instead of the [`Sequence`].
 
           </Tip>
@@ -1944,9 +1881,9 @@ def to_yaml_inner(obj: Union[dict, list]) -> dict:
                 if _type == "LargeList":
                     _feature = obj.pop("feature")
                     return simplify({"large_list": to_yaml_inner(_feature), **obj})
-                elif _type == "Sequence":
+                elif _type == "List":
                     _feature = obj.pop("feature")
-                    return simplify({"sequence": to_yaml_inner(_feature), **obj})
+                    return simplify({"list": to_yaml_inner(_feature), **obj})
                 elif _type == "Value":
                     return obj
                 elif _type and not obj:
@@ -2013,13 +1950,20 @@ def from_yaml_inner(obj: Union[dict, list]) -> Union[dict, list]:
                     return {}
                 _type = next(iter(obj))
                 if _type == "large_list":
-                    _feature = unsimplify(obj).pop(_type)
-                    return {"feature": from_yaml_inner(_feature), **obj, "_type": "LargeList"}
+                    _feature = from_yaml_inner(unsimplify(obj).pop(_type))
+                    return {"feature": _feature, **obj, "_type": "LargeList"}
                 if _type == "sequence":
-                    _feature = unsimplify(obj).pop(_type)
-                    return {"feature": from_yaml_inner(_feature), **obj, "_type": "Sequence"}
+                    _feature = from_yaml_inner(unsimplify(obj).pop(_type))
+                    if isinstance(_feature, dict):
+                        return {
+                            name: {"feature": _subfeature, **obj, "_type": "List"}
+                            for name, _subfeature in _feature.items()
+                        }
+                    else:
+                        return {"feature": _feature, **obj, "_type": "List"}
                 if _type == "list":
-                    return [from_yaml_inner(unsimplify(obj)[_type])]
+                    _feature = from_yaml_inner(unsimplify(obj).pop(_type))
+                    return {"feature": _feature, **obj, "_type": "List"}
                 if _type == "struct":
                     return from_yaml_inner(obj["struct"])
                 elif _type == "dtype":
@@ -2203,37 +2147,20 @@ def reorder_fields_as(self, other: "Features") -> "Features":
 
         Example::
 
-            >>> from datasets import Features, Sequence, Value
+            >>> from datasets import Features, List, Value
             >>> # let's say we have two features with a different order of nested fields (for a and b for example)
-            >>> f1 = Features({"root": Sequence({"a": Value("string"), "b": Value("string")})})
-            >>> f2 = Features({"root": {"b": Sequence(Value("string")), "a": Sequence(Value("string"))}})
+            >>> f1 = Features({"root": {"a": Value("string"), "b": Value("string")}})
+            >>> f2 = Features({"root": {"b": Value("string"), "a": Value("string")}})
             >>> assert f1.type != f2.type
-            >>> # re-ordering keeps the base structure (here Sequence is defined at the root level), but makes the fields order match
+            >>> # re-ordering keeps the base structure (here List is defined at the root level), but makes the fields order match
             >>> f1.reorder_fields_as(f2)
-            {'root': Sequence(feature={'b': Value(dtype='string', id=None), 'a': Value(dtype='string', id=None)}, length=-1, id=None)}
+            {'root': List(feature={'b': Value(dtype='string'), 'a': Value(dtype='string')}, length=-1)}
             >>> assert f1.reorder_fields_as(f2).type == f2.type
         """
 
         def recursive_reorder(source, target, stack=""):
             stack_position = " at " + stack[1:] if stack else ""
-            if isinstance(target, Sequence):
-                target = target.feature
-                if isinstance(target, dict):
-                    target = {k: [v] for k, v in target.items()}
-                else:
-                    target = [target]
-            if isinstance(source, Sequence):
-                sequence_kwargs = vars(source).copy()
-                source = sequence_kwargs.pop("feature")
-                if isinstance(source, dict):
-                    source = {k: [v] for k, v in source.items()}
-                    reordered = recursive_reorder(source, target, stack)
-                    return Sequence({k: v[0] for k, v in reordered.items()}, **sequence_kwargs)
-                else:
-                    source = [source]
-                    reordered = recursive_reorder(source, target, stack)
-                    return Sequence(reordered[0], **sequence_kwargs)
-            elif isinstance(source, dict):
+            if isinstance(source, dict):
                 if not isinstance(target, dict):
                     raise ValueError(f"Type mismatch: between {source} and {target}" + stack_position)
                 if sorted(source) != sorted(target):
@@ -2244,16 +2171,14 @@ def recursive_reorder(source, target, stack=""):
                     )
                     raise ValueError(message)
                 return {key: recursive_reorder(source[key], target[key], stack + f".{key}") for key in target}
-            elif isinstance(source, list):
-                if not isinstance(target, list):
+            elif isinstance(source, List):
+                if not isinstance(target, List):
                     raise ValueError(f"Type mismatch: between {source} and {target}" + stack_position)
-                if len(source) != len(target):
-                    raise ValueError(f"Length mismatch: between {source} and {target}" + stack_position)
-                return [recursive_reorder(source[i], target[i], stack + ".<list>") for i in range(len(target))]
+                return List(recursive_reorder(source.feature, target.feature, stack + ".<list>"), length=source.length)
             elif isinstance(source, LargeList):
                 if not isinstance(target, LargeList):
                     raise ValueError(f"Type mismatch: between {source} and {target}" + stack_position)
-                return LargeList(recursive_reorder(source.feature, target.feature, stack))
+                return LargeList(recursive_reorder(source.feature, target.feature, stack + ".<list>"))
             else:
                 return source
 
@@ -2277,8 +2202,8 @@ def flatten(self, max_depth=16) -> "Features":
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rajpurkar/squad", split="train")
         >>> ds.features.flatten()
-        {'answers.answer_start': Sequence(feature=Value(dtype='int32', id=None), length=-1, id=None),
-         'answers.text': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
+        {'answers.answer_start': List(feature=Value(dtype='int32', id=None), length=-1, id=None),
+         'answers.text': List(feature=Value(dtype='string', id=None), length=-1, id=None),
          'context': Value(dtype='string', id=None),
          'id': Value(dtype='string', id=None),
          'question': Value(dtype='string', id=None),
@@ -2293,15 +2218,6 @@ def flatten(self, max_depth=16) -> "Features":
                     no_change = False
                     flattened.update({f"{column_name}.{k}": v for k, v in subfeature.items()})
                     del flattened[column_name]
-                elif isinstance(subfeature, Sequence) and isinstance(subfeature.feature, dict):
-                    no_change = False
-                    flattened.update(
-                        {
-                            f"{column_name}.{k}": Sequence(v) if not isinstance(v, dict) else [v]
-                            for k, v in subfeature.feature.items()
-                        }
-                    )
-                    del flattened[column_name]
                 elif hasattr(subfeature, "flatten") and subfeature.flatten() != subfeature:
                     no_change = False
                     flattened.update({f"{column_name}.{k}": v for k, v in subfeature.flatten().items()})
diff --git a/src/datasets/features/translation.py b/src/datasets/features/translation.py
index bb91a7cfb7b..9bee3aa2c11 100644
--- a/src/datasets/features/translation.py
+++ b/src/datasets/features/translation.py
@@ -121,9 +121,9 @@ def encode_example(self, translation_dict):
 
     def flatten(self) -> Union["FeatureType", dict[str, "FeatureType"]]:
         """Flatten the TranslationVariableLanguages feature into a dictionary."""
-        from .features import Sequence, Value
+        from .features import List, Value
 
         return {
-            "language": Sequence(Value("string")),
-            "translation": Sequence(Value("string")),
+            "language": List(Value("string")),
+            "translation": List(Value("string")),
         }
diff --git a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py
index 50788eecff0..f1ec638ab3b 100644
--- a/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py
+++ b/src/datasets/packaged_modules/folder_based_builder/folder_based_builder.py
@@ -212,11 +212,11 @@ def _set_feature(feature):
                             key = key[: -len("_file_name")] or self.BASE_COLUMN_NAME
                             out[key] = self.BASE_FEATURE()
                             feature_not_found = False
-                        elif (key == "file_names" or key.endswith("_file_names")) and feature[
-                            key
-                        ] == datasets.Sequence(datasets.Value("string")):
+                        elif (key == "file_names" or key.endswith("_file_names")) and feature[key] == datasets.List(
+                            datasets.Value("string")
+                        ):
                             key = key[: -len("_file_names")] or (self.BASE_COLUMN_NAME + "s")
-                            out[key] = datasets.Sequence(self.BASE_FEATURE())
+                            out[key] = datasets.List(self.BASE_FEATURE())
                             feature_not_found = False
                         elif (key == "file_names" or key.endswith("_file_names")) and feature[key] == [
                             datasets.Value("string")
diff --git a/src/datasets/table.py b/src/datasets/table.py
index 2e616a84688..8d301afea7a 100644
--- a/src/datasets/table.py
+++ b/src/datasets/table.py
@@ -8,7 +8,6 @@
 import numpy as np
 import pyarrow as pa
 import pyarrow.compute as pc
-import pyarrow.types
 
 from .utils.logging import get_logger
 
@@ -1982,7 +1981,7 @@ def cast_array_to_feature(
     Returns:
         array (`pyarrow.Array`): the casted array
     """
-    from .features.features import LargeList, Sequence, get_nested_type
+    from .features.features import LargeList, List, get_nested_type
 
     _c = partial(
         cast_array_to_feature,
@@ -1995,12 +1994,8 @@ def cast_array_to_feature(
     if hasattr(feature, "cast_storage"):
         return feature.cast_storage(array)
 
-    elif pa.types.is_struct(array.type):
-        # feature must be a dict or Sequence(subfeatures_dict)
-        if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
-            sequence_kwargs = vars(feature).copy()
-            feature = sequence_kwargs.pop("feature")
-            feature = {name: Sequence(subfeature, **sequence_kwargs) for name, subfeature in feature.items()}
+    if pa.types.is_struct(array.type):
+        # feature must be a dict
         if isinstance(feature, dict) and (array_fields := {field.name for field in array.type}) <= set(feature):
             null_array = pa.array([None] * len(array))
             arrays = [
@@ -2009,17 +2004,8 @@ def cast_array_to_feature(
             ]
             return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null())
     elif pa.types.is_list(array.type) or pa.types.is_large_list(array.type):
-        # feature must be either [subfeature] or LargeList(subfeature) or Sequence(subfeature)
-        if isinstance(feature, list):
-            casted_array_values = _c(array.values, feature[0])
-            if pa.types.is_list(array.type) and casted_array_values.type == array.values.type:
-                # Both array and feature have equal list type and values (within the list) type
-                return array
-            else:
-                # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
-                array_offsets = _combine_list_array_offsets_with_mask(array)
-                return pa.ListArray.from_arrays(array_offsets, casted_array_values)
-        elif isinstance(feature, LargeList):
+        # feature must be either List(subfeature) or LargeList(subfeature)
+        if isinstance(feature, LargeList):
             casted_array_values = _c(array.values, feature.feature)
             if pa.types.is_large_list(array.type) and casted_array_values.type == array.values.type:
                 # Both array and feature have equal large_list type and values (within the list) type
@@ -2028,7 +2014,7 @@ def cast_array_to_feature(
                 # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
                 array_offsets = _combine_list_array_offsets_with_mask(array)
                 return pa.LargeListArray.from_arrays(array_offsets, casted_array_values)
-        elif isinstance(feature, Sequence):
+        elif isinstance(feature, List):
             if feature.length > -1:
                 if _are_list_values_of_length(array, feature.length):
                     if array.null_count > 0:
@@ -2072,16 +2058,13 @@ def cast_array_to_feature(
                     array_offsets = _combine_list_array_offsets_with_mask(array)
                     return pa.ListArray.from_arrays(array_offsets, casted_array_values)
     elif pa.types.is_fixed_size_list(array.type):
-        # feature must be either [subfeature] or Sequence(subfeature)
-        if isinstance(feature, list):
-            array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
-            return pa.ListArray.from_arrays(array_offsets, _c(array.values, feature[0]), mask=array.is_null())
-        elif isinstance(feature, LargeList):
+        # feature must be List(subfeature)
+        if isinstance(feature, LargeList):
             array_offsets = (np.arange(len(array) + 1) + array.offset) * array.type.list_size
             return pa.LargeListArray.from_arrays(
                 array_offsets, _c(array.values, feature.feature), mask=array.is_null()
             )
-        elif isinstance(feature, Sequence):
+        elif isinstance(feature, List):
             if feature.length > -1:
                 if feature.length == array.type.list_size:
                     array_values = array.values[
@@ -2099,7 +2082,7 @@ def cast_array_to_feature(
             allow_primitive_to_str=allow_primitive_to_str,
             allow_decimal_to_str=allow_decimal_to_str,
         )
-    elif not isinstance(feature, (Sequence, dict, list, tuple)):
+    elif not isinstance(feature, (List, LargeList, dict)):
         return array_cast(
             array,
             feature(),
@@ -2131,7 +2114,7 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType", token_per_repo_
     Returns:
          array (`pyarrow.Array`): the casted array
     """
-    from .features import Sequence
+    from .features import LargeList, List
 
     _e = partial(embed_array_storage, token_per_repo_id=token_per_repo_id)
 
@@ -2140,21 +2123,15 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType", token_per_repo_
     if hasattr(feature, "embed_storage"):
         return feature.embed_storage(array, token_per_repo_id=token_per_repo_id)
     elif pa.types.is_struct(array.type):
-        # feature must be a dict or Sequence(subfeatures_dict)
-        if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
-            feature = {
-                name: Sequence(subfeature, length=feature.length) for name, subfeature in feature.feature.items()
-            }
+        # feature must be a dict
         if isinstance(feature, dict):
             arrays = [_e(array.field(name), subfeature) for name, subfeature in feature.items()]
             return pa.StructArray.from_arrays(arrays, names=list(feature), mask=array.is_null())
     elif pa.types.is_list(array.type):
-        # feature must be either [subfeature] or Sequence(subfeature)
+        # feature must be either List(subfeature)
         # Merge offsets with the null bitmap to avoid the "Null bitmap with offsets slice not supported" ArrowNotImplementedError
         array_offsets = _combine_list_array_offsets_with_mask(array)
-        if isinstance(feature, list):
-            return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature[0]))
-        if isinstance(feature, Sequence) and feature.length == -1:
+        if isinstance(feature, List) and feature.length == -1:
             return pa.ListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
     elif pa.types.is_large_list(array.type):
         # feature must be LargeList(subfeature)
@@ -2162,14 +2139,14 @@ def embed_array_storage(array: pa.Array, feature: "FeatureType", token_per_repo_
         array_offsets = _combine_list_array_offsets_with_mask(array)
         return pa.LargeListArray.from_arrays(array_offsets, _e(array.values, feature.feature))
     elif pa.types.is_fixed_size_list(array.type):
-        # feature must be Sequence(subfeature)
-        if isinstance(feature, Sequence) and feature.length > -1:
+        # feature must be List(subfeature)
+        if isinstance(feature, List) and feature.length > -1:
             array_values = array.values[
                 array.offset * array.type.list_size : (array.offset + len(array)) * array.type.list_size
             ]
             embedded_array_values = _e(array_values, feature.feature)
             return pa.FixedSizeListArray.from_arrays(embedded_array_values, feature.length, mask=array.is_null())
-    if not isinstance(feature, (Sequence, dict, list, tuple)):
+    if not isinstance(feature, (List, LargeList, dict)):
         return array
     raise TypeError(f"Couldn't embed array of type\n{_short_str(array.type)}\nwith\n{_short_str(feature)}")
 
@@ -2350,7 +2327,7 @@ def table_visitor(table: pa.Table, function: Callable[[pa.Array], None]):
         function (`Callable[[pa.Array], None]`):
             Function to apply to each array.
     """
-    from .features import Features, Sequence
+    from .features import Features, LargeList, List
 
     features = Features.from_arrow_schema(table.schema)
 
@@ -2363,17 +2340,10 @@ def _visit(array, feature):
                 array = array.storage
             function(array, feature)
             if pa.types.is_struct(array.type) and not hasattr(feature, "cast_storage"):
-                if isinstance(feature, Sequence) and isinstance(feature.feature, dict):
-                    feature = {
-                        name: Sequence(subfeature, length=feature.length)
-                        for name, subfeature in feature.feature.items()
-                    }
                 for name, subfeature in feature.items():
                     _visit(array.field(name), subfeature)
             elif pa.types.is_list(array.type):
-                if isinstance(feature, list):
-                    _visit(array.values, feature[0])
-                elif isinstance(feature, Sequence):
+                if isinstance(feature, (LargeList, List)):
                     _visit(array.values, feature.feature)
 
     for name, feature in features.items():
diff --git a/tests/commands/test_test.py b/tests/commands/test_test.py
index 396f7e84611..f8935a8c025 100644
--- a/tests/commands/test_test.py
+++ b/tests/commands/test_test.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from datasets import ClassLabel, Features, Sequence, Value
+from datasets import ClassLabel, Features, Value
 from datasets.commands.test import TestCommand
 from datasets.info import DatasetInfo, DatasetInfosDict
 
@@ -43,12 +43,12 @@ def test_test_command(dataset_dir):
             "default": DatasetInfo(
                 features=Features(
                     {
-                        "tokens": Sequence(Value("string")),
-                        "ner_tags": Sequence(
+                        "tokens": List(Value("string")),
+                        "ner_tags": List(
                             ClassLabel(names=["O", "B-PER", "I-PER", "B-ORG", "I-ORG", "B-LOC", "I-LOC"])
                         ),
-                        "langs": Sequence(Value("string")),
-                        "spans": Sequence(Value("string")),
+                        "langs": List(Value("string")),
+                        "spans": List(Value("string")),
                     }
                 ),
                 splits=[
diff --git a/tests/features/test_array_xd.py b/tests/features/test_array_xd.py
index 8eb9e4e0242..4b30387386a 100644
--- a/tests/features/test_array_xd.py
+++ b/tests/features/test_array_xd.py
@@ -421,11 +421,11 @@ def test_array_xd_with_np(seq_type, dtype, shape, feature_class):
     data = np.zeros(shape, dtype=dtype)
     expected = data.tolist()
     if seq_type == "sequence":
-        feature = datasets.Sequence(feature)
+        feature = datasets.List(feature)
         data = [data]
         expected = [expected]
     elif seq_type == "sequence_of_sequence":
-        feature = datasets.Sequence(datasets.Sequence(feature))
+        feature = datasets.List(datasets.List(feature))
         data = [[data]]
         expected = [[expected]]
     ds = datasets.Dataset.from_dict({"col": [data]}, features=datasets.Features({"col": feature}))
diff --git a/tests/features/test_audio.py b/tests/features/test_audio.py
index 38999e64b4e..f959458777c 100644
--- a/tests/features/test_audio.py
+++ b/tests/features/test_audio.py
@@ -7,7 +7,7 @@
 import pytest
 
 from datasets import Column, Dataset, concatenate_datasets, load_dataset
-from datasets.features import Audio, Features, Sequence, Value
+from datasets.features import Audio, Features, Value
 
 from ..utils import require_sndfile, require_torchcodec
 
@@ -54,7 +54,7 @@ def test_audio_feature_type_to_arrow():
     assert features.arrow_schema == pa.schema({"audio": Audio().pa_type})
     features = Features({"struct_containing_an_audio": {"audio": Audio()}})
     assert features.arrow_schema == pa.schema({"struct_containing_an_audio": pa.struct({"audio": Audio().pa_type})})
-    features = Features({"sequence_of_audios": Sequence(Audio())})
+    features = Features({"sequence_of_audios": List(Audio())})
     assert features.arrow_schema == pa.schema({"sequence_of_audios": pa.list_(Audio().pa_type)})
 
 
@@ -375,7 +375,7 @@ def test_dataset_with_audio_feature_with_none():
     # nested tests
 
     data = {"audio": [[None]]}
-    features = Features({"audio": Sequence(Audio())})
+    features = Features({"audio": List(Audio())})
     dset = Dataset.from_dict(data, features=features)
     item = dset[0]
     assert item.keys() == {"audio"}
diff --git a/tests/features/test_features.py b/tests/features/test_features.py
index 3e2d36cc77a..c9d98c9f001 100644
--- a/tests/features/test_features.py
+++ b/tests/features/test_features.py
@@ -53,7 +53,7 @@ def test_from_arrow_schema_simple(self):
 
     def test_from_arrow_schema_with_sequence(self):
         data = {"a": [{"b": {"c": ["text"]}}] * 10, "foo": [1] * 10}
-        original_features = Features({"a": {"b": Sequence({"c": Value("string")})}, "foo": Value("int64")})
+        original_features = Features({"a": {"b": {"c": List(Value("string"))}}, "foo": Value("int64")})
         dset = Dataset.from_dict(data, features=original_features)
         new_features = dset.features
         new_dset = Dataset.from_dict(data, features=new_features)
@@ -145,41 +145,41 @@ def test_reorder_fields_as(self):
                     "title": Value("string"),
                     "url": Value("string"),
                     "html": Value("string"),
-                    "tokens": Sequence({"token": Value("string"), "is_html": Value("bool")}),
+                    "tokens": {"token": List(Value("string")), "is_html": List(Value("bool"))},
                 },
                 "question": {
                     "text": Value("string"),
-                    "tokens": Sequence(Value("string")),
+                    "tokens": List(Value("string")),
                 },
-                "annotations": Sequence(
-                    {
-                        "id": Value("string"),
-                        "long_answer": {
+                "annotations": {
+                    "id": List(Value("string")),
+                    "long_answer": List(
+                        {
                             "start_token": Value("int64"),
                             "end_token": Value("int64"),
                             "start_byte": Value("int64"),
                             "end_byte": Value("int64"),
-                        },
-                        "short_answers": Sequence(
-                            {
-                                "start_token": Value("int64"),
-                                "end_token": Value("int64"),
-                                "start_byte": Value("int64"),
-                                "end_byte": Value("int64"),
-                                "text": Value("string"),
-                            }
-                        ),
-                        "yes_no_answer": ClassLabel(names=["NO", "YES"]),
-                    }
-                ),
+                        }
+                    ),
+                    "short_answers": List(
+                        {
+                            "start_token": List(Value("int64")),
+                            "end_token": List(Value("int64")),
+                            "start_byte": List(Value("int64")),
+                            "end_byte": List(Value("int64")),
+                            "text": List(Value("string")),
+                        }
+                    ),
+                    "yes_no_answer": List(ClassLabel(names=["NO", "YES"])),
+                },
             }
         )
 
-        other = Features(  # same but with [] instead of sequences, and with a shuffled fields order
+        other = Features(  # same but with a shuffled fields order
             {
                 "id": Value("string"),
                 "document": {
-                    "tokens": Sequence({"token": Value("string"), "is_html": Value("bool")}),
+                    "tokens": {"token": List(Value("string")), "is_html": List(Value("bool"))},
                     "title": Value("string"),
                     "url": Value("string"),
                     "html": Value("string"),
@@ -189,27 +189,25 @@ def test_reorder_fields_as(self):
                     "tokens": [Value("string")],
                 },
                 "annotations": {
-                    "yes_no_answer": [ClassLabel(names=["NO", "YES"])],
-                    "id": [Value("string")],
-                    "long_answer": [
+                    "yes_no_answer": List(ClassLabel(names=["NO", "YES"])),
+                    "id": List(Value("string")),
+                    "long_answer": List(
                         {
                             "end_byte": Value("int64"),
                             "start_token": Value("int64"),
                             "end_token": Value("int64"),
                             "start_byte": Value("int64"),
                         }
-                    ],
-                    "short_answers": [
-                        Sequence(
-                            {
-                                "text": Value("string"),
-                                "start_token": Value("int64"),
-                                "end_token": Value("int64"),
-                                "start_byte": Value("int64"),
-                                "end_byte": Value("int64"),
-                            }
-                        )
-                    ],
+                    ),
+                    "short_answers": List(
+                        {
+                            "text": List(Value("string")),
+                            "start_token": List(Value("int64")),
+                            "end_token": List(Value("int64")),
+                            "start_byte": List(Value("int64")),
+                            "end_byte": List(Value("int64")),
+                        }
+                    ),
                 },
             }
         )
@@ -218,36 +216,36 @@ def test_reorder_fields_as(self):
             {
                 "id": Value("string"),
                 "document": {
-                    "tokens": Sequence({"token": Value("string"), "is_html": Value("bool")}),
+                    "tokens": {"token": List(Value("string")), "is_html": List(Value("bool"))},
                     "title": Value("string"),
                     "url": Value("string"),
                     "html": Value("string"),
                 },
                 "question": {
                     "text": Value("string"),
-                    "tokens": Sequence(Value("string")),
+                    "tokens": List(Value("string")),
                 },
-                "annotations": Sequence(
-                    {
-                        "yes_no_answer": ClassLabel(names=["NO", "YES"]),
-                        "id": Value("string"),
-                        "long_answer": {
+                "annotations": {
+                    "yes_no_answer": List(ClassLabel(names=["NO", "YES"])),
+                    "id": List(Value("string")),
+                    "long_answer": List(
+                        {
                             "end_byte": Value("int64"),
                             "start_token": Value("int64"),
                             "end_token": Value("int64"),
                             "start_byte": Value("int64"),
-                        },
-                        "short_answers": Sequence(
-                            {
-                                "text": Value("string"),
-                                "start_token": Value("int64"),
-                                "end_token": Value("int64"),
-                                "start_byte": Value("int64"),
-                                "end_byte": Value("int64"),
-                            }
-                        ),
-                    }
-                ),
+                        }
+                    ),
+                    "short_answers": List(
+                        {
+                            "text": List(Value("string")),
+                            "start_token": List(Value("int64")),
+                            "end_token": List(Value("int64")),
+                            "start_byte": List(Value("int64")),
+                            "end_byte": List(Value("int64")),
+                        }
+                    ),
+                },
             }
         )
 
@@ -265,7 +263,7 @@ def test_flatten(self):
         assert features == _features, "calling flatten shouldn't alter the current features"
 
     def test_flatten_with_sequence(self):
-        features = Features({"foo": Sequence({"bar": {"my_value": Value("int32")}})})
+        features = Features({"foo": {"bar": List({"my_value": Value("int32")})}})
         _features = features.copy()
         flattened_features = features.flatten()
         assert flattened_features == {"foo.bar": [{"my_value": Value("int32")}]}
@@ -278,7 +276,7 @@ def assert_features_dicts_are_synced(features: Features):
                 and features.keys() == features._column_requires_decoding.keys()
             )
 
-        features = Features({"foo": Sequence({"bar": {"my_value": Value("int32")}})})
+        features = Features({"foo": {"bar": List({"my_value": Value("int32")})}})
         assert_features_dicts_are_synced(features)
         features["barfoo"] = Image()
         assert_features_dicts_are_synced(features)
@@ -400,7 +398,7 @@ def test_class_label_to_and_from_dict(class_label_arg, tmp_path_factory):
 
 @pytest.mark.parametrize(
     "schema",
-    [[Audio()], LargeList(Audio()), Sequence(Audio())],
+    [[Audio()], LargeList(Audio()), List(Audio())],
 )
 def test_decode_nested_example_with_list_types(schema, monkeypatch):
     mock_decode_example = MagicMock()
@@ -413,7 +411,7 @@ def test_decode_nested_example_with_list_types(schema, monkeypatch):
 
 @pytest.mark.parametrize(
     "schema",
-    [[ClassLabel(names=["a", "b"])], LargeList(ClassLabel(names=["a", "b"])), Sequence(ClassLabel(names=["a", "b"]))],
+    [[ClassLabel(names=["a", "b"])], LargeList(ClassLabel(names=["a", "b"])), List(ClassLabel(names=["a", "b"]))],
 )
 def test_encode_nested_example_with_list_types(schema):
     result = encode_nested_example(schema, ["b"])
@@ -422,7 +420,7 @@ def test_encode_nested_example_with_list_types(schema):
 
 @pytest.mark.parametrize("inner_type", [Value("int32"), {"subcolumn": Value("int32")}])
 def test_encode_nested_example_sequence_with_none(inner_type):
-    schema = Sequence(inner_type)
+    schema = List(inner_type)
     obj = None
     result = encode_nested_example(schema, obj)
     assert result is None
@@ -434,7 +432,7 @@ def test_encode_nested_example_sequence_with_none(inner_type):
         ({"col_1": ClassLabel(names=["a", "b"])}, {"col_1": "b"}, {"col_1": 1}),
         ({"col_1": [ClassLabel(names=["a", "b"])]}, {"col_1": ["b"]}, {"col_1": [1]}),
         ({"col_1": LargeList(ClassLabel(names=["a", "b"]))}, {"col_1": ["b"]}, {"col_1": [1]}),
-        ({"col_1": Sequence(ClassLabel(names=["a", "b"]))}, {"col_1": ["b"]}, {"col_1": [1]}),
+        ({"col_1": List(ClassLabel(names=["a", "b"]))}, {"col_1": ["b"]}, {"col_1": [1]}),
     ],
 )
 def test_encode_example(features_dict, example, expected_encoded_example):
@@ -446,7 +444,7 @@ def test_encode_example(features_dict, example, expected_encoded_example):
 def test_encode_batch_with_example_with_empty_first_elem():
     features = Features(
         {
-            "x": Sequence(Sequence(ClassLabel(names=["a", "b"]))),
+            "x": List(List(ClassLabel(names=["a", "b"]))),
         }
     )
     encoded_batch = features.encode_batch(
@@ -497,7 +495,7 @@ def test_dataset_feature_with_none(feature):
     # nested tests
 
     data = {"col": [[None]]}
-    features = Features({"col": Sequence(feature)})
+    features = Features({"col": List(feature)})
     dset = Dataset.from_dict(data, features=features)
     item = dset[0]
     assert item.keys() == {"col"}
@@ -668,21 +666,17 @@ def test_dont_iterate_over_each_element_in_a_list(self, mocked_cast):
     Features({"foo": {}}),
     Features({"foo": {"bar": Value("int32")}}),
     Features({"foo": {"bar1": Value("int32"), "bar2": Value("float64")}}),
-    Features({"foo": Sequence(Value("int32"))}),
-    Features({"foo": Sequence({})}),
-    Features({"foo": Sequence({"bar": Value("int32")})}),
-    Features({"foo": [Value("int32")]}),
-    Features({"foo": [{"bar": Value("int32")}]}),
+    Features({"foo": List(Value("int32"))}),
+    Features({"foo": {"bar": List(Value("int32"))}}),
+    Features({"foo": List({"bar": Value("int32")})}),
     Features({"foo": LargeList(Value("int32"))}),
     Features({"foo": LargeList({"bar": Value("int32")})}),
 ]
 
 NESTED_CUSTOM_FEATURES = [
     Features({"foo": {"bar": ClassLabel(names=["negative", "positive"])}}),
-    Features({"foo": Sequence(ClassLabel(names=["negative", "positive"]))}),
-    Features({"foo": Sequence({"bar": ClassLabel(names=["negative", "positive"])})}),
-    Features({"foo": [ClassLabel(names=["negative", "positive"])]}),
-    Features({"foo": [{"bar": ClassLabel(names=["negative", "positive"])}]}),
+    Features({"foo": List(ClassLabel(names=["negative", "positive"]))}),
+    Features({"foo": List({"bar": ClassLabel(names=["negative", "positive"])})}),
     Features({"foo": LargeList(ClassLabel(names=["negative", "positive"]))}),
     Features({"foo": LargeList({"bar": ClassLabel(names=["negative", "positive"])})}),
 ]
@@ -709,7 +703,7 @@ def test_features_to_yaml_list(features: Features):
     [
         ({"col": [{"sub_col": Value("int32")}]}, {"col": [{"sub_col": Value("int32")}]}),
         ({"col": LargeList({"sub_col": Value("int32")})}, {"col": LargeList({"sub_col": Value("int32")})}),
-        ({"col": Sequence({"sub_col": Value("int32")})}, {"col.sub_col": Sequence(Value("int32"))}),
+        ({"col": {"sub_col": List(Value("int32"))}}, {"col.sub_col": List(Value("int32"))}),
     ],
 )
 def test_features_flatten_with_list_types(features_dict, expected_features_dict):
@@ -731,7 +725,7 @@ def test_features_flatten_with_list_types(features_dict, expected_features_dict)
         ),
         (
             {"col": {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "Sequence"}},
-            {"col": Sequence(Value("int32"))},
+            {"col": List(Value("int32"))},
         ),
         (
             {"col": [{"sub_col": {"dtype": "int32", "_type": "Value"}}]},
@@ -743,7 +737,7 @@ def test_features_flatten_with_list_types(features_dict, expected_features_dict)
         ),
         (
             {"col": {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "Sequence"}},
-            {"col": Sequence({"sub_col": Value("int32")})},
+            {"col": {"sub_col": List(Value("int32"))}},
         ),
     ],
 )
@@ -765,7 +759,7 @@ def test_features_from_dict_with_list_types(deserialized_features_dict, expected
         ),
         (
             {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "Sequence"},
-            Sequence(Value("int32")),
+            List(Value("int32")),
         ),
         (
             [{"sub_col": {"dtype": "int32", "_type": "Value"}}],
@@ -777,7 +771,7 @@ def test_features_from_dict_with_list_types(deserialized_features_dict, expected
         ),
         (
             {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "Sequence"},
-            Sequence({"sub_col": Value("int32")}),
+            {"sub_col": List(Value("int32"))},
         ),
     ],
 )
@@ -882,7 +876,7 @@ def test_features_from_arrow_schema_list_data_type(list_dtype, scalar_dtype):
     [
         ([Value("int64")], [Value("int64")]),
         (LargeList(Value("int64")), LargeList(Value("int64"))),
-        (Sequence(Value("int64")), Sequence(Value("int64"))),
+        (List(Value("int64")), List(Value("int64"))),
         (
             [{"sub_col_1": Value("int64"), "sub_col_2": Value("int64")}],
             [{"sub_col_2": Value("int64"), "sub_col_1": Value("int64")}],
@@ -892,8 +886,8 @@ def test_features_from_arrow_schema_list_data_type(list_dtype, scalar_dtype):
             LargeList({"sub_col_2": Value("int64"), "sub_col_1": Value("int64")}),
         ),
         (
-            Sequence({"sub_col_1": Value("int64"), "sub_col_2": Value("int64")}),
-            Sequence({"sub_col_2": Value("int64"), "sub_col_1": Value("int64")}),
+            {"sub_col_1": List(Value("int64")), "sub_col_2": List(Value("int64"))},
+            {"sub_col_2": List(Value("int64")), "sub_col_1": List(Value("int64"))},
         ),
     ],
 )
@@ -953,7 +947,7 @@ def test_generate_from_arrow_type_with_arrow_nested_data_type(
 
 @pytest.mark.parametrize(
     "schema",
-    [[ClassLabel(names=["a", "b"])], LargeList(ClassLabel(names=["a", "b"])), Sequence(ClassLabel(names=["a", "b"]))],
+    [[ClassLabel(names=["a", "b"])], LargeList(ClassLabel(names=["a", "b"])), List(ClassLabel(names=["a", "b"]))],
 )
 def test_check_non_null_non_empty_recursive_with_list_types(schema):
     assert _check_non_null_non_empty_recursive([], schema) is False
@@ -964,31 +958,31 @@ def test_check_non_null_non_empty_recursive_with_list_types(schema):
     [
         [[ClassLabel(names=["a", "b"])]],
         LargeList(LargeList(ClassLabel(names=["a", "b"]))),
-        Sequence(Sequence(ClassLabel(names=["a", "b"]))),
+        List(List(ClassLabel(names=["a", "b"]))),
     ],
 )
 def test_check_non_null_non_empty_recursive_with_nested_list_types(schema):
     assert _check_non_null_non_empty_recursive([[]], schema) is False
 
 
-@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), Sequence(Audio())])
+@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), List(Audio())])
 def test_require_decoding_with_list_types(feature):
     assert require_decoding(feature)
 
 
-@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), Sequence(Audio())])
+@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), List(Audio())])
 def test_require_storage_cast_with_list_types(feature):
     assert require_storage_cast(feature)
 
 
-@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), Sequence(Audio())])
+@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), List(Audio())])
 def test_require_storage_embed_with_list_types(feature):
     assert require_storage_embed(feature)
 
 
 @pytest.mark.parametrize(
     "feature, expected",
-    [([Value("int32")], [1]), (LargeList(Value("int32")), LargeList(1)), (Sequence(Value("int32")), Sequence(1))],
+    [([Value("int32")], [1]), (LargeList(Value("int32")), LargeList(1)), (List(Value("int32")), List(1))],
 )
 def test_visit_with_list_types(feature, expected):
     def func(x):
diff --git a/tests/features/test_image.py b/tests/features/test_image.py
index d639bf84ac5..57559b57a1e 100644
--- a/tests/features/test_image.py
+++ b/tests/features/test_image.py
@@ -9,7 +9,7 @@
 import pyarrow as pa
 import pytest
 
-from datasets import Column, Dataset, Features, Image, Sequence, Value, concatenate_datasets, load_dataset
+from datasets import Column, Dataset, Features, Image, Value, concatenate_datasets, load_dataset
 from datasets.features.image import encode_np_array, image_to_bytes
 
 from ..utils import require_pil
@@ -45,7 +45,7 @@ def test_image_feature_type_to_arrow():
     assert features.arrow_schema == pa.schema({"image": Image().pa_type})
     features = Features({"struct_containing_an_image": {"image": Image()}})
     assert features.arrow_schema == pa.schema({"struct_containing_an_image": pa.struct({"image": Image().pa_type})})
-    features = Features({"sequence_of_images": Sequence(Image())})
+    features = Features({"sequence_of_images": List(Image())})
     assert features.arrow_schema == pa.schema({"sequence_of_images": pa.list_(Image().pa_type)})
 
 
@@ -276,7 +276,7 @@ def test_dataset_with_image_feature_with_none():
     # nested tests
 
     data = {"images": [[None]]}
-    features = Features({"images": Sequence(Image())})
+    features = Features({"images": List(Image())})
     dset = Dataset.from_dict(data, features=features)
     item = dset[0]
     assert item.keys() == {"images"}
diff --git a/tests/fixtures/files.py b/tests/fixtures/files.py
index 25b1448ae46..b1c6f7768a2 100644
--- a/tests/fixtures/files.py
+++ b/tests/fixtures/files.py
@@ -24,14 +24,12 @@ def dataset():
     n = 10
     features = datasets.Features(
         {
-            "tokens": datasets.Sequence(datasets.Value("string")),
-            "labels": datasets.Sequence(datasets.ClassLabel(names=["negative", "positive"])),
-            "answers": datasets.Sequence(
-                {
-                    "text": datasets.Value("string"),
-                    "answer_start": datasets.Value("int32"),
-                }
-            ),
+            "tokens": datasets.List(datasets.Value("string")),
+            "labels": datasets.List(datasets.ClassLabel(names=["negative", "positive"])),
+            "answers": {
+                "text": datasets.List(datasets.Value("string")),
+                "answer_start": datasets.List(datasets.Value("int32")),
+            },
             "id": datasets.Value("int64"),
         }
     )
diff --git a/tests/io/test_parquet.py b/tests/io/test_parquet.py
index cdc55c9e18e..5062b88a60c 100644
--- a/tests/io/test_parquet.py
+++ b/tests/io/test_parquet.py
@@ -2,7 +2,7 @@
 import pyarrow.parquet as pq
 import pytest
 
-from datasets import Audio, Dataset, DatasetDict, Features, IterableDatasetDict, NamedSplit, Sequence, Value, config
+from datasets import Audio, Dataset, DatasetDict, Features, IterableDatasetDict, NamedSplit, Value, config
 from datasets.features.image import Image
 from datasets.info import DatasetInfo
 from datasets.io.parquet import ParquetDatasetReader, ParquetDatasetWriter, get_writer_batch_size
@@ -219,7 +219,7 @@ def test_dataset_to_parquet_keeps_features(shared_datadir, tmp_path):
     [
         (Features({"foo": Value("int32")}), None),
         (Features({"image": Image(), "foo": Value("int32")}), config.PARQUET_ROW_GROUP_SIZE_FOR_IMAGE_DATASETS),
-        (Features({"nested": Sequence(Audio())}), config.PARQUET_ROW_GROUP_SIZE_FOR_AUDIO_DATASETS),
+        (Features({"nested": List(Audio())}), config.PARQUET_ROW_GROUP_SIZE_FOR_AUDIO_DATASETS),
     ],
 )
 def test_get_writer_batch_size(feature, expected):
diff --git a/tests/packaged_modules/test_webdataset.py b/tests/packaged_modules/test_webdataset.py
index 12aa6275382..d65915fe6ea 100644
--- a/tests/packaged_modules/test_webdataset.py
+++ b/tests/packaged_modules/test_webdataset.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from datasets import Audio, DownloadManager, Features, Image, Sequence, Value
+from datasets import Audio, DownloadManager, Features, Image, List, Value
 from datasets.packaged_modules.webdataset.webdataset import WebDataset
 
 from ..utils import (
@@ -245,7 +245,7 @@ def test_tensor_webdataset(tensor_wds_file):
             "__key__": Value("string"),
             "__url__": Value("string"),
             "json": {"text": Value("string")},
-            "pth": Sequence(Value("float32")),
+            "pth": List(Value("float32")),
         }
     )
     assert len(split_generators) == 1
diff --git a/tests/test_arrow_dataset.py b/tests/test_arrow_dataset.py
index 8e365462197..317bf6273b2 100644
--- a/tests/test_arrow_dataset.py
+++ b/tests/test_arrow_dataset.py
@@ -34,7 +34,7 @@
     Features,
     Image,
     LargeList,
-    Sequence,
+    List,
     Translation,
     TranslationVariableLanguages,
     Value,
@@ -143,13 +143,13 @@ def _create_dummy_dataset(
             data = {
                 "col_1": [[[True, False], [False, True]]] * 4,  # 2D
                 "col_2": [[[["a", "b"], ["c", "d"]], [["e", "f"], ["g", "h"]]]] * 4,  # 3D array
-                "col_3": [[3, 2, 1, 0]] * 4,  # Sequence
+                "col_3": [[3, 2, 1, 0]] * 4,  # List
             }
             features = Features(
                 {
                     "col_1": Array2D(shape=(2, 2), dtype="bool"),
                     "col_2": Array3D(shape=(2, 2, 2), dtype="string"),
-                    "col_3": Sequence(feature=Value("int64")),
+                    "col_3": List(feature=Value("int64")),
                 }
             )
             dset = Dataset.from_dict(data, features=features)
@@ -205,7 +205,7 @@ def test_dummy_dataset(self, in_memory):
                         {
                             "col_1": Array2D(shape=(2, 2), dtype="bool"),
                             "col_2": Array3D(shape=(2, 2, 2), dtype="string"),
-                            "col_3": Sequence(feature=Value("int64")),
+                            "col_3": List(feature=Value("int64")),
                         }
                     ),
                 )
@@ -913,7 +913,7 @@ def test_flatten(self, in_memory):
         with tempfile.TemporaryDirectory() as tmp_dir:
             with Dataset.from_dict(
                 {"a": [{"b": {"c": ["text"]}}] * 10, "foo": [1] * 10},
-                features=Features({"a": {"b": Sequence({"c": Value("string")})}, "foo": Value("int64")}),
+                features=Features({"a": {"b": {"c": List(Value("string"))}}, "foo": Value("int64")}),
             ) as dset:
                 with self._to(in_memory, tmp_dir, dset) as dset:
                     fingerprint = dset._fingerprint
@@ -921,7 +921,7 @@ def test_flatten(self, in_memory):
                         self.assertListEqual(sorted(dset.column_names), ["a.b.c", "foo"])
                         self.assertListEqual(sorted(dset.features.keys()), ["a.b.c", "foo"])
                         self.assertDictEqual(
-                            dset.features, Features({"a.b.c": Sequence(Value("string")), "foo": Value("int64")})
+                            dset.features, Features({"a.b.c": List(Value("string")), "foo": Value("int64")})
                         )
                         self.assertNotEqual(dset._fingerprint, fingerprint)
                         assert_arrow_metadata_are_synced_with_dataset_features(dset)
@@ -962,8 +962,8 @@ def test_flatten(self, in_memory):
                             dset.features,
                             Features(
                                 {
-                                    "a.language": Sequence(Value("string")),
-                                    "a.translation": Sequence(Value("string")),
+                                    "a.language": List(Value("string")),
+                                    "a.translation": List(Value("string")),
                                     "foo": Value("int64"),
                                 }
                             ),
@@ -1729,7 +1729,7 @@ def func(example):
                     self.assertEqual(len(dset_test), 30)
                     self.assertDictEqual(
                         dset_test.features,
-                        Features({"filename": Value("string"), "tensor": Sequence(Value("float32"))}),
+                        Features({"filename": Value("string"), "tensor": List(Value("float32"))}),
                     )
                     self.assertListEqual(dset_test[0]["tensor"], [1, 2, 3])
 
@@ -1746,7 +1746,7 @@ def func(example):
                     self.assertEqual(len(dset_test), 30)
                     self.assertDictEqual(
                         dset_test.features,
-                        Features({"filename": Value("string"), "tensor": Sequence(Value("float32"))}),
+                        Features({"filename": Value("string"), "tensor": List(Value("float32"))}),
                     )
                     self.assertListEqual(dset_test[0]["tensor"], [1, 2, 3])
 
@@ -1763,7 +1763,7 @@ def func(example):
                     self.assertEqual(len(dset_test), 30)
                     self.assertDictEqual(
                         dset_test.features,
-                        Features({"filename": Value("string"), "tensor": Sequence(Value("float32"))}),
+                        Features({"filename": Value("string"), "tensor": List(Value("float32"))}),
                     )
                     self.assertListEqual(dset_test[0]["tensor"], [1, 2, 3])
 
@@ -1777,7 +1777,7 @@ def func(example):
                     self.assertEqual(len(dset_test), 30)
                     self.assertDictEqual(
                         dset_test.features,
-                        Features({"filename": Value("string"), "tensor": Sequence(Value("float64"))}),
+                        Features({"filename": Value("string"), "tensor": List(Value("float64"))}),
                     )
                     self.assertListEqual(dset_test[0]["tensor"], [1, 2, 3])
 
@@ -1795,7 +1795,7 @@ def func(batch):
                     self.assertEqual(len(dset_test), 30)
                     self.assertDictEqual(
                         dset_test.features,
-                        Features({"filename": Value("string"), "tensor": Sequence(Value("float32"))}),
+                        Features({"filename": Value("string"), "tensor": List(Value("float32"))}),
                     )
                     self.assertListEqual(dset_test[0]["tensor"], [1, 2, 3])
 
@@ -2019,8 +2019,8 @@ def test_filter_caching(self, in_memory):
     def test_keep_features_after_transform_specified(self, in_memory):
         features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
             }
         )
 
@@ -2040,8 +2040,8 @@ def invert_labels(x):
     def test_keep_features_after_transform_unspecified(self, in_memory):
         features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
             }
         )
 
@@ -2061,8 +2061,8 @@ def invert_labels(x):
     def test_keep_features_after_transform_to_file(self, in_memory):
         features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
             }
         )
 
@@ -2083,8 +2083,8 @@ def invert_labels(x):
     def test_keep_features_after_transform_to_memory(self, in_memory):
         features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
             }
         )
 
@@ -2103,8 +2103,8 @@ def invert_labels(x):
     def test_keep_features_after_loading_from_cache(self, in_memory):
         features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
             }
         )
 
@@ -2129,8 +2129,8 @@ def invert_labels(x):
     def test_keep_features_with_new_features(self, in_memory):
         features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
             }
         )
 
@@ -2139,9 +2139,9 @@ def invert_labels(x):
 
         expected_features = Features(
             {
-                "tokens": Sequence(Value("string")),
-                "labels": Sequence(ClassLabel(names=["negative", "positive"])),
-                "labels2": Sequence(Value("int64")),
+                "tokens": List(Value("string")),
+                "labels": List(ClassLabel(names=["negative", "positive"])),
+                "labels2": List(Value("int64")),
             }
         )
 
@@ -2875,9 +2875,7 @@ def test_format_vectors(self, in_memory):
             for col in columns:
                 self.assertIsInstance(dset[0][col], (str, list))
                 self.assertIsInstance(dset[:2][col], list)
-            self.assertDictEqual(
-                dset.features, Features({"filename": Value("string"), "vec": Sequence(Value("float64"))})
-            )
+            self.assertDictEqual(dset.features, Features({"filename": Value("string"), "vec": List(Value("float64"))}))
 
             dset.set_format("tensorflow")
             self.assertIsNotNone(dset[0])
@@ -2930,9 +2928,7 @@ def test_format_ragged_vectors(self, in_memory):
             for col in columns:
                 self.assertIsInstance(dset[0][col], (str, list))
                 self.assertIsInstance(dset[:2][col], list)
-            self.assertDictEqual(
-                dset.features, Features({"filename": Value("string"), "vec": Sequence(Value("float64"))})
-            )
+            self.assertDictEqual(dset.features, Features({"filename": Value("string"), "vec": List(Value("float64"))}))
 
             dset.set_format("tensorflow")
             self.assertIsNotNone(dset[0])
@@ -2986,7 +2982,7 @@ def test_format_nested(self, in_memory):
             dset.map(lambda ex: {"nested": [{"foo": np.ones(3)}] * len(ex["filename"])}, batched=True) as dset,
         ):
             self.assertDictEqual(
-                dset.features, Features({"filename": Value("string"), "nested": {"foo": Sequence(Value("float64"))}})
+                dset.features, Features({"filename": Value("string"), "nested": {"foo": List(Value("float64"))}})
             )
 
             dset.set_format("tensorflow")
@@ -3293,7 +3289,7 @@ def test_from_pandas(self):
             self.assertListEqual(list(dset.features.keys()), ["col_1", "col_2"])
             self.assertDictEqual(dset.features, Features({"col_1": Value("int64"), "col_2": Value("string")}))
 
-        features = Features({"col_1": Sequence(Value("string")), "col_2": Value("string")})
+        features = Features({"col_1": List(Value("string")), "col_2": Value("string")})
         self.assertRaises(TypeError, Dataset.from_pandas, df, features=features)
 
     @require_polars
@@ -3322,7 +3318,7 @@ def test_from_polars(self):
             self.assertListEqual(list(dset.features.keys()), ["col_1", "col_2"])
             self.assertDictEqual(dset.features, Features({"col_1": Value("int64"), "col_2": Value("large_string")}))
 
-        features = Features({"col_1": Sequence(Value("string")), "col_2": Value("large_string")})
+        features = Features({"col_1": List(Value("string")), "col_2": Value("large_string")})
         self.assertRaises(TypeError, Dataset.from_polars, df, features=features)
 
     def test_from_dict(self):
@@ -3417,8 +3413,8 @@ def test_tf_string_encoding(self):
 
 
 def test_cast_with_sliced_list():
-    old_features = Features({"foo": Sequence(Value("int64"))})
-    new_features = Features({"foo": Sequence(Value("int32"))})
+    old_features = Features({"foo": List(Value("int64"))})
+    new_features = Features({"foo": List(Value("int32"))})
     dataset = Dataset.from_dict({"foo": [[i] * (i % 3) for i in range(20)]}, features=old_features)
     casted_dataset = dataset.cast(new_features, batch_size=2)  # small batch size to slice the ListArray
     assert dataset["foo"] == casted_dataset["foo"]
@@ -4263,14 +4259,12 @@ def test_dataset_to_json(dataset, tmp_path):
             {
                 "features": Features(
                     {
-                        "tokens": Sequence(Value("string")),
-                        "labels": Sequence(Value("int16")),
-                        "answers": Sequence(
-                            {
-                                "text": Value("string"),
-                                "answer_start": Value("int32"),
-                            }
-                        ),
+                        "tokens": List(Value("string")),
+                        "labels": List(Value("int16")),
+                        "answers": {
+                            "text": List(Value("string")),
+                            "answer_start": List(Value("int32")),
+                        },
                         "id": Value("int32"),
                     }
                 )
@@ -4436,7 +4430,7 @@ def test_dataset_format_with_unformatted_image():
 
     ds = Dataset.from_dict(
         {"a": [np.arange(4 * 4 * 3).reshape(4, 4, 3)] * 10, "b": [[0, 1]] * 10},
-        Features({"a": Image(), "b": Sequence(Value("int64"))}),
+        Features({"a": Image(), "b": List(Value("int64"))}),
     )
     ds.set_format("np", columns=["b"], output_all_columns=True)
     assert isinstance(ds[0]["a"], PIL.Image.Image)
diff --git a/tests/test_dataset_dict.py b/tests/test_dataset_dict.py
index 651f2ec822e..afc0ae8b417 100644
--- a/tests/test_dataset_dict.py
+++ b/tests/test_dataset_dict.py
@@ -9,7 +9,7 @@
 from datasets import load_from_disk
 from datasets.arrow_dataset import Dataset
 from datasets.dataset_dict import DatasetDict, IterableDatasetDict
-from datasets.features import ClassLabel, Features, Sequence, Value
+from datasets.features import ClassLabel, Features, List, Value
 from datasets.iterable_dataset import IterableDataset
 from datasets.splits import NamedSplit
 
@@ -71,15 +71,13 @@ def _create_dummy_iterable_dataset_dict(self, multiple_columns=False) -> Iterabl
     def test_flatten(self):
         dset_split = Dataset.from_dict(
             {"a": [{"b": {"c": ["text"]}}] * 10, "foo": [1] * 10},
-            features=Features({"a": {"b": Sequence({"c": Value("string")})}, "foo": Value("int64")}),
+            features=Features({"a": {"b": {"c": List(Value("string"))}}, "foo": Value("int64")}),
         )
         dset = DatasetDict({"train": dset_split, "test": dset_split})
         dset = dset.flatten()
         self.assertDictEqual(dset.column_names, {"train": ["a.b.c", "foo"], "test": ["a.b.c", "foo"]})
         self.assertListEqual(sorted(dset["train"].features.keys()), ["a.b.c", "foo"])
-        self.assertDictEqual(
-            dset["train"].features, Features({"a.b.c": Sequence(Value("string")), "foo": Value("int64")})
-        )
+        self.assertDictEqual(dset["train"].features, Features({"a.b.c": List(Value("string")), "foo": Value("int64")}))
         del dset
 
     def test_set_format_numpy(self):
diff --git a/tests/test_dataset_list.py b/tests/test_dataset_list.py
index 1004ae3cd68..642eb2c5736 100644
--- a/tests/test_dataset_list.py
+++ b/tests/test_dataset_list.py
@@ -1,6 +1,6 @@
 from unittest import TestCase
 
-from datasets import Sequence, Value
+from datasets import List, Value
 from datasets.arrow_dataset import Dataset
 
 
@@ -39,7 +39,7 @@ def test_uneven_records(self):  # checks what happens with missing columns
     def test_variable_list_records(self):  # checks if the type can be inferred from the second record
         list_records = [{"col_1": []}, {"col_1": [1, 2]}]
         dset = Dataset.from_list(list_records)
-        self.assertEqual(dset.info.features["col_1"], Sequence(Value("int64")))
+        self.assertEqual(dset.info.features["col_1"], List(Value("int64")))
 
     def test_create_empty(self):
         dset = Dataset.from_list([])
diff --git a/tests/test_table.py b/tests/test_table.py
index 3d3db09e5d6..7ca740e8fde 100644
--- a/tests/test_table.py
+++ b/tests/test_table.py
@@ -9,7 +9,7 @@
 import pyarrow as pa
 import pytest
 
-from datasets.features import Array2D, ClassLabel, Features, Image, LargeList, Sequence, Value
+from datasets.features import Array2D, ClassLabel, Features, Image, LargeList, Value
 from datasets.features.features import Array2DExtensionType, get_nested_type
 from datasets.table import (
     ConcatenationTable,
@@ -1105,42 +1105,36 @@ def test_indexed_table_mixin():
 
 def test_cast_integer_array_to_features():
     arr = pa.array([[0, 1]])
-    assert cast_array_to_feature(arr, Sequence(Value("string"))).type == pa.list_(pa.string())
-    assert cast_array_to_feature(arr, Sequence(Value("string")), allow_decimal_to_str=False).type == pa.list_(
-        pa.string()
-    )
+    assert cast_array_to_feature(arr, List(Value("string"))).type == pa.list_(pa.string())
+    assert cast_array_to_feature(arr, List(Value("string")), allow_decimal_to_str=False).type == pa.list_(pa.string())
     with pytest.raises(TypeError):
-        cast_array_to_feature(arr, Sequence(Value("string")), allow_primitive_to_str=False)
+        cast_array_to_feature(arr, List(Value("string")), allow_primitive_to_str=False)
 
 
 def test_cast_float_array_to_features():
     arr = pa.array([[0.0, 1.0]])
-    assert cast_array_to_feature(arr, Sequence(Value("string"))).type == pa.list_(pa.string())
-    assert cast_array_to_feature(arr, Sequence(Value("string")), allow_decimal_to_str=False).type == pa.list_(
-        pa.string()
-    )
+    assert cast_array_to_feature(arr, List(Value("string"))).type == pa.list_(pa.string())
+    assert cast_array_to_feature(arr, List(Value("string")), allow_decimal_to_str=False).type == pa.list_(pa.string())
     with pytest.raises(TypeError):
-        cast_array_to_feature(arr, Sequence(Value("string")), allow_primitive_to_str=False)
+        cast_array_to_feature(arr, List(Value("string")), allow_primitive_to_str=False)
 
 
 def test_cast_boolean_array_to_features():
     arr = pa.array([[False, True]])
-    assert cast_array_to_feature(arr, Sequence(Value("string"))).type == pa.list_(pa.string())
-    assert cast_array_to_feature(arr, Sequence(Value("string")), allow_decimal_to_str=False).type == pa.list_(
-        pa.string()
-    )
+    assert cast_array_to_feature(arr, List(Value("string"))).type == pa.list_(pa.string())
+    assert cast_array_to_feature(arr, List(Value("string")), allow_decimal_to_str=False).type == pa.list_(pa.string())
     with pytest.raises(TypeError):
-        cast_array_to_feature(arr, Sequence(Value("string")), allow_primitive_to_str=False)
+        cast_array_to_feature(arr, List(Value("string")), allow_primitive_to_str=False)
 
 
 def test_cast_decimal_array_to_features():
     arr = pa.array([[Decimal(0), Decimal(1)]])
-    assert cast_array_to_feature(arr, Sequence(Value("string"))).type == pa.list_(pa.string())
-    assert cast_array_to_feature(arr, Sequence(Value("string")), allow_primitive_to_str=False).type == pa.list_(
+    assert cast_array_to_feature(arr, List(Value("string"))).type == pa.list_(pa.string())
+    assert cast_array_to_feature(arr, List(Value("string")), allow_primitive_to_str=False).type == pa.list_(
         pa.string()
     )
     with pytest.raises(TypeError):
-        cast_array_to_feature(arr, Sequence(Value("string")), allow_decimal_to_str=False)
+        cast_array_to_feature(arr, List(Value("string")), allow_decimal_to_str=False)
 
 
 @pytest.mark.parametrize(
@@ -1160,7 +1154,7 @@ def test_cast_array_to_feature_with_struct_with_missing_fields(array_list, expec
 
 def test_cast_array_to_features_nested():
     arr = pa.array([[{"foo": [0]}]])
-    assert cast_array_to_feature(arr, [{"foo": Sequence(Value("string"))}]).type == pa.list_(
+    assert cast_array_to_feature(arr, [{"foo": List(Value("string"))}]).type == pa.list_(
         pa.struct({"foo": pa.list_(pa.string())})
     )
 
@@ -1187,12 +1181,12 @@ def test_cast_array_to_features_nested_with_nulls():
 def test_cast_array_to_features_to_null_type():
     # same type
     arr = pa.array([[None, None]])
-    assert cast_array_to_feature(arr, Sequence(Value("null"))).type == pa.list_(pa.null())
+    assert cast_array_to_feature(arr, List(Value("null"))).type == pa.list_(pa.null())
 
     # different type
     arr = pa.array([[None, 1]])
     with pytest.raises(TypeError):
-        cast_array_to_feature(arr, Sequence(Value("null")))
+        cast_array_to_feature(arr, List(Value("null")))
 
 
 def test_cast_array_to_features_array_xd():
@@ -1207,26 +1201,26 @@ def test_cast_array_to_features_array_xd():
 
 def test_cast_array_to_features_sequence_classlabel():
     arr = pa.array([[], [1], [0, 1]], pa.list_(pa.int64()))
-    assert cast_array_to_feature(arr, Sequence(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
+    assert cast_array_to_feature(arr, List(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
 
     arr = pa.array([[], ["bar"], ["foo", "bar"]], pa.list_(pa.string()))
-    assert cast_array_to_feature(arr, Sequence(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
+    assert cast_array_to_feature(arr, List(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
 
     # Test empty arrays
     arr = pa.array([[], []], pa.list_(pa.int64()))
-    assert cast_array_to_feature(arr, Sequence(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
+    assert cast_array_to_feature(arr, List(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
 
     arr = pa.array([[], []], pa.list_(pa.string()))
-    assert cast_array_to_feature(arr, Sequence(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
+    assert cast_array_to_feature(arr, List(ClassLabel(names=["foo", "bar"]))).type == pa.list_(pa.int64())
 
     # Test invalid class labels
     arr = pa.array([[2]], pa.list_(pa.int64()))
     with pytest.raises(ValueError):
-        assert cast_array_to_feature(arr, Sequence(ClassLabel(names=["foo", "bar"])))
+        assert cast_array_to_feature(arr, List(ClassLabel(names=["foo", "bar"])))
 
     arr = pa.array([["baz"]], pa.list_(pa.string()))
     with pytest.raises(ValueError):
-        assert cast_array_to_feature(arr, Sequence(ClassLabel(names=["foo", "bar"])))
+        assert cast_array_to_feature(arr, List(ClassLabel(names=["foo", "bar"])))
 
 
 @pytest.mark.parametrize(
@@ -1240,14 +1234,14 @@ def test_cast_array_to_features_sequence_classlabel():
 def test_cast_fixed_size_list_array_to_features_sequence(arr, slice, target_value_feature):
     arr = arr if slice is None else arr[slice]
     # Fixed size list
-    casted_array = cast_array_to_feature(arr, Sequence(target_value_feature, length=arr.type.list_size))
-    assert casted_array.type == get_nested_type(Sequence(target_value_feature, length=arr.type.list_size))
+    casted_array = cast_array_to_feature(arr, List(target_value_feature, length=arr.type.list_size))
+    assert casted_array.type == get_nested_type(List(target_value_feature, length=arr.type.list_size))
     assert casted_array.to_pylist() == arr.to_pylist()
     with pytest.raises(TypeError):
-        cast_array_to_feature(arr, Sequence(target_value_feature, length=arr.type.list_size + 1))
+        cast_array_to_feature(arr, List(target_value_feature, length=arr.type.list_size + 1))
     # Variable size list
-    casted_array = cast_array_to_feature(arr, Sequence(target_value_feature))
-    assert casted_array.type == get_nested_type(Sequence(target_value_feature))
+    casted_array = cast_array_to_feature(arr, List(target_value_feature))
+    assert casted_array.type == get_nested_type(List(target_value_feature))
     assert casted_array.to_pylist() == arr.to_pylist()
     casted_array = cast_array_to_feature(arr, [target_value_feature])
     assert casted_array.type == get_nested_type([target_value_feature])
@@ -1265,16 +1259,16 @@ def test_cast_fixed_size_list_array_to_features_sequence(arr, slice, target_valu
 def test_cast_list_array_to_features_sequence(arr, slice, target_value_feature):
     arr = arr if slice is None else arr[slice]
     # Variable size list
-    casted_array = cast_array_to_feature(arr, Sequence(target_value_feature))
-    assert casted_array.type == get_nested_type(Sequence(target_value_feature))
+    casted_array = cast_array_to_feature(arr, List(target_value_feature))
+    assert casted_array.type == get_nested_type(List(target_value_feature))
     assert casted_array.to_pylist() == arr.to_pylist()
     casted_array = cast_array_to_feature(arr, [target_value_feature])
     assert casted_array.type == get_nested_type([target_value_feature])
     assert casted_array.to_pylist() == arr.to_pylist()
     # Fixed size list
     list_size = arr.value_lengths().drop_null()[0].as_py() if arr.value_lengths().drop_null() else 2
-    casted_array = cast_array_to_feature(arr, Sequence(target_value_feature, length=list_size))
-    assert casted_array.type == get_nested_type(Sequence(target_value_feature, length=list_size))
+    casted_array = cast_array_to_feature(arr, List(target_value_feature, length=list_size))
+    assert casted_array.type == get_nested_type(List(target_value_feature, length=list_size))
     assert casted_array.to_pylist() == arr.to_pylist()
 
 
@@ -1303,7 +1297,7 @@ def test_cast_array_to_feature_with_list_array_and_sequence_feature(
         array_type = pa.struct({"col_1": array_type})
         sequence_feature = {"col_1": sequence_feature}
         expected_array_type = pa.struct({"col_1": expected_array_type})
-    feature = Sequence(sequence_feature)
+    feature = List(sequence_feature)
     array = pa.array([array_data], type=array_type)
     cast_array = cast_array_to_feature(array, feature)
     assert cast_array.type == expected_array_type
@@ -1337,12 +1331,12 @@ def test_cast_array_xd_to_features_sequence():
     arr = Array2DExtensionType(shape=(2, 3), dtype="int64").wrap_array(pa.array(arr, pa.list_(pa.list_(pa.int64()))))
     arr = pa.ListArray.from_arrays([0, None, 4, 8], arr)
     # Variable size list
-    casted_array = cast_array_to_feature(arr, Sequence(Array2D(shape=(2, 3), dtype="int32")))
-    assert casted_array.type == get_nested_type(Sequence(Array2D(shape=(2, 3), dtype="int32")))
+    casted_array = cast_array_to_feature(arr, List(Array2D(shape=(2, 3), dtype="int32")))
+    assert casted_array.type == get_nested_type(List(Array2D(shape=(2, 3), dtype="int32")))
     assert casted_array.to_pylist() == arr.to_pylist()
     # Fixed size list
-    casted_array = cast_array_to_feature(arr, Sequence(Array2D(shape=(2, 3), dtype="int32"), length=4))
-    assert casted_array.type == get_nested_type(Sequence(Array2D(shape=(2, 3), dtype="int32"), length=4))
+    casted_array = cast_array_to_feature(arr, List(Array2D(shape=(2, 3), dtype="int32"), length=4))
+    assert casted_array.type == get_nested_type(List(Array2D(shape=(2, 3), dtype="int32"), length=4))
     assert casted_array.to_pylist() == arr.to_pylist()
 
 
@@ -1380,7 +1374,7 @@ def test_embed_array_storage_nested(image_file):
         ),
         (
             pa.array([[{"path": "image_path"}]], type=pa.list_(Image.pa_type)),
-            Sequence(Image()),
+            List(Image()),
             pa.types.is_list,
         ),
     ],

From dc33788f577bbc9362e41296025b952c249e0f8a Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Mon, 23 Jun 2025 22:34:17 +0200
Subject: [PATCH 2/9] docs

---
 docs/source/about_dataset_features.mdx         | 15 ++++++++-------
 docs/source/package_reference/main_classes.mdx |  2 ++
 docs/source/process.mdx                        | 11 ++++++-----
 src/datasets/features/features.py              |  1 +
 4 files changed, 17 insertions(+), 12 deletions(-)

diff --git a/docs/source/about_dataset_features.mdx b/docs/source/about_dataset_features.mdx
index d575e28065d..6df80ce72a1 100644
--- a/docs/source/about_dataset_features.mdx
+++ b/docs/source/about_dataset_features.mdx
@@ -32,20 +32,21 @@ Refer to [`Value`] for a full list of supported data types.
 
 The [`ClassLabel`] feature informs 🤗 Datasets the `label` column contains two classes. The classes are labeled `not_equivalent` and `equivalent`. Labels are stored as integers in the dataset. When you retrieve the labels, [`ClassLabel.int2str`] and [`ClassLabel.str2int`] carries out the conversion from integer value to label name, and vice versa.
 
-If your data type contains a list of objects, then you want to use the [`Sequence`] feature. Remember the SQuAD dataset?
+If your data type contains a list of objects, then you want to use the [`List`] feature. Remember the SQuAD dataset?
 
 ```py
 >>> from datasets import load_dataset
 >>> dataset = load_dataset('rajpurkar/squad', split='train')
 >>> dataset.features
-{'answers': Sequence(feature={'text': Value(dtype='string'), 'answer_start': Value(dtype='int32')}, length=-1),
-'context': Value(dtype='string'),
-'id': Value(dtype='string'),
-'question': Value(dtype='string'),
-'title': Value(dtype='string')}
+{'id': Value(dtype='string'),
+ 'title': Value(dtype='string'),
+ 'context': Value(dtype='string'),
+ 'question': Value(dtype='string'),
+ 'answers': {'text': List(feature=Value(dtype='string'), length=-1),
+  'answer_start': List(feature=Value(dtype='int32'), length=-1)}}
 ```
 
-The `answers` field is constructed using the [`Sequence`] feature because it contains two subfields, `text` and `answer_start`, which are lists of `string` and `int32`, respectively.
+The `answers` field is constructed using the dict of features because and contains two subfields, `text` and `answer_start`, which are lists of `string` and `int32`, respectively.
 
 <Tip>
 
diff --git a/docs/source/package_reference/main_classes.mdx b/docs/source/package_reference/main_classes.mdx
index 41ec8051f44..299dd765d13 100644
--- a/docs/source/package_reference/main_classes.mdx
+++ b/docs/source/package_reference/main_classes.mdx
@@ -235,6 +235,8 @@ Dictionary with split names as keys ('train', 'test' for example), and `Iterable
 
 [[autodoc]] datasets.LargeList
 
+[[autodoc]] datasets.List
+
 [[autodoc]] datasets.Sequence
 
 ### Translation
diff --git a/docs/source/process.mdx b/docs/source/process.mdx
index bdc7e33caf5..ec86f41dab2 100644
--- a/docs/source/process.mdx
+++ b/docs/source/process.mdx
@@ -265,11 +265,12 @@ Sometimes a column can be a nested structure of several types. Take a look at th
 >>> from datasets import load_dataset
 >>> dataset = load_dataset("rajpurkar/squad", split="train")
 >>> dataset.features
-{'answers': Sequence(feature={'text': Value(dtype='string'), 'answer_start': Value(dtype='int32')}, length=-1),
-'context': Value(dtype='string'),
-'id': Value(dtype='string'),
-'question': Value(dtype='string'),
-'title': Value(dtype='string')}
+{'id': Value(dtype='string'),
+ 'title': Value(dtype='string'),
+ 'context': Value(dtype='string'),
+ 'question': Value(dtype='string'),
+ 'answers': {'text': List(feature=Value(dtype='string'), length=-1),
+  'answer_start': List(feature=Value(dtype='int32'), length=-1)}}
 ```
 
 The `answers` field contains two subfields: `text` and `answer_start`. Use the [`~Dataset.flatten`] function to extract the subfields into their own separate columns:
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 99222d8c32a..148face482d 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -1158,6 +1158,7 @@ def _load_names_from_file(names_filepath):
 
 
 def Sequence(feature, length=-1):
+    """deprecated, please use List instead"""
     if isinstance(feature, dict):
         return {key: List(value, length=length) for key, value in feature.items()}
     else:

From 2d90521b157fb531039c2e1c0ab25a1d39dab2b5 Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Tue, 24 Jun 2025 16:44:26 +0200
Subject: [PATCH 3/9] fix tests

---
 src/datasets/features/features.py | 21 +++----------
 tests/features/test_features.py   | 51 +++++++++++++------------------
 tests/io/test_parquet.py          |  2 +-
 tests/test_builder.py             | 20 ++++++------
 tests/test_iterable_dataset.py    |  3 +-
 tests/test_table.py               | 43 ++++++++++++++------------
 6 files changed, 62 insertions(+), 78 deletions(-)

diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 148face482d..97af496614f 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -1294,21 +1294,6 @@ def encode_nested_example(schema, obj, level=0):
             if obj is not None
             else None
         )
-
-    elif isinstance(schema, (list, tuple)):
-        sub_schema = schema[0]
-        if obj is None:
-            return None
-        elif isinstance(obj, np.ndarray):
-            return encode_nested_example(schema, obj.tolist())
-        else:
-            if len(obj) > 0:
-                for first_elmt in obj:
-                    if _check_non_null_non_empty_recursive(first_elmt, sub_schema):
-                        break
-                if encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt:
-                    return [encode_nested_example(sub_schema, o, level=level + 1) for o in obj]
-            return list(obj)
     elif isinstance(schema, (LargeList, List)):
         if obj is None:
             return None
@@ -1318,7 +1303,11 @@ def encode_nested_example(schema, obj, level=0):
                 for first_elmt in obj:
                     if _check_non_null_non_empty_recursive(first_elmt, sub_schema):
                         break
-                if encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt:
+                try:
+                    changed = bool(encode_nested_example(sub_schema, first_elmt, level=level + 1) != first_elmt)
+                except ValueError:  # can happen when comparing arrays
+                    changed = False
+                if changed:
                     return [encode_nested_example(sub_schema, o, level=level + 1) for o in obj]
             return list(obj)
     # Object with special encoding:
diff --git a/tests/features/test_features.py b/tests/features/test_features.py
index c9d98c9f001..1ead81f8e3d 100644
--- a/tests/features/test_features.py
+++ b/tests/features/test_features.py
@@ -1,5 +1,4 @@
 import datetime
-from typing import List, Tuple
 from unittest import TestCase
 from unittest.mock import MagicMock, patch
 
@@ -10,7 +9,7 @@
 
 from datasets import Array2D
 from datasets.arrow_dataset import Column, Dataset
-from datasets.features import Audio, ClassLabel, Features, Image, LargeList, Sequence, Value
+from datasets.features import Audio, ClassLabel, Features, Image, LargeList, List, Sequence, Value
 from datasets.features.features import (
     _align_features,
     _arrow_to_datasets_dtype,
@@ -186,7 +185,7 @@ def test_reorder_fields_as(self):
                 },
                 "question": {
                     "text": Value("string"),
-                    "tokens": [Value("string")],
+                    "tokens": List(Value("string")),
                 },
                 "annotations": {
                     "yes_no_answer": List(ClassLabel(names=["NO", "YES"])),
@@ -266,7 +265,7 @@ def test_flatten_with_sequence(self):
         features = Features({"foo": {"bar": List({"my_value": Value("int32")})}})
         _features = features.copy()
         flattened_features = features.flatten()
-        assert flattened_features == {"foo.bar": [{"my_value": Value("int32")}]}
+        assert flattened_features == {"foo.bar": List({"my_value": Value("int32")})}
         assert features == _features, "calling flatten shouldn't alter the current features"
 
     def test_features_dicts_are_synced(self):
@@ -411,7 +410,7 @@ def test_decode_nested_example_with_list_types(schema, monkeypatch):
 
 @pytest.mark.parametrize(
     "schema",
-    [[ClassLabel(names=["a", "b"])], LargeList(ClassLabel(names=["a", "b"])), List(ClassLabel(names=["a", "b"]))],
+    [List(ClassLabel(names=["a", "b"])), LargeList(ClassLabel(names=["a", "b"]))],
 )
 def test_encode_nested_example_with_list_types(schema):
     result = encode_nested_example(schema, ["b"])
@@ -430,7 +429,7 @@ def test_encode_nested_example_sequence_with_none(inner_type):
     "features_dict, example, expected_encoded_example",
     [
         ({"col_1": ClassLabel(names=["a", "b"])}, {"col_1": "b"}, {"col_1": 1}),
-        ({"col_1": [ClassLabel(names=["a", "b"])]}, {"col_1": ["b"]}, {"col_1": [1]}),
+        ({"col_1": List(ClassLabel(names=["a", "b"]))}, {"col_1": ["b"]}, {"col_1": [1]}),
         ({"col_1": LargeList(ClassLabel(names=["a", "b"]))}, {"col_1": ["b"]}, {"col_1": [1]}),
         ({"col_1": List(ClassLabel(names=["a", "b"]))}, {"col_1": ["b"]}, {"col_1": [1]}),
     ],
@@ -716,20 +715,16 @@ def test_features_flatten_with_list_types(features_dict, expected_features_dict)
     "deserialized_features_dict, expected_features_dict",
     [
         (
-            {"col": [{"dtype": "int32", "_type": "Value"}]},
-            {"col": [Value("int32")]},
+            {"col": {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "List"}},
+            {"col": List(Value("int32"))},
         ),
         (
             {"col": {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "LargeList"}},
             {"col": LargeList(Value("int32"))},
         ),
         (
-            {"col": {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "Sequence"}},
-            {"col": List(Value("int32"))},
-        ),
-        (
-            {"col": [{"sub_col": {"dtype": "int32", "_type": "Value"}}]},
-            {"col": [{"sub_col": Value("int32")}]},
+            {"col": {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "List"}},
+            {"col": List({"sub_col": Value("int32")})},
         ),
         (
             {"col": {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "LargeList"}},
@@ -749,28 +744,24 @@ def test_features_from_dict_with_list_types(deserialized_features_dict, expected
 @pytest.mark.parametrize(
     "deserialized_feature_dict, expected_feature",
     [
-        (
-            [{"dtype": "int32", "_type": "Value"}],
-            [Value("int32")],
-        ),
         (
             {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "LargeList"},
             LargeList(Value("int32")),
         ),
         (
-            {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "Sequence"},
+            {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "List"},
             List(Value("int32")),
         ),
         (
-            [{"sub_col": {"dtype": "int32", "_type": "Value"}}],
-            [{"sub_col": Value("int32")}],
+            {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "List"},
+            List({"sub_col": Value("int32")}),
         ),
         (
             {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "LargeList"},
             LargeList({"sub_col": Value("int32")}),
         ),
         (
-            {"feature": {"sub_col": {"dtype": "int32", "_type": "Value"}}, "_type": "Sequence"},
+            {"sub_col": {"feature": {"dtype": "int32", "_type": "Value"}, "_type": "List"}},
             {"sub_col": List(Value("int32"))},
         ),
     ],
@@ -852,7 +843,7 @@ def test_features_to_arrow_schema(features: Features):
 
 
 @pytest.mark.parametrize("features", NESTED_COMPARISON)
-def test_features_alignment(features: Tuple[List[Features], Features]):
+def test_features_alignment(features: tuple[list[Features], list[Features]]):
     inputs, expected = features
     _check_if_features_can_be_aligned(inputs)  # Check that we can align, will raise otherwise.
     assert _align_features(inputs) == expected
@@ -874,12 +865,12 @@ def test_features_from_arrow_schema_list_data_type(list_dtype, scalar_dtype):
 @pytest.mark.parametrize(
     "feature, other_feature",
     [
-        ([Value("int64")], [Value("int64")]),
+        (List(Value("int64")), List(Value("int64"))),
         (LargeList(Value("int64")), LargeList(Value("int64"))),
         (List(Value("int64")), List(Value("int64"))),
         (
-            [{"sub_col_1": Value("int64"), "sub_col_2": Value("int64")}],
-            [{"sub_col_2": Value("int64"), "sub_col_1": Value("int64")}],
+            List({"sub_col_1": Value("int64"), "sub_col_2": Value("int64")}),
+            List({"sub_col_2": Value("int64"), "sub_col_1": Value("int64")}),
         ),
         (
             LargeList({"sub_col_1": Value("int64"), "sub_col_2": Value("int64")}),
@@ -965,24 +956,24 @@ def test_check_non_null_non_empty_recursive_with_nested_list_types(schema):
     assert _check_non_null_non_empty_recursive([[]], schema) is False
 
 
-@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), List(Audio())])
+@pytest.mark.parametrize("feature", [LargeList(Audio()), List(Audio())])
 def test_require_decoding_with_list_types(feature):
     assert require_decoding(feature)
 
 
-@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), List(Audio())])
+@pytest.mark.parametrize("feature", [LargeList(Audio()), List(Audio())])
 def test_require_storage_cast_with_list_types(feature):
     assert require_storage_cast(feature)
 
 
-@pytest.mark.parametrize("feature", [[Audio()], LargeList(Audio()), List(Audio())])
+@pytest.mark.parametrize("feature", [LargeList(Audio()), List(Audio())])
 def test_require_storage_embed_with_list_types(feature):
     assert require_storage_embed(feature)
 
 
 @pytest.mark.parametrize(
     "feature, expected",
-    [([Value("int32")], [1]), (LargeList(Value("int32")), LargeList(1)), (List(Value("int32")), List(1))],
+    [(List(Value("int32")), List(1)), (LargeList(Value("int32")), LargeList(1)), (List(Value("int32")), List(1))],
 )
 def test_visit_with_list_types(feature, expected):
     def func(x):
diff --git a/tests/io/test_parquet.py b/tests/io/test_parquet.py
index 5062b88a60c..c01781972f5 100644
--- a/tests/io/test_parquet.py
+++ b/tests/io/test_parquet.py
@@ -2,7 +2,7 @@
 import pyarrow.parquet as pq
 import pytest
 
-from datasets import Audio, Dataset, DatasetDict, Features, IterableDatasetDict, NamedSplit, Value, config
+from datasets import Audio, Dataset, DatasetDict, Features, IterableDatasetDict, List, NamedSplit, Value, config
 from datasets.features.image import Image
 from datasets.info import DatasetInfo
 from datasets.io.parquet import ParquetDatasetReader, ParquetDatasetWriter, get_writer_batch_size
diff --git a/tests/test_builder.py b/tests/test_builder.py
index c87bf030edc..4c8c949b6c8 100644
--- a/tests/test_builder.py
+++ b/tests/test_builder.py
@@ -26,7 +26,7 @@
 from datasets.data_files import DataFilesList
 from datasets.dataset_dict import DatasetDict, IterableDatasetDict
 from datasets.download.download_manager import DownloadMode
-from datasets.features import Features, Value
+from datasets.features import Features, List, Value
 from datasets.info import DatasetInfo, PostProcessedInfo
 from datasets.iterable_dataset import IterableDataset
 from datasets.load import configure_builder_class
@@ -346,7 +346,7 @@ def _post_processing_resources(self, split):
         with tempfile.TemporaryDirectory() as tmp_dir:
             builder = DummyBuilder(cache_dir=tmp_dir)
             builder.info.post_processed = PostProcessedInfo(
-                features=Features({"text": Value("string"), "tokens": [Value("string")]})
+                features=Features({"text": Value("string"), "tokens": List(Value("string"))})
             )
             builder._post_process = types.MethodType(_post_process, builder)
             builder._post_processing_resources = types.MethodType(_post_processing_resources, builder)
@@ -366,7 +366,7 @@ def _post_processing_resources(self, split):
 
                 with ArrowWriter(
                     path=os.path.join(builder.cache_dir, f"tokenized_dataset-{split}.arrow"),
-                    features=Features({"text": Value("string"), "tokens": [Value("string")]}),
+                    features=Features({"text": Value("string"), "tokens": List(Value("string"))}),
                 ) as writer:
                     writer.write_batch({"text": ["foo"] * 10, "tokens": [list("foo")] * 10})
                     writer.finalize()
@@ -377,10 +377,10 @@ def _post_processing_resources(self, split):
             self.assertEqual(len(dsets["train"]), 10)
             self.assertEqual(len(dsets["test"]), 10)
             self.assertDictEqual(
-                dsets["train"].features, Features({"text": Value("string"), "tokens": [Value("string")]})
+                dsets["train"].features, Features({"text": Value("string"), "tokens": List(Value("string"))})
             )
             self.assertDictEqual(
-                dsets["test"].features, Features({"text": Value("string"), "tokens": [Value("string")]})
+                dsets["test"].features, Features({"text": Value("string"), "tokens": List(Value("string"))})
             )
             self.assertListEqual(dsets["train"].column_names, ["text", "tokens"])
             self.assertListEqual(dsets["test"].column_names, ["text", "tokens"])
@@ -390,7 +390,7 @@ def _post_processing_resources(self, split):
             self.assertIsInstance(dset, Dataset)
             self.assertEqual(dset.split, "train")
             self.assertEqual(len(dset), 10)
-            self.assertDictEqual(dset.features, Features({"text": Value("string"), "tokens": [Value("string")]}))
+            self.assertDictEqual(dset.features, Features({"text": Value("string"), "tokens": List(Value("string"))}))
             self.assertListEqual(dset.column_names, ["text", "tokens"])
             self.assertGreater(builder.info.post_processing_size, 0)
             self.assertGreater(
@@ -402,7 +402,7 @@ def _post_processing_resources(self, split):
             self.assertIsInstance(dset, Dataset)
             self.assertEqual(dset.split, "train+test[:30%]")
             self.assertEqual(len(dset), 13)
-            self.assertDictEqual(dset.features, Features({"text": Value("string"), "tokens": [Value("string")]}))
+            self.assertDictEqual(dset.features, Features({"text": Value("string"), "tokens": List(Value("string"))}))
             self.assertListEqual(dset.column_names, ["text", "tokens"])
             del dset
 
@@ -410,7 +410,7 @@ def _post_processing_resources(self, split):
             self.assertIsInstance(dset, Dataset)
             self.assertEqual(dset.split, "train+test")
             self.assertEqual(len(dset), 20)
-            self.assertDictEqual(dset.features, Features({"text": Value("string"), "tokens": [Value("string")]}))
+            self.assertDictEqual(dset.features, Features({"text": Value("string"), "tokens": List(Value("string"))}))
             self.assertListEqual(dset.column_names, ["text", "tokens"])
             del dset
 
@@ -555,7 +555,7 @@ def _post_processing_resources(self, split):
         with tempfile.TemporaryDirectory() as tmp_dir:
             builder = DummyBuilder(cache_dir=tmp_dir)
             builder.info.post_processed = PostProcessedInfo(
-                features=Features({"text": Value("string"), "tokens": [Value("string")]})
+                features=Features({"text": Value("string"), "tokens": List(Value("string"))})
             )
             builder._post_process = types.MethodType(_post_process, builder)
             builder._post_processing_resources = types.MethodType(_post_processing_resources, builder)
@@ -570,7 +570,7 @@ def _post_processing_resources(self, split):
             self.assertDictEqual(builder.info.features, Features({"text": Value("string")}))
             self.assertDictEqual(
                 builder.info.post_processed.features,
-                Features({"text": Value("string"), "tokens": [Value("string")]}),
+                Features({"text": Value("string"), "tokens": List(Value("string"))}),
             )
             self.assertEqual(builder.info.splits["train"].num_examples, 100)
             self.assertTrue(
diff --git a/tests/test_iterable_dataset.py b/tests/test_iterable_dataset.py
index 855903fd8c2..1bca866bdf8 100644
--- a/tests/test_iterable_dataset.py
+++ b/tests/test_iterable_dataset.py
@@ -19,6 +19,7 @@
     ClassLabel,
     Features,
     Image,
+    List,
     Value,
 )
 from datasets.formatting import Formatter, get_format_type_from_alias
@@ -1766,7 +1767,7 @@ def test_iterable_dataset_features_cast_to_python():
         {
             "id": Value("int64"),
             "timestamp": Value("timestamp[us]"),
-            "array": [Value("int64")],
+            "array": List(Value("int64")),
         }
     )
     dataset = IterableDataset(ex_iterable, info=DatasetInfo(features=features))
diff --git a/tests/test_table.py b/tests/test_table.py
index 7ca740e8fde..b7bf75cc803 100644
--- a/tests/test_table.py
+++ b/tests/test_table.py
@@ -2,14 +2,14 @@
 import pickle
 from decimal import Decimal
 from functools import partial
-from typing import List, Union
+from typing import Union
 from unittest.mock import MagicMock
 
 import numpy as np
 import pyarrow as pa
 import pytest
 
-from datasets.features import Array2D, ClassLabel, Features, Image, LargeList, Value
+from datasets.features import Array2D, ClassLabel, Features, Image, LargeList, List, Value
 from datasets.features.features import Array2DExtensionType, get_nested_type
 from datasets.table import (
     ConcatenationTable,
@@ -40,7 +40,7 @@ def in_memory_pa_table(arrow_file) -> pa.Table:
     return pa.ipc.open_stream(arrow_file).read_all()
 
 
-def _to_testing_blocks(table: TableBlock) -> List[List[TableBlock]]:
+def _to_testing_blocks(table: TableBlock) -> list[list[TableBlock]]:
     assert len(table) > 2
     blocks = [
         [table.slice(0, 2)],
@@ -1049,7 +1049,7 @@ def test_concat_tables(arrow_file, in_memory_pa_table):
     assert isinstance(concatenated_table.blocks[0][2], InMemoryTable)
 
 
-def _interpolation_search_ground_truth(arr: List[int], x: int) -> Union[int, IndexError]:
+def _interpolation_search_ground_truth(arr: list[int], x: int) -> Union[int, IndexError]:
     for i in range(len(arr) - 1):
         if arr[i] <= x < arr[i + 1]:
             return i
@@ -1154,7 +1154,7 @@ def test_cast_array_to_feature_with_struct_with_missing_fields(array_list, expec
 
 def test_cast_array_to_features_nested():
     arr = pa.array([[{"foo": [0]}]])
-    assert cast_array_to_feature(arr, [{"foo": List(Value("string"))}]).type == pa.list_(
+    assert cast_array_to_feature(arr, List({"foo": List(Value("string"))})).type == pa.list_(
         pa.struct({"foo": pa.list_(pa.string())})
     )
 
@@ -1168,12 +1168,12 @@ def test_cast_array_to_features_to_nested_with_no_fields():
 def test_cast_array_to_features_nested_with_nulls():
     # same type
     arr = pa.array([{"foo": [None, [0]]}], pa.struct({"foo": pa.list_(pa.list_(pa.int64()))}))
-    casted_array = cast_array_to_feature(arr, {"foo": [[Value("int64")]]})
+    casted_array = cast_array_to_feature(arr, {"foo": List(List(Value("int64")))})
     assert casted_array.type == pa.struct({"foo": pa.list_(pa.list_(pa.int64()))})
     assert casted_array.to_pylist() == arr.to_pylist()
     # different type
     arr = pa.array([{"foo": [None, [0]]}], pa.struct({"foo": pa.list_(pa.list_(pa.int64()))}))
-    casted_array = cast_array_to_feature(arr, {"foo": [[Value("int32")]]})
+    casted_array = cast_array_to_feature(arr, {"foo": List(List(Value("int32")))})
     assert casted_array.type == pa.struct({"foo": pa.list_(pa.list_(pa.int32()))})
     assert casted_array.to_pylist() == [{"foo": [None, [0]]}]
 
@@ -1243,8 +1243,8 @@ def test_cast_fixed_size_list_array_to_features_sequence(arr, slice, target_valu
     casted_array = cast_array_to_feature(arr, List(target_value_feature))
     assert casted_array.type == get_nested_type(List(target_value_feature))
     assert casted_array.to_pylist() == arr.to_pylist()
-    casted_array = cast_array_to_feature(arr, [target_value_feature])
-    assert casted_array.type == get_nested_type([target_value_feature])
+    casted_array = cast_array_to_feature(arr, List(target_value_feature))
+    assert casted_array.type == get_nested_type(List(target_value_feature))
     assert casted_array.to_pylist() == arr.to_pylist()
 
 
@@ -1262,8 +1262,8 @@ def test_cast_list_array_to_features_sequence(arr, slice, target_value_feature):
     casted_array = cast_array_to_feature(arr, List(target_value_feature))
     assert casted_array.type == get_nested_type(List(target_value_feature))
     assert casted_array.to_pylist() == arr.to_pylist()
-    casted_array = cast_array_to_feature(arr, [target_value_feature])
-    assert casted_array.type == get_nested_type([target_value_feature])
+    casted_array = cast_array_to_feature(arr, List(target_value_feature))
+    assert casted_array.type == get_nested_type(List(target_value_feature))
     assert casted_array.to_pylist() == arr.to_pylist()
     # Fixed size list
     list_size = arr.value_lengths().drop_null()[0].as_py() if arr.value_lengths().drop_null() else 2
@@ -1278,6 +1278,11 @@ def test_cast_list_array_to_features_sequence(arr, slice, target_value_feature):
 def test_cast_array_to_feature_with_list_array_and_sequence_feature(
     list_within_struct, from_list_type, sequence_feature_dtype
 ):
+    list_feature = {
+        "list": List,
+        "fixed_size_list": partial(List, length=2),
+        "large_list": LargeList,
+    }
     list_type = {
         "list": pa.list_,
         "fixed_size_list": partial(pa.list_, list_size=2),
@@ -1290,14 +1295,17 @@ def test_cast_array_to_feature_with_list_array_and_sequence_feature(
     to_type = "list"
     array_data = [0, 1]
     array_type = list_type[from_list_type](pa.int64())
-    sequence_feature = Value(sequence_feature_dtype)
-    expected_array_type = list_type[to_type](primitive_type[sequence_feature_dtype])
+    sequence_feature = list_feature[from_list_type](Value(sequence_feature_dtype))
+    expected_array_type = list_type[from_list_type](primitive_type[sequence_feature_dtype])
     if list_within_struct:
         array_data = {"col_1": array_data}
         array_type = pa.struct({"col_1": array_type})
         sequence_feature = {"col_1": sequence_feature}
         expected_array_type = pa.struct({"col_1": expected_array_type})
-    feature = List(sequence_feature)
+    array_data = [array_data] * 2
+    array_type = list_type[from_list_type](array_type)
+    feature = list_feature[to_type](sequence_feature)
+    expected_array_type = list_type[to_type](expected_array_type)
     array = pa.array([array_data], type=array_type)
     cast_array = cast_array_to_feature(array, feature)
     assert cast_array.type == expected_array_type
@@ -1364,7 +1372,7 @@ def test_embed_array_storage_nested(image_file):
     [
         (
             pa.array([[{"path": "image_path"}]], type=pa.list_(Image.pa_type)),
-            [Image()],
+            List(Image()),
             pa.types.is_list,
         ),
         (
@@ -1372,11 +1380,6 @@ def test_embed_array_storage_nested(image_file):
             LargeList(Image()),
             pa.types.is_large_list,
         ),
-        (
-            pa.array([[{"path": "image_path"}]], type=pa.list_(Image.pa_type)),
-            List(Image()),
-            pa.types.is_list,
-        ),
     ],
 )
 def test_embed_array_storage_with_list_types(array, feature, expected_embedded_array_type, monkeypatch):

From 52e04d520864f31be761fc2ed5548230f43eeefc Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Tue, 24 Jun 2025 20:19:32 +0200
Subject: [PATCH 4/9] fix tests and add backward compatibility utilities

---
 src/datasets/arrow_dataset.py     |  6 ++++
 src/datasets/features/features.py | 52 ++++++++++++++++++++++---------
 src/datasets/iterable_dataset.py  |  5 +++
 src/datasets/load.py              | 12 +++----
 tests/features/test_audio.py      |  2 +-
 tests/features/test_image.py      |  4 +--
 tests/utils.py                    |  6 ++--
 7 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
index d0169ea5b3b..f5d4d5878c5 100644
--- a/src/datasets/arrow_dataset.py
+++ b/src/datasets/arrow_dataset.py
@@ -81,6 +81,7 @@
     FeatureType,
     _align_features,
     _check_if_features_can_be_aligned,
+    _fix_for_backward_compatible_features,
     generate_from_arrow_type,
     pandas_types_mapper,
     require_decoding,
@@ -2118,6 +2119,7 @@ def cast(
                 f"as the columns in the dataset: {self._data.column_names}"
             )
 
+        features = _fix_for_backward_compatible_features(features)
         schema = features.arrow_schema
         format = self.format
         dataset = self.with_format("arrow")
@@ -2167,6 +2169,7 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Option
          'text': Value(dtype='string', id=None)}
         ```
         """
+        feature = _fix_for_backward_compatible_features(feature)
         if hasattr(feature, "decode_example"):
             dataset = copy.deepcopy(self)
             dataset._info.features[column] = feature
@@ -3083,6 +3086,9 @@ def map(
         if fn_kwargs is None:
             fn_kwargs = {}
 
+        if features is not None:
+            features = _fix_for_backward_compatible_features(features)
+
         if num_proc is not None and num_proc > len(self):
             num_proc = len(self)
             logger.warning(
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 97af496614f..3e329e791f7 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -1158,7 +1158,24 @@ def _load_names_from_file(names_filepath):
 
 
 def Sequence(feature, length=-1):
-    """deprecated, please use List instead"""
+    """
+    A `Sequence` is a utility that automatically converts internal dictionary feature into a dictionary of
+    lists. This behavior is implemented to have a compatibility layer with the TensorFlow Datasets library but may be
+    un-wanted in some cases. If you don't want this behavior, you can use a [`List`] or a [`LargeList`]
+    instead of the [`Sequence`].
+
+    Args:
+        feature ([`FeatureType`]):
+            Child feature data type of each item within the large list.
+        length (optional `int`, default to -1):
+            Length of the list if it is fixed.
+            Defaults to -1 which means an arbitrary length.
+
+        Returns:
+            [`List`] of the specified feature, except `dict` of sub-features
+            which are converted to `dict` of lists of sub-features for compatibility with TFDS.
+
+    """
     if isinstance(feature, dict):
         return {key: List(value, length=length) for key, value in feature.items()}
     else:
@@ -1174,6 +1191,9 @@ class List:
     Args:
         feature ([`FeatureType`]):
             Child feature data type of each item within the large list.
+        length (optional `int`, default to -1):
+            Length of the list if it is fixed.
+            Defaults to -1 which means an arbitrary length.
     """
 
     feature: Any
@@ -1558,8 +1578,6 @@ def _visit(feature: FeatureType, func: Callable[[FeatureType], Optional[FeatureT
         out = func(Features({k: _visit(f, func) for k, f in feature.items()}))
     elif isinstance(feature, dict):
         out = func({k: _visit(f, func) for k, f in feature.items()})
-    elif isinstance(feature, (list, tuple)):
-        out = func([_visit(feature[0], func)])
     elif isinstance(feature, LargeList):
         out = func(LargeList(_visit(feature.feature, func)))
     elif isinstance(feature, List):
@@ -1700,22 +1718,19 @@ class Features(dict):
           It's possible to have nested fields of nested fields in an arbitrary manner.
         - [`List`] or [`LargeList`] specifies a composite feature containing a sequence of
           sub-features, all of the same feature type.
-
-          <Tip>
-
-           A `Sequence` is deprecated and automatically converts internal dictionary feature into a dictionary of
-           lists. This behavior is implemented to have a compatibility layer with the TensorFlow Datasets library but may be
-           un-wanted in some cases. If you don't want this behavior, you can use a [`List`] or a [`LargeList`]
-           instead of the [`Sequence`].
-
-          </Tip>
-
         - [`Array2D`], [`Array3D`], [`Array4D`] or [`Array5D`] feature for multidimensional arrays.
         - [`Audio`] feature to store the absolute path to an audio file or a dictionary with the relative path
-          to an audio file ("path" key) and its bytes content ("bytes" key). This feature extracts the audio data.
+          to an audio file ("path" key) and its bytes content ("bytes" key).
+          This feature loads the audio lazily with a decoder.
         - [`Image`] feature to store the absolute path to an image file, an `np.ndarray` object, a `PIL.Image.Image` object
           or a dictionary with the relative path to an image file ("path" key) and its bytes content ("bytes" key).
           This feature extracts the image data.
+        - [`Video`] feature to store the absolute path to a video file, a `torchcodec.decoders.VideoDecoder` object
+          or a dictionary with the relative path to a video file ("path" key) and its bytes content ("bytes" key).
+          This feature loads the video lazily with a decoder.
+        - [`Pdf`] feature to store the absolute path to a PDF file, a `pdfplumber.pdf.PDF` object
+          or a dictionary with the relative path to a PDF file ("path" key) and its bytes content ("bytes" key).
+          This feature loads the PDF lazily with a PDF reader.
         - [`Translation`] or [`TranslationVariableLanguages`] feature specific to Machine Translation.
     """
 
@@ -2252,3 +2267,12 @@ def _check_if_features_can_be_aligned(features_list: list[Features]):
                 raise ValueError(
                     f'The features can\'t be aligned because the key {k} of features {features} has unexpected type - {v} (expected either {name2feature[k]} or Value("null").'
                 )
+
+
+def _fix_for_backward_compatible_features(feature: Any) -> FeatureType:
+    def _fix_old_list(feature):
+        if isinstance(feature, list):
+            return List(_fix_for_backward_compatible_features(feature[0]))
+        return feature
+
+    return _visit(feature, _fix_old_list)
diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py
index c8ccdea7c1e..8031c140ef4 100644
--- a/src/datasets/iterable_dataset.py
+++ b/src/datasets/iterable_dataset.py
@@ -37,6 +37,7 @@
     Value,
     _align_features,
     _check_if_features_can_be_aligned,
+    _fix_for_backward_compatible_features,
     _visit,
     cast_to_python_objects,
     require_decoding,
@@ -2661,6 +2662,8 @@ def map(
             function = identity_func
         if fn_kwargs is None:
             fn_kwargs = {}
+        if features is not None:
+            features = _fix_for_backward_compatible_features(features)
 
         ex_iterable = self._ex_iterable
         # no need to apply features if ex_iterable is typed and if there was no cast_column()
@@ -3244,6 +3247,7 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDataset":
          'transcription': Value(dtype='string', id=None)}
         ```
         """
+        feature = _fix_for_backward_compatible_features(feature)
         info = self._info.copy()
         info.features[column] = feature
         return IterableDataset(
@@ -3290,6 +3294,7 @@ def cast(
          'text': Value(dtype='large_string', id=None)}
         ```
         """
+        features = _fix_for_backward_compatible_features(features)
         info = self._info.copy()
         info.features = features
         return IterableDataset(
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 36e4840f5bb..6c51dabd0b3 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -23,7 +23,6 @@
 import posixpath
 from collections import Counter
 from collections.abc import Mapping, Sequence
-from contextlib import nullcontext
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Optional, Union
@@ -60,6 +59,7 @@
 from .download.streaming_download_manager import StreamingDownloadManager, xbasename, xglob, xjoin
 from .exceptions import DataFilesNotFoundError, DatasetNotFoundError
 from .features import Features
+from .features.features import _fix_for_backward_compatible_features
 from .fingerprint import Hasher
 from .info import DatasetInfo, DatasetInfosDict
 from .iterable_dataset import IterableDataset
@@ -171,12 +171,7 @@ def import_main_class(module_path) -> Optional[type[DatasetBuilder]]:
 def get_dataset_builder_class(
     dataset_module: "DatasetModule", dataset_name: Optional[str] = None
 ) -> type[DatasetBuilder]:
-    with (
-        lock_importable_file(dataset_module.importable_file_path)
-        if dataset_module.importable_file_path
-        else nullcontext()
-    ):
-        builder_cls = import_main_class(dataset_module.module_path)
+    builder_cls = import_main_class(dataset_module.module_path)
     if dataset_module.builder_configs_parameters.builder_configs:
         dataset_name = dataset_name or dataset_module.builder_kwargs.get("dataset_name")
         if dataset_name is None:
@@ -388,7 +383,6 @@ class DatasetModule:
     builder_kwargs: dict
     builder_configs_parameters: BuilderConfigsParameters = field(default_factory=BuilderConfigsParameters)
     dataset_infos: Optional[DatasetInfosDict] = None
-    importable_file_path: Optional[str] = None
 
 
 class _DatasetModuleFactory:
@@ -1133,6 +1127,8 @@ def load_dataset_builder(
     if storage_options is not None:
         download_config = download_config.copy() if download_config else DownloadConfig()
         download_config.storage_options.update(storage_options)
+    if features is not None:
+        features = _fix_for_backward_compatible_features(features)
     dataset_module = dataset_module_factory(
         path,
         revision=revision,
diff --git a/tests/features/test_audio.py b/tests/features/test_audio.py
index f959458777c..dae082429ed 100644
--- a/tests/features/test_audio.py
+++ b/tests/features/test_audio.py
@@ -7,7 +7,7 @@
 import pytest
 
 from datasets import Column, Dataset, concatenate_datasets, load_dataset
-from datasets.features import Audio, Features, Value
+from datasets.features import Audio, Features, List, Value
 
 from ..utils import require_sndfile, require_torchcodec
 
diff --git a/tests/features/test_image.py b/tests/features/test_image.py
index 57559b57a1e..0b6774330b5 100644
--- a/tests/features/test_image.py
+++ b/tests/features/test_image.py
@@ -9,7 +9,7 @@
 import pyarrow as pa
 import pytest
 
-from datasets import Column, Dataset, Features, Image, Value, concatenate_datasets, load_dataset
+from datasets import Column, Dataset, Features, Image, List, Value, concatenate_datasets, load_dataset
 from datasets.features.image import encode_np_array, image_to_bytes
 
 from ..utils import require_pil
@@ -336,7 +336,7 @@ def test_dataset_concatenate_image_features(shared_datadir):
 def test_dataset_concatenate_nested_image_features(shared_datadir):
     # we use a different data structure between 1 and 2 to make sure they are compatible with each other
     image_path = str(shared_datadir / "test_image_rgb.jpg")
-    features = Features({"list_of_structs_of_images": [{"image": Image()}]})
+    features = Features({"list_of_structs_of_images": List({"image": Image()})})
     data1 = {"list_of_structs_of_images": [[{"image": image_path}]]}
     dset1 = Dataset.from_dict(data1, features=features)
     data2 = {"list_of_structs_of_images": [[{"image": {"bytes": open(image_path, "rb").read()}}]]}
diff --git a/tests/utils.py b/tests/utils.py
index 66341e70220..be159bae21f 100644
--- a/tests/utils.py
+++ b/tests/utils.py
@@ -191,13 +191,13 @@ def require_torchvision(test_case):
 
 def require_torchcodec(test_case):
     """
-    Decorator marking a test that requires torchvision.
+    Decorator marking a test that requires torchcodec.
 
-    These tests are skipped when torchvision isn't installed.
+    These tests are skipped when torchcodec isn't installed.
 
     """
     if not config.TORCHCODEC_AVAILABLE:
-        test_case = unittest.skip("test requires torchvision")(test_case)
+        test_case = unittest.skip("test requires torchcodec")(test_case)
     return test_case
 
 

From 31c778066c8fe282623dd20cb9939aad8c9a2d9b Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Tue, 24 Jun 2025 20:45:36 +0200
Subject: [PATCH 5/9] fix tests

---
 src/datasets/arrow_dataset.py     | 6 ++++++
 src/datasets/features/features.py | 7 ++++---
 src/datasets/search.py            | 4 ++--
 tests/commands/test_test.py       | 2 +-
 tests/test_inspect.py             | 4 ++--
 tests/test_upstream_hub.py        | 3 ++-
 6 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
index f5d4d5878c5..cd5b5e52d1b 100644
--- a/src/datasets/arrow_dataset.py
+++ b/src/datasets/arrow_dataset.py
@@ -898,6 +898,8 @@ def from_pandas(
                 f"Features specified in `features` and `info.features` can't be different:\n{features}\n{info.features}"
             )
         features = features if features is not None else info.features if info is not None else None
+        if features is not None:
+            features = _fix_for_backward_compatible_features(features)
         if info is None:
             info = DatasetInfo()
         info.features = features
@@ -943,6 +945,8 @@ def from_polars(
                 f"Features specified in `features` and `info.features` can't be different:\n{features}\n{info.features}"
             )
         features = features if features is not None else info.features if info is not None else None
+        if features is not None:
+            features = _fix_for_backward_compatible_features(features)
         if info is None:
             info = DatasetInfo()
         info.features = features
@@ -988,6 +992,8 @@ def from_dict(
                 f"Features specified in `features` and `info.features` can't be different:\n{features}\n{info.features}"
             )
         features = features if features is not None else info.features if info is not None else None
+        if features is not None:
+            features = _fix_for_backward_compatible_features(features)
         arrow_typed_mapping = {}
         for col, data in mapping.items():
             if isinstance(data, (pa.Array, pa.ChunkedArray)):
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 3e329e791f7..5aa9c722267 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -1957,14 +1957,15 @@ def from_yaml_inner(obj: Union[dict, list]) -> Union[dict, list]:
                 if _type == "large_list":
                     _feature = from_yaml_inner(unsimplify(obj).pop(_type))
                     return {"feature": _feature, **obj, "_type": "LargeList"}
-                if _type == "sequence":
-                    _feature = from_yaml_inner(unsimplify(obj).pop(_type))
-                    if isinstance(_feature, dict):
+                if _type == "sequence":  # backward compatibility
+                    if isinstance(obj[_type], list):
+                        _feature = from_yaml_inner(unsimplify(obj).pop(_type))
                         return {
                             name: {"feature": _subfeature, **obj, "_type": "List"}
                             for name, _subfeature in _feature.items()
                         }
                     else:
+                        _feature = from_yaml_inner(unsimplify(obj).pop(_type))
                         return {"feature": _feature, **obj, "_type": "List"}
                 if _type == "list":
                     _feature = from_yaml_inner(unsimplify(obj).pop(_type))
diff --git a/src/datasets/search.py b/src/datasets/search.py
index 07ec6c93bc1..cb994d24e41 100644
--- a/src/datasets/search.py
+++ b/src/datasets/search.py
@@ -7,7 +7,7 @@
 import fsspec
 import numpy as np
 
-from .features import Sequence
+from .features import List
 from .utils import logging
 from .utils import tqdm as hf_tqdm
 
@@ -266,7 +266,7 @@ def add_vectors(
         """
         import faiss  # noqa: F811
 
-        if column and not isinstance(vectors.features[column], Sequence):
+        if column and not isinstance(vectors.features[column], List):
             raise ValueError(
                 f"Wrong feature type for column '{column}'. Expected 1d array, got {vectors.features[column]}"
             )
diff --git a/tests/commands/test_test.py b/tests/commands/test_test.py
index f8935a8c025..9c47cdefc8d 100644
--- a/tests/commands/test_test.py
+++ b/tests/commands/test_test.py
@@ -3,7 +3,7 @@
 
 import pytest
 
-from datasets import ClassLabel, Features, Value
+from datasets import ClassLabel, Features, List, Value
 from datasets.commands.test import TestCommand
 from datasets.info import DatasetInfo, DatasetInfosDict
 
diff --git a/tests/test_inspect.py b/tests/test_inspect.py
index fd227670253..9855efc3159 100644
--- a/tests/test_inspect.py
+++ b/tests/test_inspect.py
@@ -52,7 +52,7 @@ def test_get_dataset_config_info_raises(path, config_name, expected_exception):
 @pytest.mark.parametrize(
     "path, expected",
     [
-        ("acronym_identification", ["default"]),
+        ("amirveyseh/acronym_identification", ["default"]),
         ("rajpurkar/squad", ["plain_text"]),
         ("dalle-mini/wit", ["default"]),
         ("hf-internal-testing/librispeech_asr_dummy", ["clean"]),
@@ -69,7 +69,7 @@ def test_get_dataset_config_names(path, expected):
 @pytest.mark.parametrize(
     "path, expected",
     [
-        ("acronym_identification", "default"),
+        ("amirveyseh/acronym_identification", "default"),
         ("rajpurkar/squad", "plain_text"),
         ("dalle-mini/wit", "default"),
         ("hf-internal-testing/librispeech_asr_dummy", "clean"),
diff --git a/tests/test_upstream_hub.py b/tests/test_upstream_hub.py
index b118f174264..d549a22f2b8 100644
--- a/tests/test_upstream_hub.py
+++ b/tests/test_upstream_hub.py
@@ -23,6 +23,7 @@
     Features,
     Image,
     IterableDatasetDict,
+    List,
     Value,
     load_dataset,
     load_dataset_builder,
@@ -441,7 +442,7 @@ def test_push_dataset_to_hub_custom_features_image(self, temporary_repo):
     def test_push_dataset_to_hub_custom_features_image_list(self, temporary_repo):
         image_path = os.path.join(os.path.dirname(__file__), "features", "data", "test_image_rgb.jpg")
         data = {"x": [[image_path], [image_path, image_path]], "y": [0, -1]}
-        features = Features({"x": [Image()], "y": Value("int32")})
+        features = Features({"x": List(Image()), "y": Value("int32")})
         ds = Dataset.from_dict(data, features=features)
 
         for embed_external_files in [True, False]:

From c8441c599ea2ac68d8fcaf1dd7a421c732ba3df6 Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Wed, 25 Jun 2025 15:17:53 +0200
Subject: [PATCH 6/9] last fix

---
 tests/test_table.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_table.py b/tests/test_table.py
index b7bf75cc803..e5cebb793de 100644
--- a/tests/test_table.py
+++ b/tests/test_table.py
@@ -1358,7 +1358,7 @@ def test_embed_array_storage(image_file):
 
 def test_embed_array_storage_nested(image_file):
     array = pa.array([[{"bytes": None, "path": image_file}]], type=pa.list_(Image.pa_type))
-    embedded_images_array = embed_array_storage(array, [Image()])
+    embedded_images_array = embed_array_storage(array, List(Image()))
     assert isinstance(embedded_images_array.to_pylist()[0][0]["path"], str)
     assert isinstance(embedded_images_array.to_pylist()[0][0]["bytes"], bytes)
     array = pa.array([{"foo": {"bytes": None, "path": image_file}}], type=pa.struct({"foo": Image.pa_type}))

From dcd1f6ee1880fa7a294f9b34af569334fd28021a Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Wed, 25 Jun 2025 15:20:51 +0200
Subject: [PATCH 7/9] last

---
 src/datasets/iterable_dataset.py | 3 ++-
 tests/features/test_features.py  | 2 +-
 tests/test_upstream_hub.py       | 2 +-
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py
index 8031c140ef4..c70381542c3 100644
--- a/src/datasets/iterable_dataset.py
+++ b/src/datasets/iterable_dataset.py
@@ -34,6 +34,7 @@
 from .features import Features
 from .features.features import (
     FeatureType,
+    List,
     Value,
     _align_features,
     _check_if_features_can_be_aligned,
@@ -3450,7 +3451,7 @@ def batch_fn(unbatched):
             return {k: [v] for k, v in unbatched.items()}
 
         if self.features:
-            features = Features({col: [feature] for col, feature in self.features.items()})
+            features = Features({col: List(feature) for col, feature in self.features.items()})
         else:
             features = None
         return self.map(
diff --git a/tests/features/test_features.py b/tests/features/test_features.py
index 1ead81f8e3d..9b0d924c631 100644
--- a/tests/features/test_features.py
+++ b/tests/features/test_features.py
@@ -397,7 +397,7 @@ def test_class_label_to_and_from_dict(class_label_arg, tmp_path_factory):
 
 @pytest.mark.parametrize(
     "schema",
-    [[Audio()], LargeList(Audio()), List(Audio())],
+    [LargeList(Audio()), List(Audio())],
 )
 def test_decode_nested_example_with_list_types(schema, monkeypatch):
     mock_decode_example = MagicMock()
diff --git a/tests/test_upstream_hub.py b/tests/test_upstream_hub.py
index d549a22f2b8..fd35308fe1f 100644
--- a/tests/test_upstream_hub.py
+++ b/tests/test_upstream_hub.py
@@ -454,7 +454,7 @@ def test_push_dataset_to_hub_custom_features_image_list(self, temporary_repo):
                 assert list(ds.features.keys()) == list(hub_ds.features.keys())
                 assert ds.features == hub_ds.features
                 assert ds[:] == hub_ds[:]
-                hub_ds = hub_ds.cast_column("x", [Image(decode=False)])
+                hub_ds = hub_ds.cast_column("x", List(Image(decode=False)))
                 elem = hub_ds[0]["x"][0]
                 path, bytes_ = elem["path"], elem["bytes"]
                 assert isinstance(path, str)

From c4f3c2a6681d3104a3cd43ed8212b505a5d6117d Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Wed, 25 Jun 2025 15:58:15 +0200
Subject: [PATCH 8/9] fix docstrings

---
 src/datasets/arrow_dataset.py     | 18 +++++++++---------
 src/datasets/builder.py           |  4 ++--
 src/datasets/dataset_dict.py      | 26 +++++++++++++-------------
 src/datasets/features/features.py | 18 +++++++++---------
 src/datasets/iterable_dataset.py  | 18 +++++++++---------
 src/datasets/load.py              |  2 +-
 6 files changed, 43 insertions(+), 43 deletions(-)

diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
index cd5b5e52d1b..e3c4f5c50cf 100644
--- a/src/datasets/arrow_dataset.py
+++ b/src/datasets/arrow_dataset.py
@@ -1957,14 +1957,14 @@ def class_encode_column(self, column: str, include_nulls: bool = False) -> "Data
         >>> from datasets import load_dataset
         >>> ds = load_dataset("boolq", split="validation")
         >>> ds.features
-        {'answer': Value(dtype='bool', id=None),
-         'passage': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None)}
+        {'answer': Value(dtype='bool'),
+         'passage': Value(dtype='string'),
+         'question': Value(dtype='string')}
         >>> ds = ds.class_encode_column('answer')
         >>> ds.features
         {'answer': ClassLabel(num_classes=2, names=['False', 'True'], id=None),
-         'passage': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None)}
+         'passage': Value(dtype='string'),
+         'question': Value(dtype='string')}
         ```
         """
         # Sanity checks
@@ -2109,14 +2109,14 @@ def cast(
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
         >>> ds.features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> new_features = ds.features.copy()
         >>> new_features['label'] = ClassLabel(names=['bad', 'good'])
         >>> new_features['text'] = Value('large_string')
         >>> ds = ds.cast(new_features)
         >>> ds.features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='large_string', id=None)}
+         'text': Value(dtype='large_string')}
         ```
         """
         if sorted(features) != sorted(self._data.column_names):
@@ -2168,11 +2168,11 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Option
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
         >>> ds.features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
         >>> ds.features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         ```
         """
         feature = _fix_for_backward_compatible_features(feature)
diff --git a/src/datasets/builder.py b/src/datasets/builder.py
index de79c71995e..118f77a354a 100644
--- a/src/datasets/builder.py
+++ b/src/datasets/builder.py
@@ -513,7 +513,7 @@ def get_all_exported_dataset_infos(cls) -> DatasetInfosDict:
         >>> from datasets import load_dataset_builder
         >>> ds_builder = load_dataset_builder('vivos')
         >>> ds_builder.get_all_exported_dataset_infos()
-        {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)}
+        {'default': DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string'), 'path': Value(dtype='string'), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string')}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)}
         ```
         """
         return DatasetInfosDict.from_directory(cls.get_imported_module_dir())
@@ -527,7 +527,7 @@ def get_exported_dataset_info(self) -> DatasetInfo:
         >>> from datasets import load_dataset_builder
         >>> ds_builder = load_dataset_builder('cornell-movie-review-data/rotten_tomatoes')
         >>> ds_builder.get_exported_dataset_info()
-        DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string', id=None), 'path': Value(dtype='string', id=None), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string', id=None)}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)
+        DatasetInfo(description='', citation='', homepage='', license='', features={'speaker_id': Value(dtype='string'), 'path': Value(dtype='string'), 'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None), 'sentence': Value(dtype='string')}, post_processed=None, supervised_keys=None, builder_name=None, dataset_name=None, config_name='default', version=None, splits={'train': SplitInfo(name='train', num_bytes=1722002133, num_examples=11660, shard_lengths=None, dataset_name=None), 'test': SplitInfo(name='test', num_bytes=86120227, num_examples=760, shard_lengths=None, dataset_name=None)}, download_checksums=None, download_size=1475540500, post_processing_size=None, dataset_size=1808122360, size_in_bytes=None)
         ```
         """
         return self.get_all_exported_dataset_infos().get(self.config.name, DatasetInfo())
diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py
index 4d79f95620e..1b523ea10da 100644
--- a/src/datasets/dataset_dict.py
+++ b/src/datasets/dataset_dict.py
@@ -290,14 +290,14 @@ def cast(self, features: Features) -> "DatasetDict":
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes")
         >>> ds["train"].features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> new_features = ds["train"].features.copy()
         >>> new_features['label'] = ClassLabel(names=['bad', 'good'])
         >>> new_features['text'] = Value('large_string')
         >>> ds = ds.cast(new_features)
         >>> ds["train"].features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='large_string', id=None)}
+         'text': Value(dtype='large_string')}
         ```
         """
         self._check_values_type()
@@ -322,11 +322,11 @@ def cast_column(self, column: str, feature) -> "DatasetDict":
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes")
         >>> ds["train"].features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
         >>> ds["train"].features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         ```
         """
         self._check_values_type()
@@ -513,14 +513,14 @@ def class_encode_column(self, column: str, include_nulls: bool = False) -> "Data
         >>> from datasets import load_dataset
         >>> ds = load_dataset("boolq")
         >>> ds["train"].features
-        {'answer': Value(dtype='bool', id=None),
-         'passage': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None)}
+        {'answer': Value(dtype='bool'),
+         'passage': Value(dtype='string'),
+         'question': Value(dtype='string')}
         >>> ds = ds.class_encode_column("answer")
         >>> ds["train"].features
         {'answer': ClassLabel(num_classes=2, names=['False', 'True'], id=None),
-         'passage': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None)}
+         'passage': Value(dtype='string'),
+         'question': Value(dtype='string')}
         ```
         """
         self._check_values_type()
@@ -2381,11 +2381,11 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDatasetDict
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", streaming=True)
         >>> ds["train"].features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
         >>> ds["train"].features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         ```
         """
         return IterableDatasetDict(
@@ -2417,14 +2417,14 @@ def cast(
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", streaming=True)
         >>> ds["train"].features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> new_features = ds["train"].features.copy()
         >>> new_features['label'] = ClassLabel(names=['bad', 'good'])
         >>> new_features['text'] = Value('large_string')
         >>> ds = ds.cast(new_features)
         >>> ds["train"].features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='large_string', id=None)}
+         'text': Value(dtype='large_string')}
         ```
         """
         return IterableDatasetDict({k: dataset.cast(features=features) for k, dataset in self.items()})
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 5aa9c722267..20dcb8878af 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -519,7 +519,7 @@ class Value:
     >>> from datasets import Features
     >>> features = Features({'stars': Value(dtype='int32')})
     >>> features
-    {'stars': Value(dtype='int32', id=None)}
+    {'stars': Value(dtype='int32')}
     ```
     """
 
@@ -1834,7 +1834,7 @@ def from_dict(cls, dic) -> "Features":
 
         Example::
             >>> Features.from_dict({'_type': {'dtype': 'string', 'id': None, '_type': 'Value'}})
-            {'_type': Value(dtype='string', id=None)}
+            {'_type': Value(dtype='string')}
         """
         obj = generate_from_dict(dic)
         return cls(**obj)
@@ -2132,7 +2132,7 @@ def copy(self) -> "Features":
         >>> copy_of_features = ds.features.copy()
         >>> copy_of_features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         ```
         """
         return copy.deepcopy(self)
@@ -2208,12 +2208,12 @@ def flatten(self, max_depth=16) -> "Features":
         >>> from datasets import load_dataset
         >>> ds = load_dataset("rajpurkar/squad", split="train")
         >>> ds.features.flatten()
-        {'answers.answer_start': List(feature=Value(dtype='int32', id=None), length=-1, id=None),
-         'answers.text': List(feature=Value(dtype='string', id=None), length=-1, id=None),
-         'context': Value(dtype='string', id=None),
-         'id': Value(dtype='string', id=None),
-         'question': Value(dtype='string', id=None),
-         'title': Value(dtype='string', id=None)}
+        {'answers.answer_start': List(feature=Value(dtype='int32'), length=-1, id=None),
+         'answers.text': List(feature=Value(dtype='string'), length=-1, id=None),
+         'context': Value(dtype='string'),
+         'id': Value(dtype='string'),
+         'question': Value(dtype='string'),
+         'title': Value(dtype='string')}
         ```
         """
         for depth in range(1, max_depth):
diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py
index c70381542c3..4bd9f80d47d 100644
--- a/src/datasets/iterable_dataset.py
+++ b/src/datasets/iterable_dataset.py
@@ -3233,19 +3233,19 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDataset":
         >>> ds = load_dataset("PolyAI/minds14", name="en-US", split="train", streaming=True)
         >>> ds.features
         {'audio': Audio(sampling_rate=8000, mono=True, decode=True, id=None),
-         'english_transcription': Value(dtype='string', id=None),
+         'english_transcription': Value(dtype='string'),
          'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan',  'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill'], id=None),
          'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR',  'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN'], id=None),
-         'path': Value(dtype='string', id=None),
-         'transcription': Value(dtype='string', id=None)}
+         'path': Value(dtype='string'),
+         'transcription': Value(dtype='string')}
         >>> ds = ds.cast_column("audio", Audio(sampling_rate=16000))
         >>> ds.features
         {'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None),
-         'english_transcription': Value(dtype='string', id=None),
+         'english_transcription': Value(dtype='string'),
          'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan',  'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill'], id=None),
          'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR',  'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN'], id=None),
-         'path': Value(dtype='string', id=None),
-         'transcription': Value(dtype='string', id=None)}
+         'path': Value(dtype='string'),
+         'transcription': Value(dtype='string')}
         ```
         """
         feature = _fix_for_backward_compatible_features(feature)
@@ -3285,14 +3285,14 @@ def cast(
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="train", streaming=True)
         >>> ds.features
         {'label': ClassLabel(names=['neg', 'pos'], id=None),
-         'text': Value(dtype='string', id=None)}
+         'text': Value(dtype='string')}
         >>> new_features = ds.features.copy()
         >>> new_features["label"] = ClassLabel(names=["bad", "good"])
         >>> new_features["text"] = Value("large_string")
         >>> ds = ds.cast(new_features)
         >>> ds.features
         {'label': ClassLabel(names=['bad', 'good'], id=None),
-         'text': Value(dtype='large_string', id=None)}
+         'text': Value(dtype='large_string')}
         ```
         """
         features = _fix_for_backward_compatible_features(features)
@@ -3348,7 +3348,7 @@ def decode(self, enable: bool = True, num_threads: int = 0) -> "IterableDataset"
         >>> ds = ds.decode(False)
         >>> ds.features
         {'image': Image(mode=None, decode=False, id=None),
-        'text': Value(dtype='string', id=None)}
+        'text': Value(dtype='string')}
         >>> next(iter(ds))
         {
           'image': {
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 5743c7d8276..8d1d49dc2ee 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -1117,7 +1117,7 @@ def load_dataset_builder(
     >>> ds_builder = load_dataset_builder('cornell-movie-review-data/rotten_tomatoes')
     >>> ds_builder.info.features
     {'label': ClassLabel(names=['neg', 'pos'], id=None),
-     'text': Value(dtype='string', id=None)}
+     'text': Value(dtype='string')}
     ```
     """
     download_mode = DownloadMode(download_mode or DownloadMode.REUSE_DATASET_IF_EXISTS)

From 5b359cf03370f3517c69dcfa1cfcbdfef37cb314 Mon Sep 17 00:00:00 2001
From: Quentin Lhoest <lhoest.q@gmail.com>
Date: Wed, 25 Jun 2025 15:58:47 +0200
Subject: [PATCH 9/9] again

---
 src/datasets/arrow_dataset.py     | 10 +++++-----
 src/datasets/dataset_dict.py      | 18 +++++++++---------
 src/datasets/features/features.py |  4 ++--
 src/datasets/iterable_dataset.py  | 12 ++++++------
 src/datasets/load.py              |  2 +-
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/src/datasets/arrow_dataset.py b/src/datasets/arrow_dataset.py
index e3c4f5c50cf..cfdf6f2a444 100644
--- a/src/datasets/arrow_dataset.py
+++ b/src/datasets/arrow_dataset.py
@@ -1962,7 +1962,7 @@ def class_encode_column(self, column: str, include_nulls: bool = False) -> "Data
          'question': Value(dtype='string')}
         >>> ds = ds.class_encode_column('answer')
         >>> ds.features
-        {'answer': ClassLabel(num_classes=2, names=['False', 'True'], id=None),
+        {'answer': ClassLabel(num_classes=2, names=['False', 'True']),
          'passage': Value(dtype='string'),
          'question': Value(dtype='string')}
         ```
@@ -2108,14 +2108,14 @@ def cast(
         >>> from datasets import load_dataset, ClassLabel, Value
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
         >>> ds.features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> new_features = ds.features.copy()
         >>> new_features['label'] = ClassLabel(names=['bad', 'good'])
         >>> new_features['text'] = Value('large_string')
         >>> ds = ds.cast(new_features)
         >>> ds.features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='large_string')}
         ```
         """
@@ -2167,11 +2167,11 @@ def cast_column(self, column: str, feature: FeatureType, new_fingerprint: Option
         >>> from datasets import load_dataset, ClassLabel
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="validation")
         >>> ds.features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
         >>> ds.features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='string')}
         ```
         """
diff --git a/src/datasets/dataset_dict.py b/src/datasets/dataset_dict.py
index 1b523ea10da..9d8c67ae0e2 100644
--- a/src/datasets/dataset_dict.py
+++ b/src/datasets/dataset_dict.py
@@ -289,14 +289,14 @@ def cast(self, features: Features) -> "DatasetDict":
         >>> from datasets import load_dataset, ClassLabel, Value
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes")
         >>> ds["train"].features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> new_features = ds["train"].features.copy()
         >>> new_features['label'] = ClassLabel(names=['bad', 'good'])
         >>> new_features['text'] = Value('large_string')
         >>> ds = ds.cast(new_features)
         >>> ds["train"].features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='large_string')}
         ```
         """
@@ -321,11 +321,11 @@ def cast_column(self, column: str, feature) -> "DatasetDict":
         >>> from datasets import load_dataset, ClassLabel
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes")
         >>> ds["train"].features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
         >>> ds["train"].features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='string')}
         ```
         """
@@ -518,7 +518,7 @@ def class_encode_column(self, column: str, include_nulls: bool = False) -> "Data
          'question': Value(dtype='string')}
         >>> ds = ds.class_encode_column("answer")
         >>> ds["train"].features
-        {'answer': ClassLabel(num_classes=2, names=['False', 'True'], id=None),
+        {'answer': ClassLabel(num_classes=2, names=['False', 'True']),
          'passage': Value(dtype='string'),
          'question': Value(dtype='string')}
         ```
@@ -2380,11 +2380,11 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDatasetDict
         >>> from datasets import load_dataset, ClassLabel
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", streaming=True)
         >>> ds["train"].features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> ds = ds.cast_column('label', ClassLabel(names=['bad', 'good']))
         >>> ds["train"].features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='string')}
         ```
         """
@@ -2416,14 +2416,14 @@ def cast(
         >>> from datasets import load_dataset
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", streaming=True)
         >>> ds["train"].features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> new_features = ds["train"].features.copy()
         >>> new_features['label'] = ClassLabel(names=['bad', 'good'])
         >>> new_features['text'] = Value('large_string')
         >>> ds = ds.cast(new_features)
         >>> ds["train"].features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='large_string')}
         ```
         """
diff --git a/src/datasets/features/features.py b/src/datasets/features/features.py
index 20dcb8878af..676521e6990 100644
--- a/src/datasets/features/features.py
+++ b/src/datasets/features/features.py
@@ -987,7 +987,7 @@ class ClassLabel:
     >>> from datasets import Features, ClassLabel
     >>> features = Features({'label': ClassLabel(num_classes=3, names=['bad', 'ok', 'good'])})
     >>> features
-    {'label': ClassLabel(names=['bad', 'ok', 'good'], id=None)}
+    {'label': ClassLabel(names=['bad', 'ok', 'good'])}
     ```
     """
 
@@ -2131,7 +2131,7 @@ def copy(self) -> "Features":
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="train")
         >>> copy_of_features = ds.features.copy()
         >>> copy_of_features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         ```
         """
diff --git a/src/datasets/iterable_dataset.py b/src/datasets/iterable_dataset.py
index 4bd9f80d47d..a5797bfa54c 100644
--- a/src/datasets/iterable_dataset.py
+++ b/src/datasets/iterable_dataset.py
@@ -3234,16 +3234,16 @@ def cast_column(self, column: str, feature: FeatureType) -> "IterableDataset":
         >>> ds.features
         {'audio': Audio(sampling_rate=8000, mono=True, decode=True, id=None),
          'english_transcription': Value(dtype='string'),
-         'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan',  'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill'], id=None),
-         'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR',  'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN'], id=None),
+         'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan',  'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill']),
+         'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR',  'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN']),
          'path': Value(dtype='string'),
          'transcription': Value(dtype='string')}
         >>> ds = ds.cast_column("audio", Audio(sampling_rate=16000))
         >>> ds.features
         {'audio': Audio(sampling_rate=16000, mono=True, decode=True, id=None),
          'english_transcription': Value(dtype='string'),
-         'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan',  'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill'], id=None),
-         'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR',  'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN'], id=None),
+         'intent_class': ClassLabel(num_classes=14, names=['abroad', 'address', 'app_error', 'atm_limit', 'balance', 'business_loan',  'card_issues', 'cash_deposit', 'direct_debit', 'freeze', 'high_value_payment', 'joint_account', 'latest_transactions', 'pay_bill']),
+         'lang_id': ClassLabel(num_classes=14, names=['cs-CZ', 'de-DE', 'en-AU', 'en-GB', 'en-US', 'es-ES', 'fr-FR', 'it-IT', 'ko-KR',  'nl-NL', 'pl-PL', 'pt-PT', 'ru-RU', 'zh-CN']),
          'path': Value(dtype='string'),
          'transcription': Value(dtype='string')}
         ```
@@ -3284,14 +3284,14 @@ def cast(
         >>> from datasets import load_dataset, ClassLabel, Value
         >>> ds = load_dataset("cornell-movie-review-data/rotten_tomatoes", split="train", streaming=True)
         >>> ds.features
-        {'label': ClassLabel(names=['neg', 'pos'], id=None),
+        {'label': ClassLabel(names=['neg', 'pos']),
          'text': Value(dtype='string')}
         >>> new_features = ds.features.copy()
         >>> new_features["label"] = ClassLabel(names=["bad", "good"])
         >>> new_features["text"] = Value("large_string")
         >>> ds = ds.cast(new_features)
         >>> ds.features
-        {'label': ClassLabel(names=['bad', 'good'], id=None),
+        {'label': ClassLabel(names=['bad', 'good']),
          'text': Value(dtype='large_string')}
         ```
         """
diff --git a/src/datasets/load.py b/src/datasets/load.py
index 8d1d49dc2ee..c540e511473 100644
--- a/src/datasets/load.py
+++ b/src/datasets/load.py
@@ -1116,7 +1116,7 @@ def load_dataset_builder(
     >>> from datasets import load_dataset_builder
     >>> ds_builder = load_dataset_builder('cornell-movie-review-data/rotten_tomatoes')
     >>> ds_builder.info.features
-    {'label': ClassLabel(names=['neg', 'pos'], id=None),
+    {'label': ClassLabel(names=['neg', 'pos']),
      'text': Value(dtype='string')}
     ```
     """