explosion · svlandeg · May 9, 2022 · Feb 22, 2022 · Feb 23, 2022 · Feb 23, 2022
diff --git a/examples/benchmarks/lstm_tagger.py b/examples/benchmarks/lstm_tagger.py
@@ -11,15 +11,15 @@
 
 So PyTorch is 3x faster currently.
 """
-from typing import List
+from typing import List, cast
 import typer
 import tqdm
 import numpy.random
 from timeit import default_timer as timer
 from thinc.api import Model, Config, registry, chain, list2padded, with_array
 from thinc.api import to_categorical, set_current_ops
 from thinc.api import NumpyOps, CupyOps, fix_random_seed, require_gpu
-from thinc.types import Array2d, Padded
+from thinc.types import Array2d, Padded, List2d
 
 CONFIG = """
 [data]
@@ -59,7 +59,7 @@ def build_tagger(
     embed: Model[Array2d, Array2d],
     encode: Model[Padded, Padded],
     predict: Model[Array2d, Array2d],
-) -> Model[List[Array2d], Padded]:
+) -> Model[List2d, List2d]:
     model = chain(
         list2padded(),
         with_array(embed),

diff --git a/requirements.txt b/requirements.txt
@@ -8,7 +8,7 @@ wasabi>=0.8.1,<1.1.0
 catalogue>=2.0.4,<2.1.0
 ml_datasets>=0.2.0,<0.3.0
 # Third-party dependencies
-pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
 numpy>=1.15.0
 # Backports of modern Python features
 dataclasses>=0.6,<1.0; python_version < "3.7"
@@ -22,8 +22,7 @@ pytest-cov>=2.7.0,<2.8.0
 coverage>=5.0.0,<6.0.0
 mock>=2.0.0,<3.0.0
 flake8>=3.5.0,<3.6.0
-# restricting mypy until faster 3.10 wheels are available
-mypy>=0.901,<0.920; python_version < "3.10"
+mypy>=0.901,<=0.931
 types-mock>=0.1.1
 types-contextvars>=0.1.2; python_version < "3.7"
 types-dataclasses>=0.1.3; python_version < "3.7"

diff --git a/setup.cfg b/setup.cfg
@@ -48,7 +48,7 @@ install_requires =
     # Third-party dependencies
     setuptools
     numpy>=1.15.0
-    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.9.0
+    pydantic>=1.7.4,!=1.8,!=1.8.1,<1.10.0
     # Backports of modern Python features
     dataclasses>=0.6,<1.0; python_version < "3.7"
     typing_extensions>=3.7.4.1,<4.0.0.0; python_version < "3.8"

diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py
@@ -5,15 +5,47 @@
 import numpy
 import itertools
 
-from .. import registry
-from ..types import Xp, Shape, DTypes, DTypesInt, DTypesFloat, List2d, ArrayXd
-from ..types import Array3d, Floats1d, Floats2d, Floats3d, Floats4d
-from ..types import FloatsXd, Ints1d, Ints2d, Ints3d, Ints4d, IntsXd, _Floats
+from ..types import (
+    Array1d,
+    Array2d,
+    Array3d,
+    Array4d,
+    ArrayXd,
+)
+from ..types import Floats1d, Floats2d, Floats3d, Floats4d, FloatsXd, _Floats
+from ..types import Ints1d, Ints2d, Ints3d, Ints4d, IntsXd
+from ..types import Xp, Shape, DTypes, DTypesInt, DTypesFloat
 from ..types import DeviceTypes, Generator, Padded, Batchable, SizedGenerator
 from ..util import get_array_module, is_xp_array, to_numpy
 
 
+ArrayT2d = TypeVar("ArrayT2d", bound=Union[Floats2d, Ints2d, Array2d])
+ArrayT2d_co = TypeVar(
+    "ArrayT2d_co", bound=Union[Floats2d, Ints2d, Array2d], covariant=True
+)
 ArrayT = TypeVar("ArrayT", bound=ArrayXd)
+ArrayTXd = TypeVar(
+    "ArrayTXd",
+    bound=ArrayXd,
+)
+ArrayTXd_co = TypeVar(
+    "ArrayTXd_co",
+    bound=ArrayXd,
+    covariant=True,
+)
+ArrayTXNotMind = TypeVar(
+    "ArrayTXNotMind", bound=Union[Floats2d, Floats3d, Floats4d, Ints2d, Ints3d, Ints4d]
+)
+ArrayTXNotMaxd = TypeVar(
+    "ArrayTXNotMaxd", bound=Union[Floats1d, Floats2d, Floats3d, Ints1d, Ints2d, Ints3d]
+)
+ArrayTXNotMaxd_co = TypeVar(
+    "ArrayTXNotMaxd_co",
+    bound=Union[Floats1d, Floats2d, Floats3d, Ints1d, Ints2d, Ints3d],
+    covariant=True,
+)
+
+
 FloatsT = TypeVar("FloatsT", bound=_Floats)
 FloatsType = TypeVar("FloatsType", bound=FloatsXd)
 SQRT2PI = math.sqrt(2.0 / math.pi)
@@ -227,11 +259,11 @@ def affine(self, X: Floats2d, W: Floats2d, b: Floats1d) -> Floats2d:
 
     def flatten(
         self,
-        X: Sequence[ArrayT],
+        X: List[ArrayTXd_co],
         dtype: Optional[DTypes] = None,
         pad: int = 0,
         ndim_if_empty: int = 2,
-    ) -> ArrayT:
+    ) -> ArrayTXd_co:
         """Flatten a list of arrays into one large array."""
         if X is None or len(X) == 0:
             return self.alloc((0,) * ndim_if_empty, dtype=dtype or "f")
@@ -252,7 +284,7 @@ def flatten(
             result = xp.asarray(result, dtype=dtype)
         return result
 
-    def unflatten(self, X: Floats2d, lengths: Ints1d, pad: int = 0) -> List[Floats2d]:
+    def unflatten(self, X: ArrayTXd, lengths: Ints1d, pad: int = 0) -> List[ArrayTXd]:
         """The reverse/backward operation of the `flatten` function: unflatten
         a large array into a list of arrays according to the given lengths.
         """
@@ -261,31 +293,20 @@ def unflatten(self, X: Floats2d, lengths: Ints1d, pad: int = 0) -> List[Floats2d
         for length in lengths:
             length = int(length)
             if pad >= 1 and length != 0:
-                X = X[pad:]
+                X = X[pad:]  # type: ignore[assignment]
             unflat.append(X[:length])
-            X = X[length:]
+            X = X[length:]  # type: ignore[assignment]
         if pad >= 1:
-            X = X[pad:]
+            X = X[pad:]  # type: ignore[assignment]
         assert len(X) == 0
         assert len(unflat) == len(lengths)
-        return unflat
-
-    @overload
-    def pad(self, seqs: List[Ints2d], round_to=1) -> Ints3d:
-        ...
+        return cast(List[ArrayTXd], unflat)
 
-    @overload  # noqa: F811
-    def pad(self, seqs: List[Floats2d], round_to=1) -> Floats3d:
-        ...
-
-    def pad(  # noqa: F811
-        self, seqs: Union[List[Ints2d], List[Floats2d]], round_to=1
-    ) -> Array3d:
+    def pad(self, seqs: List[ArrayTXNotMaxd_co], round_to=1) -> ArrayTXNotMind:
         """Perform padding on a list of arrays so that they each have the same
         length, by taking the maximum dimension across each axis. This only
         works on non-empty sequences with the same `ndim` and `dtype`.
         """
-        # TODO: This should be generalized to handle different ranks
         if not seqs:
             raise ValueError("Cannot pad empty sequence")
         if len(set(seq.ndim for seq in seqs)) != 1:
@@ -300,29 +321,31 @@ def pad(  # noqa: F811
         # array sizes.
         length = (length + (round_to - 1)) // round_to * round_to
         final_shape = (len(seqs), length) + seqs[0].shape[1:]
-        output: Array3d = self.alloc(final_shape, dtype=seqs[0].dtype)
+        output: ArrayTXNotMind = self.alloc(final_shape, dtype=seqs[0].dtype)
         for i, arr in enumerate(seqs):
             # It's difficult to convince this that the dtypes will match.
             output[i, : arr.shape[0]] = arr  # type: ignore
         return output
 
-    def unpad(self, padded: Array3d, lengths: List[int]) -> List2d:
+    def unpad(
+        self, padded: ArrayTXNotMind, lengths: List[int]
+    ) -> List[ArrayTXNotMaxd_co]:
         """The reverse/backward operation of the `pad` function: transform an
         array back into a list of arrays, each with their original length.
         """
         output = []
         for i, length in enumerate(lengths):
             output.append(padded[i, :length])
-        return cast(List2d, output)
+        return cast(List[ArrayTXNotMaxd_co], output)
 
-    def list2padded(self, seqs: List[Floats2d]) -> Padded:
+    def list2padded(self, seqs: List[ArrayT2d_co]) -> Padded:
         """Pack a sequence of 2d arrays into a Padded datatype."""
         if not seqs:
             return Padded(
                 self.alloc3f(0, 0, 0), self.alloc1i(0), self.alloc1i(0), self.alloc1i(0)
             )
         elif len(seqs) == 1:
-            data = self.reshape3f(seqs[0], seqs[0].shape[0], 1, seqs[0].shape[1])
+            data = self.reshape3(seqs[0], seqs[0].shape[0], 1, seqs[0].shape[1])
             size_at_t = self.asarray1i([1] * data.shape[0])
             lengths = self.asarray1i([data.shape[0]])
             indices = self.asarray1i([0])
@@ -338,7 +361,7 @@ def list2padded(self, seqs: List[Floats2d]) -> Padded:
         # direction: you're swapping elements between their original and sorted
         # position.
         seqs = [seqs[i] for i in indices_]
-        arr: Floats3d = self.pad(seqs)
+        arr: Array3d = cast(Array3d, self.pad(seqs))
         assert arr.shape == (nB, nS, nO), (nB, nS, nO)
         arr = self.as_contig(arr.transpose((1, 0, 2)))
         assert arr.shape == (nS, nB, nO)
@@ -351,23 +374,23 @@ def list2padded(self, seqs: List[Floats2d]) -> Padded:
             batch_size_at_t_[t] = current_size
         assert sum(lengths_) == sum(batch_size_at_t_)
         return Padded(
-            cast(Floats3d, arr),
+            arr,
             self.asarray1i(batch_size_at_t_),
             self.asarray1i(lengths_),
             self.asarray1i(indices_),
         )
 
-    def padded2list(self, padded: Padded) -> List2d:
+    def padded2list(self, padded: Padded) -> List[Array2d]:
         """Unpack a Padded datatype to a list of 2-dimensional arrays."""
         data = padded.data
         indices = to_numpy(padded.indices)
         lengths = to_numpy(padded.lengths)
-        unpadded: List[Optional[Floats2d]] = [None] * len(lengths)
+        unpadded: List[Optional[Array2d]] = [None] * len(lengths)
         # Transpose from (length, batch, data) to (batch, length, data)
         data = self.as_contig(data.transpose((1, 0, 2)))
         for i in range(data.shape[0]):
             unpadded[indices[i]] = data[i, : int(lengths[i])]
-        return cast(List2d, unpadded)
+        return cast(List[Array2d], unpadded)
 
     def get_dropout_mask(self, shape: Shape, drop: Optional[float]) -> FloatsXd:
         """Create a random mask for applying dropout, with a certain percent of
@@ -445,6 +468,18 @@ def alloc(self, shape: Shape, *, dtype: Optional[DTypes] = "float32") -> ArrayT:
             shape = (shape,)
         return self.xp.zeros(shape, dtype=dtype)
 
+    def reshape1(self, array: ArrayXd, d0: int) -> Array1d:
+        return cast(Array1d, self.reshape(array, (d0,)))
+
+    def reshape2(self, array: ArrayXd, d0: int, d1: int) -> Array2d:
+        return cast(Array2d, self.reshape(array, (d0, d1)))
+
+    def reshape3(self, array: ArrayXd, d0: int, d1: int, d2: int) -> Array3d:
+        return cast(Array3d, self.reshape(array, (d0, d1, d2)))
+
+    def reshape4(self, array: ArrayXd, d0: int, d1: int, d2: int, d3: int) -> Array4d:
+        return cast(Array4d, self.reshape(array, (d0, d1, d2, d3)))
+
     def reshape1f(self, array: FloatsXd, d0: int) -> Floats1d:
         return cast(Floats1d, self.reshape(array, (d0,)))
 
@@ -603,7 +638,7 @@ def dtanh(self, Y: FloatsT, *, inplace: bool = False) -> FloatsT:
             Y += 1.0
             return Y
         else:
-            return 1 - Y ** 2
+            return 1 - Y**2
 
     def softmax(
         self,
@@ -859,7 +894,7 @@ def gelu_approx(self, X: FloatsType, inplace: bool = False) -> FloatsType:
         Y = self.xp.zeros_like(X)
         Y += tmp
         Y *= X
-        return cast(FloatsType, Y)
+        return Y
 
     def backprop_gelu_approx(
         self, dY: FloatsType, X: FloatsType, inplace: bool = False
@@ -924,7 +959,7 @@ def backprop_mish(
         delta = xp.exp(Xsub) + 1.0
         delta *= delta
         delta += 1.0
-        dXsub = dYsub * ((xp.exp(Xsub) * omega) / (delta ** 2))
+        dXsub = dYsub * ((xp.exp(Xsub) * omega) / (delta**2))
         # Gradient when above threshold will ignore softplus.
         if inplace:
             out = dY
@@ -1368,7 +1403,7 @@ def dsigmoid(Y: ArrayT) -> ArrayT:
 
 
 def dtanh(Y: ArrayT) -> ArrayT:
-    return 1 - Y ** 2
+    return 1 - Y**2
 
 
 def gaussian_cdf(ops: Ops, X: FloatsType) -> FloatsType:

diff --git a/thinc/config.py b/thinc/config.py
@@ -1,4 +1,4 @@
-from typing import Union, Dict, Any, Optional, List, Tuple, Callable, Type
+from typing import Union, Dict, Any, Optional, List, Tuple, Callable, Type, Mapping
 from typing import Iterable, Sequence, cast
 from types import GeneratorType
 from dataclasses import dataclass
@@ -550,7 +550,7 @@ def __init__(
         self,
         *,
         config: Optional[Union[Config, Dict[str, Dict[str, Any]], str]] = None,
-        errors: Iterable[Dict[str, Any]] = tuple(),
+        errors: Union[Sequence[Mapping[str, Any]], Iterable[Dict[str, Any]]] = tuple(),
         title: Optional[str] = "Config validation error",
         desc: Optional[str] = None,
         parent: Optional[str] = None,
@@ -560,9 +560,10 @@ def __init__(
 
         config (Union[Config, Dict[str, Dict[str, Any]], str]): The
             config the validation error refers to.
-        errors (Iterable[Dict[str, Any]]): A list of errors as dicts with keys
-            "loc" (list of strings describing the path of the value), "msg"
-            (validation message to show) and optional "type" (mostly internals).
+        errors (Union[Sequence[Mapping[str, Any]], Iterable[Dict[str, Any]]]):
+            A list of errors as dicts with keys "loc" (list of strings
+            describing the path of the value), "msg" (validation message
+            to show) and optional "type" (mostly internals).
             Same format as produced by pydantic's validation error (e.errors()).
         title (str): The error title.
         desc (str): Optional error description, displayed below the title.

diff --git a/thinc/layers/add.py b/thinc/layers/add.py
@@ -7,7 +7,7 @@
 
 
 InT = TypeVar("InT", bound=Any)
-OutT = TypeVar("OutT", bound=ArrayXd)
+OutT = TypeVar("OutT", bound=ArrayXd, covariant=True)
 
 
 @registry.layers("add.v1")

diff --git a/thinc/layers/array_getitem.py b/thinc/layers/array_getitem.py
@@ -1,13 +1,14 @@
-from typing import Union, Sequence, Tuple
+from typing import Union, Sequence, Tuple, TypeVar
 from ..types import ArrayXd, FloatsXd, IntsXd
 from ..model import Model
 
 
 AxisIndex = Union[int, slice, Sequence[int]]
 Index = Union[AxisIndex, Tuple[AxisIndex, ...]]
+ArrayXd_co = TypeVar("ArrayXd_co", bound=ArrayXd, covariant=True)
 
 
-def array_getitem(index: Index) -> Model[ArrayXd, ArrayXd]:
+def array_getitem(index: Index) -> Model[ArrayXd_co, ArrayXd_co]:
     """Index into input arrays, and return the subarrays.
 
     index:

diff --git a/thinc/layers/cauchysimilarity.py b/thinc/layers/cauchysimilarity.py
@@ -30,7 +30,7 @@ def forward(
     X1, X2 = X1_X2
     W = cast(Floats2d, model.get_param("W"))
     diff = X1 - X2
-    square_diff = diff ** 2
+    square_diff = diff**2
     total = (W * square_diff).sum(axis=1)  # type: ignore
     sim, bp_sim = inverse(total)