diff --git a/thinc/backends/ops.py b/thinc/backends/ops.py index 2386e21cc..a7015d44d 100644 --- a/thinc/backends/ops.py +++ b/thinc/backends/ops.py @@ -229,56 +229,56 @@ def affine(self, X: Floats2d, W: Floats2d, b: Floats1d) -> Floats2d: Y += b return Y - @overload + @overload def flatten( self, X: List[Floats2d], dtype: Optional[DTypes] = None, pad: int = 0, ndim_if_empty: int = 2, - ) -> Floats2d: + ) -> Floats2d: ... - @overload + @overload def flatten( self, X: List[Ints1d], dtype: Optional[DTypes] = None, pad: int = 0, ndim_if_empty: int = 2, - ) -> Ints1d: + ) -> Ints1d: ... - @overload + @overload def flatten( self, X: List2d, dtype: Optional[DTypes] = None, pad: int = 0, ndim_if_empty: int = 2, - ) -> Array2d: + ) -> Array2d: ... # further specific typed signatures can be added as necessary - @overload + @overload def flatten( self, X: ListXd, dtype: Optional[DTypes] = None, pad: int = 0, ndim_if_empty: int = 2, - ) -> ArrayXd: + ) -> ArrayXd: ... - @overload + @overload def flatten( self, X: Sequence[ArrayXd], dtype: Optional[DTypes] = None, pad: int = 0, ndim_if_empty: int = 2, - ) -> ArrayXd: + ) -> ArrayXd: ... def flatten( diff --git a/thinc/layers/gelu.py b/thinc/layers/gelu.py index d49ac77a9..cdb0fb6ee 100644 --- a/thinc/layers/gelu.py +++ b/thinc/layers/gelu.py @@ -34,8 +34,9 @@ def Gelu( return model -def forward(model: Model[Floats2d, Floats2d], - X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]: +def forward( + model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool +) -> Tuple[Floats2d, Callable]: W = cast(Floats2d, model.get_param("W")) b = cast(Floats1d, model.get_param("b")) Y_preact = model.ops.affine(X, W, b) diff --git a/thinc/layers/hard_swish.py b/thinc/layers/hard_swish.py index 81b1ad8dd..0478fd270 100644 --- a/thinc/layers/hard_swish.py +++ b/thinc/layers/hard_swish.py @@ -34,8 +34,9 @@ def HardSwish( return model -def forward(model: Model[Floats2d, Floats2d], - X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]: +def forward( + model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool +) -> Tuple[Floats2d, Callable]: W = cast(Floats2d, model.get_param("W")) b = cast(Floats1d, model.get_param("b")) Y_preact = model.ops.affine(X, W, b) diff --git a/thinc/layers/hard_swish_mobilenet.py b/thinc/layers/hard_swish_mobilenet.py index 38004c848..6a5dce388 100644 --- a/thinc/layers/hard_swish_mobilenet.py +++ b/thinc/layers/hard_swish_mobilenet.py @@ -34,17 +34,16 @@ def HardSwishMobilenet( return model -def forward(model: Model[Floats2d, Floats2d], - X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]: +def forward( + model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool +) -> Tuple[Floats2d, Callable]: W = cast(Floats2d, model.get_param("W")) b = cast(Floats1d, model.get_param("b")) Y_preact = model.ops.affine(X, W, b) Y = model.ops.hard_swish_mobilenet(Y_preact) def backprop(dY: Floats2d) -> Floats2d: - dY = model.ops.backprop_hard_swish_mobilenet(dY, - Y_preact, - inplace=False) + dY = model.ops.backprop_hard_swish_mobilenet(dY, Y_preact, inplace=False) model.inc_grad("b", dY.sum(axis=0)) model.inc_grad("W", model.ops.gemm(dY, X, trans1=True)) return model.ops.gemm(dY, W) diff --git a/thinc/layers/layernorm.py b/thinc/layers/layernorm.py index cf22015ed..684489c54 100644 --- a/thinc/layers/layernorm.py +++ b/thinc/layers/layernorm.py @@ -17,7 +17,7 @@ def LayerNorm(nI: Optional[int] = None) -> Model[InT, InT]: forward, init=init, dims={"nI": nI, "nO": nI}, - params={"G": None, "b": None} + params={"G": None, "b": None}, ) diff --git a/thinc/layers/swish.py b/thinc/layers/swish.py index ea5444b49..a05a0dc72 100644 --- a/thinc/layers/swish.py +++ b/thinc/layers/swish.py @@ -34,8 +34,9 @@ def Swish( return model -def forward(model: Model[Floats2d, Floats2d], - X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]: +def forward( + model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool +) -> Tuple[Floats2d, Callable]: W = cast(Floats2d, model.get_param("W")) b = cast(Floats1d, model.get_param("b")) Y_preact = model.ops.affine(X, W, b) diff --git a/thinc/model.py b/thinc/model.py index 261858658..08366523e 100644 --- a/thinc/model.py +++ b/thinc/model.py @@ -464,7 +464,9 @@ def copy(self: SelfT) -> SelfT: """ return self._copy() - def _copy(self: SelfT, seen: Optional[Dict[int, Union["Model", Shim]]] = None) -> SelfT: + def _copy( + self: SelfT, seen: Optional[Dict[int, Union["Model", Shim]]] = None + ) -> SelfT: if seen is None: seen = {} params = {} diff --git a/thinc/optimizers.py b/thinc/optimizers.py index c8e38e84b..f34cd2ff8 100644 --- a/thinc/optimizers.py +++ b/thinc/optimizers.py @@ -279,7 +279,7 @@ def _radam(self, ops, weights, grad, lr_scale, key, nr_upd): # exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad) exp_avg_sq *= beta2 - exp_avg_sq += (1 - beta2) * (gradient_1D ** 2) + exp_avg_sq += (1 - beta2) * (gradient_1D**2) # exp_avg.mul_(beta1).add_(1 - beta1, grad) exp_avg *= beta1 exp_avg += (1 - beta1) * gradient_1D @@ -338,9 +338,9 @@ def _adam(self, ops, weights, gradient, lr_scale, key, nr_upd): mom2 = self.mom2[key] b1 = self.b1 b2 = self.b2 - fix1 = 1.0 - (b1 ** nr_upd) - fix2 = 1.0 - (b2 ** nr_upd) - lr = self.learn_rate * fix2 ** 0.5 / fix1 + fix1 = 1.0 - (b1**nr_upd) + fix2 = 1.0 - (b2**nr_upd) + lr = self.learn_rate * fix2**0.5 / fix1 eps = self.eps # needs to be 1D going into the adam function weights_1D, gradient_1D, mom1, mom2 = ops.adam( diff --git a/thinc/tests/layers/test_combinators.py b/thinc/tests/layers/test_combinators.py index ed9a2992a..ea5583108 100644 --- a/thinc/tests/layers/test_combinators.py +++ b/thinc/tests/layers/test_combinators.py @@ -271,10 +271,7 @@ def test_concatenate(): def test_map_list(): nI = 4 nO = 9 - Xs = [ - numpy.zeros((6, nI), dtype="f"), - numpy.ones((3, nI), dtype="f") - ] + Xs = [numpy.zeros((6, nI), dtype="f"), numpy.ones((3, nI), dtype="f")] Y_shapes = [(x.shape[0], nO) for x in Xs] model = map_list(Linear()) model.initialize(X=Xs, Y=[numpy.zeros(shape, dtype="f") for shape in Y_shapes]) diff --git a/thinc/tests/layers/test_pytorch_wrapper.py b/thinc/tests/layers/test_pytorch_wrapper.py index e6f4edfb4..fc4396370 100644 --- a/thinc/tests/layers/test_pytorch_wrapper.py +++ b/thinc/tests/layers/test_pytorch_wrapper.py @@ -64,7 +64,9 @@ def test_pytorch_wrapper(nN, nI, nO): assert isinstance(model.predict(X), numpy.ndarray) -@pytest.mark.skipif(not has_cupy or not has_torch_gpu, reason="needs PyTorch with CUDA-capable GPU") +@pytest.mark.skipif( + not has_cupy or not has_torch_gpu, reason="needs PyTorch with CUDA-capable GPU" +) @pytest.mark.parametrize("nN,nI,nO", [(2, 3, 4)]) @pytest.mark.parametrize("mixed_precision", TORCH_MIXED_PRECISION) def test_pytorch_wrapper_thinc_input(nN, nI, nO, mixed_precision): diff --git a/thinc/tests/layers/test_reduce.py b/thinc/tests/layers/test_reduce.py index ba829f779..d26065c4a 100644 --- a/thinc/tests/layers/test_reduce.py +++ b/thinc/tests/layers/test_reduce.py @@ -92,6 +92,7 @@ def test_reduce_mean(Xs): dX = backprop(Y) assert dX.dataXd.shape == X.dataXd.shape + def test_reduce_sum(Xs): model = reduce_sum() lengths = model.ops.asarray([x.shape[0] for x in Xs], dtype="i") @@ -107,6 +108,7 @@ def test_reduce_sum(Xs): dX = backprop(Y) assert dX.dataXd.shape == X.dataXd.shape + def test_size_mismatch(Xs): for reduce in [reduce_first, reduce_last, reduce_max, reduce_mean, reduce_sum]: model = reduce() diff --git a/thinc/tests/layers/test_with_transforms.py b/thinc/tests/layers/test_with_transforms.py index a01e20567..c23db1463 100644 --- a/thinc/tests/layers/test_with_transforms.py +++ b/thinc/tests/layers/test_with_transforms.py @@ -26,8 +26,8 @@ def list_input(shapes): for i, x in enumerate(data): # Give values that make it easy to see where rows or columns mismatch. x += i * 100 - x += numpy.arange(x.shape[0]).reshape((-1, 1)) * 10 - x += numpy.arange(x.shape[1]).reshape((1, -1)) + x += numpy.arange(x.shape[0]).reshape((-1, 1)) * 10 + x += numpy.arange(x.shape[1]).reshape((1, -1)) return data @@ -68,8 +68,10 @@ def noop_models(): with_array(noop()), with_array2d(noop()), with_list(noop()), - with_ragged(noop()) + with_ragged(noop()), ] + + # As an example operation, lets just trim the last dimension. That # should catch stuff that confuses the input and output. @@ -180,14 +182,14 @@ def test_noop_transforms(noop_models, ragged_input, padded_input, list_input): d_ragged = Ragged(ragged_input.data + 1, ragged_input.lengths) d_padded = padded_input.copy() d_padded.data += 1 - d_list = [dx+1 for dx in list_input] + d_list = [dx + 1 for dx in list_input] for model in noop_models: print(model.name) check_transform_doesnt_change_noop_values(model, padded_input, d_padded) check_transform_doesnt_change_noop_values(model, list_input, d_list) check_transform_doesnt_change_noop_values(model, ragged_input, d_ragged) - + def test_with_array_initialize(ragged_input, padded_input, list_input, array_input): for inputs in (ragged_input, padded_input, list_input, array_input): check_initialize(get_array_model(), inputs) diff --git a/thinc/tests/mypy/test_mypy.py b/thinc/tests/mypy/test_mypy.py index 287043578..e03d1c874 100644 --- a/thinc/tests/mypy/test_mypy.py +++ b/thinc/tests/mypy/test_mypy.py @@ -23,6 +23,7 @@ def test_mypy_results( ): pytest.importorskip("mypy") from mypy import api as mypy_api + os.chdir(tmpdir) root_dir = Path(__file__).parent thinc_root_dir = Path(__file__).parent.parent.parent.parent diff --git a/thinc/tests/test_loss.py b/thinc/tests/test_loss.py index 710a88d61..75206d240 100644 --- a/thinc/tests/test_loss.py +++ b/thinc/tests/test_loss.py @@ -168,7 +168,9 @@ def test_sequence_categorical_crossentropy(guesses, labels, names): assert d_scores1[1][0] == pytest.approx(0.4, eps) assert d_scores1[1][1] == pytest.approx(-0.4, eps) # The normalization divides the difference (e.g. 0.4) by the number of seqs - d_scores = SequenceCategoricalCrossentropy(normalize=True, names=names).get_grad(guesses, labels) + d_scores = SequenceCategoricalCrossentropy(normalize=True, names=names).get_grad( + guesses, labels + ) d_scores1 = d_scores[0] d_scores2 = d_scores[1] @@ -189,7 +191,9 @@ def test_sequence_categorical_crossentropy(guesses, labels, names): assert d_scores2[0][0] == pytest.approx(0.1, eps) assert d_scores2[0][1] == pytest.approx(-0.35, eps) - loss = SequenceCategoricalCrossentropy(normalize=True, names=names).get_loss(guesses, labels) + loss = SequenceCategoricalCrossentropy(normalize=True, names=names).get_loss( + guesses, labels + ) assert loss == pytest.approx(1.09, eps) @@ -200,9 +204,9 @@ def test_sequence_categorical_crossentropy(guesses, labels, names): ], ) def test_sequence_categorical_missing_negative(guesses, labels, names): - d_scores = SequenceCategoricalCrossentropy(normalize=False, names=names, neg_prefix="!", missing_value="").get_grad( - guesses, labels - ) + d_scores = SequenceCategoricalCrossentropy( + normalize=False, names=names, neg_prefix="!", missing_value="" + ).get_grad(guesses, labels) d_scores0 = d_scores[0] # [0.1, 0.5, 0.6] should be A @@ -292,8 +296,16 @@ def test_cosine_unmatched(): ("SequenceCategoricalCrossentropy.v1", {}, ([scores0], [labels0])), ("CategoricalCrossentropy.v2", {"neg_prefix": "!"}, (scores0, labels0)), ("CategoricalCrossentropy.v3", {"neg_prefix": "!"}, (scores0, labels0)), - ("SequenceCategoricalCrossentropy.v2", {"neg_prefix": "!"}, ([scores0], [labels0])), - ("SequenceCategoricalCrossentropy.v3", {"neg_prefix": "!"}, ([scores0], [labels0])), + ( + "SequenceCategoricalCrossentropy.v2", + {"neg_prefix": "!"}, + ([scores0], [labels0]), + ), + ( + "SequenceCategoricalCrossentropy.v3", + {"neg_prefix": "!"}, + ([scores0], [labels0]), + ), ("L2Distance.v1", {}, (scores0, scores0)), ( "CosineDistance.v1", diff --git a/thinc/tests/test_serialize.py b/thinc/tests/test_serialize.py index f3a937c34..b89fc2d94 100644 --- a/thinc/tests/test_serialize.py +++ b/thinc/tests/test_serialize.py @@ -55,7 +55,7 @@ def test_simple_model_roundtrip_bytes(): def test_simple_model_roundtrip_bytes_length(): - """ Ensure that serialization of non-initialized weight matrices goes fine """ + """Ensure that serialization of non-initialized weight matrices goes fine""" model1 = Maxout(5, 10, nP=2) model2 = Maxout(5, 10, nP=2) @@ -186,7 +186,7 @@ def test_simple_model_can_from_dict(): assert model.can_from_dict(model_dict) # Test check without initialize assert Maxout(5, 10, nP=2).can_from_dict(model_dict) - # Test not-strict check + # Test not-strict check assert not Maxout(10, 5, nP=2).can_from_dict(model_dict) assert Maxout(5, nP=2).can_from_dict(model_dict)