Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions thinc/backends/ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -229,56 +229,56 @@ def affine(self, X: Floats2d, W: Floats2d, b: Floats1d) -> Floats2d:
Y += b
return Y

@overload
@overload
def flatten(
self,
X: List[Floats2d],
dtype: Optional[DTypes] = None,
pad: int = 0,
ndim_if_empty: int = 2,
) -> Floats2d:
) -> Floats2d:
...

@overload
@overload
def flatten(
self,
X: List[Ints1d],
dtype: Optional[DTypes] = None,
pad: int = 0,
ndim_if_empty: int = 2,
) -> Ints1d:
) -> Ints1d:
...

@overload
@overload
def flatten(
self,
X: List2d,
dtype: Optional[DTypes] = None,
pad: int = 0,
ndim_if_empty: int = 2,
) -> Array2d:
) -> Array2d:
...

# further specific typed signatures can be added as necessary

@overload
@overload
def flatten(
self,
X: ListXd,
dtype: Optional[DTypes] = None,
pad: int = 0,
ndim_if_empty: int = 2,
) -> ArrayXd:
) -> ArrayXd:
...

@overload
@overload
def flatten(
self,
X: Sequence[ArrayXd],
dtype: Optional[DTypes] = None,
pad: int = 0,
ndim_if_empty: int = 2,
) -> ArrayXd:
) -> ArrayXd:
...

def flatten(
Expand Down
5 changes: 3 additions & 2 deletions thinc/layers/gelu.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ def Gelu(
return model


def forward(model: Model[Floats2d, Floats2d],
X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]:
def forward(
model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool
) -> Tuple[Floats2d, Callable]:
W = cast(Floats2d, model.get_param("W"))
b = cast(Floats1d, model.get_param("b"))
Y_preact = model.ops.affine(X, W, b)
Expand Down
5 changes: 3 additions & 2 deletions thinc/layers/hard_swish.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ def HardSwish(
return model


def forward(model: Model[Floats2d, Floats2d],
X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]:
def forward(
model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool
) -> Tuple[Floats2d, Callable]:
W = cast(Floats2d, model.get_param("W"))
b = cast(Floats1d, model.get_param("b"))
Y_preact = model.ops.affine(X, W, b)
Expand Down
9 changes: 4 additions & 5 deletions thinc/layers/hard_swish_mobilenet.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,16 @@ def HardSwishMobilenet(
return model


def forward(model: Model[Floats2d, Floats2d],
X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]:
def forward(
model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool
) -> Tuple[Floats2d, Callable]:
W = cast(Floats2d, model.get_param("W"))
b = cast(Floats1d, model.get_param("b"))
Y_preact = model.ops.affine(X, W, b)
Y = model.ops.hard_swish_mobilenet(Y_preact)

def backprop(dY: Floats2d) -> Floats2d:
dY = model.ops.backprop_hard_swish_mobilenet(dY,
Y_preact,
inplace=False)
dY = model.ops.backprop_hard_swish_mobilenet(dY, Y_preact, inplace=False)
model.inc_grad("b", dY.sum(axis=0))
model.inc_grad("W", model.ops.gemm(dY, X, trans1=True))
return model.ops.gemm(dY, W)
Expand Down
2 changes: 1 addition & 1 deletion thinc/layers/layernorm.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def LayerNorm(nI: Optional[int] = None) -> Model[InT, InT]:
forward,
init=init,
dims={"nI": nI, "nO": nI},
params={"G": None, "b": None}
params={"G": None, "b": None},
)


Expand Down
5 changes: 3 additions & 2 deletions thinc/layers/swish.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@ def Swish(
return model


def forward(model: Model[Floats2d, Floats2d],
X: Floats2d, is_train: bool) -> Tuple[Floats2d, Callable]:
def forward(
model: Model[Floats2d, Floats2d], X: Floats2d, is_train: bool
) -> Tuple[Floats2d, Callable]:
W = cast(Floats2d, model.get_param("W"))
b = cast(Floats1d, model.get_param("b"))
Y_preact = model.ops.affine(X, W, b)
Expand Down
4 changes: 3 additions & 1 deletion thinc/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -464,7 +464,9 @@ def copy(self: SelfT) -> SelfT:
"""
return self._copy()

def _copy(self: SelfT, seen: Optional[Dict[int, Union["Model", Shim]]] = None) -> SelfT:
def _copy(
self: SelfT, seen: Optional[Dict[int, Union["Model", Shim]]] = None
) -> SelfT:
if seen is None:
seen = {}
params = {}
Expand Down
8 changes: 4 additions & 4 deletions thinc/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,7 +279,7 @@ def _radam(self, ops, weights, grad, lr_scale, key, nr_upd):

# exp_avg_sq.mul_(beta2).addcmul_(1 - beta2, grad, grad)
exp_avg_sq *= beta2
exp_avg_sq += (1 - beta2) * (gradient_1D ** 2)
exp_avg_sq += (1 - beta2) * (gradient_1D**2)
# exp_avg.mul_(beta1).add_(1 - beta1, grad)
exp_avg *= beta1
exp_avg += (1 - beta1) * gradient_1D
Expand Down Expand Up @@ -338,9 +338,9 @@ def _adam(self, ops, weights, gradient, lr_scale, key, nr_upd):
mom2 = self.mom2[key]
b1 = self.b1
b2 = self.b2
fix1 = 1.0 - (b1 ** nr_upd)
fix2 = 1.0 - (b2 ** nr_upd)
lr = self.learn_rate * fix2 ** 0.5 / fix1
fix1 = 1.0 - (b1**nr_upd)
fix2 = 1.0 - (b2**nr_upd)
lr = self.learn_rate * fix2**0.5 / fix1
eps = self.eps
# needs to be 1D going into the adam function
weights_1D, gradient_1D, mom1, mom2 = ops.adam(
Expand Down
5 changes: 1 addition & 4 deletions thinc/tests/layers/test_combinators.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,10 +271,7 @@ def test_concatenate():
def test_map_list():
nI = 4
nO = 9
Xs = [
numpy.zeros((6, nI), dtype="f"),
numpy.ones((3, nI), dtype="f")
]
Xs = [numpy.zeros((6, nI), dtype="f"), numpy.ones((3, nI), dtype="f")]
Y_shapes = [(x.shape[0], nO) for x in Xs]
model = map_list(Linear())
model.initialize(X=Xs, Y=[numpy.zeros(shape, dtype="f") for shape in Y_shapes])
Expand Down
4 changes: 3 additions & 1 deletion thinc/tests/layers/test_pytorch_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,9 @@ def test_pytorch_wrapper(nN, nI, nO):
assert isinstance(model.predict(X), numpy.ndarray)


@pytest.mark.skipif(not has_cupy or not has_torch_gpu, reason="needs PyTorch with CUDA-capable GPU")
@pytest.mark.skipif(
not has_cupy or not has_torch_gpu, reason="needs PyTorch with CUDA-capable GPU"
)
@pytest.mark.parametrize("nN,nI,nO", [(2, 3, 4)])
@pytest.mark.parametrize("mixed_precision", TORCH_MIXED_PRECISION)
def test_pytorch_wrapper_thinc_input(nN, nI, nO, mixed_precision):
Expand Down
2 changes: 2 additions & 0 deletions thinc/tests/layers/test_reduce.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ def test_reduce_mean(Xs):
dX = backprop(Y)
assert dX.dataXd.shape == X.dataXd.shape


def test_reduce_sum(Xs):
model = reduce_sum()
lengths = model.ops.asarray([x.shape[0] for x in Xs], dtype="i")
Expand All @@ -107,6 +108,7 @@ def test_reduce_sum(Xs):
dX = backprop(Y)
assert dX.dataXd.shape == X.dataXd.shape


def test_size_mismatch(Xs):
for reduce in [reduce_first, reduce_last, reduce_max, reduce_mean, reduce_sum]:
model = reduce()
Expand Down
12 changes: 7 additions & 5 deletions thinc/tests/layers/test_with_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ def list_input(shapes):
for i, x in enumerate(data):
# Give values that make it easy to see where rows or columns mismatch.
x += i * 100
x += numpy.arange(x.shape[0]).reshape((-1, 1)) * 10
x += numpy.arange(x.shape[1]).reshape((1, -1))
x += numpy.arange(x.shape[0]).reshape((-1, 1)) * 10
x += numpy.arange(x.shape[1]).reshape((1, -1))
return data


Expand Down Expand Up @@ -68,8 +68,10 @@ def noop_models():
with_array(noop()),
with_array2d(noop()),
with_list(noop()),
with_ragged(noop())
with_ragged(noop()),
]


# As an example operation, lets just trim the last dimension. That
# should catch stuff that confuses the input and output.

Expand Down Expand Up @@ -180,14 +182,14 @@ def test_noop_transforms(noop_models, ragged_input, padded_input, list_input):
d_ragged = Ragged(ragged_input.data + 1, ragged_input.lengths)
d_padded = padded_input.copy()
d_padded.data += 1
d_list = [dx+1 for dx in list_input]
d_list = [dx + 1 for dx in list_input]
for model in noop_models:
print(model.name)
check_transform_doesnt_change_noop_values(model, padded_input, d_padded)
check_transform_doesnt_change_noop_values(model, list_input, d_list)
check_transform_doesnt_change_noop_values(model, ragged_input, d_ragged)


def test_with_array_initialize(ragged_input, padded_input, list_input, array_input):
for inputs in (ragged_input, padded_input, list_input, array_input):
check_initialize(get_array_model(), inputs)
Expand Down
1 change: 1 addition & 0 deletions thinc/tests/mypy/test_mypy.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def test_mypy_results(
):
pytest.importorskip("mypy")
from mypy import api as mypy_api

os.chdir(tmpdir)
root_dir = Path(__file__).parent
thinc_root_dir = Path(__file__).parent.parent.parent.parent
Expand Down
26 changes: 19 additions & 7 deletions thinc/tests/test_loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ def test_sequence_categorical_crossentropy(guesses, labels, names):
assert d_scores1[1][0] == pytest.approx(0.4, eps)
assert d_scores1[1][1] == pytest.approx(-0.4, eps)
# The normalization divides the difference (e.g. 0.4) by the number of seqs
d_scores = SequenceCategoricalCrossentropy(normalize=True, names=names).get_grad(guesses, labels)
d_scores = SequenceCategoricalCrossentropy(normalize=True, names=names).get_grad(
guesses, labels
)
d_scores1 = d_scores[0]
d_scores2 = d_scores[1]

Expand All @@ -189,7 +191,9 @@ def test_sequence_categorical_crossentropy(guesses, labels, names):
assert d_scores2[0][0] == pytest.approx(0.1, eps)
assert d_scores2[0][1] == pytest.approx(-0.35, eps)

loss = SequenceCategoricalCrossentropy(normalize=True, names=names).get_loss(guesses, labels)
loss = SequenceCategoricalCrossentropy(normalize=True, names=names).get_loss(
guesses, labels
)
assert loss == pytest.approx(1.09, eps)


Expand All @@ -200,9 +204,9 @@ def test_sequence_categorical_crossentropy(guesses, labels, names):
],
)
def test_sequence_categorical_missing_negative(guesses, labels, names):
d_scores = SequenceCategoricalCrossentropy(normalize=False, names=names, neg_prefix="!", missing_value="").get_grad(
guesses, labels
)
d_scores = SequenceCategoricalCrossentropy(
normalize=False, names=names, neg_prefix="!", missing_value=""
).get_grad(guesses, labels)
d_scores0 = d_scores[0]

# [0.1, 0.5, 0.6] should be A
Expand Down Expand Up @@ -292,8 +296,16 @@ def test_cosine_unmatched():
("SequenceCategoricalCrossentropy.v1", {}, ([scores0], [labels0])),
("CategoricalCrossentropy.v2", {"neg_prefix": "!"}, (scores0, labels0)),
("CategoricalCrossentropy.v3", {"neg_prefix": "!"}, (scores0, labels0)),
("SequenceCategoricalCrossentropy.v2", {"neg_prefix": "!"}, ([scores0], [labels0])),
("SequenceCategoricalCrossentropy.v3", {"neg_prefix": "!"}, ([scores0], [labels0])),
(
"SequenceCategoricalCrossentropy.v2",
{"neg_prefix": "!"},
([scores0], [labels0]),
),
(
"SequenceCategoricalCrossentropy.v3",
{"neg_prefix": "!"},
([scores0], [labels0]),
),
("L2Distance.v1", {}, (scores0, scores0)),
(
"CosineDistance.v1",
Expand Down
4 changes: 2 additions & 2 deletions thinc/tests/test_serialize.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ def test_simple_model_roundtrip_bytes():


def test_simple_model_roundtrip_bytes_length():
""" Ensure that serialization of non-initialized weight matrices goes fine """
"""Ensure that serialization of non-initialized weight matrices goes fine"""
model1 = Maxout(5, 10, nP=2)
model2 = Maxout(5, 10, nP=2)

Expand Down Expand Up @@ -186,7 +186,7 @@ def test_simple_model_can_from_dict():
assert model.can_from_dict(model_dict)
# Test check without initialize
assert Maxout(5, 10, nP=2).can_from_dict(model_dict)
# Test not-strict check
# Test not-strict check
assert not Maxout(10, 5, nP=2).can_from_dict(model_dict)
assert Maxout(5, nP=2).can_from_dict(model_dict)

Expand Down