From b6771efaa3ee59cbffbbfccc7c49581bbfef557e Mon Sep 17 00:00:00 2001 From: kadarakos Date: Fri, 24 Jun 2022 13:10:40 +0000 Subject: [PATCH 1/9] correcting label smoothing param contraint --- thinc/util.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/thinc/util.py b/thinc/util.py index e46c62447..01cb5a1bb 100644 --- a/thinc/util.py +++ b/thinc/util.py @@ -212,12 +212,6 @@ def to_categorical( *, label_smoothing: float = 0.0, ) -> FloatsXd: - if not 0.0 <= label_smoothing < 0.5: - raise ValueError( - "label_smoothing should be greater or " - "equal to 0.0 and less than 0.5, " - f"but {label_smoothing} was provided." - ) if n_classes is None: n_classes = int(numpy.max(Y) + 1) # type: ignore @@ -234,6 +228,13 @@ def to_categorical( ) nongold_prob = label_smoothing / (n_classes - 1) + if (1 - label_smoothing) < nongold_prob: + raise ValueError( + f"For {n_classes} number of classes " + "label_smoothing parameter has to be less than " + f"{1 - nongold_prob}, but found {label_smoothing}." + ) + xp = get_array_module(Y) label_distr = xp.full((n_classes, n_classes), nongold_prob, dtype="float32") xp.fill_diagonal(label_distr, 1 - label_smoothing) From 7a70585cba7eb03b8e1d4a1bb3840e4d50f69e79 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Fri, 24 Jun 2022 13:20:21 +0000 Subject: [PATCH 2/9] test new label smooth validation error --- thinc/tests/test_util.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/thinc/tests/test_util.py b/thinc/tests/test_util.py index 715d381d5..4964b54e4 100644 --- a/thinc/tests/test_util.py +++ b/thinc/tests/test_util.py @@ -115,6 +115,14 @@ def test_to_categorical(label_smoothing): ): to_categorical(numpy.asarray([0, 0, 0]), label_smoothing=0.01), + error_msg = "For 5 number of classes " + error_msg += "label_smoothing parameter has to be less than " + error_msg += "0.8, but found 0.8." + with pytest.raises( + ValueError, match=error_msg + ): + to_categorical(numpy.asarray([0, 1, 2, 3, 4]), label_smoothing=0.8) + def test_convert_recursive(): is_match = lambda obj: obj == "foo" From 6ccf9eed864669e68a6c27a4df91ed0bf5285d41 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Fri, 24 Jun 2022 13:25:24 +0000 Subject: [PATCH 3/9] less than 0 input validation --- thinc/util.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/thinc/util.py b/thinc/util.py index 01cb5a1bb..33aa3ba4c 100644 --- a/thinc/util.py +++ b/thinc/util.py @@ -216,6 +216,11 @@ def to_categorical( if n_classes is None: n_classes = int(numpy.max(Y) + 1) # type: ignore + if label_smoothing < 0.0: + raise ValueError( + "Label-smoothing parameter has to be greater or equal to 0" + ) + if label_smoothing == 0.0: if n_classes == 0: raise ValueError("n_classes should be at least 1") From 5229bf23a1e919745ffe623c86aa3589e9d15a41 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Fri, 24 Jun 2022 19:48:36 +0000 Subject: [PATCH 4/9] string concat --- thinc/tests/test_util.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/thinc/tests/test_util.py b/thinc/tests/test_util.py index 4964b54e4..d9f12cb6b 100644 --- a/thinc/tests/test_util.py +++ b/thinc/tests/test_util.py @@ -115,9 +115,9 @@ def test_to_categorical(label_smoothing): ): to_categorical(numpy.asarray([0, 0, 0]), label_smoothing=0.01), - error_msg = "For 5 number of classes " - error_msg += "label_smoothing parameter has to be less than " - error_msg += "0.8, but found 0.8." + error_msg = ("For 5 number of classes " + "label_smoothing parameter has to be less than " + "0.8, but found 0.8.") with pytest.raises( ValueError, match=error_msg ): From 975ea3df89ee5bd955e8a99de83820a20769c2b9 Mon Sep 17 00:00:00 2001 From: Sofie Van Landeghem Date: Wed, 29 Jun 2022 17:16:18 +0200 Subject: [PATCH 5/9] small update to error msg --- thinc/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thinc/util.py b/thinc/util.py index 33aa3ba4c..8f89fe8f2 100644 --- a/thinc/util.py +++ b/thinc/util.py @@ -218,7 +218,7 @@ def to_categorical( if label_smoothing < 0.0: raise ValueError( - "Label-smoothing parameter has to be greater or equal to 0" + "Label-smoothing parameter has to be greater than or equal to 0" ) if label_smoothing == 0.0: From 4fc632d3aefc7ca92be00e838ceec9419d280a54 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Thu, 30 Jun 2022 11:28:16 +0000 Subject: [PATCH 6/9] fix max smoothing coefficient --- thinc/util.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/thinc/util.py b/thinc/util.py index 33aa3ba4c..43dc8a650 100644 --- a/thinc/util.py +++ b/thinc/util.py @@ -233,11 +233,12 @@ def to_categorical( ) nongold_prob = label_smoothing / (n_classes - 1) - if (1 - label_smoothing) < nongold_prob: + max_smooth = (n_classes - 1) / n_classes + if n_classes > 1 and label_smoothing >= max_smooth: raise ValueError( f"For {n_classes} number of classes " "label_smoothing parameter has to be less than " - f"{1 - nongold_prob}, but found {label_smoothing}." + f"{max_smooth}, but found {label_smoothing}." ) xp = get_array_module(Y) From 4f4e7771c8f6c731e3de0018648972f09f9c5a8e Mon Sep 17 00:00:00 2001 From: kadarakos Date: Fri, 1 Jul 2022 08:18:19 +0000 Subject: [PATCH 7/9] double check error message --- thinc/tests/test_util.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/thinc/tests/test_util.py b/thinc/tests/test_util.py index d9f12cb6b..03ade36cc 100644 --- a/thinc/tests/test_util.py +++ b/thinc/tests/test_util.py @@ -123,6 +123,11 @@ def test_to_categorical(label_smoothing): ): to_categorical(numpy.asarray([0, 1, 2, 3, 4]), label_smoothing=0.8) + with pytest.raises( + ValueError, match=error_msg + ): + to_categorical(numpy.asarray([0, 1, 2, 3, 4]), label_smoothing=0.88) + def test_convert_recursive(): is_match = lambda obj: obj == "foo" From 66ade419e64f4a662176706bc9ae87df13714282 Mon Sep 17 00:00:00 2001 From: kadarakos Date: Wed, 6 Jul 2022 17:57:13 +0200 Subject: [PATCH 8/9] Update thinc/util.py Co-authored-by: Adriane Boyd --- thinc/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/thinc/util.py b/thinc/util.py index 3b755cd7a..42c7f07b8 100644 --- a/thinc/util.py +++ b/thinc/util.py @@ -236,7 +236,7 @@ def to_categorical( max_smooth = (n_classes - 1) / n_classes if n_classes > 1 and label_smoothing >= max_smooth: raise ValueError( - f"For {n_classes} number of classes " + f"For {n_classes} classes " "label_smoothing parameter has to be less than " f"{max_smooth}, but found {label_smoothing}." ) From 499ce4f2e9fafd6471e0235b7caaec4cfcfac03b Mon Sep 17 00:00:00 2001 From: kadarakos Date: Thu, 7 Jul 2022 08:08:54 +0000 Subject: [PATCH 9/9] test error message fix --- thinc/tests/test_util.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/thinc/tests/test_util.py b/thinc/tests/test_util.py index 03ade36cc..aa8ca3c81 100644 --- a/thinc/tests/test_util.py +++ b/thinc/tests/test_util.py @@ -115,16 +115,13 @@ def test_to_categorical(label_smoothing): ): to_categorical(numpy.asarray([0, 0, 0]), label_smoothing=0.01), - error_msg = ("For 5 number of classes " - "label_smoothing parameter has to be less than " - "0.8, but found 0.8.") with pytest.raises( - ValueError, match=error_msg + ValueError, match=r"label_smoothing parameter" ): to_categorical(numpy.asarray([0, 1, 2, 3, 4]), label_smoothing=0.8) with pytest.raises( - ValueError, match=error_msg + ValueError, match=r"label_smoothing parameter" ): to_categorical(numpy.asarray([0, 1, 2, 3, 4]), label_smoothing=0.88)