huggingface · lhoestq · May 6, 2021 · May 6, 2021 · May 6, 2021 · May 6, 2021
diff --git a/metrics/matthews_correlation/matthews_correlation.py b/metrics/matthews_correlation/matthews_correlation.py
@@ -0,0 +1,86 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Matthews Correlation metric."""
+
+from sklearn.metrics import matthews_corrcoef
+
+import datasets
+
+
+_DESCRIPTION = """
+Compute the Matthews correlation coefficient (MCC)
+
+The Matthews correlation coefficient is used in machine learning as a
+measure of the quality of binary and multiclass classifications. It takes
+into account true and false positives and negatives and is generally
+regarded as a balanced measure which can be used even if the classes are of
+very different sizes. The MCC is in essence a correlation coefficient value
+between -1 and +1. A coefficient of +1 represents a perfect prediction, 0
+an average random prediction and -1 an inverse prediction.  The statistic
+is also known as the phi coefficient. [source: Wikipedia]
+"""
+
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions: Predicted labels, as returned by a model.
+    references: Ground truth labels.
+    sample_weight: Sample weights.
+Returns:
+    matthews_correlation: Matthews correlation.
+Examples:
+
+    >>> matthews_metric = datasets.load_metric("matthews_correlation")
+    >>> results = matthews_metric.compute(references=[0, 1], predictions=[0, 1])
+    >>> print(results)
+    {'matthews_correlation': 1.0}
+"""
+
+_CITATION = """\
+@article{scikit-learn,
+  title={Scikit-learn: Machine Learning in {P}ython},
+  author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V.
+         and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P.
+         and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and
+         Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.},
+  journal={Journal of Machine Learning Research},
+  volume={12},
+  pages={2825--2830},
+  year={2011}
+}
+"""
+
+
+@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class MatthewsCorelation(datasets.Metric):
+    def _info(self):
+        return datasets.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("int32"),
+                    "references": datasets.Value("int32"),
+                }
+            ),
+            reference_urls=[
+                "https://scikit-learn.org/stable/modules/generated/sklearn.metrics.matthews_corrcoef.html"
+            ],
+        )
+
+    def _compute(self, predictions, references, sample_weight=None):
+        return {
+            "matthews_correlation": matthews_corrcoef(references, predictions, sample_weight=sample_weight),
+        }
diff --git a/metrics/pearsonr/pearsonr.py b/metrics/pearsonr/pearsonr.py
@@ -0,0 +1,98 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Pearson correlation coefficient metric."""
+
+from scipy.stats import pearsonr
+
+import datasets
+
+
+_DESCRIPTION = """
+Pearson correlation coefficient and p-value for testing non-correlation.
+
+The Pearson correlation coefficient measures the linear relationship
+between two datasets.  The calculation of the p-value relies on the
+assumption that each dataset is normally distributed. Like other correlation
+coefficients, this one varies between -1 and +1 with 0 implying no
+correlation. Correlations of -1 or +1 imply an exact linear relationship.
+Positive correlations imply that as x increases, so does y. Negative
+correlations imply that as x increases, y decreases.
+
+The p-value roughly indicates the probability of an uncorrelated system
+producing datasets that have a Pearson correlation at least as extreme
+as the one computed from these datasets.
+"""
+
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions: Predicted labels, as returned by a model.
+    references: Ground truth labels.
+Returns:
+    pearsonr: Pearson correlation coefficient.
+Examples:
+
+    >>> pearsonr_metric = datasets.load_metric("pearsonr")
+    >>> results = pearsonr_metric.compute(references=[0, 1], predictions=[0, 1])
+    >>> print(results)
+    {'pearsonr': 1.0}
+"""
+
+_CITATION = r"""\
+@article{2020SciPy-NMeth,
+  author  = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
+            Haberland, Matt and Reddy, Tyler and Cournapeau, David and
+            Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
+            Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
+            Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
+            Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
+            Kern, Robert and Larson, Eric and Carey, C J and
+            Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
+            {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
+            Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
+            Harris, Charles R. and Archibald, Anne M. and
+            Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
+            {van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
+  title   = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
+            Computing in Python}},
+  journal = {Nature Methods},
+  year    = {2020},
+  volume  = {17},
+  pages   = {261--272},
+  adsurl  = {https://rdcu.be/b08Wh},
+  doi     = {10.1038/s41592-019-0686-2},
+}
+"""
+
+
+@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Pearsonr(datasets.Metric):
+    def _info(self):
+        return datasets.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("int32"),
+                    "references": datasets.Value("int32"),
+                }
+            ),
+            reference_urls=["https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.pearsonr.html"],
+        )
+
+    def _compute(self, predictions, references):
+        return {
+            "pearsonr": pearsonr(references, predictions)[0],
+        }
diff --git a/metrics/spearmanr/spearmanr.py b/metrics/spearmanr/spearmanr.py
@@ -0,0 +1,100 @@
+# coding=utf-8
+# Copyright 2020 The HuggingFace Datasets Authors and the current dataset script contributor.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Spearman correlation coefficient metric."""
+
+from scipy.stats import spearmanr
+
+import datasets
+
+
+_DESCRIPTION = """
+Calculate a Spearman correlation coefficient with associated p-value.
+
+The Spearman rank-order correlation coefficient is a nonparametric measure
+of the monotonicity of the relationship between two datasets. Unlike the
+Pearson correlation, the Spearman correlation does not assume that both
+datasets are normally distributed. Like other correlation coefficients,
+this one varies between -1 and +1 with 0 implying no correlation.
+Correlations of -1 or +1 imply an exact monotonic relationship. Positive
+correlations imply that as x increases, so does y. Negative correlations
+imply that as x increases, y decreases.
+
+The p-value roughly indicates the probability of an uncorrelated system
+producing datasets that have a Spearman correlation at least as extreme
+as the one computed from these datasets. The p-values are not entirely
+reliable but are probably reasonable for datasets larger than 500 or so.
+"""
+
+_KWARGS_DESCRIPTION = """
+Args:
+    predictions: Predicted labels, as returned by a model.
+    references: Ground truth labels.
+Returns:
+    spearmanr: Spearman correlation coefficient.
+Examples:
+
+    >>> spearmanr_metric = datasets.load_metric("spearmanr")
+    >>> results = spearmanr_metric.compute(references=[0, 1, 1], predictions=[0, 1, 1])
+    >>> print(results)
+    {'spearmanr': 1.0}
+"""
+
+_CITATION = r"""\
+@article{2020SciPy-NMeth,
+  author  = {Virtanen, Pauli and Gommers, Ralf and Oliphant, Travis E. and
+            Haberland, Matt and Reddy, Tyler and Cournapeau, David and
+            Burovski, Evgeni and Peterson, Pearu and Weckesser, Warren and
+            Bright, Jonathan and {van der Walt}, St{\'e}fan J. and
+            Brett, Matthew and Wilson, Joshua and Millman, K. Jarrod and
+            Mayorov, Nikolay and Nelson, Andrew R. J. and Jones, Eric and
+            Kern, Robert and Larson, Eric and Carey, C J and
+            Polat, {\.I}lhan and Feng, Yu and Moore, Eric W. and
+            {VanderPlas}, Jake and Laxalde, Denis and Perktold, Josef and
+            Cimrman, Robert and Henriksen, Ian and Quintero, E. A. and
+            Harris, Charles R. and Archibald, Anne M. and
+            Ribeiro, Ant{\^o}nio H. and Pedregosa, Fabian and
+            {van Mulbregt}, Paul and {SciPy 1.0 Contributors}},
+  title   = {{{SciPy} 1.0: Fundamental Algorithms for Scientific
+            Computing in Python}},
+  journal = {Nature Methods},
+  year    = {2020},
+  volume  = {17},
+  pages   = {261--272},
+  adsurl  = {https://rdcu.be/b08Wh},
+  doi     = {10.1038/s41592-019-0686-2},
+}
+"""
+
+
+@datasets.utils.file_utils.add_start_docstrings(_DESCRIPTION, _KWARGS_DESCRIPTION)
+class Spearmanr(datasets.Metric):
+    def _info(self):
+        return datasets.MetricInfo(
+            description=_DESCRIPTION,
+            citation=_CITATION,
+            inputs_description=_KWARGS_DESCRIPTION,
+            features=datasets.Features(
+                {
+                    "predictions": datasets.Value("int32"),
+                    "references": datasets.Value("int32"),
+                }
+            ),
+            reference_urls=["https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.spearmanr.html"],
+        )
+
+    def _compute(self, predictions, references):
+        return {
+            "spearmanr": spearmanr(references, predictions)[0],
+        }