diff --git a/metrics/sari/sari.py b/metrics/sari/sari.py index b271be8455b..2f31205bdb7 100644 --- a/metrics/sari/sari.py +++ b/metrics/sari/sari.py @@ -18,6 +18,7 @@ import sacrebleu import sacremoses +from packaging import version import datasets @@ -239,7 +240,10 @@ def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_s sentence = sentence.lower() if tokenizer in ["13a", "intl"]: - normalized_sent = sacrebleu.TOKENIZERS[tokenizer]()(sentence) + if version.parse(sacrebleu.__version__).major >= 2: + normalized_sent = sacrebleu.metrics.bleu._get_tokenizer(tokenizer)()(sentence) + else: + normalized_sent = sacrebleu.TOKENIZERS[tokenizer]()(sentence) elif tokenizer == "moses": normalized_sent = sacremoses.MosesTokenizer().tokenize(sentence, return_str=True, escape=False) elif tokenizer == "penn": diff --git a/metrics/wiki_split/wiki_split.py b/metrics/wiki_split/wiki_split.py index 856587cd23c..60940dde230 100644 --- a/metrics/wiki_split/wiki_split.py +++ b/metrics/wiki_split/wiki_split.py @@ -20,6 +20,7 @@ import sacrebleu import sacremoses +from packaging import version import datasets @@ -266,7 +267,10 @@ def normalize(sentence, lowercase: bool = True, tokenizer: str = "13a", return_s sentence = sentence.lower() if tokenizer in ["13a", "intl"]: - normalized_sent = sacrebleu.TOKENIZERS[tokenizer]()(sentence) + if version.parse(sacrebleu.__version__).major >= 2: + normalized_sent = sacrebleu.metrics.bleu._get_tokenizer(tokenizer)()(sentence) + else: + normalized_sent = sacrebleu.TOKENIZERS[tokenizer]()(sentence) elif tokenizer == "moses": normalized_sent = sacremoses.MosesTokenizer().tokenize(sentence, return_str=True, escape=False) elif tokenizer == "penn":