From a04520dd2452599eadefd9f84db817b667c760a1 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 22 May 2020 23:11:38 +0200 Subject: [PATCH 1/4] Warn the user about max_len being on the path to be deprecated. --- src/transformers/tokenization_utils.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 5e8d7df00978..0bcdd3251801 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -821,6 +821,12 @@ def __init__(self, model_max_length=None, **kwargs): super().__init__(**kwargs) + if "max_len" in kwargs: + logger.warning( + "Parameter max_len is deprecated and will be removed in a future release. " + "Use model_max_length instead." + ) + # For backward compatibility we fallback to set model_max_length from max_len if provided model_max_length = model_max_length if model_max_length is not None else kwargs.pop("max_len", None) self.model_max_length = model_max_length if model_max_length is not None else VERY_LARGE_INTEGER From 71c46cc8028944a7de01606e423cf78af80b8c31 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 22 May 2020 23:47:34 +0200 Subject: [PATCH 2/4] Ensure better backward compatibility when max_len is provided to a tokenizer. --- src/transformers/tokenization_utils.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index 0bcdd3251801..ca704a2583cf 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -22,6 +22,7 @@ import operator import os import re +import warnings from collections import UserDict, defaultdict from contextlib import contextmanager from typing import Any, Dict, List, NamedTuple, Optional, Sequence, Tuple, Union @@ -821,14 +822,18 @@ def __init__(self, model_max_length=None, **kwargs): super().__init__(**kwargs) + # For backward compatibility we fallback to set model_max_length from max_len if provided if "max_len" in kwargs: - logger.warning( + warnings.warn( "Parameter max_len is deprecated and will be removed in a future release. " - "Use model_max_length instead." + "Use model_max_length instead.", + category=FutureWarning ) - # For backward compatibility we fallback to set model_max_length from max_len if provided - model_max_length = model_max_length if model_max_length is not None else kwargs.pop("max_len", None) + self.model_max_length = kwargs.pop("max_len") + else: + self.model_max_length = model_max_length + self.model_max_length = model_max_length if model_max_length is not None else VERY_LARGE_INTEGER # Padding side is right by default and overridden in subclasses. If specified in the kwargs, it is changed. From 28a9828644abb70f9314d33f7484670964b40a85 Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 22 May 2020 23:55:16 +0200 Subject: [PATCH 3/4] Make sure to override the parameter and not the actual instance value. --- src/transformers/tokenization_utils.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index ca704a2583cf..b339fbc54ea9 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -830,10 +830,7 @@ def __init__(self, model_max_length=None, **kwargs): category=FutureWarning ) - self.model_max_length = kwargs.pop("max_len") - else: - self.model_max_length = model_max_length - + model_max_length = kwargs.pop("max_len") self.model_max_length = model_max_length if model_max_length is not None else VERY_LARGE_INTEGER # Padding side is right by default and overridden in subclasses. If specified in the kwargs, it is changed. From e6abd157a6611c23d6a342670ddd6914ce394daa Mon Sep 17 00:00:00 2001 From: Morgan Funtowicz Date: Fri, 22 May 2020 23:59:44 +0200 Subject: [PATCH 4/4] Format & quality --- src/transformers/tokenization_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/tokenization_utils.py b/src/transformers/tokenization_utils.py index b339fbc54ea9..ef9079540a42 100644 --- a/src/transformers/tokenization_utils.py +++ b/src/transformers/tokenization_utils.py @@ -827,7 +827,7 @@ def __init__(self, model_max_length=None, **kwargs): warnings.warn( "Parameter max_len is deprecated and will be removed in a future release. " "Use model_max_length instead.", - category=FutureWarning + category=FutureWarning, ) model_max_length = kwargs.pop("max_len")