@@ -725,6 +725,130 @@ class Tokenizer:
725725 """
726726 pass
727727
728+ def async_decode_batch (self , sequences , skip_special_tokens = True ):
729+ """
730+ Decode a batch of ids back to their corresponding string
731+
732+ Args:
733+ sequences (:obj:`List` of :obj:`List[int]`):
734+ The batch of sequences we want to decode
735+
736+ skip_special_tokens (:obj:`bool`, defaults to :obj:`True`):
737+ Whether the special tokens should be removed from the decoded strings
738+
739+ Returns:
740+ :obj:`List[str]`: A list of decoded strings
741+ """
742+ pass
743+
744+ def async_encode (self , sequence , pair = None , is_pretokenized = False , add_special_tokens = True ):
745+ """
746+ Asynchronously encode the given input with character offsets.
747+
748+ This is an async version of encode that can be awaited in async Python code.
749+
750+ Example:
751+ Here are some examples of the inputs that are accepted::
752+
753+ await async_encode("A single sequence")
754+
755+ Args:
756+ sequence (:obj:`~tokenizers.InputSequence`):
757+ The main input sequence we want to encode. This sequence can be either raw
758+ text or pre-tokenized, according to the ``is_pretokenized`` argument:
759+
760+ - If ``is_pretokenized=False``: :class:`~tokenizers.TextInputSequence`
761+ - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedInputSequence`
762+
763+ pair (:obj:`~tokenizers.InputSequence`, `optional`):
764+ An optional input sequence. The expected format is the same that for ``sequence``.
765+
766+ is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
767+ Whether the input is already pre-tokenized
768+
769+ add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
770+ Whether to add the special tokens
771+
772+ Returns:
773+ :class:`~tokenizers.Encoding`: The encoded result
774+
775+ """
776+ pass
777+
778+ def async_encode_batch (self , input , is_pretokenized = False , add_special_tokens = True ):
779+ """
780+ Asynchronously encode the given batch of inputs with character offsets.
781+
782+ This is an async version of encode_batch that can be awaited in async Python code.
783+
784+ Example:
785+ Here are some examples of the inputs that are accepted::
786+
787+ await async_encode_batch([
788+ "A single sequence",
789+ ("A tuple with a sequence", "And its pair"),
790+ [ "A", "pre", "tokenized", "sequence" ],
791+ ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
792+ ])
793+
794+ Args:
795+ input (A :obj:`List`/:obj:`Tuple` of :obj:`~tokenizers.EncodeInput`):
796+ A list of single sequences or pair sequences to encode. Each sequence
797+ can be either raw text or pre-tokenized, according to the ``is_pretokenized``
798+ argument:
799+
800+ - If ``is_pretokenized=False``: :class:`~tokenizers.TextEncodeInput`
801+ - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedEncodeInput`
802+
803+ is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
804+ Whether the input is already pre-tokenized
805+
806+ add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
807+ Whether to add the special tokens
808+
809+ Returns:
810+ A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch
811+
812+ """
813+ pass
814+
815+ def async_encode_batch_fast (self , input , is_pretokenized = False , add_special_tokens = True ):
816+ """
817+ Asynchronously encode the given batch of inputs without tracking character offsets.
818+
819+ This is an async version of encode_batch_fast that can be awaited in async Python code.
820+
821+ Example:
822+ Here are some examples of the inputs that are accepted::
823+
824+ await async_encode_batch_fast([
825+ "A single sequence",
826+ ("A tuple with a sequence", "And its pair"),
827+ [ "A", "pre", "tokenized", "sequence" ],
828+ ([ "A", "pre", "tokenized", "sequence" ], "And its pair")
829+ ])
830+
831+ Args:
832+ input (A :obj:`List`/:obj:`Tuple` of :obj:`~tokenizers.EncodeInput`):
833+ A list of single sequences or pair sequences to encode. Each sequence
834+ can be either raw text or pre-tokenized, according to the ``is_pretokenized``
835+ argument:
836+
837+ - If ``is_pretokenized=False``: :class:`~tokenizers.TextEncodeInput`
838+ - If ``is_pretokenized=True``: :class:`~tokenizers.PreTokenizedEncodeInput`
839+
840+ is_pretokenized (:obj:`bool`, defaults to :obj:`False`):
841+ Whether the input is already pre-tokenized
842+
843+ add_special_tokens (:obj:`bool`, defaults to :obj:`True`):
844+ Whether to add the special tokens
845+
846+ Returns:
847+ A :obj:`List` of :class:`~tokenizers.Encoding`: The encoded batch
848+
849+ """
850+ pass
851+
728852 def decode (self , ids , skip_special_tokens = True ):
729853 """
730854 Decode the given list of ids back to a string
0 commit comments