From 2615cc2d868fc7d6d75f5a1b52c3425037648c17 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Mon, 3 Jan 2022 20:59:51 +0100 Subject: [PATCH 1/2] fix doc example - TypeError: get_text_features() got an unexpected keyword argument 'token_type_ids' --- .../modeling_vision_text_dual_encoder.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index f40cb2782f58..953069efc50e 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -232,6 +232,7 @@ def get_text_features( >>> tokenizer = AutoTokenizer.from_pretrained("clip-italian/clip-italian") >>> inputs = tokenizer(["una foto di un gatto", "una foto di un cane"], padding=True, return_tensors="pt") + >>> inputs.pop("token_type_ids") >>> text_features = model.get_text_features(**inputs) ```""" text_outputs = self.text_model( From eabd491efaccd7708058d10c4e59c4a61144a3c1 Mon Sep 17 00:00:00 2001 From: ydshieh Date: Fri, 7 Jan 2022 18:48:22 +0100 Subject: [PATCH 2/2] add token_type_ids param --- .../modeling_vision_text_dual_encoder.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py index 953069efc50e..58f8376260f7 100755 --- a/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py +++ b/src/transformers/models/vision_text_dual_encoder/modeling_vision_text_dual_encoder.py @@ -214,6 +214,7 @@ def get_text_features( input_ids=None, attention_mask=None, position_ids=None, + token_type_ids=None, output_attentions=None, output_hidden_states=None, return_dict=None, @@ -232,13 +233,13 @@ def get_text_features( >>> tokenizer = AutoTokenizer.from_pretrained("clip-italian/clip-italian") >>> inputs = tokenizer(["una foto di un gatto", "una foto di un cane"], padding=True, return_tensors="pt") - >>> inputs.pop("token_type_ids") >>> text_features = model.get_text_features(**inputs) ```""" text_outputs = self.text_model( input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids, + token_type_ids=token_type_ids, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict,