Skip to content

Commit ba853bc

Browse files
committed
Add tests
1 parent 433f4f5 commit ba853bc

File tree

1 file changed

+30
-32
lines changed

1 file changed

+30
-32
lines changed

tests/models/chinese_clip/test_processor_chinese_clip.py

Lines changed: 30 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@
3030
if is_vision_available():
3131
from PIL import Image
3232

33-
from transformers import ChineseCLIPFeatureExtractor, ChineseCLIPProcessor
33+
from transformers import ChineseCLIPImageProcessor, ChineseCLIPProcessor
3434

3535

3636
@require_vision
@@ -62,7 +62,7 @@ def setUp(self):
6262
with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer:
6363
vocab_writer.write("".join([x + "\n" for x in vocab_tokens]))
6464

65-
feature_extractor_map = {
65+
image_processor_map = {
6666
"do_resize": True,
6767
"size": {"height": 224, "width": 224},
6868
"do_center_crop": True,
@@ -72,18 +72,18 @@ def setUp(self):
7272
"image_std": [0.26862954, 0.26130258, 0.27577711],
7373
"do_convert_rgb": True,
7474
}
75-
self.feature_extractor_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME)
76-
with open(self.feature_extractor_file, "w", encoding="utf-8") as fp:
77-
json.dump(feature_extractor_map, fp)
75+
self.image_processor_file = os.path.join(self.tmpdirname, FEATURE_EXTRACTOR_NAME)
76+
with open(self.image_processor_file, "w", encoding="utf-8") as fp:
77+
json.dump(image_processor_map, fp)
7878

7979
def get_tokenizer(self, **kwargs):
8080
return BertTokenizer.from_pretrained(self.tmpdirname, **kwargs)
8181

8282
def get_rust_tokenizer(self, **kwargs):
8383
return BertTokenizerFast.from_pretrained(self.tmpdirname, **kwargs)
8484

85-
def get_feature_extractor(self, **kwargs):
86-
return ChineseCLIPFeatureExtractor.from_pretrained(self.tmpdirname, **kwargs)
85+
def get_image_processor(self, **kwargs):
86+
return ChineseCLIPImageProcessor.from_pretrained(self.tmpdirname, **kwargs)
8787

8888
def tearDown(self):
8989
shutil.rmtree(self.tmpdirname)
@@ -102,13 +102,13 @@ def prepare_image_inputs(self):
102102
def test_save_load_pretrained_default(self):
103103
tokenizer_slow = self.get_tokenizer()
104104
tokenizer_fast = self.get_rust_tokenizer()
105-
feature_extractor = self.get_feature_extractor()
105+
image_processor = self.get_image_processor()
106106

107-
processor_slow = ChineseCLIPProcessor(tokenizer=tokenizer_slow, feature_extractor=feature_extractor)
107+
processor_slow = ChineseCLIPProcessor(tokenizer=tokenizer_slow, image_processor=image_processor)
108108
processor_slow.save_pretrained(self.tmpdirname)
109109
processor_slow = ChineseCLIPProcessor.from_pretrained(self.tmpdirname, use_fast=False)
110110

111-
processor_fast = ChineseCLIPProcessor(tokenizer=tokenizer_fast, feature_extractor=feature_extractor)
111+
processor_fast = ChineseCLIPProcessor(tokenizer=tokenizer_fast, image_processor=image_processor)
112112
processor_fast.save_pretrained(self.tmpdirname)
113113
processor_fast = ChineseCLIPProcessor.from_pretrained(self.tmpdirname)
114114

@@ -118,19 +118,17 @@ def test_save_load_pretrained_default(self):
118118
self.assertIsInstance(processor_slow.tokenizer, BertTokenizer)
119119
self.assertIsInstance(processor_fast.tokenizer, BertTokenizerFast)
120120

121-
self.assertEqual(processor_slow.feature_extractor.to_json_string(), feature_extractor.to_json_string())
122-
self.assertEqual(processor_fast.feature_extractor.to_json_string(), feature_extractor.to_json_string())
123-
self.assertIsInstance(processor_slow.feature_extractor, ChineseCLIPFeatureExtractor)
124-
self.assertIsInstance(processor_fast.feature_extractor, ChineseCLIPFeatureExtractor)
121+
self.assertEqual(processor_slow.image_processor.to_json_string(), image_processor.to_json_string())
122+
self.assertEqual(processor_fast.image_processor.to_json_string(), image_processor.to_json_string())
123+
self.assertIsInstance(processor_slow.image_processor, ChineseCLIPImageProcessor)
124+
self.assertIsInstance(processor_fast.image_processor, ChineseCLIPImageProcessor)
125125

126126
def test_save_load_pretrained_additional_features(self):
127-
processor = ChineseCLIPProcessor(
128-
tokenizer=self.get_tokenizer(), feature_extractor=self.get_feature_extractor()
129-
)
127+
processor = ChineseCLIPProcessor(tokenizer=self.get_tokenizer(), image_processor=self.get_image_processor())
130128
processor.save_pretrained(self.tmpdirname)
131129

132130
tokenizer_add_kwargs = self.get_tokenizer(cls_token="(CLS)", sep_token="(SEP)")
133-
feature_extractor_add_kwargs = self.get_feature_extractor(do_normalize=False)
131+
image_processor_add_kwargs = self.get_image_processor(do_normalize=False)
134132

135133
processor = ChineseCLIPProcessor.from_pretrained(
136134
self.tmpdirname, cls_token="(CLS)", sep_token="(SEP)", do_normalize=False
@@ -139,28 +137,28 @@ def test_save_load_pretrained_additional_features(self):
139137
self.assertEqual(processor.tokenizer.get_vocab(), tokenizer_add_kwargs.get_vocab())
140138
self.assertIsInstance(processor.tokenizer, BertTokenizerFast)
141139

142-
self.assertEqual(processor.feature_extractor.to_json_string(), feature_extractor_add_kwargs.to_json_string())
143-
self.assertIsInstance(processor.feature_extractor, ChineseCLIPFeatureExtractor)
140+
self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string())
141+
self.assertIsInstance(processor.image_processor, ChineseCLIPImageProcessor)
144142

145-
def test_feature_extractor(self):
146-
feature_extractor = self.get_feature_extractor()
143+
def test_image_processor(self):
144+
image_processor = self.get_image_processor()
147145
tokenizer = self.get_tokenizer()
148146

149-
processor = ChineseCLIPProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
147+
processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
150148

151149
image_input = self.prepare_image_inputs()
152150

153-
input_feat_extract = feature_extractor(image_input, return_tensors="np")
151+
input_feat_extract = image_processor(image_input, return_tensors="np")
154152
input_processor = processor(images=image_input, return_tensors="np")
155153

156154
for key in input_feat_extract.keys():
157155
self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2)
158156

159157
def test_tokenizer(self):
160-
feature_extractor = self.get_feature_extractor()
158+
image_processor = self.get_image_processor()
161159
tokenizer = self.get_tokenizer()
162160

163-
processor = ChineseCLIPProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
161+
processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
164162

165163
input_str = "Alexandra,T-shirt的价格是15便士。"
166164

@@ -172,10 +170,10 @@ def test_tokenizer(self):
172170
self.assertListEqual(encoded_tok[key], encoded_processor[key])
173171

174172
def test_processor(self):
175-
feature_extractor = self.get_feature_extractor()
173+
image_processor = self.get_image_processor()
176174
tokenizer = self.get_tokenizer()
177175

178-
processor = ChineseCLIPProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
176+
processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
179177

180178
input_str = "Alexandra,T-shirt的价格是15便士。"
181179
image_input = self.prepare_image_inputs()
@@ -189,10 +187,10 @@ def test_processor(self):
189187
processor()
190188

191189
def test_tokenizer_decode(self):
192-
feature_extractor = self.get_feature_extractor()
190+
image_processor = self.get_image_processor()
193191
tokenizer = self.get_tokenizer()
194192

195-
processor = ChineseCLIPProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
193+
processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
196194

197195
predicted_ids = [[1, 4, 5, 8, 1, 0, 8], [3, 4, 3, 1, 1, 8, 9]]
198196

@@ -202,10 +200,10 @@ def test_tokenizer_decode(self):
202200
self.assertListEqual(decoded_tok, decoded_processor)
203201

204202
def test_model_input_names(self):
205-
feature_extractor = self.get_feature_extractor()
203+
image_processor = self.get_image_processor()
206204
tokenizer = self.get_tokenizer()
207205

208-
processor = ChineseCLIPProcessor(tokenizer=tokenizer, feature_extractor=feature_extractor)
206+
processor = ChineseCLIPProcessor(tokenizer=tokenizer, image_processor=image_processor)
209207

210208
input_str = "Alexandra,T-shirt的价格是15便士。"
211209
image_input = self.prepare_image_inputs()

0 commit comments

Comments
 (0)