Skip to content

Commit 68c8fd5

Browse files
committed
upload fast image processor
1 parent 88f9dcc commit 68c8fd5

File tree

1 file changed

+107
-0
lines changed

1 file changed

+107
-0
lines changed
Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
# coding=utf-8
2+
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
3+
#
4+
# Licensed under the Apache License, Version 2.0 (the "License");
5+
# you may not use this file except in compliance with the License.
6+
# You may obtain a copy of the License at
7+
#
8+
# http://www.apache.org/licenses/LICENSE-2.0
9+
#
10+
# Unless required by applicable law or agreed to in writing, software
11+
# distributed under the License is distributed on an "AS IS" BASIS,
12+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
# See the License for the specific language governing permissions and
14+
# limitations under the License.
15+
"""Fast Image processor class for DeepseekVL."""
16+
17+
from ...image_processing_utils_fast import BASE_IMAGE_PROCESSOR_FAST_DOCSTRING, BaseImageProcessorFast
18+
from ...image_transforms import get_size_with_aspect_ratio
19+
from ...image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, PILImageResampling, SizeDict, get_image_size
20+
from ...utils import (
21+
add_start_docstrings,
22+
is_torch_available,
23+
is_torchvision_available,
24+
is_torchvision_v2_available,
25+
)
26+
27+
28+
if is_torch_available():
29+
import torch
30+
31+
32+
if is_torchvision_available():
33+
if is_torchvision_v2_available():
34+
from torchvision.transforms.v2 import functional as F
35+
else:
36+
from torchvision.transforms import functional as F
37+
38+
39+
@add_start_docstrings(
40+
"Constructs a fast DeepseekVL image processor.",
41+
BASE_IMAGE_PROCESSOR_FAST_DOCSTRING,
42+
)
43+
class DeepseekVLImageProcessorFast(BaseImageProcessorFast):
44+
resample = PILImageResampling.BICUBIC
45+
image_mean = OPENAI_CLIP_MEAN
46+
image_std = OPENAI_CLIP_STD
47+
size = 1024
48+
do_resize = True
49+
do_rescale = True
50+
do_normalize = False
51+
background_color = [122, 116, 104]
52+
53+
def pad_to_square(
54+
self,
55+
image: "torch.Tensor",
56+
) -> "torch.Tensor":
57+
"""
58+
Pads an image to a square based on the longest edge.
59+
60+
Args:
61+
image (`torch.Tensor`):
62+
Image to pad.
63+
Returns:
64+
`torch.Tensor`: The padded image.
65+
"""
66+
height, width = image.shape[-2:]
67+
max_dim = max(height, width)
68+
paste_x_left = (max_dim - width) // 2
69+
paste_y_left = (max_dim - height) // 2
70+
paste_x_right = max_dim - width - paste_x_left
71+
paste_y_right = max_dim - height - paste_y_left
72+
return F.pad(
73+
image, padding=[paste_x_left, paste_y_left, paste_x_right, paste_y_right], fill=self.background_color
74+
)
75+
76+
def resize(
77+
self,
78+
image: "torch.Tensor",
79+
size: SizeDict,
80+
interpolation: "F.InterpolationMode" = None,
81+
antialias: bool = True,
82+
**kwargs,
83+
) -> "torch.Tensor":
84+
"""
85+
Resize an image to `(size["height"], size["width"])`.
86+
87+
Args:
88+
image (`torch.Tensor`):
89+
Image to resize.
90+
size (`SizeDict`):
91+
Dictionary in the format `{"height": int, "width": int}` specifying the size of the output image.
92+
resample (`InterpolationMode`, *optional*, defaults to `InterpolationMode.BILINEAR`):
93+
`InterpolationMode` filter to use when resizing the image e.g. `InterpolationMode.BICUBIC`.
94+
95+
Returns:
96+
`torch.Tensor`: The resized image.
97+
"""
98+
interpolation = interpolation if interpolation is not None else F.InterpolationMode.BILINEAR
99+
size = get_size_with_aspect_ratio(image.shape[-2:], size["height"], size["height"])
100+
101+
image = F.resize(image, size, interpolation=interpolation, antialias=antialias)
102+
image = self.pad_to_square(image)
103+
104+
return image
105+
106+
107+
__all__ = ["DeepseekVLImageProcessorFast"]

0 commit comments

Comments
 (0)