huggingface · skuros · Mar 30, 2026
diff --git a/optimum/intel/__init__.py b/optimum/intel/__init__.py
@@ -97,6 +97,7 @@
             "OVModelForCustomTasks",
             "OVModelForFeatureExtraction",
             "OVModelForImageClassification",
+            "OVModelForImageToImage",
             "OVModelForMaskedLM",
             "OVModelForPix2Struct",
             "OVModelForQuestionAnswering",
@@ -125,6 +126,7 @@
         "OVModelForCustomTasks",
         "OVModelForFeatureExtraction",
         "OVModelForImageClassification",
+        "OVModelForImageToImage",
         "OVModelForMaskedLM",
         "OVModelForPix2Struct",
         "OVModelForQuestionAnswering",

diff --git a/optimum/intel/openvino/__init__.py b/optimum/intel/openvino/__init__.py
@@ -72,6 +72,7 @@
     OVModelForCustomTasks,
     OVModelForFeatureExtraction,
     OVModelForImageClassification,
+    OVModelForImageToImage,
     OVModelForMaskedLM,
     OVModelForQuestionAnswering,
     OVModelForSequenceClassification,

diff --git a/optimum/intel/openvino/modeling.py b/optimum/intel/openvino/modeling.py
@@ -979,6 +979,44 @@ def forward(self, **kwargs):
         return ModelOutput(**model_outputs)
 
 
+IMAGE_TO_IMAGE_EXAMPLE = r"""
+    Example of image-to-image using `transformers.pipelines`:
+    ```python
+    >>> from PIL import Image
+    >>> import requests
+    >>> from transformers import {processor_class}, pipeline
+    >>> from optimum.intel import {model_class}
+    >>> processor = {processor_class}.from_pretrained("{checkpoint}")
+    >>> model = {model_class}.from_pretrained("{checkpoint}", export=True)
+    >>> pipe = pipeline("image-to-image", model=model, image_processor=processor)
+    >>> url = "https://huggingface.co/datasets/hf-internal-testing/dummy_image/resolve/main/colorful_cat.png"
+    >>> image = Image.open(requests.get(url, stream=True).raw)
+    >>> outputs = pipe(image)
+    ```
+"""
+
+
+@add_start_docstrings(
+    """
+    OpenVINO Model for image-to-image tasks.
+    """,
+    MODEL_START_DOCSTRING,
+)
+class OVModelForImageToImage(OVModelForCustomTasks):
+    export_feature = "image-to-image"
+
+    @add_start_docstrings_to_model_forward(
+        IMAGE_INPUTS_DOCSTRING
+        + IMAGE_TO_IMAGE_EXAMPLE.format(
+            processor_class=_FEATURE_EXTRACTOR_FOR_DOC,
+            model_class="OVModelForImageToImage",
+            checkpoint="caidas/swin2SR-classical-sr-x2-64",
+        )
+    )
+    def forward(self, pixel_values: Union[torch.Tensor, np.ndarray], **kwargs):
+        return super().forward(pixel_values=pixel_values, **kwargs)
+
+
 class OVModelForZeroShotImageClassification(OVModel):
     auto_model_class = AutoModelForZeroShotImageClassification
     export_feature = "zero-shot-image-classification"

diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py
@@ -129,6 +129,7 @@
     "token-classification": "OVModelForTokenClassification",
     "question-answering": "OVModelForQuestionAnswering",
     "image-classification": "OVModelForImageClassification",
+    "image-to-image": "OVModelForImageToImage",
     "image-text-to-text": "OVModelForVisualCausalLM",
     "zero-shot-image-classification": "OVModelForZeroShotImageClassification",
     "audio-classification": "OVModelForAudioClassification",

diff --git a/optimum/intel/pipelines/accelerator_utils.py b/optimum/intel/pipelines/accelerator_utils.py
@@ -70,6 +70,7 @@
         OVModelForCTC,
         OVModelForFeatureExtraction,
         OVModelForImageClassification,
+        OVModelForImageToImage,
         OVModelForMaskedLM,
         OVModelForQuestionAnswering,
         OVModelForSeq2SeqLM,
@@ -91,6 +92,7 @@
         "feature-extraction": (OVModelForFeatureExtraction,),
         "fill-mask": (OVModelForMaskedLM,),
         "image-classification": (OVModelForImageClassification,),
+        "image-to-image": (OVModelForImageToImage,),
         "image-text-to-text": (OVModelForVisualCausalLM,),
         "image-to-text": (OVModelForVision2Seq,),
         "question-answering": (OVModelForQuestionAnswering,),

diff --git a/optimum/intel/utils/dummy_openvino_objects.py b/optimum/intel/utils/dummy_openvino_objects.py
@@ -136,6 +136,17 @@ def from_pretrained(cls, *args, **kwargs):
         requires_backends(cls, ["openvino"])
 
 
+class OVModelForImageToImage(metaclass=DummyObject):
+    _backends = ["openvino"]
+
+    def __init__(self, *args, **kwargs):
+        requires_backends(self, ["openvino"])
+
+    @classmethod
+    def from_pretrained(cls, *args, **kwargs):
+        requires_backends(cls, ["openvino"])
+
+
 class OVModelForMaskedLM(metaclass=DummyObject):
     _backends = ["openvino"]
 

diff --git a/tests/openvino/test_export.py b/tests/openvino/test_export.py
@@ -39,6 +39,7 @@
     OVModelForCustomTasks,
     OVModelForFeatureExtraction,
     OVModelForImageClassification,
+    OVModelForImageToImage,
     OVModelForMaskedLM,
     OVModelForPix2Struct,
     OVModelForQuestionAnswering,
@@ -57,6 +58,7 @@
 )
 from optimum.intel.openvino.modeling_base import OVBaseModel
 from optimum.intel.openvino.modeling_visual_language import MODEL_TYPE_TO_CLS_MAPPING
+from optimum.intel.pipelines.accelerator_utils import get_openvino_model_class
 from optimum.intel.openvino.utils import TemporaryDirectory
 from optimum.intel.utils.import_utils import _transformers_version, is_transformers_version
 from optimum.utils import logging
@@ -393,3 +395,12 @@ def test_export_custom_model(self):
         ov_outputs = ov_model(**tokens)
         self.assertTrue(torch.allclose(ov_outputs.token_embeddings, model_outputs.token_embeddings, atol=1e-4))
         self.assertTrue(torch.allclose(ov_outputs.sentence_embedding, model_outputs.sentence_embedding, atol=1e-4))
+
+
+class ImageToImageSupportTest(unittest.TestCase):
+    def test_image_to_image_model_class_uses_custom_tasks_base(self):
+        self.assertTrue(issubclass(OVModelForImageToImage, OVModelForCustomTasks))
+        self.assertEqual(OVModelForImageToImage.export_feature, "image-to-image")
+
+    def test_pipeline_dispatch_maps_image_to_image_to_openvino_model(self):
+        self.assertIs(get_openvino_model_class("image-to-image"), OVModelForImageToImage)
diff --git a/tests/openvino/test_exporters_cli.py b/tests/openvino/test_exporters_cli.py
@@ -48,6 +48,7 @@
     OVModelForCausalLM,
     OVModelForFeatureExtraction,
     OVModelForImageClassification,
+    OVModelForImageToImage,
     OVModelForMaskedLM,
     OVModelForQuestionAnswering,
     OVModelForSeq2SeqLM,
@@ -167,6 +168,10 @@ class OVCLIExportTestCase(unittest.TestCase):
             ]
         )
 
+    def test_transformers_image_to_image_head_mapping(self):
+        self.assertEqual(_HEAD_TO_AUTOMODELS["image-to-image"], "OVModelForImageToImage")
+        self.assertEqual(OVModelForImageToImage.export_feature, "image-to-image")
+
     EXPECTED_NUMBER_OF_TOKENIZER_MODELS = {
         "gpt2": 2,
         "t5": 2,