Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion docs/source/openvino/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ Here is the list of the supported architectures :
- Falcon-Mamba
- FlauBERT
- GLM-4
- GLM-4.7-Flash
- GLM-Edge
- GPT-2
- GPT-BigCode
Expand Down Expand Up @@ -168,6 +169,7 @@ Here is the list of the supported architectures :
- Zamba2

## [Diffusers](https://huggingface.co/docs/diffusers/index)

- Stable Diffusion
- Stable Diffusion XL
- Latent Consistency
Expand All @@ -178,11 +180,14 @@ Here is the list of the supported architectures :
- LTX

## [Timm](https://huggingface.co/docs/timm/index)

- PiT
- ViT

## [Sentence Transformers](https://github.com/UKPLab/sentence-transformers)

- All Transformer and CLIP-based models.

## [OpenCLIP](https://github.com/mlfoundations/open_clip)
- All CLIP-based models

- All CLIP-based models
18 changes: 18 additions & 0 deletions optimum/exporters/openvino/model_configs.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,7 @@
BlenderbotSmallModelPatcher,
BloomModelPatcher,
ChatGLMModelPatcher,
Glm4MoeLitePatcher,
CodeGenModelPatcher,
CommonImageEmbeddingsModelPatcher,
DBRXModelPatcher,
Expand Down Expand Up @@ -3976,6 +3977,23 @@ class GLM4OpenVINOConfig(LlamaOpenVINOConfig):
MIN_TRANSFORMERS_VERSION = "4.51.3"


@register_in_tasks_manager(
"glm4_moe_lite",
*[
"text-generation",
"text-generation-with-past",
],
library_name="transformers",
)
class Glm4MoeLiteOpenVINOConfig(TextDecoderWithPositionIdsOnnxConfig):
DEFAULT_ONNX_OPSET = 14
MIN_TRANSFORMERS_VERSION = "5.0.0"
DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, OVMiniCPM3DummyPastKeyValuesGenerator)
DUMMY_PKV_GENERATOR_CLASS = OVMiniCPM3DummyPastKeyValuesGenerator
NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
_MODEL_PATCHER = Glm4MoeLitePatcher


@register_in_tasks_manager(
"granite",
*[
Expand Down
12 changes: 12 additions & 0 deletions optimum/exporters/openvino/model_patcher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7744,6 +7744,18 @@ def __exit__(self, exc_type, exc_value, traceback):
del afmoe_moe.down_projs, afmoe_moe.gate_projs, afmoe_moe.up_projs


# Patch MoE implementation for glm4_moe_lite to enable correct Torch tracing.
# The default "eager" experts implementation in Glm4MoeLiteNaiveMoe.forward() uses
# torch.no_grad(), nonzero(), and a data-dependent loop over experts, producing
# inconsistent graphs for different inputs during torch.jit.trace.
# The transformers built-in "batched_mm" implementation uses index-based operations
# and batched matrix multiplications that are fully trace-safe.
class Glm4MoeLitePatcher(OVDecoderModelPatcher):
def __enter__(self):
super().__enter__()
self._model.set_experts_implementation("batched_mm")


# adopted from https://github.com/huggingface/transformers/blob/v4.57.6/src/transformers/models/llama/modeling_llama.py#L197
class LlamaEagle3Attention(LlamaAttention):
"""
Expand Down
4 changes: 4 additions & 0 deletions tests/openvino/test_decoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,9 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
if is_transformers_version(">=", "4.57.0"):
SUPPORTED_ARCHITECTURES += ("hunyuan_v1_dense",)

if is_transformers_version(">=", "5.0.0"):
SUPPORTED_ARCHITECTURES += ("glm4_moe_lite",)

if is_transformers_version("<", "4.56.0"):
SUPPORTED_ARCHITECTURES += ("qwen", "chatglm", "chatglm4")

Expand Down Expand Up @@ -262,6 +265,7 @@ class OVModelForCausalLMIntegrationTest(unittest.TestCase):
"zamba2": 1,
"bitnet": 6,
"hunyuan_v1_dense": 2,
"glm4_moe_lite": 2,
"qwen3_next": 1,
}
TASK = "text-generation"
Expand Down
3 changes: 3 additions & 0 deletions tests/openvino/test_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,9 @@ class ExportModelTest(unittest.TestCase):
if is_transformers_version(">=", "4.57.0"):
SUPPORTED_ARCHITECTURES.update({"hunyuan_v1_dense": OVModelForCausalLM})

if is_transformers_version(">=", "5.0.0"):
SUPPORTED_ARCHITECTURES.update({"glm4_moe_lite": OVModelForCausalLM})

if is_transformers_version(">=", "4.57.0") and is_transformers_version("<", "5"):
SUPPORTED_ARCHITECTURES.update({"qwen3_next": OVModelForCausalLM})

Expand Down
7 changes: 7 additions & 0 deletions tests/openvino/test_exporters_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,13 @@ class OVCLIExportTestCase(unittest.TestCase):
]
)

if is_transformers_version(">=", "5.0.0"):
SUPPORTED_ARCHITECTURES.extend(
[
("text-generation-with-past", "glm4_moe_lite"),
]
)

if is_transformers_version(">=", "4.57.0") and is_transformers_version("<", "5"):
SUPPORTED_ARCHITECTURES.extend(
[
Expand Down
3 changes: 3 additions & 0 deletions tests/openvino/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -1081,6 +1081,9 @@ class OVWeightCompressionTest(unittest.TestCase):
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.append((OVModelForVisualCausalLM, "qwen3_vl", False))
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.append((OVModelForCausalLM, "hunyuan_v1_dense", False))

if is_transformers_version(">=", "5.0.0"):
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.append((OVModelForCausalLM, "glm4_moe_lite", False))

if is_transformers_version("<", "5"):
SUPPORTED_ARCHITECTURES_WITH_AUTO_COMPRESSION.extend(
[
Expand Down
2 changes: 2 additions & 0 deletions tests/openvino/utils_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@
"xglm": "optimum-intel-internal-testing/tiny-random-XGLMForCausalLM",
"xverse": "optimum-intel-internal-testing/tiny-random-xverse",
"glm4": "optimum-intel-internal-testing/tiny-random-glm4",
"glm4_moe_lite": "zai-org/GLM-4.7-Flash",
"glm": "optimum-intel-internal-testing/tiny-random-glm-edge",
"open-clip": "optimum-intel-internal-testing/tiny-open-clip-model",
"open-clip-ov": "optimum-intel-internal-testing/tiny-open-clip-model",
Expand Down Expand Up @@ -372,6 +373,7 @@
"hunyuan_v1_dense": {"model": 32},
"qwen3_eagle3": {"model": 20},
"qwen3_next": {"model": 100},
"glm4_moe_lite": {"model": 16},
}

TEST_IMAGE_URL = "http://images.cocodataset.org/val2017/000000039769.jpg"
Expand Down
Loading