diff --git a/optimum/exporters/openvino/model_configs.py b/optimum/exporters/openvino/model_configs.py
index 7ffe158396..270db0b350 100644
--- a/optimum/exporters/openvino/model_configs.py
+++ b/optimum/exporters/openvino/model_configs.py
@@ -194,6 +194,7 @@
     Qwen2MoEPatcher,
     Qwen2VLLanguageModelPatcher,
     Qwen2VLVisionEmbMergerPatcher,
+    Qwen3_5Patcher,
     Qwen3MoeModelPatcher,
     Qwen3VLLanguageModelPatcher,
     Qwen3VLVisionEmbMergerPatcher,
@@ -4961,6 +4962,195 @@ def inputs(self) -> Dict[str, Dict[int, str]]:
         return common_inputs
 
 
+class Qwen3_5DummyPastKeyValuesGenerator(DummyPastKeyValuesGenerator):
+    """
+    Generates dummy cache_params inputs for Qwen3.5 hybrid GatedDeltaNet + Attention architectures.
+    Linear attention layers produce conv_states and recurrent_states (fixed size).
+    Full attention layers produce standard KV cache (variable size).
+    """
+
+    SUPPORTED_INPUT_NAMES = ("cache_params",)
+
+    def __init__(
+        self,
+        task: str,
+        normalized_config,
+        batch_size: int = DEFAULT_DUMMY_SHAPES["batch_size"],
+        sequence_length: int = DEFAULT_DUMMY_SHAPES["sequence_length"],
+        **kwargs,
+    ):
+        super().__init__(
+            task=task,
+            normalized_config=normalized_config,
+            batch_size=batch_size,
+            sequence_length=sequence_length,
+            **kwargs,
+        )
+
+        config = normalized_config.config
+        self._model_config = config
+        # Derive attention layer indices from layer_types list
+        layer_types = config.layer_types
+        self.attention_layers_indices = set(
+            i for i, lt in enumerate(layer_types) if lt == "full_attention"
+        )
+        self.num_hidden_layers = config.num_hidden_layers
+        self.num_linear_layers = self.num_hidden_layers - len(self.attention_layers_indices)
+        self.num_attention_layers = len(self.attention_layers_indices)
+
+        # Linear attention (GatedDeltaNet) state dimensions
+        self.linear_num_key_heads = config.linear_num_key_heads
+        self.linear_key_head_dim = config.linear_key_head_dim
+        self.linear_value_head_dim = config.linear_value_head_dim
+        self.linear_num_value_heads = config.linear_num_value_heads
+        self.linear_conv_kernel_dim = config.linear_conv_kernel_dim
+        # conv_dim = key_dim * 2 + value_dim
+        self.conv_dim = (
+            self.linear_num_key_heads * self.linear_key_head_dim * 2
+            + self.linear_num_value_heads * self.linear_value_head_dim
+        )
+
+        # Full attention KV cache dimensions
+        self.num_key_value_heads = config.num_key_value_heads
+        self.head_dim = getattr(config, "head_dim", config.hidden_size // config.num_attention_heads)
+        self.sequence_length = 0
+
+    def generate(self, input_name: str, framework: str = "pt", int_dtype: str = "int64", float_dtype: str = "fp32"):
+        past_key_values = []
+
+        # Linear attention layers: conv_states + recurrent_states
+        for i in range(self.num_linear_layers):
+            conv_state_shape = (self.batch_size, self.conv_dim, self.linear_conv_kernel_dim)
+            conv_state = self.random_float_tensor(conv_state_shape, framework=framework, dtype=float_dtype)
+            past_key_values.append(conv_state)
+
+            recurrent_state_shape = (
+                self.batch_size,
+                self.linear_num_key_heads,
+                self.linear_key_head_dim,
+                self.linear_value_head_dim,
+            )
+            recurrent_state = self.random_float_tensor(recurrent_state_shape, framework=framework, dtype=float_dtype)
+            past_key_values.append(recurrent_state)
+
+        # Full attention layers: key + value cache
+        for i in range(self.num_attention_layers):
+            kv_shape = (self.batch_size, self.num_key_value_heads, self.sequence_length, self.head_dim)
+            k = self.random_float_tensor(kv_shape, framework=framework, dtype=float_dtype)
+            v = self.random_float_tensor(kv_shape, framework=framework, dtype=float_dtype)
+            past_key_values.append(k)
+            past_key_values.append(v)
+
+        return past_key_values
+
+
+@register_in_tasks_manager(
+    "qwen3_5",
+    *[
+        "text-generation",
+        "text-generation-with-past",
+    ],
+    library_name="transformers",
+)
+@register_in_tasks_manager(
+    "qwen3_5_text",
+    *[
+        "text-generation",
+        "text-generation-with-past",
+    ],
+    library_name="transformers",
+)
+class Qwen3_5OpenVINOConfig(MambaOpenVINOConfig):
+    PAD_ATTENTION_MASK_TO_PAST = False
+    DUMMY_INPUT_GENERATOR_CLASSES = (DummyTextInputGenerator, Qwen3_5DummyPastKeyValuesGenerator)
+    DUMMY_PKV_GENERATOR_CLASS = Qwen3_5DummyPastKeyValuesGenerator
+    NORMALIZED_CONFIG_CLASS = NormalizedTextConfig
+    # No attention_mask input: patched_forward passes None to avoid hardcoded reshapes.
+    NO_ATTENTION_MASK = True
+    MIN_TRANSFORMERS_VERSION = "5.3.0"
+    _MODEL_PATCHER = Qwen3_5Patcher
+
+    def patch_model_for_export(self, model: PreTrainedModel, model_kwargs: Optional[Dict[str, Any]] = None):
+        model_kwargs = model_kwargs or {}
+        return Qwen3_5Patcher(self, model, model_kwargs=model_kwargs)
+
+    def add_past_key_values(self, inputs_or_outputs: Dict[str, Dict[int, str]], direction: str):
+        if direction not in ["inputs", "outputs"]:
+            raise ValueError(f'direction must either be "inputs" or "outputs", but {direction} was given')
+
+        if direction == "inputs":
+            decoder_sequence_name = "past_sequence_length"
+            cache_name_prefix = "cache_params.past"
+        else:
+            decoder_sequence_name = "past_sequence_length + sequence_length"
+            cache_name_prefix = "cache_params.present"
+
+        config = self._normalized_config.config
+        layer_types = config.layer_types
+        num_hidden_layers = config.num_hidden_layers
+
+        # Grouped order: all linear attention layers first, then all full attention layers.
+        # This must match the order in Qwen3_5DummyPastKeyValuesGenerator.generate()
+        # and Qwen3_5Patcher.patched_forward() cache unpacking/repacking.
+        linear_layer_idx = 0
+        for i in range(num_hidden_layers):
+            if layer_types[i] == "linear_attention":
+                inputs_or_outputs[f"{cache_name_prefix}.conv.{linear_layer_idx}"] = {0: "batch_size"}
+                inputs_or_outputs[f"{cache_name_prefix}.recurrent.{linear_layer_idx}"] = {0: "batch_size"}
+                linear_layer_idx += 1
+
+        attention_layer_idx = 0
+        for i in range(num_hidden_layers):
+            if layer_types[i] == "full_attention":
+                inputs_or_outputs[f"{cache_name_prefix}.key.{attention_layer_idx}"] = {
+                    0: "batch_size",
+                    2: decoder_sequence_name,
+                }
+                inputs_or_outputs[f"{cache_name_prefix}.value.{attention_layer_idx}"] = {
+                    0: "batch_size",
+                    2: decoder_sequence_name,
+                }
+                attention_layer_idx += 1
+
+    def overwrite_shape_and_generate_input(self, dummy_input_gen, input_name, framework, input_shapes):
+        # Qwen3.5's GatedDeltaNet has separate prefill (seq_len > 1) and decode (seq_len == 1) paths.
+        # The stateful model must trace the decode path so that conv/recurrent cache inputs are consumed.
+        # Force seq_len=1 for input_ids when past states are present.
+        if self.use_past and self.use_past_in_inputs and input_name in ("input_ids", "position_ids"):
+            saved = dummy_input_gen.sequence_length
+            dummy_input_gen.sequence_length = 1
+            result = dummy_input_gen.generate(
+                input_name, framework=framework, int_dtype=self.int_dtype, float_dtype=self.float_dtype
+            )
+            dummy_input_gen.sequence_length = saved
+            return result
+        if self.use_past and self.use_past_in_inputs and input_name == "attention_mask":
+            # attention_mask must be LONGER than input_ids (length > 1) during tracing so that
+            # torch.jit.trace captures the padding_mask slicing branch in sdpa_mask():
+            #   if padding_mask.shape[-1] > kv_length: padding_mask = padding_mask[:, -kv_length:]
+            # This makes the graph correctly adapt to growing attention_mask at runtime.
+            # apply_mask_to_padding_states is patched to no-op by Qwen3_5Patcher to avoid
+            # the broadcast issue with hidden_states * attention_mask[:, :, None].
+            import torch
+            return torch.ones(dummy_input_gen.batch_size, 2, dtype=torch.int64)
+        return super().overwrite_shape_and_generate_input(dummy_input_gen, input_name, framework, input_shapes)
+
+    @property
+    def inputs(self) -> Dict[str, Dict[int, str]]:
+        # attention_mask is NOT included: patched_forward passes attention_mask=None
+        # to avoid hardcoded reshapes from torch.jit.trace's causal mask computation.
+        # The model creates a pure causal mask depending only on KV cache shape (dynamic).
+        # position_ids IS included: needed for correct RoPE in full_attention layers.
+        # Without it, cache_position (baked to [0]) would give every token position 0.
+        common_inputs = {
+            "input_ids": {0: "batch_size", 1: "sequence_length"},
+            "position_ids": {0: "batch_size", 1: "sequence_length"},
+        }
+        if self.use_past_in_inputs:
+            self.add_past_key_values(common_inputs, direction="inputs")
+        return common_inputs
+
+
 @register_in_tasks_manager("audio-spectrogram-transformer", *["feature-extraction", "audio-classification"])
 class ASTOpenVINOConfig(ASTOnnxConfig):
     pass
diff --git a/optimum/exporters/openvino/model_patcher.py b/optimum/exporters/openvino/model_patcher.py
index 72605106a1..c4d840be60 100644
--- a/optimum/exporters/openvino/model_patcher.py
+++ b/optimum/exporters/openvino/model_patcher.py
@@ -6919,10 +6919,211 @@ def segment_sum(input_tensor):
 
 
 # This patcher class serves the following purposes:
-# 1. Packs the KV-cache, conv_state, and ssm_state tensors into a Zamba2HybridDynamicCache structure
+# 1. Packs the conv_state, recurrent_state, and KV-cache tensors into a Qwen3_5DynamicCache structure
 #    for subsequent invocation of the model's `forward` method.
-# 2. Patches the Zamba2MambaMixer so that the traced `forward` function works correctly
-#    during both the prefill and decoding steps.
+# 2. Ensures GatedDeltaNet layers use torch fallback paths (not CUDA-only fast kernels)
+#    during export tracing.
+#
+# Qwen3.5 is a hybrid model:
+#   - Linear attention layers (GatedDeltaNet) with conv_states and recurrent_states (fixed-size)
+#   - Full attention layers with standard KV cache (variable-size)
+#
+# The flat cache_params list is ordered as (matching the dummy generator):
+#   [conv_0, rec_0, conv_1, rec_1, ..., conv_{L-1}, rec_{L-1},
+#    key_0, val_0, key_1, val_1, ..., key_{A-1}, val_{A-1}]
+# where L = number of linear attention layers, A = number of full attention layers.
+class Qwen3_5Patcher(ModelPatcher):
+    def __init__(
+        self,
+        config: "OnnxConfig",
+        model: "PreTrainedModel",
+        model_kwargs: Optional[Dict[str, Any]] = None,
+    ):
+        from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5DynamicCache
+
+        super().__init__(config, model, model_kwargs)
+        orig_model = model
+
+        model_config = self.real_config._config
+        layer_types = model_config.layer_types
+        num_hidden_layers = model_config.num_hidden_layers
+
+        # Compute absolute indices for each layer type
+        linear_layer_indices = [i for i in range(num_hidden_layers) if layer_types[i] == "linear_attention"]
+        attention_layer_indices = [i for i in range(num_hidden_layers) if layer_types[i] == "full_attention"]
+        num_linear = len(linear_layer_indices)
+        num_attention = len(attention_layer_indices)
+
+        # Cache wrapper that reconstructs Qwen3_5DynamicCache from compact per-type lists.
+        # The model accesses cache fields by absolute layer index (0..num_hidden_layers-1),
+        # so we expand compact lists into full-length lists with None at unused positions.
+        class Qwen3_5HybridCacheWrap(Qwen3_5DynamicCache):
+            def __init__(self, cfg, conv_states_compact, recurrent_states_compact, key_cache_compact, value_cache_compact):
+                # Initialize full-length lists of None
+                super().__init__(cfg)
+                # Place conv/recurrent tensors at linear layer positions
+                for compact_idx, abs_idx in enumerate(linear_layer_indices):
+                    self.conv_states[abs_idx] = conv_states_compact[compact_idx]
+                    self.recurrent_states[abs_idx] = recurrent_states_compact[compact_idx]
+                # Place key/value tensors at attention layer positions
+                for compact_idx, abs_idx in enumerate(attention_layer_indices):
+                    self.key_cache[abs_idx] = key_cache_compact[compact_idx]
+                    self.value_cache[abs_idx] = value_cache_compact[compact_idx]
+
+            def update(
+                self,
+                key_states: torch.Tensor,
+                value_states: torch.Tensor,
+                layer_idx: int,
+                cache_kwargs: Optional[dict[str, Any]] = None,
+            ) -> tuple[torch.Tensor, torch.Tensor]:
+                # key_cache at attention positions is an actual tensor (possibly empty with
+                # seq_len=0 from the dummy generator), never None. Concatenation with an
+                # empty tensor (dim=2 size 0) produces the correct result.
+                self.key_cache[layer_idx] = torch.cat([self.key_cache[layer_idx], key_states], dim=2)
+                self.value_cache[layer_idx] = torch.cat([self.value_cache[layer_idx], value_states], dim=2)
+                return self.key_cache[layer_idx], self.value_cache[layer_idx]
+
+            def get_seq_length(self, layer_idx: Optional[int] = 0) -> int:
+                layer_idx = self.transformer_layers[0] if layer_idx not in self.transformer_layers else layer_idx
+                if self.key_cache[layer_idx] is None:
+                    return 0
+                return self.key_cache[layer_idx].shape[-2]
+
+        # Patched forward that converts between flat tensor list and cache object.
+        def patched_forward(
+            input_ids,
+            position_ids=None,
+            cache_params=None,
+        ):
+            use_cache = False
+            wrapped_cache_params = None
+            if cache_params is not None:
+                use_cache = True
+
+                # Unpack flat list (grouped order: all linear pairs, then all attention pairs)
+                conv_states = []
+                recurrent_states = []
+                for idx in range(num_linear):
+                    conv_states.append(cache_params[2 * idx])
+                    recurrent_states.append(cache_params[2 * idx + 1])
+
+                key_cache = []
+                value_cache = []
+                offset = 2 * num_linear
+                for idx in range(num_attention):
+                    key_cache.append(cache_params[offset + 2 * idx])
+                    value_cache.append(cache_params[offset + 2 * idx + 1])
+
+                # Remember input dtype for output matching (stateful transform needs same dtypes)
+                input_dtype = conv_states[0].dtype if conv_states else key_cache[0].dtype
+
+                # Cast cache tensors to model dtype for computation
+                model_dtype = next(orig_model.parameters()).dtype
+                if model_dtype != input_dtype:
+                    conv_states = [s.to(model_dtype) for s in conv_states]
+                    recurrent_states = [s.to(model_dtype) for s in recurrent_states]
+                    key_cache = [s.to(model_dtype) for s in key_cache]
+                    value_cache = [s.to(model_dtype) for s in value_cache]
+
+                wrapped_cache_params = Qwen3_5HybridCacheWrap(
+                    model_config, conv_states, recurrent_states, key_cache, value_cache
+                )
+
+            # Pass attention_mask=None to avoid tracing the mask-dependent indexing
+            # in create_causal_mask/sdpa_mask, which produces hardcoded reshapes
+            # from torch.jit.trace. Without attention_mask, the model creates a
+            # pure causal mask that depends only on KV cache shape (dynamic).
+            # Pass position_ids explicitly so the model uses correct RoPE positions
+            # instead of computing from cache state (which would be baked by trace).
+            causal_lm_output = self.model_orig_forward(
+                input_ids=input_ids,
+                attention_mask=None,
+                position_ids=position_ids,
+                past_key_values=wrapped_cache_params,
+                use_cache=use_cache,
+            )
+            outputs = {
+                "logits": causal_lm_output.logits,
+            }
+
+            if use_cache:
+                past_key_values = causal_lm_output.past_key_values
+                # Repack into flat list (same grouped order), casting back to input dtype
+                present_key_values = []
+                for abs_idx in linear_layer_indices:
+                    present_key_values.append(past_key_values.conv_states[abs_idx].to(input_dtype))
+                    present_key_values.append(past_key_values.recurrent_states[abs_idx].to(input_dtype))
+
+                for abs_idx in attention_layer_indices:
+                    present_key_values.append(past_key_values.key_cache[abs_idx].to(input_dtype))
+                    present_key_values.append(past_key_values.value_cache[abs_idx].to(input_dtype))
+
+                outputs["present_key_values"] = present_key_values
+
+            return outputs
+
+        self.patched_forward = patched_forward
+        self.model_orig_forward = self.orig_forward
+        self.orig_forward = patched_forward
+
+    def __enter__(self):
+        import transformers.models.qwen3_5.modeling_qwen3_5 as _qwen3_5_module
+        from transformers.models.qwen3_5.modeling_qwen3_5 import (
+            Qwen3_5GatedDeltaNet,
+            torch_causal_conv1d_update,
+            torch_chunk_gated_delta_rule,
+            torch_recurrent_gated_delta_rule,
+        )
+
+        super().__enter__()
+        setattr(self._model, self.orig_forward_name, self.patched_forward)
+
+        # Patch apply_mask_to_padding_states to no-op during export.
+        # This function broadcasts hidden_states * attention_mask[:,:,None], which would
+        # expand hidden_states from [B,1,D] to [B,mask_len,D] if attention_mask is longer
+        # than input_ids. It's only needed for padding during training, not inference.
+        self._orig_apply_mask = _qwen3_5_module.apply_mask_to_padding_states
+        _qwen3_5_module.apply_mask_to_padding_states = lambda hidden_states, attention_mask: hidden_states
+
+        # Patch each GatedDeltaNet layer to use torch fallback paths instead of
+        # CUDA-only fast kernels (causal-conv1d, flash-linear-attention).
+        for layer in self._model.model.layers:
+            if not (hasattr(layer, "linear_attn") and isinstance(layer.linear_attn, Qwen3_5GatedDeltaNet)):
+                continue
+            gdn = layer.linear_attn
+            # Save originals for restoration
+            gdn._orig_causal_conv1d_fn = gdn.causal_conv1d_fn
+            gdn._orig_causal_conv1d_update = gdn.causal_conv1d_update
+            gdn._orig_chunk_gated_delta_rule = gdn.chunk_gated_delta_rule
+            gdn._orig_recurrent_gated_delta_rule = gdn.recurrent_gated_delta_rule
+            # Force torch fallback paths
+            gdn.causal_conv1d_fn = None  # triggers native conv1d path in prefill
+            gdn.causal_conv1d_update = torch_causal_conv1d_update
+            gdn.chunk_gated_delta_rule = torch_chunk_gated_delta_rule
+            gdn.recurrent_gated_delta_rule = torch_recurrent_gated_delta_rule
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        import transformers.models.qwen3_5.modeling_qwen3_5 as _qwen3_5_module
+        from transformers.models.qwen3_5.modeling_qwen3_5 import Qwen3_5GatedDeltaNet
+
+        super().__exit__(exc_type, exc_value, traceback)
+        setattr(self._model, self.orig_forward_name, self.model_orig_forward)
+
+        # Restore apply_mask_to_padding_states
+        _qwen3_5_module.apply_mask_to_padding_states = self._orig_apply_mask
+
+        # Restore original GatedDeltaNet methods
+        for layer in self._model.model.layers:
+            if not (hasattr(layer, "linear_attn") and isinstance(layer.linear_attn, Qwen3_5GatedDeltaNet)):
+                continue
+            gdn = layer.linear_attn
+            gdn.causal_conv1d_fn = gdn._orig_causal_conv1d_fn
+            gdn.causal_conv1d_update = gdn._orig_causal_conv1d_update
+            gdn.chunk_gated_delta_rule = gdn._orig_chunk_gated_delta_rule
+            gdn.recurrent_gated_delta_rule = gdn._orig_recurrent_gated_delta_rule
+
+
 class Zamba2ModelPatcher(ModelPatcher):
     def __init__(
         self,
diff --git a/optimum/exporters/openvino/stateful.py b/optimum/exporters/openvino/stateful.py
index 3b8642d65a..f7f6e42438 100644
--- a/optimum/exporters/openvino/stateful.py
+++ b/optimum/exporters/openvino/stateful.py
@@ -284,14 +284,14 @@ def get_kv_ssm_tensor_names(ssm_prefix_names: list, kv_prefix_names: list, ov_te
                 other_tensors.append(ov_tensor)
         return kv_names, ssm_names, other_tensors
 
-    ssm_prefix_input_names = ["cache_params.past.ssm", "cache_params.past.conv"]
+    ssm_prefix_input_names = ["cache_params.past.ssm", "cache_params.past.recurrent", "cache_params.past.conv"]
     kv_prefix_input_names = ["cache_params.past.key", "cache_params.past.value"]
     kv_input_names, ssm_input_names, not_cache_inputs = get_kv_ssm_tensor_names(
         ssm_prefix_input_names, kv_prefix_input_names, ov_model.inputs
     )
     cache_inputs = kv_input_names + ssm_input_names
 
-    ssm_prefix_output_names = ["cache_params.present.ssm", "cache_params.present.conv"]
+    ssm_prefix_output_names = ["cache_params.present.ssm", "cache_params.present.recurrent", "cache_params.present.conv"]
     kv_prefix_output_names = ["cache_params.present.key", "cache_params.present.value"]
     kv_output_names, ssm_output_names, _ = get_kv_ssm_tensor_names(
         ssm_prefix_output_names, kv_prefix_output_names, ov_model.outputs
@@ -307,10 +307,11 @@ def get_kv_ssm_tensor_names(ssm_prefix_names: list, kv_prefix_names: list, ov_te
 
 def patch_stateful(config: PretrainedConfig, ov_model: ov.Model):
     if config.is_encoder_decoder and model_has_input_output_name(ov_model, "encoder_hidden_states"):
-        return patch_stateful_encoder_decoder(config, ov_model)
-    if config.model_type in SSM_MODELS:
-        return patch_stateful_hybrid_ssm(ov_model)
-    return patch_stateful_decoder(config, ov_model)
+        patch_stateful_encoder_decoder(config, ov_model)
+    elif config.model_type in SSM_MODELS:
+        patch_stateful_hybrid_ssm(ov_model)
+    else:
+        patch_stateful_decoder(config, ov_model)
 
 
 def patch_stateful_decoder(config: PretrainedConfig, ov_model: ov.Model):
diff --git a/optimum/exporters/openvino/utils.py b/optimum/exporters/openvino/utils.py
index 3d9a854e39..a8f14fc79d 100644
--- a/optimum/exporters/openvino/utils.py
+++ b/optimum/exporters/openvino/utils.py
@@ -103,7 +103,13 @@ def _get_input_info(
     for i in range(len(ordered_input_names)):
         name = ordered_input_names[i]
         example = flatten_inputs[i]
-        type = get_element_type(example.cpu().numpy().dtype)
+        # NumPy doesn't support bfloat16; convert to float32 for dtype detection, then fix up
+        import torch
+        if example.dtype == torch.bfloat16:
+            from openvino import Type as OVType
+            type = OVType.bf16
+        else:
+            type = get_element_type(example.cpu().numpy().dtype)
         shape = PartialShape(example.shape)
         if name in inputs:
             named_dims = inputs[name]
@@ -305,7 +311,7 @@ def get_submodels(model):
     "minicpmo",
 ]
 
-SSM_MODELS = ["mamba", "falcon_mamba", "zamba2", "lfm2", "granitemoehybrid"]
+SSM_MODELS = ["mamba", "falcon_mamba", "zamba2", "lfm2", "granitemoehybrid", "qwen3_5", "qwen3_5_text"]
 
 # All transformers, diffusers, timm and sentence transformers models that are supported via optimum-onnx OnnxConfigs but that have currently no test
 # TODO: add tests for all models that are compatible and remove support for all others
@@ -504,6 +510,8 @@ def set_simplified_chat_template(ov_tokenizer_model, processor_chat_template=Non
     "esm",
     "levit",
     "llama4",
+    "qwen3_5",
+    "qwen3_5_text",
 )
 
 
diff --git a/optimum/intel/openvino/configuration.py b/optimum/intel/openvino/configuration.py
index 2002e268ac..5a28b82c57 100644
--- a/optimum/intel/openvino/configuration.py
+++ b/optimum/intel/openvino/configuration.py
@@ -172,6 +172,22 @@ class OVQuantizationMethod(str, Enum):
         "ratio": 1.0,
         "quant_method": OVQuantizationMethod.AWQ,
     },
+    "Qwen/Qwen3.5-3B": {
+        "bits": 4,
+        "sym": True,
+        "group_size": 128,
+        "ratio": 1.0,
+        "quant_method": OVQuantizationMethod.AWQ,
+    },
+    "Qwen/Qwen3.5-8B": {
+        "bits": 4,
+        "sym": False,
+        "group_size": 128,
+        "ratio": 1.0,
+        "dataset": "wikitext2",
+        "quant_method": OVQuantizationMethod.AWQ,
+        "scale_estimation": True,
+    },
     "openlm-research/open_llama_3b": {"bits": 4, "sym": False, "group_size": 64, "all_layers": True},
     "openlm-research/open_llama_3b_v2": {
         "bits": 4,
diff --git a/optimum/intel/openvino/modeling_base.py b/optimum/intel/openvino/modeling_base.py
index a97416cea1..108c13a947 100644
--- a/optimum/intel/openvino/modeling_base.py
+++ b/optimum/intel/openvino/modeling_base.py
@@ -25,7 +25,10 @@
 from transformers import GenerationConfig, PretrainedConfig
 from transformers.file_utils import add_start_docstrings
 from transformers.generation import GenerationMixin
-from transformers.utils import is_offline_mode
+try:
+    from transformers.utils import is_offline_mode
+except ImportError:
+    from transformers.utils.hub import is_offline_mode
 from transformers.utils.hub import cached_file
 
 from optimum.exporters.base import ExportConfig
@@ -264,7 +267,7 @@ def __init__(
             # some model configs may have issues with loading without parameters initialization
             try:
                 misplaced_generation_parameters = self.config._get_non_default_generation_parameters()
-            except (KeyError, TypeError):
+            except (KeyError, TypeError, AttributeError):
                 misplaced_generation_parameters = {}
             if len(misplaced_generation_parameters) > 0:
                 logger.warning(
diff --git a/optimum/intel/openvino/modeling_decoder.py b/optimum/intel/openvino/modeling_decoder.py
index 3b95b5f276..f09b479422 100644
--- a/optimum/intel/openvino/modeling_decoder.py
+++ b/optimum/intel/openvino/modeling_decoder.py
@@ -1082,6 +1082,7 @@ def __init__(
         max_batch_size: Optional[int] = None,
         conv_states: Optional[List[torch.Tensor]] = None,
         ssm_states: Optional[List[torch.Tensor]] = None,
+        recurrent_states: Optional[List[torch.Tensor]] = None,
         key_cache: Optional[List[torch.Tensor]] = None,
         value_cache: Optional[List[torch.Tensor]] = None,
     ):
@@ -1093,9 +1094,27 @@ def __init__(
         self.mamba_d_conv = getattr(config, "mamba_d_conv", None)
         self.mamba_expand = getattr(config, "mamba_expand", None)
         self.mamba_d_state = getattr(config, "mamba_d_state", None)
-        self.intermediate_size = config.intermediate_size
+        self.intermediate_size = getattr(config, "intermediate_size", None)
         self.conv_kernel_size = getattr(config, "conv_kernel", getattr(config, "mamba_d_conv", None))
-        if config.model_type == "granitemoehybrid":
+        if config.model_type in ("qwen3_5", "qwen3_5_text"):
+            text_cfg = getattr(config, "text_config", config)
+            layer_types = text_cfg.layer_types
+            self.num_key_value_heads = text_cfg.num_key_value_heads
+            self.head_dim = text_cfg.head_dim
+            self.num_mamba_layers = layer_types.count("linear_attention")
+            self.num_attn_layers = layer_types.count("full_attention")
+            # Store linear attention parameters for state initialization
+            self._linear_num_key_heads = text_cfg.linear_num_key_heads
+            self._linear_key_head_dim = text_cfg.linear_key_head_dim
+            self._linear_num_value_heads = text_cfg.linear_num_value_heads
+            self._linear_value_head_dim = text_cfg.linear_value_head_dim
+            self._linear_conv_kernel_dim = text_cfg.linear_conv_kernel_dim
+            # Not applicable to qwen3_5
+            self.mamba_ngroups = None
+            self.n_mamba_heads = None
+            self.mamba_headdim = None
+            self.ssm_state_size = None
+        elif config.model_type == "granitemoehybrid":
             layer_types = getattr(config, "layer_types", None)
             self.num_key_value_heads = getattr(config, "num_key_value_heads", None)
             self.head_dim = int(self.hidden_size / self.num_attention_heads)
@@ -1123,7 +1142,14 @@ def __init__(
         if self.conv_states is None:
             self.conv_states = []
             for _ in range(self.num_mamba_layers):
-                if (
+                if hasattr(self, "_linear_conv_kernel_dim"):
+                    # Qwen3.5 linear attention conv state:
+                    # d_inner = key_dim * 2 + value_dim
+                    key_dim = self._linear_key_head_dim * self._linear_num_key_heads
+                    value_dim = self._linear_value_head_dim * self._linear_num_value_heads
+                    d_inner = key_dim * 2 + value_dim
+                    conv_state_shape = (self.max_batch_size, d_inner, self._linear_conv_kernel_dim)
+                elif (
                     self.mamba_ngroups
                     and self.mamba_d_state
                     and self.mamba_d_conv
@@ -1146,25 +1172,46 @@ def __init__(
         self.ssm_states = ssm_states
         if self.ssm_states is None:
             self.ssm_states: List[torch.Tensor] = []
-            for _ in range(self.num_mamba_layers):
-                if self.n_mamba_heads and self.mamba_headdim:
-                    # Mamba2 block
-                    ssm_state_shape = (
+            if not hasattr(self, "_linear_conv_kernel_dim"):
+                # SSM states only apply to Mamba-based models, not Qwen3.5
+                for _ in range(self.num_mamba_layers):
+                    if self.n_mamba_heads and self.mamba_headdim:
+                        # Mamba2 block
+                        ssm_state_shape = (
+                            self.max_batch_size,
+                            self.n_mamba_heads,
+                            self.mamba_headdim,
+                            self.ssm_state_size,
+                        )
+                    else:
+                        # Mamba block
+                        ssm_state_shape = (self.max_batch_size, self.intermediate_size, self.ssm_state_size)
+
+                    ssm_state: torch.Tensor = torch.zeros(
+                        ssm_state_shape,
+                        device=self.device,
+                        dtype=dtype,
+                    )
+                    self.ssm_states.append(ssm_state)
+
+        # Recurrent states for Qwen3.5 linear attention layers (gated delta rule K^T V accumulation)
+        self.recurrent_states = recurrent_states
+        if self.recurrent_states is None:
+            self.recurrent_states: List[torch.Tensor] = []
+            if hasattr(self, "_linear_conv_kernel_dim"):
+                for _ in range(self.num_mamba_layers):
+                    recurrent_state_shape = (
                         self.max_batch_size,
-                        self.n_mamba_heads,
-                        self.mamba_headdim,
-                        self.ssm_state_size,
+                        self._linear_num_key_heads,
+                        self._linear_key_head_dim,
+                        self._linear_value_head_dim,
                     )
-                else:
-                    # Mamba block
-                    ssm_state_shape = (self.max_batch_size, self.intermediate_size, self.ssm_state_size)
-
-                ssm_state: torch.Tensor = torch.zeros(
-                    ssm_state_shape,
-                    device=self.device,
-                    dtype=dtype,
-                )
-                self.ssm_states.append(ssm_state)
+                    recurrent_state: torch.Tensor = torch.zeros(
+                        recurrent_state_shape,
+                        device=self.device,
+                        dtype=dtype,
+                    )
+                    self.recurrent_states.append(recurrent_state)
 
         self.key_cache = key_cache
         if self.key_cache is None:
@@ -1245,6 +1292,7 @@ def __init__(
         self.key_cache_names = []
         self.value_cache_names = []
         self.ssm_cache_names = []
+        self.recurrent_cache_names = []
         self.conv_cache_names = []
 
         super().__init__(
@@ -1261,6 +1309,9 @@ def __init__(
         self.key_cache_input_names = sorted([key for key in self.input_names if "cache_params.past.key" in key])
         self.value_cache_input_names = sorted([key for key in self.input_names if "cache_params.past.value" in key])
         self.ssm_cache_input_names = sorted([key for key in self.input_names if "cache_params.past.ssm" in key])
+        self.recurrent_cache_input_names = sorted(
+            [key for key in self.input_names if "cache_params.past.recurrent" in key]
+        )
         self.conv_cache_input_names = sorted([key for key in self.input_names if "cache_params.past.conv" in key])
 
         self.key_cache_output_names = sorted([key for key in self.output_names if "cache_params.present.key" in key])
@@ -1268,6 +1319,9 @@ def __init__(
             [key for key in self.output_names if "cache_params.present.value" in key]
         )
         self.ssm_cache_output_names = sorted([key for key in self.output_names if "cache_params.present.ssm" in key])
+        self.recurrent_cache_output_names = sorted(
+            [key for key in self.output_names if "cache_params.present.recurrent" in key]
+        )
         self.conv_cache_output_names = sorted([key for key in self.output_names if "cache_params.present.conv" in key])
 
         if hasattr(config, "conv_kernel") and config.conv_kernel is not None:
@@ -1280,17 +1334,23 @@ def compile(self):
         super().compile()
         if is_first_time_compile and self.stateful:
             for state in self.request.query_state():
-                if "cache_params.present.key" in state.name:
-                    self.key_cache_names.append(state.name)
-                elif "cache_params.present.value" in state.name:
-                    self.value_cache_names.append(state.name)
-                elif "cache_params.present.ssm" in state.name:
-                    self.ssm_cache_names.append(state.name)
-                elif "cache_params.present.conv" in state.name:
-                    self.conv_cache_names.append(state.name)
+                name = state.name
+                # Match both "past" and "present" prefixes since the variable_id
+                # may use either convention depending on the stateful conversion method
+                if ".key" in name and ("cache_params.present.key" in name or "cache_params.past.key" in name):
+                    self.key_cache_names.append(name)
+                elif ".value" in name and ("cache_params.present.value" in name or "cache_params.past.value" in name):
+                    self.value_cache_names.append(name)
+                elif "cache_params.present.ssm" in name or "cache_params.past.ssm" in name:
+                    self.ssm_cache_names.append(name)
+                elif "cache_params.present.recurrent" in name or "cache_params.past.recurrent" in name:
+                    self.recurrent_cache_names.append(name)
+                elif "cache_params.present.conv" in name or "cache_params.past.conv" in name:
+                    self.conv_cache_names.append(name)
             self.key_cache_names = sorted(self.key_cache_names)
             self.value_cache_names = sorted(self.value_cache_names)
             self.ssm_cache_names = sorted(self.ssm_cache_names)
+            self.recurrent_cache_names = sorted(self.recurrent_cache_names)
             self.conv_cache_names = sorted(self.conv_cache_names)
 
     @staticmethod
@@ -1310,20 +1370,21 @@ def prepare_inputs(
     ) -> Dict:
         if kwargs.get("past_key_values") is not None:
             raise ValueError("`past_key_values` input is not supported for `OVModelWithMambaForCausalLM`")
-        if kwargs.get("position_ids") is not None:
-            raise ValueError("`position_ids` input is not supported for `OVModelWithMambaForCausalLM`")
 
         inputs = {"input_ids": input_ids}
         if "cache_position" in self.input_names:
             if cache_position is None:
                 # initialize it as for prefill stage
-                cache_position = torch.arange(0, self.config.conv_kernel)
+                cache_position = torch.arange(0, self.conv_kernel)
             inputs["cache_position"] = cache_position
         if "attention_mask" in self.input_names:
             if attention_mask is None:
                 # during decoding stage it must be a tensor of ones
                 attention_mask = torch.ones_like(input_ids, dtype=torch.int64)
             inputs["attention_mask"] = attention_mask
+        position_ids = kwargs.get("position_ids", None)
+        if "position_ids" in self.input_names and position_ids is not None:
+            inputs["position_ids"] = position_ids
 
         if self.stateful and cache_params is None:
             # this is prefill step, reset all states
@@ -1335,6 +1396,7 @@ def prepare_inputs(
                 cache_params = OVCacheWithMambaStates(self.config, input_ids.shape[0])
 
             ssm_cache = cache_params.ssm_states
+            recurrent_cache = cache_params.recurrent_states
             conv_cache = cache_params.conv_states
             key_cache = cache_params.key_cache
             value_cache = cache_params.value_cache
@@ -1342,6 +1404,7 @@ def prepare_inputs(
             inputs.update(zip(self.key_cache_input_names, key_cache))
             inputs.update(zip(self.value_cache_input_names, value_cache))
             inputs.update(zip(self.ssm_cache_input_names, ssm_cache))
+            inputs.update(zip(self.recurrent_cache_input_names, recurrent_cache))
             inputs.update(zip(self.conv_cache_input_names, conv_cache))
 
         # prepare beam_idx input that is required for hybrid models with both KV cache and Mamba states
@@ -1361,19 +1424,50 @@ def forward(
         **kwargs,
     ):
         self.compile()
-        inputs = self.prepare_inputs(input_ids, attention_mask, cache_params, use_cache, cache_position, **kwargs)
 
-        self.request.start_async(inputs, share_inputs=True)
-        self.request.wait()
-        logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device)
+        seq_len = input_ids.shape[1] if input_ids is not None else 1
+
+        # For models exported in decode mode (seq_len=1), process prefill
+        # tokens one at a time. This is needed for hybrid models like Qwen3.5
+        # where the model graph contains hardcoded shapes for seq_len=1.
+        if seq_len > 1 and self.stateful:
+            # Reset state once at the start of prefill (NOT per-token).
+            # We avoid calling prepare_inputs in the loop since it would
+            # reset the OV state on every token (cache_params=None triggers reset).
+            if cache_params is None:
+                if self.request is not None:
+                    self.request.reset_state()
+                self._past_length = 0
+            batch_size = input_ids.shape[0]
+            for t in range(seq_len):
+                token = input_ids[:, t : t + 1]
+                inputs = {"input_ids": token}
+                if "position_ids" in self.input_names:
+                    inputs["position_ids"] = torch.tensor([[t]], dtype=torch.int64)
+                if "beam_idx" in self.input_names:
+                    inputs["beam_idx"] = np.arange(batch_size, dtype=int)
+                self.request.start_async(inputs, share_inputs=True)
+                self.request.wait()
+            logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device)
+        else:
+            position_ids = torch.tensor([[self._past_length]], dtype=torch.int64) if hasattr(self, "_past_length") else None
+            inputs = self.prepare_inputs(
+                input_ids, attention_mask, cache_params, use_cache, cache_position,
+                position_ids=position_ids, **kwargs,
+            )
+            self.request.start_async(inputs, share_inputs=True)
+            self.request.wait()
+            logits = torch.from_numpy(self.request.get_tensor("logits").data).to(self.device)
 
         ssm_states = None
+        recurrent_states = None
         conv_states = None
         key_cache = None
         value_cache = None
         if self.stateful:
             self._past_length += input_ids.shape[1]
             ssm_states = [None] * len(self.ssm_cache_names)
+            recurrent_states = [None] * len(self.recurrent_cache_names)
             conv_states = [None] * len(self.conv_cache_names)
             key_cache = [None] * len(self.key_cache_names)
             value_cache = [None] * len(self.value_cache_names)
@@ -1381,6 +1475,9 @@ def forward(
                 if "cache_params.past.ssm" in state.name:
                     idx = int(state.name.rsplit(".", 1)[-1])
                     ssm_states[idx] = state.state.data
+                elif "cache_params.past.recurrent" in state.name:
+                    idx = int(state.name.rsplit(".", 1)[-1])
+                    recurrent_states[idx] = state.state.data
                 elif "cache_params.past.conv" in state.name:
                     idx = int(state.name.rsplit(".", 1)[-1])
                     conv_states[idx] = state.state.data
@@ -1392,6 +1489,9 @@ def forward(
                     value_cache[idx] = state.state.data
         elif not self.stateful and use_cache:
             ssm_states = [self.request.get_tensor(key).data for key in self.ssm_cache_output_names]
+            recurrent_states = [
+                self.request.get_tensor(key).data for key in self.recurrent_cache_output_names
+            ]
             conv_states = [self.request.get_tensor(key).data for key in self.conv_cache_output_names]
             key_cache = [self.request.get_tensor(key).data for key in self.key_cache_output_names]
             value_cache = [self.request.get_tensor(key).data for key in self.value_cache_output_names]
@@ -1401,6 +1501,7 @@ def forward(
             batch_size=input_ids.shape[0],
             conv_states=conv_states,
             ssm_states=ssm_states,
+            recurrent_states=recurrent_states,
             key_cache=key_cache,
             value_cache=value_cache,
         )
@@ -1418,6 +1519,8 @@ def _update_model_kwargs_for_generation(
             model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens
 
         if "attention_mask" in model_kwargs:
+            config = getattr(self, "config", None)
+            model_type = getattr(config, "model_type", "")
             attention_mask = model_kwargs["attention_mask"]
             model_kwargs["attention_mask"] = torch.cat(
                 [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
@@ -1449,8 +1552,8 @@ def prepare_inputs_for_generation(
                 # decoding stage so it takes the last token
                 input_ids = input_ids[:, -1].unsqueeze(-1)
 
-                if self.config.model_type not in ["lfm2", "granitemoehybrid"]:
-                    # LFM2 and GraniteMoeHybrid (Granite-4.0) require the attention mask
+                if self.config.model_type not in ["lfm2", "granitemoehybrid", "qwen3_5", "qwen3_5_text"]:
+                    # LFM2, GraniteMoeHybrid (Granite-4.0), and Qwen3.5 require the attention mask
                     # to be the length of the full context, so default mask from OVModelForCausalLM needs to be used.
                     # Other models like Mamba typically do not require an attention_mask
                     # for the decoding step after the first token so use attention mask of ones.
diff --git a/optimum/intel/openvino/modeling_open_clip.py b/optimum/intel/openvino/modeling_open_clip.py
index 2e2ee2d63c..1918c95f0f 100644
--- a/optimum/intel/openvino/modeling_open_clip.py
+++ b/optimum/intel/openvino/modeling_open_clip.py
@@ -31,7 +31,10 @@
 from transformers.file_utils import add_start_docstrings
 from transformers.modeling_outputs import ModelOutput
 from transformers.models.clip.modeling_clip import CLIPOutput
-from transformers.utils import is_offline_mode
+try:
+    from transformers.utils import is_offline_mode
+except ImportError:
+    from transformers.utils.hub import is_offline_mode
 
 from optimum.exporters.tasks import TasksManager
 
diff --git a/optimum/intel/openvino/modeling_seq2seq.py b/optimum/intel/openvino/modeling_seq2seq.py
index e6e99ffd56..2afd2f6c95 100644
--- a/optimum/intel/openvino/modeling_seq2seq.py
+++ b/optimum/intel/openvino/modeling_seq2seq.py
@@ -27,12 +27,15 @@
     AutoConfig,
     AutoModelForSeq2SeqLM,
     AutoModelForSpeechSeq2Seq,
-    AutoModelForVision2Seq,
     GenerationConfig,
     Pix2StructForConditionalGeneration,
     PretrainedConfig,
     WhisperForConditionalGeneration,
 )
+try:
+    from transformers import AutoModelForVision2Seq
+except ImportError:
+    AutoModelForVision2Seq = None
 from transformers.file_utils import add_start_docstrings, add_start_docstrings_to_model_forward
 from transformers.generation import GenerationMixin
 from transformers.modeling_outputs import BaseModelOutput, Seq2SeqLMOutput
diff --git a/optimum/intel/openvino/utils.py b/optimum/intel/openvino/utils.py
index 818eb41726..b22c5adc96 100644
--- a/optimum/intel/openvino/utils.py
+++ b/optimum/intel/openvino/utils.py
@@ -32,7 +32,19 @@
 from openvino import Type as OVType
 from packaging.version import Version
 from transformers import AutoTokenizer, CLIPTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast
-from transformers.onnx.utils import ParameterFormat, compute_serialized_parameters_size
+try:
+    from transformers.onnx.utils import ParameterFormat, compute_serialized_parameters_size
+except (ImportError, ModuleNotFoundError):
+    # transformers 5.x removed transformers.onnx; inline the logic
+    import enum
+
+    class ParameterFormat(enum.Enum):
+        Float = 4
+        Float16 = 2
+        Int8 = 1
+
+    def compute_serialized_parameters_size(num_parameters: int, dtype: ParameterFormat) -> int:
+        return num_parameters * dtype.value
 
 from optimum.intel.utils.import_utils import is_torch_version
 
diff --git a/optimum/intel/utils/import_utils.py b/optimum/intel/utils/import_utils.py
index 3ad9877a82..bdc622ea8a 100644
--- a/optimum/intel/utils/import_utils.py
+++ b/optimum/intel/utils/import_utils.py
@@ -84,11 +84,9 @@
         from openvino import get_version
 
         version = get_version()
-        # avoid invalid format
+        # avoid invalid format: strip dev/commit suffixes (e.g. "2026.0.0-17740-abc" -> "2026.0.0")
         if "-" in version:
-            ov_major_version, dev_info = version.split("-", 1)
-            commit_id = dev_info.split("-")[0]
-            version = f"{ov_major_version}-{commit_id}"
+            version = version.split("-")[0]
         _openvino_version = version
     except ImportError:
         _openvino_available = False
@@ -425,7 +423,8 @@ def is_openvino_tokenizers_version(operation: str, version: str):
         except importlib_metadata.PackageNotFoundError:
             pass
 
-    tokenizers_version = tokenizers_version[: len("2025.0.0.0")]
+    # Take only the first 4 version components (e.g. "2026.10.0.0" -> "2026.10.0.0")
+    tokenizers_version = ".".join(tokenizers_version.split(".")[:4])
     return compare_versions(parse(tokenizers_version), operation, version)
 
 
diff --git a/optimum/intel/utils/modeling_utils.py b/optimum/intel/utils/modeling_utils.py
index cab9e5efa3..561b747cb7 100644
--- a/optimum/intel/utils/modeling_utils.py
+++ b/optimum/intel/utils/modeling_utils.py
@@ -23,7 +23,15 @@
 from typing import Dict, List, Optional, Type, Union
 
 import torch
-from huggingface_hub import HfApi, HfFolder, hf_hub_download
+from huggingface_hub import HfApi, hf_hub_download
+try:
+    from huggingface_hub import HfFolder
+except ImportError:
+    from huggingface_hub import get_token as _get_token
+    class HfFolder:
+        @staticmethod
+        def get_token():
+            return _get_token()
 from huggingface_hub.constants import HUGGINGFACE_HUB_CACHE
 from huggingface_hub.hf_api import file_exists
 from transformers import CLIPConfig, PretrainedConfig, PreTrainedModel
diff --git a/setup.py b/setup.py
index b86c176463..5561d3569b 100644
--- a/setup.py
+++ b/setup.py
@@ -28,8 +28,8 @@
 
 INSTALL_REQUIRE = [
     "torch>=2.1",
-    "optimum-onnx@git+https://github.com/huggingface/optimum-onnx.git@main",
-    "transformers>=4.45,<4.58",
+    "optimum-onnx",
+    "transformers>=4.45",
     "setuptools",
     "nncf>=2.19.0",
     "openvino>=2025.4.0",