diff --git a/src/common/transformations/src/transformations/common_optimizations/fuse_moe_experts.cpp b/src/common/transformations/src/transformations/common_optimizations/fuse_moe_experts.cpp index a7bc778bc23b6d..3da0c0a5af749c 100644 --- a/src/common/transformations/src/transformations/common_optimizations/fuse_moe_experts.cpp +++ b/src/common/transformations/src/transformations/common_optimizations/fuse_moe_experts.cpp @@ -39,6 +39,7 @@ #include "openvino/op/topk.hpp" #include "openvino/op/transpose.hpp" #include "openvino/op/unsqueeze.hpp" +#include "openvino/op/util/op_types.hpp" #include "openvino/op/util/shape_of_base.hpp" #include "openvino/pass/manager.hpp" #include "openvino/pass/pattern/matcher.hpp" @@ -47,6 +48,7 @@ #include "openvino/pass/pattern/op/wrap_type.hpp" #include "transformations/rt_info/decompression.hpp" #include "transformations/utils/utils.hpp" + namespace ov { namespace pass { @@ -396,7 +398,14 @@ ov::pass::FuseMOEExperts::FuseMOEExperts() : MultiMatcher("FuseMOEExperts") { } auto fused = std::make_shared(inputs, 0); - fused->get_rt_info()["postponed_constant"] = true; + if (std::all_of(inputs.begin(), inputs.end(), [](const auto& input) { + return ov::op::util::is_constant(input.get_node()); + })) { + // postponed_constant attribute is needed to perform constant folding on serialization step + fused->get_rt_info()["postponed_constant"] = true; + // disable constant folding here to postpone it to serialization step + ov::pass::disable_constant_folding(fused); + } if (needs_decompress) { auto convert = std::make_shared(fused, target_type); ov::mark_as_decompression(convert);