Skip to content

According to the example content, the model has been successfully exported, but Cannot run in Net environment #2103

@nnbw-liu

Description

@nnbw-liu

Describe the bug
According to the example content, the model has been successfully exported, but Cannot run in Net environment
Error prompt:

Load model from C:\Models\SLM\context_ctx.onnx failed:E:\_work\1\s\onnxruntime\core\graph\model.cc:182 onnxruntime::Model::Model Unsupported model IR version: 11, max supported IR version: 10

To Reproduce
According to the example conversion model, after successful completion, use onnxrruntimeGenAI.QNN to load the model

Expected behavior
Successfully loaded the model and ran it

Olive config

{
"input_model": { "type": "HfModel", "model_path": "microsoft/Phi-4-mini-instruct" },
"systems": {
"qnn_system": {
"type": "PythonEnvironment",
"python_environment_path": "C:\Users\SSD.TT\Downloads\Olive-main\Olive-main\.venv\Scripts",
"accelerators": [ { "execution_providers": [ "QNNExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "wikitext2_train_joined",
"type": "HuggingfaceContainer",
"load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" },
"pre_process_data_config": {
"strategy": "join",
"add_special_tokens": false,
"max_seq_len": 4096,
"max_samples": 128
}
},
{
"name": "wikitext2_train_act",
"type": "HuggingfaceContainer",
"load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" },
"pre_process_data_config": {
"strategy": "line-by-line",
"add_special_tokens": true,
"max_samples": 256,
"max_seq_len": 1024
}
}
],
"passes": {
"q": { "type": "QuaRot" },
"g": {
"type": "GptqModel",
"bits": 4,
"sym": true,
"group_size": -1,
"lm_head": false,
"device": "cuda",
"data_config": "wikitext2_train_joined"
},
"cs": { "type": "CaptureSplitInfo", "num_splits": 4, "unique_embeds_lm_head_splits": true },
"mb": {
"type": "ModelBuilder",
"precision": "int4",
"int4_block_size": 32,
"int4_accuracy_level": 4,
"int4_op_types_to_quantize": [ "MatMul", "Gather" ]
},
"mq": {
"type": "MatMulNBitsToQDQ",
"use_int4": true,
"add_zero_point": true,
"nodes_to_exclude": [ "/lm_head/MatMul_Q4" ],
"save_as_external_data": true
},
"gs": {
"type": "GraphSurgeries",
"surgeries": [
{ "surgeon": "RemoveRopeMultiCache" },
{ "surgeon": "AttentionMaskToSequenceLengths" },
{ "surgeon": "SimplifiedLayerNormToL2Norm" }
],
"save_as_external_data": true
},
"sq": {
"type": "OnnxStaticQuantization",
"data_config": "wikitext2_train_act",
"activation_type": "uint16",
"precision": "uint8",
"calibration_providers": [ "CUDAExecutionProvider" ],
"quant_preprocess": true,
"op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
"save_as_external_data": true
},
"sp": { "type": "SplitModel" },
"st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
"cb": {
"type": "EPContextBinaryGenerator",
"provider_options": {
"htp_performance_mode": "burst",
"htp_graph_finalization_optimization_mode": "3",
"soc_model": "60"
},
"weight_sharing": true
},
"cp": { "type": "ComposeOnnxModels" }
},
"target": "qnn_system",
"log_severity_level": 1,
"output_dir": "models/phi4_mini_instruct",
"cache_dir": "cache",
"no_artifacts": true
}

Other information
OS: Windows 11 Pro 26100.4946
Olive version: [0.10.0.dev0]
ONNXRuntime package and version: [e.g. onnxruntime-gpu: 1.21.1]
Transformers package version: [e.g. transformers 4.53.0]

.Net 8
OnnxRuntimeGenAI.QNN:0.9.0
OnnxRuntime.QNN:1.22.2

Additional context
(qdq) PS C:\Users\SSD.TT\Downloads\Olive-main\Olive-main>
pip list
Package Version


accelerate 1.10.0
aiohappyeyeballs 2.6.1
aiohttp 3.12.15
aiosignal 1.4.0
alembic 1.16.4
annotated-types 0.7.0
attrs 25.3.0
auto_gptq 0.8.0.dev0
autopep8 2.3.2
certifi 2025.8.3
cffi 1.17.1
charset-normalizer 3.4.3
colorama 0.4.6
coloredlogs 15.0.1
colorlog 6.9.0
datasets 4.0.0
device-smi 0.4.1
dill 0.3.8
filelock 3.18.0
flatbuffers 25.2.10
frozenlist 1.7.0
fsspec 2025.3.0
gekko 1.3.0
gptqmodel 4.0.0.dev0
greenlet 3.2.4
hf_transfer 0.1.9
huggingface-hub 0.34.4
humanfriendly 10.0
idna 3.10
iniconfig 2.1.0
Jinja2 3.1.6
lightning-utilities 0.15.2
logbar 0.0.4
Mako 1.3.10
MarkupSafe 3.0.2
ml_dtypes 0.5.3
mpmath 1.3.0
multidict 6.6.4
multiprocess 0.70.16
networkx 3.5
numpy 2.3.2
olive-ai 0.10.0.dev0
onnx 1.18.0
onnx-ir 0.1.6
onnxruntime-genai-cuda 0.7.1
onnxruntime-gpu 1.21.1
onnxscript 0.3.2
optimum 1.27.0
optuna 4.4.0
packaging 25.0
pandas 2.3.1
peft 0.17.0
pillow 11.3.0
pip 25.2
pluggy 1.6.0
propcache 0.3.2
protobuf 6.31.1
psutil 7.0.0
pyarrow 21.0.0
pycodestyle 2.14.0
pycparser 2.22
pydantic 2.11.7
pydantic_core 2.33.2
Pygments 2.19.2
pyreadline3 3.5.4
pytest 8.4.1
python-dateutil 2.9.0.post0
pytorch-lightning 2.5.3
pytz 2025.2
PyYAML 6.0.2
random_word 1.0.13
regex 2025.7.34
requests 2.32.4
rouge 1.0.1
safetensors 0.6.2
sentencepiece 0.2.1
setuptools 80.9.0
six 1.17.0
soundfile 0.13.1
SQLAlchemy 2.0.43
sympy 1.14.0
tabulate 0.9.0
threadpoolctl 3.6.0
tokenicer 0.0.4
tokenizers 0.21.4
torch 2.7.1+cu128
torchaudio 2.7.1+cu128
torchmetrics 1.8.1
torchvision 0.22.1+cu128
tqdm 4.67.1
transformers 4.53.0
typing_extensions 4.14.1
typing-inspection 0.4.1
tzdata 2025.2
urllib3 2.5.0
xxhash 3.5.0
yarl 1.20.1

qnn env
(.venv) PS C:\Users\SSD.TT\Downloads\Olive-main\Olive-main> pip list
Package Version


aiohappyeyeballs 2.6.1
aiohttp 3.12.15
aiosignal 1.4.0
alembic 1.16.4
annotated-types 0.7.0
attrs 25.3.0
certifi 2025.8.3
charset-normalizer 3.4.3
colorama 0.4.6
coloredlogs 15.0.1
colorlog 6.9.0
datasets 4.0.0
dill 0.3.8
filelock 3.19.1
flatbuffers 25.2.10
frozenlist 1.7.0
fsspec 2025.3.0
greenlet 3.2.4
huggingface-hub 0.34.4
humanfriendly 10.0
idna 3.10
Jinja2 3.1.6
lightning-utilities 0.15.2
Mako 1.3.10
MarkupSafe 3.0.2
ml_dtypes 0.5.3
mpmath 1.3.0
multidict 6.6.4
multiprocess 0.70.16
networkx 3.5
numpy 2.3.2
olive-ai 0.10.0.dev0
onnx 1.18.0
onnx-ir 0.1.6
onnxruntime-qnn 1.23.0.dev20250815003
onnxscript 0.3.2
optimum 1.27.0
optuna 4.4.0
packaging 25.0
pandas 2.3.1
pip 25.2
propcache 0.3.2
protobuf 6.32.0
pyarrow 21.0.0
pydantic 2.11.7
pydantic_core 2.33.2
pyreadline3 3.5.4
python-dateutil 2.9.0.post0
pytz 2025.2
PyYAML 6.0.2
regex 2025.7.34
requests 2.32.4
safetensors 0.6.2
setuptools 80.9.0
six 1.17.0
SQLAlchemy 2.0.43
sympy 1.14.0
tokenizers 0.21.4
torch 2.8.0
torchmetrics 1.8.1
tqdm 4.67.1
transformers 4.53.0
typing_extensions 4.14.1
typing-inspection 0.4.1
tzdata 2025.2
urllib3 2.5.0
xxhash 3.5.0
yarl 1.20.1

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions