-
Notifications
You must be signed in to change notification settings - Fork 260
Description
Describe the bug
According to the example content, the model has been successfully exported, but Cannot run in Net environment
Error prompt:
Load model from C:\Models\SLM\context_ctx.onnx failed:E:\_work\1\s\onnxruntime\core\graph\model.cc:182 onnxruntime::Model::Model Unsupported model IR version: 11, max supported IR version: 10
To Reproduce
According to the example conversion model, after successful completion, use onnxrruntimeGenAI.QNN to load the model
Expected behavior
Successfully loaded the model and ran it
Olive config
{
"input_model": { "type": "HfModel", "model_path": "microsoft/Phi-4-mini-instruct" },
"systems": {
"qnn_system": {
"type": "PythonEnvironment",
"python_environment_path": "C:\Users\SSD.TT\Downloads\Olive-main\Olive-main\.venv\Scripts",
"accelerators": [ { "execution_providers": [ "QNNExecutionProvider" ] } ]
}
},
"data_configs": [
{
"name": "wikitext2_train_joined",
"type": "HuggingfaceContainer",
"load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" },
"pre_process_data_config": {
"strategy": "join",
"add_special_tokens": false,
"max_seq_len": 4096,
"max_samples": 128
}
},
{
"name": "wikitext2_train_act",
"type": "HuggingfaceContainer",
"load_dataset_config": { "data_name": "wikitext", "subset": "wikitext-2-raw-v1", "split": "train" },
"pre_process_data_config": {
"strategy": "line-by-line",
"add_special_tokens": true,
"max_samples": 256,
"max_seq_len": 1024
}
}
],
"passes": {
"q": { "type": "QuaRot" },
"g": {
"type": "GptqModel",
"bits": 4,
"sym": true,
"group_size": -1,
"lm_head": false,
"device": "cuda",
"data_config": "wikitext2_train_joined"
},
"cs": { "type": "CaptureSplitInfo", "num_splits": 4, "unique_embeds_lm_head_splits": true },
"mb": {
"type": "ModelBuilder",
"precision": "int4",
"int4_block_size": 32,
"int4_accuracy_level": 4,
"int4_op_types_to_quantize": [ "MatMul", "Gather" ]
},
"mq": {
"type": "MatMulNBitsToQDQ",
"use_int4": true,
"add_zero_point": true,
"nodes_to_exclude": [ "/lm_head/MatMul_Q4" ],
"save_as_external_data": true
},
"gs": {
"type": "GraphSurgeries",
"surgeries": [
{ "surgeon": "RemoveRopeMultiCache" },
{ "surgeon": "AttentionMaskToSequenceLengths" },
{ "surgeon": "SimplifiedLayerNormToL2Norm" }
],
"save_as_external_data": true
},
"sq": {
"type": "OnnxStaticQuantization",
"data_config": "wikitext2_train_act",
"activation_type": "uint16",
"precision": "uint8",
"calibration_providers": [ "CUDAExecutionProvider" ],
"quant_preprocess": true,
"op_types_to_exclude": [ "GatherBlockQuantized", "GroupQueryAttention", "MatMulNBits" ],
"save_as_external_data": true
},
"sp": { "type": "SplitModel" },
"st": { "type": "StaticLLM", "batch_size": 1, "context_length": 64 },
"cb": {
"type": "EPContextBinaryGenerator",
"provider_options": {
"htp_performance_mode": "burst",
"htp_graph_finalization_optimization_mode": "3",
"soc_model": "60"
},
"weight_sharing": true
},
"cp": { "type": "ComposeOnnxModels" }
},
"target": "qnn_system",
"log_severity_level": 1,
"output_dir": "models/phi4_mini_instruct",
"cache_dir": "cache",
"no_artifacts": true
}
Other information
OS: Windows 11 Pro 26100.4946
Olive version: [0.10.0.dev0]
ONNXRuntime package and version: [e.g. onnxruntime-gpu: 1.21.1]
Transformers package version: [e.g. transformers 4.53.0]
.Net 8
OnnxRuntimeGenAI.QNN:0.9.0
OnnxRuntime.QNN:1.22.2
Additional context
(qdq) PS C:\Users\SSD.TT\Downloads\Olive-main\Olive-main>
pip list
Package Version
accelerate 1.10.0
aiohappyeyeballs 2.6.1
aiohttp 3.12.15
aiosignal 1.4.0
alembic 1.16.4
annotated-types 0.7.0
attrs 25.3.0
auto_gptq 0.8.0.dev0
autopep8 2.3.2
certifi 2025.8.3
cffi 1.17.1
charset-normalizer 3.4.3
colorama 0.4.6
coloredlogs 15.0.1
colorlog 6.9.0
datasets 4.0.0
device-smi 0.4.1
dill 0.3.8
filelock 3.18.0
flatbuffers 25.2.10
frozenlist 1.7.0
fsspec 2025.3.0
gekko 1.3.0
gptqmodel 4.0.0.dev0
greenlet 3.2.4
hf_transfer 0.1.9
huggingface-hub 0.34.4
humanfriendly 10.0
idna 3.10
iniconfig 2.1.0
Jinja2 3.1.6
lightning-utilities 0.15.2
logbar 0.0.4
Mako 1.3.10
MarkupSafe 3.0.2
ml_dtypes 0.5.3
mpmath 1.3.0
multidict 6.6.4
multiprocess 0.70.16
networkx 3.5
numpy 2.3.2
olive-ai 0.10.0.dev0
onnx 1.18.0
onnx-ir 0.1.6
onnxruntime-genai-cuda 0.7.1
onnxruntime-gpu 1.21.1
onnxscript 0.3.2
optimum 1.27.0
optuna 4.4.0
packaging 25.0
pandas 2.3.1
peft 0.17.0
pillow 11.3.0
pip 25.2
pluggy 1.6.0
propcache 0.3.2
protobuf 6.31.1
psutil 7.0.0
pyarrow 21.0.0
pycodestyle 2.14.0
pycparser 2.22
pydantic 2.11.7
pydantic_core 2.33.2
Pygments 2.19.2
pyreadline3 3.5.4
pytest 8.4.1
python-dateutil 2.9.0.post0
pytorch-lightning 2.5.3
pytz 2025.2
PyYAML 6.0.2
random_word 1.0.13
regex 2025.7.34
requests 2.32.4
rouge 1.0.1
safetensors 0.6.2
sentencepiece 0.2.1
setuptools 80.9.0
six 1.17.0
soundfile 0.13.1
SQLAlchemy 2.0.43
sympy 1.14.0
tabulate 0.9.0
threadpoolctl 3.6.0
tokenicer 0.0.4
tokenizers 0.21.4
torch 2.7.1+cu128
torchaudio 2.7.1+cu128
torchmetrics 1.8.1
torchvision 0.22.1+cu128
tqdm 4.67.1
transformers 4.53.0
typing_extensions 4.14.1
typing-inspection 0.4.1
tzdata 2025.2
urllib3 2.5.0
xxhash 3.5.0
yarl 1.20.1
qnn env
(.venv) PS C:\Users\SSD.TT\Downloads\Olive-main\Olive-main> pip list
Package Version
aiohappyeyeballs 2.6.1
aiohttp 3.12.15
aiosignal 1.4.0
alembic 1.16.4
annotated-types 0.7.0
attrs 25.3.0
certifi 2025.8.3
charset-normalizer 3.4.3
colorama 0.4.6
coloredlogs 15.0.1
colorlog 6.9.0
datasets 4.0.0
dill 0.3.8
filelock 3.19.1
flatbuffers 25.2.10
frozenlist 1.7.0
fsspec 2025.3.0
greenlet 3.2.4
huggingface-hub 0.34.4
humanfriendly 10.0
idna 3.10
Jinja2 3.1.6
lightning-utilities 0.15.2
Mako 1.3.10
MarkupSafe 3.0.2
ml_dtypes 0.5.3
mpmath 1.3.0
multidict 6.6.4
multiprocess 0.70.16
networkx 3.5
numpy 2.3.2
olive-ai 0.10.0.dev0
onnx 1.18.0
onnx-ir 0.1.6
onnxruntime-qnn 1.23.0.dev20250815003
onnxscript 0.3.2
optimum 1.27.0
optuna 4.4.0
packaging 25.0
pandas 2.3.1
pip 25.2
propcache 0.3.2
protobuf 6.32.0
pyarrow 21.0.0
pydantic 2.11.7
pydantic_core 2.33.2
pyreadline3 3.5.4
python-dateutil 2.9.0.post0
pytz 2025.2
PyYAML 6.0.2
regex 2025.7.34
requests 2.32.4
safetensors 0.6.2
setuptools 80.9.0
six 1.17.0
SQLAlchemy 2.0.43
sympy 1.14.0
tokenizers 0.21.4
torch 2.8.0
torchmetrics 1.8.1
tqdm 4.67.1
transformers 4.53.0
typing_extensions 4.14.1
typing-inspection 0.4.1
tzdata 2025.2
urllib3 2.5.0
xxhash 3.5.0
yarl 1.20.1