Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
169 changes: 132 additions & 37 deletions xinference/model/image/model_spec.json
Original file line number Diff line number Diff line change
Expand Up @@ -782,15 +782,6 @@
"default_model_config": {
"variant": "fp16"
},
"virtualenv": {
"packages": [
"#diffusers_dependencies# ; #engine# == \"diffusers\"",
"transformers>=4.51.0",
"#system_torch#",
"#system_numpy#"
],
"no_build_isolation": true
},
"model_src": {
"huggingface": {
"model_id": "Kwai-Kolors/Kolors-diffusers",
Expand All @@ -802,7 +793,16 @@
}
},
"updated_at": 1769418366,
"featured": false
"featured": false,
"virtualenv": {
"packages": [
"#diffusers_dependencies# ; #engine# == \"diffusers\"",
"transformers>=4.51.0",
"#system_torch#",
"#system_numpy#"
],
"no_build_isolation": true
}
},
{
"version": 2,
Expand All @@ -814,15 +814,6 @@
"default_model_config": {
"torch_dtype": "bfloat16"
},
"virtualenv": {
"packages": [
"#diffusers_dependencies# ; #engine# == \"diffusers\"",
"transformers>=4.51.0",
"#system_torch#",
"#system_numpy#"
],
"no_build_isolation": true
},
"model_src": {
"huggingface": {
"model_id": "THUDM/CogView4-6B",
Expand All @@ -834,7 +825,16 @@
}
},
"updated_at": 1769418366,
"featured": true
"featured": true,
"virtualenv": {
"packages": [
"#diffusers_dependencies# ; #engine# == \"diffusers\"",
"transformers>=4.51.0",
"#system_torch#",
"#system_numpy#"
],
"no_build_isolation": true
}
},
{
"version": 2,
Expand Down Expand Up @@ -1361,6 +1361,69 @@
"updated_at": 1766387632,
"featured": true
},
{
"version": 2,
"model_name": "dots.ocr",
"model_family": "ocr",
"model_ability": [
"ocr"
],
"virtualenv": {
"packages": [
"transformers==4.51.3",
"flash-attn==2.8.0.post2",
"#system_torch#",
"#system_numpy#"
]
},
"model_src": {
"huggingface": {
"model_id": "rednote-hilab/dots.ocr",
"model_revision": "ba670c5dcf03ff4e02015558c95b4042f5dce069"
},
"modelscope": {
"model_id": "rednote-hilab/dots.ocr",
"model_revision": "master"
}
},
"featured": false,
"is_enterprise": true,
"updated_at": 1766547661
},
{
"version": 2,
"model_name": "MinerU2.5",
"model_family": "docanalyze",
"model_ability": [
"docanalyze"
],
"virtualenv": {
"packages": [
"vllm>=0.10.1",
"pypdfium2>=4.30.0",
"boto3>=1.28.43",
"pypdf>=5.6.0",
"reportlab",
"onnxruntime>1.17.0",
"beautifulsoup4>=4.13.5,<5",
"#system_torch#",
"#system_numpy#"
]
},
"model_src": {
"huggingface": {
"model_id": "opendatalab/MinerU2.5-2509-1.2B",
"model_revision": "879e58bdd9566632b27a8a81f0e2961873311f67"
},
"modelscope": {
"model_id": "OpenDataLab/MinerU2.5-2509-1.2B",
"model_revision": "master"
}
},
"featured": false,
"is_enterprise": true,
"updated_at": 1766547734
},
{
"version": 2,
"model_name": "Qwen-Image-Layered",
Expand Down Expand Up @@ -1580,32 +1643,64 @@
},
{
"version": 2,
"context_length": 32768,
"model_name": "MinerU2.5-2509-1.2B",
"model_family": "ocr",
"model_lang": [
"en",
"zh"
],
"model_ability": [
"ocr"
"chat",
"vision"
],
"model_description": "MinerU2.5-2509-1.2B is a vision language model for document understanding.",
"model_specs": [
{
"model_format": "pytorch",
"model_size_in_billions": "1_2",
"model_src": {
"huggingface": {
"quantizations": [
"none"
],
"model_id": "opendatalab/MinerU2.5-2509-1.2B",
"model_revision": "main"
},
"modelscope": {
"quantizations": [
"none"
],
"model_id": "opendatalab/MinerU2.5-2509-1.2B",
"model_revision": "master"
}
}
}
],
"stop_token_ids": [
151645,
151643
],
"stop": [
"<|im_end|>",
"<|endoftext|>"
],
"virtualenv": {
"packages": [
"transformers>=4.45.0 ; #engine# == \"transformers\"",
"mineru-vl-utils[transformers] ; #engine# == \"transformers\"",
"transformers>=4.45.0 ; #engine# == \"Transformers\"",
"mineru-vl-utils[transformers] ; #engine# == \"Transformers\"",
"vllm_dependencies ; #engine# == \"vllm\"",
"qwen-vl-utils",
"#system_torch#",
"#system_numpy#"
"#system_numpy#",
"qwen_omni_utils"
]
},
"model_src": {
"huggingface": {
"model_id": "opendatalab/MinerU2.5-2509-1.2B",
"model_revision": "main"
},
"modelscope": {
"model_id": "opendatalab/MinerU2.5-2509-1.2B",
"model_revision": "master"
}
},
"updated_at": 1769768716,
"featured": false,
"updated_at": 1769418382
"architectures": [
"Qwen2VLForConditionalGeneration"
],
"chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
},
{
"version": 2,
Expand Down Expand Up @@ -1645,4 +1740,4 @@
"featured": true,
"updated_at": 1769594289
}
]
]
Loading