diff --git a/tests/models/multimodal/processing/test_common.py b/tests/models/multimodal/processing/test_common.py index 0fdc182b9ee9..8aa0dc7e8e34 100644 --- a/tests/models/multimodal/processing/test_common.py +++ b/tests/models/multimodal/processing/test_common.py @@ -275,16 +275,17 @@ def _test_processing_correctness_one( "google/gemma-3n-E2B-it", "zai-org/glm-4v-9b", "zai-org/GLM-4.1V-9B-Thinking", + "zai-org/GLM-4.5V", "ibm-granite/granite-speech-3.3-2b", "h2oai/h2ovl-mississippi-800m", + "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", + "HuggingFaceM4/Idefics3-8B-Llama3", "internlm/Intern-S1", "OpenGVLab/InternVL2-1B", "OpenGVLab/InternVL3-1B", - "HuggingFaceM4/Idefics3-8B-Llama3", - "HuggingFaceTB/SmolVLM2-2.2B-Instruct", + "Kwai-Keye/Keye-VL-8B-Preview", "moonshotai/Kimi-VL-A3B-Instruct", "meta-llama/Llama-4-Scout-17B-16E-Instruct", - "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", "llava-hf/llava-1.5-7b-hf", "llava-hf/llava-v1.6-mistral-7b-hf", "llava-hf/LLaVA-NeXT-Video-7B-hf", @@ -315,10 +316,13 @@ def _test_processing_correctness_one( "Qwen/Qwen2-Audio-7B-Instruct", "Qwen/Qwen2.5-Omni-3B", "Skywork/Skywork-R1V-38B", + "HuggingFaceTB/SmolVLM2-2.2B-Instruct", + "stepfun-ai/step3", "fixie-ai/ultravox-v0_5-llama-3_2-1b", "openai/whisper-large-v3", "omni-research/Tarsier-7b", "omni-research/Tarsier2-Recap-7b", + "mistralai/Voxtral-Mini-3B-2507", ]) @pytest.mark.parametrize("hit_rate", [0.3, 0.5, 1.0]) @pytest.mark.parametrize("num_batches", [32]) diff --git a/tests/models/registry.py b/tests/models/registry.py index cbdc9edbbc9d..28fe9063169e 100644 --- a/tests/models/registry.py +++ b/tests/models/registry.py @@ -215,9 +215,6 @@ def check_available_online( "HunYuanDenseV1ForCausalLM":_HfExamplesInfo("tencent/Hunyuan-7B-Instruct-0124", trust_remote_code=True, is_available_online=False), - "HCXVisionForCausalLM": _HfExamplesInfo( - "naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", - trust_remote_code=True), "InternLMForCausalLM": _HfExamplesInfo("internlm/internlm-chat-7b", trust_remote_code=True), "InternLM2ForCausalLM": _HfExamplesInfo("internlm/internlm2-chat-7b", @@ -298,8 +295,7 @@ def check_available_online( "StableLmForCausalLM": _HfExamplesInfo("stabilityai/stablelm-3b-4e1t"), "Starcoder2ForCausalLM": _HfExamplesInfo("bigcode/starcoder2-3b"), "Step3TextForCausalLM": _HfExamplesInfo("stepfun-ai/step3", - trust_remote_code=True, - is_available_online=False), + trust_remote_code=True), "SolarForCausalLM": _HfExamplesInfo("upstage/solar-pro-preview-instruct", trust_remote_code=True), "TeleChat2ForCausalLM": _HfExamplesInfo("Tele-AI/TeleChat2-3B", @@ -405,22 +401,24 @@ def check_available_online( hf_overrides={"architectures": ["GLM4VForCausalLM"]}), # noqa: E501 "Glm4vForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.1V-9B-Thinking"), # noqa: E501 "Glm4vMoeForConditionalGeneration": _HfExamplesInfo("zai-org/GLM-4.5V", - is_available_online=False), # noqa: E501 + min_transformers_version="4.56"), # noqa: E501 "H2OVLChatModel": _HfExamplesInfo("h2oai/h2ovl-mississippi-800m", trust_remote_code=True, extras={"2b": "h2oai/h2ovl-mississippi-2b"}, # noqa: E501 max_transformers_version="4.48", # noqa: E501 transformers_version_reason="HF model is not compatible."), # noqa: E501 + "HCXVisionForCausalLM": _HfExamplesInfo("naver-hyperclovax/HyperCLOVAX-SEED-Vision-Instruct-3B", # noqa: E501 + trust_remote_code=True), "Idefics3ForConditionalGeneration": _HfExamplesInfo("HuggingFaceM4/Idefics3-8B-Llama3", # noqa: E501 {"tiny": "HuggingFaceTB/SmolVLM-256M-Instruct"}, # noqa: E501 min_transformers_version="4.55.1", transformers_version_reason="HF model broken in 4.55.0"), # noqa: E501 + "InternS1ForConditionalGeneration": _HfExamplesInfo("internlm/Intern-S1", + trust_remote_code=True), # noqa: E501 "InternVLChatModel": _HfExamplesInfo("OpenGVLab/InternVL2-1B", extras={"2B": "OpenGVLab/InternVL2-2B", "3.0": "OpenGVLab/InternVL3-1B"}, # noqa: E501 trust_remote_code=True), - "InternS1ForConditionalGeneration": _HfExamplesInfo("internlm/Intern-S1", - trust_remote_code=True), "KeyeForConditionalGeneration": _HfExamplesInfo("Kwai-Keye/Keye-VL-8B-Preview", # noqa: E501 trust_remote_code=True), "KimiVLForConditionalGeneration": _HfExamplesInfo("moonshotai/Kimi-VL-A3B-Instruct", # noqa: E501 @@ -464,9 +462,10 @@ def check_available_online( transformers_version_reason="HF model is not compatible", # noqa: E501 extras={"1.6-llama": "AIDC-AI/Ovis1.6-Llama3.2-3B", "1.6-gemma": "AIDC-AI/Ovis1.6-Gemma2-9B"}), # noqa: E501 - "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", trust_remote_code=True, - max_transformers_version="4.53", - transformers_version_reason="HF model is not compatible"), # noqa: E501 + "Ovis2_5": _HfExamplesInfo("AIDC-AI/Ovis2.5-2B", + trust_remote_code=True, + max_transformers_version="4.53", + transformers_version_reason="HF model is not compatible"), # noqa: E501 "PaliGemmaForConditionalGeneration": _HfExamplesInfo("google/paligemma-3b-mix-224", # noqa: E501 extras={"v2": "google/paligemma2-3b-ft-docci-448"}), # noqa: E501 "Phi3VForCausalLM": _HfExamplesInfo("microsoft/Phi-3-vision-128k-instruct", @@ -496,8 +495,7 @@ def check_available_online( min_transformers_version="4.55.1", transformers_version_reason="HF model broken in 4.55.0"), # noqa: E501 "Step3VLForConditionalGeneration": _HfExamplesInfo("stepfun-ai/step3", - trust_remote_code=True, - is_available_online=False), + trust_remote_code=True), "UltravoxModel": _HfExamplesInfo("fixie-ai/ultravox-v0_5-llama-3_2-1b", # noqa: E501 trust_remote_code=True), "TarsierForConditionalGeneration": _HfExamplesInfo("omni-research/Tarsier-7b"), # noqa: E501