Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 21 additions & 22 deletions test/srt/test_vision_openai_server_b.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,29 +172,28 @@ def setUpClass(cls):
cls.base_url += "/v1"


# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
# class TestKimiVLServer(TestOpenAIVisionServer):
# @classmethod
# def setUpClass(cls):
# cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
# cls.base_url = DEFAULT_URL_FOR_TEST
# cls.api_key = "sk-123456"
# cls.process = popen_launch_server(
# cls.model,
# cls.base_url,
# timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
# other_args=[
# "--trust-remote-code",
# "--context-length",
# "4096",
# "--dtype",
# "bfloat16",
# ],
# )
# cls.base_url += "/v1"
class TestKimiVLServer(TestOpenAIVisionServer):
@classmethod
def setUpClass(cls):
cls.model = "moonshotai/Kimi-VL-A3B-Instruct"
cls.base_url = DEFAULT_URL_FOR_TEST
cls.api_key = "sk-123456"
cls.process = popen_launch_server(
cls.model,
cls.base_url,
timeout=DEFAULT_TIMEOUT_FOR_SERVER_LAUNCH,
other_args=[
"--trust-remote-code",
"--context-length",
"4096",
"--dtype",
"bfloat16",
],
)
cls.base_url += "/v1"

# def test_video_images_chat_completion(self):
# pass
def test_video_images_chat_completion(self):
pass


class TestPhi4MMServer(TestOpenAIVisionServer):
Expand Down
45 changes: 22 additions & 23 deletions test/srt/test_vlm_input_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,32 +189,31 @@ def _pixel_values_image_data(self, processor_output):
)


# commented out before https://huggingface.co/moonshotai/Kimi-VL-A3B-Instruct/discussions/27 get fixed
# class TestKimiVLImageUnderstandsImage(
# VLMInputTestBase, unittest.IsolatedAsyncioTestCase
# ):
# model_path = "moonshotai/Kimi-VL-A3B-Instruct"
# chat_template = "kimi-vl"
class TestKimiVLImageUnderstandsImage(
VLMInputTestBase, unittest.IsolatedAsyncioTestCase
):
model_path = "moonshotai/Kimi-VL-A3B-Instruct"
chat_template = "kimi-vl"

# @classmethod
# def _init_visual(cls):
# model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
# cls.vision_tower = model.vision_tower.eval().to(cls.device)
# cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)
@classmethod
def _init_visual(cls):
model = AutoModel.from_pretrained(cls.model_path, trust_remote_code=True)
cls.vision_tower = model.vision_tower.eval().to(cls.device)
cls.mm_projector = model.multi_modal_projector.eval().to(cls.device)

# cls.visual = lambda tokenizer_output: cls.mm_projector(
# cls.vision_tower(
# pixel_values=tokenizer_output["pixel_values"],
# grid_hws=tokenizer_output["image_grid_hws"],
# )
# )
cls.visual = lambda tokenizer_output: cls.mm_projector(
cls.vision_tower(
pixel_values=tokenizer_output["pixel_values"],
grid_hws=tokenizer_output["image_grid_hws"],
)
)

# def _pixel_values_image_data(self, processor_output):
# return dict(
# modality="IMAGE",
# pixel_values=processor_output["pixel_values"],
# image_grid_hws=processor_output["image_grid_hws"],
# )
def _pixel_values_image_data(self, processor_output):
return dict(
modality="IMAGE",
pixel_values=processor_output["pixel_values"],
image_grid_hws=processor_output["image_grid_hws"],
)


# not for CI: too large
Expand Down
Loading