Update docs

kcz358 · kcz358 · commit 032fd36b14cc · 2025-07-29T20:57:36.000-07:00
diff --git a/docs/lmms-eval-0.4.md b/docs/lmms-eval-0.4.md
@@ -25,13 +25,11 @@ LMMS-Eval v0.4 represents a significant evolution in multimodal model evaluation
     - [Distributed Evaluation API](#distributed-evaluation-api)
     - [Judge API Integration](#judge-api-integration)
     - [Batch Processing and Efficiency](#batch-processing-and-efficiency)
-    - [API Benefits](#api-benefits)
   - [New Benchmarks](#new-benchmarks)
     - [Vision Understanding](#vision-understanding)
     - [Reasoning-Oriented Benchmarks](#reasoning-oriented-benchmarks)
       - [Mathematical Reasoning](#mathematical-reasoning)
       - [Olympic-Level Challenges](#olympic-level-challenges)
-    - [Upcoming Benchmarks](#upcoming-benchmarks)
   - [Technical Details](#technical-details)
     - [Multi-Node Evaluation Architecture](#multi-node-evaluation-architecture)
     - [Async OpenAI API Integration](#async-openai-api-integration)
@@ -133,6 +131,19 @@ hf_messages = chat_messages.to_hf_messages()
 
 You can then use these messages with a chat template or the chat completion API. If you wish to implement your own message processing logic, please refer to the protocol definition in `lmms_eval/protocol.py` for more details.
 
+**Replacing the Simple Model with a Chat Model**
+
+To use the `doc_to_messages` function, you must implement a chat model capable of processing the message format it produces. Examples of such models can be found in the `lmms_eval/models/chat` directory.
+
+If you prefer to fall back to the previous simple model implementation, you can add the `--force_simple` flag to the launch command.
+
+To implement a new chat model, follow these steps:
+
+1. Create the chat model (e.g., `lmms_eval/models/vllm.py`).
+2. Register the model in `lmms_eval/models/__init__.py`.
+
+
+
 ### 2. Multi-Node Distributed Evaluation
 
 ![Pix-Pin-2025-07-29-23-25-16](https://i.postimg.cc/z88RsDb5/Pix-Pin-2025-07-29-23-25-16.png)
diff --git a/docs/model_guide.md b/docs/model_guide.md
@@ -126,13 +126,14 @@ class MyImageModel(lmms):
             messages = doc_to_messages(doc)
             
             # Process images and text from messages
-            images = []
+            images, videos, audios = messages.extract_media()
             text_prompt = ""
             for message in messages:
-                if message["type"] == "image":
-                    images.append(message["content"])
-                elif message["type"] == "text":
-                    text_prompt += message["content"]
+                if message.type == "text":
+                    text_prompt += message["text"]
+
+            # If your model support apply chat template
+            # text_prompt = self.processor.apply_chat_template(messages.to_hf_messages())
             
             # Prepare inputs for your model
             inputs = self.processor(
@@ -188,15 +189,11 @@ class MyVideoModel(lmms):
             messages = doc_to_messages(doc)
             
             # Extract video frames
-            video_frames = []
+            images, videos, audios = messages.extract_media()
             text_prompt = ""
             for message in messages:
-                if message["type"] == "video":
-                    # Process video into frames
-                    frames = self.extract_frames(message["content"], self.max_frames)
-                    video_frames.extend(frames)
-                elif message["type"] == "text":
-                    text_prompt += message["content"]
+                if message.type == "text":
+                    text_prompt += message["text"]
             
             # Process video frames and generate response
             # ...
@@ -229,16 +226,11 @@ class MyAudioModel(lmms):
             doc = self.task_dict[task][split][doc_id]
             messages = doc_to_messages(doc)
             
-            # Process audio data
-            audio_data = []
+            images, videos, audios = messages.extract_media()
             text_prompt = ""
             for message in messages:
-                if message["type"] == "audio":
-                    # Load and process audio
-                    audio = self.load_audio(message["content"], self.sample_rate)
-                    audio_data.append(audio)
-                elif message["type"] == "text":
-                    text_prompt += message["content"]
+                if message.type == "text":
+                    text_prompt += message["text"]
             
             # Process audio and generate response
             # ...