Fix error in the batch method of the MLXLM model

RobinPicard · RobinPicard · commit 87234d202924 · 2025-10-25T09:04:23.000+08:00
diff --git a/docs/features/models/mlxlm.md b/docs/features/models/mlxlm.md
@@ -100,7 +100,7 @@ for chunk in model.stream("Write a short story about a cat.", max_tokens=100):
 
 #### Batch Generation
 
-The `MLXLM` model supports generating text in batches. To do so, use the `batch` method and provide a list of strings as a model input. For instance:
+The `MLXLM` model supports generating text in batches. To do so, use the `batch` method and provide a list of strings as a model input. However, constrained generation is not supported with batching, so you cannot provide an `output_type`. For instance:
 
 ```python
 import outlines
diff --git a/outlines/models/mlxlm.py b/outlines/models/mlxlm.py
@@ -173,16 +173,41 @@ def generate_batch(
             The list of text generated by the model.
 
         """
-        from mlx_lm import generate_batch
+        from mlx_lm import batch_generate
 
-        return generate_batch(
+        if output_type:
+            raise NotImplementedError(
+                "mlx-lm does not support constrained generation with batching."
+                + "You cannot provide an `output_type` with this method."
+            )
+
+        model_input = [self.type_adapter.format_input(item) for item in model_input]
+
+        # Contrarily to the other generate methods, batch_generate requires
+        # tokenized prompts
+        add_special_tokens = [
+            (
+                self.mlx_tokenizer.bos_token is None
+                or not prompt.startswith(self.mlx_tokenizer.bos_token)
+            )
+            for prompt in model_input
+        ]
+        tokenized_model_input = [
+            self.mlx_tokenizer.encode(
+                model_input[i], add_special_tokens=add_special_tokens[i]
+            )
+            for i in range(len(model_input))
+        ]
+
+        response = batch_generate(
             self.model,
             self.mlx_tokenizer,
-            [self.type_adapter.format_input(item) for item in model_input],
-            logits_processors=self.type_adapter.format_output_type(output_type),
+            tokenized_model_input,
             **kwargs,
         )
 
+        return response.texts
+
     def generate_stream(
         self,
         model_input: str,
diff --git a/tests/models/test_mlxlm.py b/tests/models/test_mlxlm.py
@@ -128,3 +128,15 @@ def test_mlxlm_batch(model):
     assert len(result) == 2
     assert isinstance(result[0], str)
     assert isinstance(result[1], str)
+
+
+@pytest.mark.skipif(not HAS_MLX, reason="MLX tests require Apple Silicon")
+def test_mlxlm_batch_output_type(model):
+    with pytest.raises(
+        NotImplementedError,
+        match="mlx-lm does not support constrained generation with batching."
+    ):
+        model.batch(
+            ["Respond with one word.", "Respond with one word."],
+            Regex(r"[0-9]")
+        )