dora-rs · Clement795 · Apr 26, 2025 · Apr 26, 2025 · Apr 26, 2025 · Apr 27, 2025
diff --git a/.github/workflows/node_hub_test.sh b/.github/workflows/node_hub_test.sh
@@ -2,7 +2,7 @@
 set -euo
 
 # List of ignored modules 
-ignored_folders=("dora-parler")
+ignored_folders=("dora-parler" "dora-mlx-lm")
 
 # Skip test
 skip_test_folders=("dora-internvl" "dora-parler" "dora-keyboard" "dora-microphone" "terminal-input")

diff --git a/node-hub/dora-mlx-lm/README.md b/node-hub/dora-mlx-lm/README.md
@@ -0,0 +1,86 @@
+# Dora MLX-LM Node
+
+## Overview
+
+The `dora-mlx-lm` node integrates the [`mlx-lm`](https://github.com/ml-explore/mlx-lm) library to run large language models (LLMs) optimized for Apple Silicon (M1, M2, M3, and later) on macOS. It processes text prompts as input and generates text responses using a model such as `mlx-community/SmolLM-135M-Instruct-4bit`. The node is designed for use within a [Dora-rs-cli](https://github.com/dora-rs/dora) pipeline, supporting features like activation words, conversation history, and performance metadata.`.
+
+## Installation
+
+To use the `dora-mlx-lm` node, install the required dependencies:
+
+```bash
+pip install dora-rs-cli mlx-lm
+```
+
+## Usage
+
+1. **Add the node to your Dora pipeline**:
+
+   Include the `dora-mlx-lm` node in your pipeline YAML file. Below is an example configuration:
+
+   ```yaml
+   nodes:
+     - id: mlx_lm
+       build: pip install mlx-lm
+       path: dora-mlx-lm/main.py
+       inputs:
+         prompt: dora/input
+       outputs:
+         - text
+       env:
+         MODEL_PATH: mlx-community/SmolLM-135M-Instruct-4bit
+         SYSTEM_PROMPT: "You are a helpful assistant optimized for Apple M-series chips."
+         MAX_TOKENS: "100"
+         TEMPERATURE: "0.7"
+         CONTEXT_SIZE: "2048"
+         ACTIVATION_WORDS: "hey assistant"
+   ```
+
+   ### Environment Variables
+   - `MODEL_PATH`: Path or Hugging Face ID of the model (default: `mlx-community/SmolLM-135M-Instruct-4bit`).
+   - `SYSTEM_PROMPT`: Optional system prompt to define the model's behavior (default: empty).
+   - `MAX_TOKENS`: Maximum number of tokens to generate (default: 100).
+   - `TEMPERATURE`: Sampling temperature for generation (default: 0.7).
+   - `CONTEXT_SIZE`: Maximum context length for conversation history (default: 2048).
+   - `ACTIVATION_WORDS`: Space-separated list of words to trigger the node (default: empty, processes all inputs).
+
+2. **Run the pipeline**:
+
+   Build and execute your pipeline using the Dora CLI:
+
+   ```bash
+   dora build your_pipeline.yml --uv
+   dora run your_pipeline.yml --uv
+   ```
+## Inputs
+
+- **prompt**: A text string to be processed by the LLM (e.g., "Write a short story about a robot"). The node validates that the input is a non-empty `pyarrow.Array` containing a string.
+
+## Outputs
+
+- **text**: The text response generated by the LLM, sent as a `pyarrow.Array`. The output includes metadata such as:
+  - `processing_time`: Time taken to generate the response (in seconds).
+  - `model`: The model used (e.g., `mlx-community/SmolLM-135M-Instruct-4bit`).
+  - `optimized_for`: Indicates optimization for Apple's M-series chips.
+
+## Features
+
+- **Apple Silicon Optimization**: Leverages the MLX framework for efficient inference on M1, M2, M3, and later chips, with automatic GPU and Neural Engine acceleration.
+- **Conversation History**: Maintains a conversation history with a configurable system prompt, truncated based on `CONTEXT_SIZE`.
+- **Activation Words**: Optionally processes inputs only when they contain specified activation words.
+- **Robust Error Handling**: Validates inputs and logs errors for reliable pipeline integration.
+- **Metadata**: Provides performance metrics and configuration details in output metadata.
+
+### Using mlx-lm in Dora Node Hub
+- **Platform**: macOS 13.5+ (ARM-native Python required)
+- Note: This node is only supported on macOS and skips execution on Linux/Windows.
+
+## Notes
+
+- The node uses `mlx-lm`, which is optimized for Apple Silicon. Parameters like `N_GPU_LAYERS` or `N_THREADS` (common in other frameworks like `llama_cpp`) are not applicable, as MLX manages resource allocation internally.
+- For large models, use quantized versions (e.g., 4-bit) to optimize memory usage and performance.
+- The conversation history is truncated to respect the `CONTEXT_SIZE` limit, ensuring compatibility with the model's context length.
+
+## License
+
+This node is licensed under the [MIT License](https://opensource.org/licenses/MIT), consistent with the `mlx-lm` library.
diff --git a/node-hub/dora-mlx-lm/dora_mlx_lm/__init__ .py b/node-hub/dora-mlx-lm/dora_mlx_lm/__init__ .py
@@ -0,0 +1,13 @@
+"""TODO: Add docstring."""
+
+import os
+
+# Define the path to the README file relative to the package directory
+readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md")
+
+# Read the content of the README file
+try:
+    with open(readme_path, encoding="utf-8") as f:
+        __doc__ = f.read()
+except FileNotFoundError:
+    __doc__ = "README file not found."
diff --git a/node-hub/dora-mlx-lm/dora_mlx_lm/__main__ .py b/node-hub/dora-mlx-lm/dora_mlx_lm/__main__ .py
@@ -0,0 +1,4 @@
+from .main import main
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-mlx-lm/dora_mlx_lm/main.py b/node-hub/dora-mlx-lm/dora_mlx_lm/main.py
@@ -0,0 +1,115 @@
+"""Dora node for generating text responses using a pre-trained language model, optimized for Apple M1, M2, M3 chips.
+
+This node listens for input prompts on the 'prompt' channel, generates text using
+a pre-trained model (default: SmolLM-135M-Instruct-4bit) optimized for Apple's M-series
+chips via MLX, and sends responses to the 'text' output channel. The node can be configured
+via environment variables and supports activation words to filter inputs.
+
+Note: This node is only supported on macOS. It skips execution on Linux and Windows.
+"""
+
+import logging
+import os
+import platform
+import sys
+import time
+from pathlib import Path
+
+# Vérifier si la plateforme est macOS
+if platform.system() != "Darwin":
+    logging.basicConfig(level=logging.INFO)
+    logging.info("mlx-lm is only supported on macOS. Skipping execution on %s.", platform.system())
+    sys.exit(0)  # Sortir sans erreur pour éviter un échec CI
+
+import pyarrow as pa
+from dora import Node
+from mlx_lm import load, generate
+
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+
+# Environment variables for model configuration
+MODEL_PATH = os.getenv("MODEL_PATH", "mlx-community/SmolLM-135M-Instruct-4bit")
+SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "")
+MAX_TOKENS = int(os.getenv("MAX_TOKENS", "100"))
+TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7"))
+CONTEXT_SIZE = int(os.getenv("CONTEXT_SIZE", "2048"))  # Context length for the model
+ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "").split()
+
+def get_model():
+    """Load a pre-trained language model and tokenizer optimized for Apple M1/M2/M3 chips."""
+    try:
+        logging.info(f"Loading model from {MODEL_PATH} for Apple M-series optimization")
+        model, tokenizer = load(
+            MODEL_PATH, tokenizer_config={"eos_token": "<|im_end|>"}
+        )
+        logging.info("Model loaded successfully with MLX for M1/M2/M3 performance")
+        return model, tokenizer
+    except Exception as e:
+        logging.exception(f"Error loading model: {e}")
+        raise
+
+def main():
+    """Process input events and generate text responses using the loaded model.
+
+    Optimized for Apple M1, M2, M3 chips using the MLX framework for efficient inference.
+    Generates responses independently for each input, using only the system prompt as context.
+    """
+    # Initialize model and tokenizer
+    model, tokenizer = get_model()
+    node = Node()
+    history = [{"role": "system", "content": SYSTEM_PROMPT}] if SYSTEM_PROMPT else []
+
+    for event in node:
+        if event["type"] == "INPUT" and event["id"] == "prompt":
+            # Validate input
+            if not isinstance(event["value"], pa.Array) or len(event["value"]) == 0:
+                logging.error("Invalid input: expected a non-empty pyarrow.Array")
+                continue
+            text = event["value"][0].as_py()
+            if not isinstance(text, str):
+                logging.error("Invalid input: expected a string")
+                continue
+
+            words = text.lower().split()
+            if len(ACTIVATION_WORDS) == 0 or any(
+                word in ACTIVATION_WORDS for word in words
+            ):
+                try:
+                    start_time = time.time()
+                    messages = history + [{"role": "user", "content": text}]
+                    formatted_prompt = tokenizer.apply_chat_template(
+                        messages, add_generation_prompt=True
+                    )
+
+                    response = generate(
+                        model,
+                        tokenizer,
+                        prompt=formatted_prompt,
+                        max_tokens=MAX_TOKENS,
+                        temp=TEMPERATURE,
+                        verbose=False,
+                    )
+
+                    processing_time = time.time() - start_time
+                    node.send_output(
+                        output_id="text",
+                        data=pa.array([response]),
+                        metadata={
+                            "processing_time": processing_time,
+                            "model": MODEL_PATH,
+                            "optimized_for": "Apple M1/M2/M3",
+                        },
+                    )
+
+                except Exception as e:
+                    logging.exception(f"Error generating response: {e}")
+
+        elif event["type"] == "STOP":
+            logging.info("Received STOP event, cleaning up...")
+            model = None
+            tokenizer = None
+            break
+
+if __name__ == "__main__":
+    main()
diff --git a/node-hub/dora-mlx-lm/pyproject.toml b/node-hub/dora-mlx-lm/pyproject.toml
@@ -0,0 +1,27 @@
+[project]
+name = "dora-mlx-lm"
+version = "0.1.0"
+authors = [{ name = "Clément Leprêtre", email = "[email protected]" }]
+description = "DORA node for running MLX-LM large language models"
+license = { text = "MIT" }
+readme = "README.md"
+requires-python = ">=3.7"
+dependencies = [
+    "mlx-lm>=0.23.2",
+    "dora-rs>=0.3.11"
+]
+
+[project.urls]
+Repository = "https://github.com/dora-rs/dora"
+
+[tool.ruff.lint]
+extend-select = [
+    "D",    # pydocstyle
+    "UP",   # Ruff's UP rule
+    "PERF", # Ruff's PERF rule
+    "RET",  # Ruff's RET rule
+    "RSE",  # Ruff's RSE rule
+    "NPY",  # Ruff's NPY rule
+    "N",    # Ruff's N rule
+    "I",    # Ruff's I rule
+]
diff --git a/node-hub/dora-mlx-lm/tests/test_dora_mlx_lm.py b/node-hub/dora-mlx-lm/tests/test_dora_mlx_lm.py
@@ -0,0 +1,15 @@
+import pytest
+
+
+def test_mlx_lm_node():
+    """
+    Test the import and execution of the mlx_lm_node function.
+
+    This test verifies that the mlx_lm_node function can be imported from the dora_mlx_lm module
+    and checks that calling it outside a DORA dataflow raises a RuntimeError, as expected.
+    """
+    from dora_mlx_lm.main import main
+
+    # Check that calling the node function raises a RuntimeError, as it requires a DORA dataflow environment.
+    with pytest.raises(RuntimeError):
+        main()