diff --git a/.github/workflows/node_hub_test.sh b/.github/workflows/node_hub_test.sh index 791021992..aa2bf7795 100755 --- a/.github/workflows/node_hub_test.sh +++ b/.github/workflows/node_hub_test.sh @@ -5,7 +5,7 @@ set -euo CI=${GITHUB_ACTIONS:-false} # List of ignored modules -ignored_folders=("dora-parler" "dora-opus" "dora-internvl" "dora-magma") +ignored_folders=("dora-parler" "dora-opus" "dora-internvl" "dora-magma", "dora-mlx-lm") # Skip test skip_test_folders=("dora-internvl" "dora-parler" "dora-keyboard" "dora-microphone" "terminal-input" "dora-magma") diff --git a/node-hub/dora-mlx-lm/README.md b/node-hub/dora-mlx-lm/README.md new file mode 100755 index 000000000..a4d52b609 --- /dev/null +++ b/node-hub/dora-mlx-lm/README.md @@ -0,0 +1,86 @@ +# Dora MLX-LM Node + +## Overview + +The `dora-mlx-lm` node integrates the [`mlx-lm`](https://github.com/ml-explore/mlx-lm) library to run large language models (LLMs) optimized for Apple Silicon (M1, M2, M3, and later) on macOS. It processes text prompts as input and generates text responses using a model such as `mlx-community/SmolLM-135M-Instruct-4bit`. The node is designed for use within a [Dora-rs-cli](https://github.com/dora-rs/dora) pipeline, supporting features like activation words, conversation history, and performance metadata.`. + +## Installation + +To use the `dora-mlx-lm` node, install the required dependencies: + +```bash +pip install dora-rs-cli mlx-lm +``` + +## Usage + +1. **Add the node to your Dora pipeline**: + + Include the `dora-mlx-lm` node in your pipeline YAML file. Below is an example configuration: + + ```yaml + nodes: + - id: mlx_lm + build: pip install mlx-lm + path: dora-mlx-lm/main.py + inputs: + prompt: dora/input + outputs: + - text + env: + MODEL_PATH: mlx-community/SmolLM-135M-Instruct-4bit + SYSTEM_PROMPT: "You are a helpful assistant optimized for Apple M-series chips." + MAX_TOKENS: "100" + TEMPERATURE: "0.7" + CONTEXT_SIZE: "2048" + ACTIVATION_WORDS: "hey assistant" + ``` + + ### Environment Variables + - `MODEL_PATH`: Path or Hugging Face ID of the model (default: `mlx-community/SmolLM-135M-Instruct-4bit`). + - `SYSTEM_PROMPT`: Optional system prompt to define the model's behavior (default: empty). + - `MAX_TOKENS`: Maximum number of tokens to generate (default: 100). + - `TEMPERATURE`: Sampling temperature for generation (default: 0.7). + - `CONTEXT_SIZE`: Maximum context length for conversation history (default: 2048). + - `ACTIVATION_WORDS`: Space-separated list of words to trigger the node (default: empty, processes all inputs). + +2. **Run the pipeline**: + + Build and execute your pipeline using the Dora CLI: + + ```bash + dora build your_pipeline.yml --uv + dora run your_pipeline.yml --uv + ``` +## Inputs + +- **prompt**: A text string to be processed by the LLM (e.g., "Write a short story about a robot"). The node validates that the input is a non-empty `pyarrow.Array` containing a string. + +## Outputs + +- **text**: The text response generated by the LLM, sent as a `pyarrow.Array`. The output includes metadata such as: + - `processing_time`: Time taken to generate the response (in seconds). + - `model`: The model used (e.g., `mlx-community/SmolLM-135M-Instruct-4bit`). + - `optimized_for`: Indicates optimization for Apple's M-series chips. + +## Features + +- **Apple Silicon Optimization**: Leverages the MLX framework for efficient inference on M1, M2, M3, and later chips, with automatic GPU and Neural Engine acceleration. +- **Conversation History**: Maintains a conversation history with a configurable system prompt, truncated based on `CONTEXT_SIZE`. +- **Activation Words**: Optionally processes inputs only when they contain specified activation words. +- **Robust Error Handling**: Validates inputs and logs errors for reliable pipeline integration. +- **Metadata**: Provides performance metrics and configuration details in output metadata. + +### Using mlx-lm in Dora Node Hub +- **Platform**: macOS 13.5+ (ARM-native Python required) +- Note: This node is only supported on macOS and skips execution on Linux/Windows. + +## Notes + +- The node uses `mlx-lm`, which is optimized for Apple Silicon. Parameters like `N_GPU_LAYERS` or `N_THREADS` (common in other frameworks like `llama_cpp`) are not applicable, as MLX manages resource allocation internally. +- For large models, use quantized versions (e.g., 4-bit) to optimize memory usage and performance. +- The conversation history is truncated to respect the `CONTEXT_SIZE` limit, ensuring compatibility with the model's context length. + +## License + +This node is licensed under the [MIT License](https://opensource.org/licenses/MIT), consistent with the `mlx-lm` library. \ No newline at end of file diff --git a/node-hub/dora-mlx-lm/dora_mlx_lm/__init__ .py b/node-hub/dora-mlx-lm/dora_mlx_lm/__init__ .py new file mode 100755 index 000000000..79cbf3701 --- /dev/null +++ b/node-hub/dora-mlx-lm/dora_mlx_lm/__init__ .py @@ -0,0 +1,13 @@ +"""TODO: Add docstring.""" + +import os + +# Define the path to the README file relative to the package directory +readme_path = os.path.join(os.path.dirname(os.path.dirname(__file__)), "README.md") + +# Read the content of the README file +try: + with open(readme_path, encoding="utf-8") as f: + __doc__ = f.read() +except FileNotFoundError: + __doc__ = "README file not found." diff --git a/node-hub/dora-mlx-lm/dora_mlx_lm/__main__ .py b/node-hub/dora-mlx-lm/dora_mlx_lm/__main__ .py new file mode 100755 index 000000000..40e2b013f --- /dev/null +++ b/node-hub/dora-mlx-lm/dora_mlx_lm/__main__ .py @@ -0,0 +1,4 @@ +from .main import main + +if __name__ == "__main__": + main() diff --git a/node-hub/dora-mlx-lm/dora_mlx_lm/main.py b/node-hub/dora-mlx-lm/dora_mlx_lm/main.py new file mode 100755 index 000000000..cad352bb6 --- /dev/null +++ b/node-hub/dora-mlx-lm/dora_mlx_lm/main.py @@ -0,0 +1,115 @@ +"""Dora node for generating text responses using a pre-trained language model, optimized for Apple M1, M2, M3 chips. + +This node listens for input prompts on the 'text' channel, generates text using +a pre-trained model (default: SmolLM-135M-Instruct-4bit) optimized for Apple's M-series +chips via MLX, and sends responses to the 'text' output channel. The node can be configured +via environment variables and supports activation words to filter inputs. + +Note: This node is only supported on macOS. It skips execution on Linux and Windows. +""" + +import logging +import os +import platform +import sys +import time +from pathlib import Path + +# Vérifier si la plateforme est macOS +if platform.system() != "Darwin": + logging.basicConfig(level=logging.INFO) + logging.info("mlx-lm is only supported on macOS. Skipping execution on %s.", platform.system()) + sys.exit(0) # Sortir sans erreur pour éviter un échec CI + +import pyarrow as pa +from dora import Node +from mlx_lm import load, generate + +# Configure logging +logging.basicConfig(level=logging.INFO) + +# Environment variables for model configuration +MODEL_PATH = os.getenv("MODEL_PATH", "mlx-community/SmolLM-135M-Instruct-4bit") +SYSTEM_PROMPT = os.getenv("SYSTEM_PROMPT", "") +MAX_TOKENS = int(os.getenv("MAX_TOKENS", "100")) +TEMPERATURE = float(os.getenv("TEMPERATURE", "0.7")) +CONTEXT_SIZE = int(os.getenv("CONTEXT_SIZE", "2048")) # Context length for the model +ACTIVATION_WORDS = os.getenv("ACTIVATION_WORDS", "").split() + +def get_model(): + """Load a pre-trained language model and tokenizer optimized for Apple M1/M2/M3 chips.""" + try: + logging.info(f"Loading model from {MODEL_PATH} for Apple M-series optimization") + model, tokenizer = load( + MODEL_PATH, tokenizer_config={"eos_token": "<|im_end|>"} + ) + logging.info("Model loaded successfully with MLX for M1/M2/M3 performance") + return model, tokenizer + except Exception as e: + logging.exception(f"Error loading model: {e}") + raise + +def main(): + """Process input events and generate text responses using the loaded model. + + Optimized for Apple M1, M2, M3 chips using the MLX framework for efficient inference. + Generates responses independently for each input, using only the system prompt as context. + """ + # Initialize model and tokenizer + model, tokenizer = get_model() + node = Node() + history = [{"role": "system", "content": SYSTEM_PROMPT}] if SYSTEM_PROMPT else [] + + for event in node: + if event["type"] == "INPUT" and event["id"] == "text": + # Validate input + if not isinstance(event["value"], pa.Array) or len(event["value"]) == 0: + logging.error("Invalid input: expected a non-empty pyarrow.Array") + continue + text = event["value"][0].as_py() + if not isinstance(text, str): + logging.error("Invalid input: expected a string") + continue + + words = text.lower().split() + if len(ACTIVATION_WORDS) == 0 or any( + word in ACTIVATION_WORDS for word in words + ): + try: + start_time = time.time() + messages = history + [{"role": "user", "content": text}] + formatted_prompt = tokenizer.apply_chat_template( + messages, add_generation_prompt=True + ) + + response = generate( + model, + tokenizer, + prompt=formatted_prompt, + max_tokens=MAX_TOKENS, + temp=TEMPERATURE, + verbose=False, + ) + + processing_time = time.time() - start_time + node.send_output( + output_id="text", + data=pa.array([response]), + metadata={ + "processing_time": processing_time, + "model": MODEL_PATH, + "optimized_for": "Apple M1/M2/M3", + }, + ) + + except Exception as e: + logging.exception(f"Error generating response: {e}") + + elif event["type"] == "STOP": + logging.info("Received STOP event, cleaning up...") + model = None + tokenizer = None + break + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/node-hub/dora-mlx-lm/pyproject.toml b/node-hub/dora-mlx-lm/pyproject.toml new file mode 100755 index 000000000..29542efea --- /dev/null +++ b/node-hub/dora-mlx-lm/pyproject.toml @@ -0,0 +1,27 @@ +[project] +name = "dora-mlx-lm" +version = "0.1.0" +authors = [{ name = "Clément Leprêtre", email = "clement.lepretre4@gmail.com" }] +description = "DORA node for running MLX-LM large language models" +license = { text = "MIT" } +readme = "README.md" +requires-python = ">=3.7" +dependencies = [ + "mlx-lm>=0.23.2", + "dora-rs>=0.3.11" +] + +[project.urls] +Repository = "https://github.com/dora-rs/dora" + +[tool.ruff.lint] +extend-select = [ + "D", # pydocstyle + "UP", # Ruff's UP rule + "PERF", # Ruff's PERF rule + "RET", # Ruff's RET rule + "RSE", # Ruff's RSE rule + "NPY", # Ruff's NPY rule + "N", # Ruff's N rule + "I", # Ruff's I rule +] \ No newline at end of file diff --git a/node-hub/dora-mlx-lm/tests/test_dora_mlx_lm.py b/node-hub/dora-mlx-lm/tests/test_dora_mlx_lm.py new file mode 100755 index 000000000..27b8d7a14 --- /dev/null +++ b/node-hub/dora-mlx-lm/tests/test_dora_mlx_lm.py @@ -0,0 +1,15 @@ +import pytest + + +def test_mlx_lm_node(): + """ + Test the import and execution of the mlx_lm_node function. + + This test verifies that the mlx_lm_node function can be imported from the dora_mlx_lm module + and checks that calling it outside a DORA dataflow raises a RuntimeError, as expected. + """ + from dora_mlx_lm.main import main + + # Check that calling the node function raises a RuntimeError, as it requires a DORA dataflow environment. + with pytest.raises(RuntimeError): + main() \ No newline at end of file