Docs

EricLBuehler · EricLBuehler · commit d7cd5feb95de · 2025-06-21T22:57:54.000+02:00
diff --git a/README.md b/README.md
@@ -469,6 +469,7 @@ If you do not specify the architecture, an attempt will be made to use the model
 - `deepseekv3`
 - `qwen3`
 - `qwen3moe`
+- `smollm3`
 
 </details>
 
@@ -552,6 +553,7 @@ Please submit more benchmarks via raising an issue!
 |Mistral 3| | |✅|
 |Llama 4| | |✅|
 |Qwen 3|✅| |✅|
+|SmolLM3| | |✅|
 |Dia 1.6b| | |✅|
 </details>
 
@@ -598,6 +600,7 @@ Please submit more benchmarks via raising an issue!
 |Mistral 3| | | |
 |Llama 4| | | |
 |Qwen 3| | | |
+|SmolLM3|✅| | |
 </details>
 
 <details>
@@ -631,6 +634,7 @@ Please submit more benchmarks via raising an issue!
 |Mistral 3|✅|
 |Llama 4| |
 |Qwen 3| |
+|SmolLM3|✅|
 </details>
 
 ### Using derivative and adapter models
diff --git a/examples/python/smollm3.py b/examples/python/smollm3.py
@@ -0,0 +1,28 @@
+#!/usr/bin/env python
+"""
+Example of using SmolLM3 model with mistral.rs
+"""
+
+from mistralrs import Runner, Which, ChatCompletionRequest, Architecture
+
+# Create a SmolLM3 model runner
+runner = Runner(
+    which=Which.Plain(
+        model_id="HuggingFaceTB/SmolLM3-3B",  # You can use any SmolLM3 model from HuggingFace
+        arch=Architecture.SmolLm3,
+    ),
+)
+
+# Send a chat completion request
+res = runner.send_chat_completion_request(
+    ChatCompletionRequest(
+        model="smollm3",
+        messages=[{"role": "user", "content": "What is the capital of France?"}],
+        max_tokens=256,
+        temperature=0.7,
+    )
+)
+
+# Print the response
+print(res.choices[0].message.content)
+print(f"\nUsage: {res.usage}")
diff --git a/mistralrs-pyo3/API.md b/mistralrs-pyo3/API.md
@@ -28,6 +28,9 @@ If you do not specify the architecture, an attempt will be made to use the model
 - `Phi3_5MoE`
 - `DeepseekV2`
 - `DeepseekV3`
+- `Qwen3`
+- `Qwen3Moe`
+- `SmolLm3`
 
 ### ISQ Organization
 - `Default`
diff --git a/mistralrs-pyo3/src/which.rs b/mistralrs-pyo3/src/which.rs
@@ -24,6 +24,7 @@ pub enum Architecture {
     Qwen3,
     GLM4,
     Qwen3Moe,
+    SmolLm3,
 }
 
 impl From<Architecture> for NormalLoaderType {
@@ -44,6 +45,7 @@ impl From<Architecture> for NormalLoaderType {
             Architecture::Qwen3 => Self::Qwen3,
             Architecture::GLM4 => Self::GLM4,
             Architecture::Qwen3Moe => Self::Qwen3Moe,
+            Architecture::SmolLm3 => Self::SmolLm3,
         }
     }
 }
diff --git a/mistralrs/examples/smollm3/main.rs b/mistralrs/examples/smollm3/main.rs
@@ -0,0 +1,54 @@
+use anyhow::Result;
+use mistralrs::{
+    IsqType, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages,
+    TextModelBuilder,
+};
+
+#[tokio::main]
+async fn main() -> Result<()> {
+    let model = TextModelBuilder::new("HuggingFaceTB/SmolLM3-3B")
+        .with_isq(IsqType::Q8_0)
+        .with_logging()
+        .with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
+        .build()
+        .await?;
+
+    let messages = TextMessages::new()
+        .add_message(
+            TextMessageRole::System,
+            "You are an AI agent with a specialty in programming.",
+        )
+        .add_message(
+            TextMessageRole::User,
+            "Hello! How are you? Please write generic binary search function in Rust.",
+        );
+
+    let response = model.send_chat_request(messages).await?;
+
+    println!("{}", response.choices[0].message.content.as_ref().unwrap());
+    dbg!(
+        response.usage.avg_prompt_tok_per_sec,
+        response.usage.avg_compl_tok_per_sec
+    );
+
+    // Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
+    let request = RequestBuilder::new().return_logprobs(true).add_message(
+        TextMessageRole::User,
+        "Please write a mathematical equation where a few numbers are added.",
+    );
+
+    let response = model.send_chat_request(request).await?;
+
+    println!(
+        "Logprobs: {:?}",
+        &response.choices[0]
+            .logprobs
+            .as_ref()
+            .unwrap()
+            .content
+            .as_ref()
+            .unwrap()[0..3]
+    );
+
+    Ok(())
+}

Original file line number	Diff line number	Diff line change
`@@ -24,6 +24,7 @@ pub enum Architecture {`
`24`	`24`	`Qwen3,`
`25`	`25`	`GLM4,`
`26`	`26`	`Qwen3Moe,`
	`27`	`+ SmolLm3,`
`27`	`28`	`}`
`28`	`29`
`29`	`30`	`impl From<Architecture> for NormalLoaderType {`
`@@ -44,6 +45,7 @@ impl From<Architecture> for NormalLoaderType {`
`44`	`45`	`Architecture::Qwen3 => Self::Qwen3,`
`45`	`46`	`Architecture::GLM4 => Self::GLM4,`
`46`	`47`	`Architecture::Qwen3Moe => Self::Qwen3Moe,`
	`48`	`+ Architecture::SmolLm3 => Self::SmolLm3,`
`47`	`49`	`}`
`48`	`50`	`}`
`49`	`51`	`}`