Skip to content

Commit d7cd5fe

Browse files
committed
Docs
1 parent b8f7c4d commit d7cd5fe

File tree

5 files changed

+91
-0
lines changed

5 files changed

+91
-0
lines changed

README.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -469,6 +469,7 @@ If you do not specify the architecture, an attempt will be made to use the model
469469
- `deepseekv3`
470470
- `qwen3`
471471
- `qwen3moe`
472+
- `smollm3`
472473
473474
</details>
474475
@@ -552,6 +553,7 @@ Please submit more benchmarks via raising an issue!
552553
|Mistral 3| | |✅|
553554
|Llama 4| | |✅|
554555
|Qwen 3|✅| |✅|
556+
|SmolLM3| | |✅|
555557
|Dia 1.6b| | |✅|
556558
</details>
557559
@@ -598,6 +600,7 @@ Please submit more benchmarks via raising an issue!
598600
|Mistral 3| | | |
599601
|Llama 4| | | |
600602
|Qwen 3| | | |
603+
|SmolLM3|✅| | |
601604
</details>
602605
603606
<details>
@@ -631,6 +634,7 @@ Please submit more benchmarks via raising an issue!
631634
|Mistral 3|✅|
632635
|Llama 4| |
633636
|Qwen 3| |
637+
|SmolLM3|✅|
634638
</details>
635639
636640
### Using derivative and adapter models

examples/python/smollm3.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
#!/usr/bin/env python
2+
"""
3+
Example of using SmolLM3 model with mistral.rs
4+
"""
5+
6+
from mistralrs import Runner, Which, ChatCompletionRequest, Architecture
7+
8+
# Create a SmolLM3 model runner
9+
runner = Runner(
10+
which=Which.Plain(
11+
model_id="HuggingFaceTB/SmolLM3-3B", # You can use any SmolLM3 model from HuggingFace
12+
arch=Architecture.SmolLm3,
13+
),
14+
)
15+
16+
# Send a chat completion request
17+
res = runner.send_chat_completion_request(
18+
ChatCompletionRequest(
19+
model="smollm3",
20+
messages=[{"role": "user", "content": "What is the capital of France?"}],
21+
max_tokens=256,
22+
temperature=0.7,
23+
)
24+
)
25+
26+
# Print the response
27+
print(res.choices[0].message.content)
28+
print(f"\nUsage: {res.usage}")

mistralrs-pyo3/API.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,9 @@ If you do not specify the architecture, an attempt will be made to use the model
2828
- `Phi3_5MoE`
2929
- `DeepseekV2`
3030
- `DeepseekV3`
31+
- `Qwen3`
32+
- `Qwen3Moe`
33+
- `SmolLm3`
3134

3235
### ISQ Organization
3336
- `Default`

mistralrs-pyo3/src/which.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ pub enum Architecture {
2424
Qwen3,
2525
GLM4,
2626
Qwen3Moe,
27+
SmolLm3,
2728
}
2829

2930
impl From<Architecture> for NormalLoaderType {
@@ -44,6 +45,7 @@ impl From<Architecture> for NormalLoaderType {
4445
Architecture::Qwen3 => Self::Qwen3,
4546
Architecture::GLM4 => Self::GLM4,
4647
Architecture::Qwen3Moe => Self::Qwen3Moe,
48+
Architecture::SmolLm3 => Self::SmolLm3,
4749
}
4850
}
4951
}

mistralrs/examples/smollm3/main.rs

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
use anyhow::Result;
2+
use mistralrs::{
3+
IsqType, PagedAttentionMetaBuilder, RequestBuilder, TextMessageRole, TextMessages,
4+
TextModelBuilder,
5+
};
6+
7+
#[tokio::main]
8+
async fn main() -> Result<()> {
9+
let model = TextModelBuilder::new("HuggingFaceTB/SmolLM3-3B")
10+
.with_isq(IsqType::Q8_0)
11+
.with_logging()
12+
.with_paged_attn(|| PagedAttentionMetaBuilder::default().build())?
13+
.build()
14+
.await?;
15+
16+
let messages = TextMessages::new()
17+
.add_message(
18+
TextMessageRole::System,
19+
"You are an AI agent with a specialty in programming.",
20+
)
21+
.add_message(
22+
TextMessageRole::User,
23+
"Hello! How are you? Please write generic binary search function in Rust.",
24+
);
25+
26+
let response = model.send_chat_request(messages).await?;
27+
28+
println!("{}", response.choices[0].message.content.as_ref().unwrap());
29+
dbg!(
30+
response.usage.avg_prompt_tok_per_sec,
31+
response.usage.avg_compl_tok_per_sec
32+
);
33+
34+
// Next example: Return some logprobs with the `RequestBuilder`, which enables higher configurability.
35+
let request = RequestBuilder::new().return_logprobs(true).add_message(
36+
TextMessageRole::User,
37+
"Please write a mathematical equation where a few numbers are added.",
38+
);
39+
40+
let response = model.send_chat_request(request).await?;
41+
42+
println!(
43+
"Logprobs: {:?}",
44+
&response.choices[0]
45+
.logprobs
46+
.as_ref()
47+
.unwrap()
48+
.content
49+
.as_ref()
50+
.unwrap()[0..3]
51+
);
52+
53+
Ok(())
54+
}

0 commit comments

Comments
 (0)