EricLBuehler · EricLBuehler · Jun 4, 2025 · Jun 4, 2025 · Jun 4, 2025 · Jun 4, 2025
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -78,6 +78,7 @@ half = "2.4.0"
 rayon = "1.1.0"
 url = "2.5.2"
 utoipa = "4.2"
+walkdir = "2.5.0"
 data-url = "0.3.1"
 float8 = "0.2.1"
 regex = "1.10.6"

diff --git a/docs/WEB_SEARCH.md b/docs/WEB_SEARCH.md
@@ -23,6 +23,35 @@ Internally, we use a BERT model (Snowflake/snowflake-arctic-embed-l-v2.0)[https:
 - Python: `search_bert_model` in the Runner
 - Server: `search-bert-model` before the model type selector (`plain`/`vision-plain`)
 
+## Specifying a custom search callback
+
+By default, mistral.rs uses a DuckDuckGo-based search callback. To override this, you can provide your own search function:
+
+- Rust: use `.with_search_callback(...)` on the model builder with an `Arc<dyn Fn(&SearchFunctionParameters) -> anyhow::Result<Vec<SearchResult>> + Send + Sync>`.
+- Python: pass the `search_callback` keyword argument to `Runner`, which should be a function `def search_callback(query: str) -> List[Dict[str, str]]` returning a list of results with keys `"title"`, `"description"`, `"url"`, and `"content"`.
+
+Example in Python:
+```py
+def search_callback(query: str) -> list[dict[str, str]]:
+    # Implement your custom search logic here, returning a list of result dicts
+    return [
+        {
+            "title": "Example Result",
+            "description": "An example description",
+            "url": "https://example.com",
+            "content": "Full text content of the page",
+        },
+        # more results...
+    ]
+
+from mistralrs import Runner, Which, Architecture
+runner = Runner(
+    which=Which.Plain(model_id="YourModel/ID", arch=Architecture.Mistral),
+    enable_search=True,
+    search_callback=search_callback,
+)
+```
+
 ## HTTP server
 **Be sure to add `--enable-search`!**
 
@@ -80,12 +109,25 @@ from mistralrs import (
     WebSearchOptions,
 )
 
+# Define a custom search callback if desired
+def my_search_callback(query: str) -> list[dict[str, str]]:
+    # Fetch or compute search results here
+    return [
+        {
+            "title": "Mistral.rs GitHub",
+            "description": "Official mistral.rs repository",
+            "url": "https://github.com/huggingface/mistral.rs",
+            "content": "mistral.rs is a Rust binding for Mistral models...",
+        },
+    ]
+
 runner = Runner(
     which=Which.Plain(
         model_id="NousResearch/Hermes-3-Llama-3.1-8B",
         arch=Architecture.Llama,
     ),
     enable_search=True,
+    search_callback=my_search_callback,
 )
 
 res = runner.send_chat_completion_request(

diff --git a/examples/python/local_search.py b/examples/python/local_search.py
@@ -0,0 +1,52 @@
+from mistralrs import (
+    Runner,
+    Which,
+    ChatCompletionRequest,
+    Architecture,
+    WebSearchOptions,
+)
+import os
+
+
+def local_search(query: str):
+    results = []
+    for root, _, files in os.walk("."):
+        for f in files:
+            if query in f:
+                path = os.path.join(root, f)
+                try:
+                    content = open(path).read()
+                except Exception:
+                    content = ""
-                    content = open(path).read()
-                except Exception:
-                    content = ""
+                try:
+                    with open(path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                except (OSError, UnicodeDecodeError):
+                    content = ""
-                    content = open(path).read()
-                except Exception:
-                    content = ""
+                try:
+                    with open(path, 'r', encoding='utf-8') as f:
+                        content = f.read()
+                except (OSError, UnicodeDecodeError):
+                    content = ""
+                results.append(
+                    {
+                        "title": f,
+                        "description": path,
+                        "url": path,
+                        "content": content,
+                    }
+                )
+    results.sort(key=lambda r: r["title"], reverse=True)
+    return results
+
+
+runner = Runner(
+    which=Which.Plain(
+        model_id="NousResearch/Hermes-3-Llama-3.1-8B",
+        arch=Architecture.Llama,
+    ),
+    enable_search=True,
+    search_callback=local_search,
+)
+
+res = runner.send_chat_completion_request(
+    ChatCompletionRequest(
+        model="mistral",
+        messages=[{"role": "user", "content": "Where is Cargo.toml in this repo?"}],
+        max_tokens=64,
+        web_search_options=WebSearchOptions(
+            search_description="Local filesystem search"
+        ),
+    )
+)
+print(res.choices[0].message.content)
diff --git a/mistralrs-core/src/engine/mod.rs b/mistralrs-core/src/engine/mod.rs
@@ -9,6 +9,7 @@ use crate::{
     prefix_cacher::PrefixCacheManagerV2,
     response::CompletionChoice,
     scheduler::{Scheduler, SchedulerOutput},
+    search,
     sequence::{SeqStepType, StopReason},
     CompletionResponse, SchedulerConfig, DEBUG,
 };
@@ -72,6 +73,7 @@ pub struct Engine {
     rx: Arc<Mutex<Receiver<Request>>>,
     pipeline: Arc<Mutex<dyn Pipeline>>,
     bert_pipeline: Arc<Mutex<Option<BertPipeline>>>,
+    search_callback: Option<Arc<search::SearchCallback>>,
     scheduler: Arc<Mutex<dyn Scheduler>>,
     id: Arc<Mutex<usize>>,
     truncate_sequence: bool,
@@ -105,6 +107,7 @@ impl Engine {
         disable_eos_stop: bool,
         throughput_logging_enabled: bool,
         search_embedding_model: Option<BertEmbeddingModel>,
+        search_callback: Option<Arc<search::SearchCallback>>,
     ) -> anyhow::Result<Self> {
         no_kv_cache |= get_mut_arcmutex!(pipeline).get_metadata().no_kv_cache;
 
@@ -127,6 +130,7 @@ impl Engine {
             rx: Arc::new(Mutex::new(rx)),
             pipeline,
             bert_pipeline: Arc::new(Mutex::new(bert_pipeline)),
+            search_callback,
             scheduler: scheduler.clone(),
             id: Arc::new(Mutex::new(0)),
             truncate_sequence,

diff --git a/mistralrs-core/src/engine/search_request.rs b/mistralrs-core/src/engine/search_request.rs
@@ -72,8 +72,12 @@ async fn do_search(
             };
         let mut results = tokio::task::block_in_place(|| {
             tracing::dispatcher::with_default(&dispatch, || {
-                search::run_search_tool(&tool_call_params)
-                    .unwrap()
+                let base_results = if let Some(cb) = &this.search_callback {
+                    cb(&tool_call_params).unwrap()
+                } else {
+                    search::run_search_tool(&tool_call_params).unwrap()
+                };
+                base_results
                     .into_iter()
                     .map(|mut result| {
                         result = result

diff --git a/mistralrs-core/src/lib.rs b/mistralrs-core/src/lib.rs
@@ -104,6 +104,7 @@ pub use sampler::{
     CustomLogitsProcessor, DrySamplingParams, SamplingParams, StopTokens, TopLogprob,
 };
 pub use scheduler::{DefaultSchedulerMethod, SchedulerConfig};
+pub use search::{SearchCallback, SearchFunctionParameters, SearchResult};
 use serde::Serialize;
 pub use speech_models::{utils as speech_utils, SpeechGenerationConfig, SpeechLoaderType};
 use tokio::runtime::Runtime;
@@ -159,6 +160,7 @@ struct RebootState {
     disable_eos_stop: bool,
     throughput_logging_enabled: bool,
     search_embedding_model: Option<BertEmbeddingModel>,
+    search_callback: Option<Arc<search::SearchCallback>>,
 }
 
 #[derive(Debug)]
@@ -196,9 +198,13 @@ pub struct MistralRsBuilder {
     disable_eos_stop: Option<bool>,
     throughput_logging_enabled: bool,
     search_embedding_model: Option<BertEmbeddingModel>,
+    search_callback: Option<Arc<SearchCallback>>,
 }
 
 impl MistralRsBuilder {
+    /// Creates a new builder with the given pipeline, scheduler method, logging flag,
+    /// and optional embedding model for web search. To override the search callback,
+    /// use `.with_search_callback(...)` on the builder.
     pub fn new(
         pipeline: Arc<tokio::sync::Mutex<dyn Pipeline>>,
         method: SchedulerConfig,
@@ -216,6 +222,7 @@ impl MistralRsBuilder {
             disable_eos_stop: None,
             throughput_logging_enabled: throughput_logging,
             search_embedding_model,
+            search_callback: None,
         }
     }
     pub fn with_log(mut self, log: String) -> Self {
@@ -247,6 +254,12 @@ impl MistralRsBuilder {
         self
     }
 
+    /// Use a custom callback to gather search results.
+    pub fn with_search_callback(mut self, search_callback: Arc<SearchCallback>) -> Self {
+        self.search_callback = Some(search_callback);
+        self
+    }
+
     pub fn build(self) -> Arc<MistralRs> {
         MistralRs::new(self)
     }
@@ -274,6 +287,7 @@ impl MistralRs {
             disable_eos_stop,
             throughput_logging_enabled,
             search_embedding_model,
+            search_callback,
         } = config;
 
         let category = pipeline.try_lock().unwrap().category();
@@ -297,6 +311,7 @@ impl MistralRs {
             disable_eos_stop,
             throughput_logging_enabled,
             search_embedding_model: search_embedding_model.clone(),
+            search_callback: search_callback.clone(),
         };
 
         let (tx, rx) = channel(10_000);
@@ -328,6 +343,7 @@ impl MistralRs {
                         disable_eos_stop,
                         throughput_logging_enabled,
                         search_embedding_model,
+                        search_callback.clone(),
                     )
                     .expect("Engine creation failed.");
                     Arc::new(engine).run().await;
@@ -349,6 +365,7 @@ impl MistralRs {
                         disable_eos_stop,
                         throughput_logging_enabled,
                         search_embedding_model,
+                        search_callback.clone(),
                     )
                     .expect("Engine creation failed.");
                     Arc::new(engine).run().await;
@@ -473,6 +490,7 @@ impl MistralRs {
                         reboot_state.disable_eos_stop,
                         reboot_state.throughput_logging_enabled,
                         reboot_state.search_embedding_model,
+                        reboot_state.search_callback.clone(),
                     )
                     .expect("Engine creation failed");
                     Arc::new(engine).run().await;

diff --git a/mistralrs-core/src/request.rs b/mistralrs-core/src/request.rs
@@ -110,6 +110,10 @@ pub enum WebSearchUserLocation {
 pub struct WebSearchOptions {
     pub search_context_size: Option<SearchContextSize>,
     pub user_location: Option<WebSearchUserLocation>,
+    /// Override the description for the search tool.
+    pub search_description: Option<String>,
+    /// Override the description for the extraction tool.
+    pub extract_description: Option<String>,
 }
 
 #[derive(Clone, Serialize, Deserialize)]

diff --git a/mistralrs-core/src/search/mod.rs b/mistralrs-core/src/search/mod.rs
@@ -14,6 +14,11 @@ use tokenizers::Tokenizer;
 
 use crate::{Function, Tool, ToolType, WebSearchOptions, WebSearchUserLocation};
 
+/// Callback used to override how search results are gathered. The returned
+/// vector must be sorted in decreasing order of relevance.
+pub type SearchCallback =
+    dyn Fn(&SearchFunctionParameters) -> Result<Vec<SearchResult>> + Send + Sync;
+
 pub(crate) fn search_tool_called(name: &str) -> bool {
     name == SEARCH_TOOL_NAME || name == EXTRACT_TOOL_NAME
 }
@@ -140,11 +145,14 @@ pub fn get_search_tools(web_search_options: &WebSearchOptions) -> Result<Vec<Too
             }
             None => "".to_string(),
         };
-
+        let description = web_search_options
+            .search_description
+            .as_deref()
+            .unwrap_or(SEARCH_DESCRIPTION);
         Tool {
             tp: ToolType::Function,
             function: Function {
-                description: Some(format!("{SEARCH_DESCRIPTION}{location_details}")),
+                description: Some(format!("{}{}", description, location_details)),
                 name: SEARCH_TOOL_NAME.to_string(),
                 parameters: Some(parameters),
             },
@@ -163,10 +171,14 @@ pub fn get_search_tools(web_search_options: &WebSearchOptions) -> Result<Vec<Too
             "required": ["url"],
         }))?;
 
+        let description = web_search_options
+            .extract_description
+            .as_deref()
+            .unwrap_or(EXTRACT_DESCRIPTION);
         Tool {
             tp: ToolType::Function,
             function: Function {
-                description: Some(EXTRACT_DESCRIPTION.to_string()),
+                description: Some(description.to_string()),
                 name: EXTRACT_TOOL_NAME.to_string(),
                 parameters: Some(parameters),
             },

diff --git a/mistralrs-pyo3/mistralrs.pyi b/mistralrs-pyo3/mistralrs.pyi
@@ -1,6 +1,6 @@
 from dataclasses import dataclass
 from enum import Enum
-from typing import Iterator, Literal, Optional
+from typing import Iterator, Literal, Optional, Callable
 
 class SearchContextSize(Enum):
     Low = "low"
@@ -345,7 +345,9 @@ class Runner:
         paged_attn: bool = False,
         prompt_batchsize: int | None = None,
         seed: int | None = None,
+        enable_search: bool = False,
         search_bert_model: str | None = None,
+        search_callback: Callable[[str], list[dict[str, str]]] | None = None,
         no_bert_model: bool = False,
     ) -> None:
         """
@@ -389,6 +391,7 @@ class Runner:
         - `seed`, used to ensure reproducible random number generation.
         - `enable_search`: Enable searching compatible with the OpenAI `web_search_options` setting. This uses the BERT model specified below or the default.
         - `search_bert_model`: specify a Hugging Face model ID for a BERT model to assist web searching. Defaults to Snowflake Arctic Embed L.
+        - `search_callback`: Custom Python callable to perform web searches. Should accept a query string and return a list of dicts with keys "title", "description", "url", and "content".
         """
         ...