Skip to content

Commit 4f47b80

Browse files
committed
feat: implement retry mechanism for MCP server activation with exponential backoff
1 parent 919b667 commit 4f47b80

File tree

1 file changed

+96
-16
lines changed

1 file changed

+96
-16
lines changed

src-tauri/src/core/mcp.rs

Lines changed: 96 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ use serde_json::{Map, Value};
44
use std::fs;
55
use std::{collections::HashMap, env, sync::Arc, time::Duration};
66
use tauri::{AppHandle, Emitter, Runtime, State};
7-
use tokio::{process::Command, sync::Mutex, time::timeout};
7+
use tokio::{process::Command, sync::Mutex, time::{timeout, sleep}};
88

99
use super::{cmd::get_jan_data_folder_path, state::AppState};
1010

@@ -13,6 +13,12 @@ const DEFAULT_MCP_CONFIG: &str = r#"{"mcpServers":{"browsermcp":{"command":"npx"
1313
// Timeout for MCP tool calls (30 seconds)
1414
const MCP_TOOL_CALL_TIMEOUT: Duration = Duration::from_secs(30);
1515

16+
// Maximum retry attempts for MCP server loading
17+
const MAX_MCP_RETRY_ATTEMPTS: u32 = 3;
18+
19+
// Base backoff duration in milliseconds (exponential backoff will multiply this)
20+
const BASE_BACKOFF_MS: u64 = 1000;
21+
1622
/// Runs MCP commands by reading configuration from a JSON file and initializing servers
1723
///
1824
/// # Arguments
@@ -46,30 +52,103 @@ pub async fn run_mcp_commands<R: Runtime>(
4652
log::trace!("Server {name} is not active, skipping.");
4753
continue;
4854
}
49-
match start_mcp_server(
55+
56+
// Try to start the server with retry mechanism
57+
start_mcp_server(
5058
app.clone(),
5159
servers_state.clone(),
5260
name.clone(),
5361
config.clone(),
5462
)
55-
.await
56-
{
57-
Ok(_) => {
58-
log::info!("Server {name} activated successfully.");
59-
}
60-
Err(e) => {
63+
.await;
64+
}
65+
}
66+
67+
Ok(())
68+
}
69+
70+
/// Starts an MCP server with retry mechanism using exponential backoff
71+
async fn start_mcp_server<R: Runtime>(
72+
app: AppHandle<R>,
73+
servers_state: Arc<Mutex<HashMap<String, RunningService<RoleClient, ()>>>>,
74+
name: String,
75+
config: Value,
76+
) {
77+
// Initialize retry state locally for this server start attempt
78+
let mut retry_count = 0;
79+
let mut backoff_ms = BASE_BACKOFF_MS;
80+
81+
loop {
82+
retry_count += 1;
83+
84+
if retry_count > 1 {
85+
log::info!(
86+
"Starting MCP server {name} - retry attempt {} of {} (waiting {:.1}s)",
87+
retry_count,
88+
MAX_MCP_RETRY_ATTEMPTS,
89+
backoff_ms as f64 / 1000.0
90+
);
91+
92+
let _ = app.emit(
93+
"mcp-retry-attempt",
94+
serde_json::json!({
95+
"server": name,
96+
"attempt": retry_count,
97+
"max_attempts": MAX_MCP_RETRY_ATTEMPTS
98+
})
99+
);
100+
101+
sleep(Duration::from_millis(backoff_ms)).await;
102+
} else {
103+
log::info!("Starting MCP server {name} - initial attempt");
104+
}
105+
106+
// Attempt to start the server
107+
match start_mcp_server_internal(app.clone(), servers_state.clone(), name.clone(), config.clone()).await {
108+
Ok(_) => {
109+
log::info!("Server {name} activated successfully.");
110+
111+
let _ = app.emit(
112+
"mcp-server-started",
113+
serde_json::json!({
114+
"server": name,
115+
"status": "success",
116+
"attempts": retry_count
117+
})
118+
);
119+
return;
120+
}
121+
Err(e) => {
122+
log::error!("Failed to activate server {name} (attempt {retry_count}): {e}");
123+
124+
if retry_count >= MAX_MCP_RETRY_ATTEMPTS {
125+
log::error!(
126+
"Server {name} has exceeded maximum retry attempts ({}). Giving up.",
127+
MAX_MCP_RETRY_ATTEMPTS
128+
);
61129
let _ = app.emit(
62-
"mcp-error",
63-
format!("Failed to activate MCP server {name}: {e}"),
130+
"mcp-max-retries-exceeded",
131+
format!("MCP server {name} failed after {MAX_MCP_RETRY_ATTEMPTS} attempts: {e}"),
64132
);
65-
log::error!("Failed to activate server {name}: {e}");
66-
continue; // Skip to the next server
133+
return;
134+
} else {
135+
let _ = app.emit(
136+
"mcp-retry-scheduled",
137+
serde_json::json!({
138+
"server": name,
139+
"attempt": retry_count,
140+
"max_attempts": MAX_MCP_RETRY_ATTEMPTS,
141+
"error": e,
142+
"next_retry_in_ms": backoff_ms
143+
})
144+
);
145+
146+
// Update backoff duration for next attempt (exponential backoff)
147+
backoff_ms = (backoff_ms * 2).min(30000); // Cap at 30 seconds
67148
}
68149
}
69150
}
70151
}
71-
72-
Ok(())
73152
}
74153

75154
#[tauri::command]
@@ -81,10 +160,11 @@ pub async fn activate_mcp_server<R: Runtime>(
81160
) -> Result<(), String> {
82161
let servers: Arc<Mutex<HashMap<String, RunningService<RoleClient, ()>>>> =
83162
state.mcp_servers.clone();
84-
start_mcp_server(app, servers, name, config).await
163+
start_mcp_server(app, servers, name, config).await;
164+
Ok(())
85165
}
86166

87-
async fn start_mcp_server<R: Runtime>(
167+
async fn start_mcp_server_internal<R: Runtime>(
88168
app: tauri::AppHandle<R>,
89169
servers: Arc<Mutex<HashMap<String, RunningService<RoleClient, ()>>>>,
90170
name: String,

0 commit comments

Comments
 (0)