Skip to content

Commit 1d0bb53

Browse files
feat: add support for querying available backend devices (#5877)
* feat: add support for querying available backend devices This change introduces a new `get_devices` method to the `llamacpp_extension` engine that allows the frontend to query and display a list of available devices (e.g., Vulkan, CUDA, SYCL) from the compiled `llama-server` binary. * Added `DeviceList` interface to represent GPU/device metadata. * Implemented `getDevices(): Promise<DeviceList[]>` method. * Splits `version/backend`, ensures backend is ready. * Invokes the new Tauri command `get_devices`. * Introduced a new `get_devices` Tauri command. * Parses `llama-server --list-devices` output to extract available devices with memory info. * Introduced `DeviceInfo` struct (`id`, `name`, `mem`, `free`) and exposed it via serialization. * Robust parsing logic using string processing (non-regex) to locate memory stats. * Registered the new command in the `tauri::Builder` in `lib.rs`. * Fixed logic to correctly parse multiple devices from the llama-server output. * Handles common failure modes: binary not found, malformed memory info, etc. This sets the foundation for device selection, memory-aware model loading, and improved diagnostics in Jan AI engine setup flows. * Update extensions/llamacpp-extension/src/index.ts Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com> --------- Co-authored-by: ellipsis-dev[bot] <65095814+ellipsis-dev[bot]@users.noreply.github.com>
1 parent d6ad797 commit 1d0bb53

File tree

3 files changed

+288
-0
lines changed
  • extensions/llamacpp-extension/src
  • src-tauri/src

3 files changed

+288
-0
lines changed

extensions/llamacpp-extension/src/index.ts

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,13 @@ interface EmbeddingData {
9292
index: number
9393
object: string
9494
}
95+
96+
interface DeviceList {
97+
id: string
98+
name: string
99+
mem: number
100+
free: number
101+
}
95102
/**
96103
* Override the default app.log function to use Jan's logging system.
97104
* @param args
@@ -1223,6 +1230,32 @@ export default class llamacpp_extension extends AIEngine {
12231230
return lmodels
12241231
}
12251232

1233+
async getDevices(): Promise<DeviceList[]> {
1234+
const cfg = this.config
1235+
const [version, backend] = cfg.version_backend.split('/')
1236+
if (!version || !backend) {
1237+
throw new Error(
1238+
`Invalid version/backend format: ${cfg.version_backend}. Expected format: <version>/<backend>`
1239+
)
1240+
}
1241+
1242+
// Ensure backend is downloaded and ready before proceeding
1243+
await this.ensureBackendReady(backend, version)
1244+
logger.info('Calling Tauri command getDevices with arg --list-devices')
1245+
const backendPath = await getBackendExePath(backend, version)
1246+
const libraryPath = await joinPath([await this.getProviderPath(), 'lib'])
1247+
try {
1248+
const dList = await invoke<DeviceList[]>('get_devices', {
1249+
backendPath,
1250+
libraryPath,
1251+
})
1252+
return dList
1253+
} catch (error) {
1254+
logger.error('Failed to query devices:\n', error)
1255+
throw new Error(`Failed to load llama-server: ${error}`)
1256+
}
1257+
}
1258+
12261259
async embed(text: string[]): Promise<EmbeddingResponse> {
12271260
let sInfo = this.findSessionByModel('sentence-transformer-mini')
12281261
if (!sInfo) {

src-tauri/src/core/utils/extensions/inference_llamacpp_extension/server.rs

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ pub enum ServerError {
2828
Io(#[from] std::io::Error),
2929
#[error("Jan API error: {0}")]
3030
Tauri(#[from] tauri::Error),
31+
#[error("Parse error: {0}")]
32+
ParseError(String),
3133
}
3234

3335
// impl serialization for tauri
@@ -57,6 +59,14 @@ pub struct UnloadResult {
5759
error: Option<String>,
5860
}
5961

62+
#[derive(Debug, Clone, Serialize, Deserialize)]
63+
pub struct DeviceInfo {
64+
pub id: String,
65+
pub name: String,
66+
pub mem: i32,
67+
pub free: i32,
68+
}
69+
6070
// --- Load Command ---
6171
#[tauri::command]
6272
pub async fn load_llama_model(
@@ -362,6 +372,250 @@ pub async fn unload_llama_model(
362372
}
363373
}
364374

375+
#[tauri::command]
376+
pub async fn get_devices(
377+
backend_path: &str,
378+
library_path: Option<&str>,
379+
) -> ServerResult<Vec<DeviceInfo>> {
380+
log::info!("Getting devices from server at path: {:?}", backend_path);
381+
382+
let server_path_buf = PathBuf::from(backend_path);
383+
if !server_path_buf.exists() {
384+
log::error!(
385+
"Server binary not found at expected path: {:?}",
386+
backend_path
387+
);
388+
return Err(ServerError::BinaryNotFound(format!(
389+
"Binary not found at {:?}",
390+
backend_path
391+
)));
392+
}
393+
394+
// Configure the command to run the server with --list-devices
395+
let mut command = Command::new(backend_path);
396+
command.arg("--list-devices");
397+
398+
// Set up library path similar to load function
399+
if let Some(lib_path) = library_path {
400+
if cfg!(target_os = "linux") {
401+
let new_lib_path = match std::env::var("LD_LIBRARY_PATH") {
402+
Ok(path) => format!("{}:{}", path, lib_path),
403+
Err(_) => lib_path.to_string(),
404+
};
405+
command.env("LD_LIBRARY_PATH", new_lib_path);
406+
} else if cfg!(target_os = "windows") {
407+
let new_path = match std::env::var("PATH") {
408+
Ok(path) => format!("{};{}", path, lib_path),
409+
Err(_) => lib_path.to_string(),
410+
};
411+
command.env("PATH", new_path);
412+
413+
// Normalize the path by removing UNC prefix if present
414+
let normalized_path = lib_path.trim_start_matches(r"\\?\").to_string();
415+
log::info!("Library path:\n{}", &normalized_path);
416+
417+
// Only set current_dir if the normalized path exists and is a directory
418+
let path = std::path::Path::new(&normalized_path);
419+
if path.exists() && path.is_dir() {
420+
command.current_dir(&normalized_path);
421+
} else {
422+
log::warn!(
423+
"Library path '{}' does not exist or is not a directory",
424+
normalized_path
425+
);
426+
}
427+
} else {
428+
log::warn!("Library path setting is not supported on this OS");
429+
}
430+
}
431+
432+
command.stdout(Stdio::piped());
433+
command.stderr(Stdio::piped());
434+
435+
#[cfg(all(windows, target_arch = "x86_64"))]
436+
{
437+
use std::os::windows::process::CommandExt;
438+
const CREATE_NO_WINDOW: u32 = 0x0800_0000;
439+
const CREATE_NEW_PROCESS_GROUP: u32 = 0x0000_0200;
440+
command.creation_flags(CREATE_NO_WINDOW | CREATE_NEW_PROCESS_GROUP);
441+
}
442+
443+
// Execute the command and wait for completion
444+
let output = timeout(Duration::from_secs(30), command.output())
445+
.await
446+
.map_err(|_| ServerError::LlamacppError("Timeout waiting for device list".to_string()))?
447+
.map_err(ServerError::Io)?;
448+
449+
// Check if command executed successfully
450+
if !output.status.success() {
451+
let stderr = String::from_utf8_lossy(&output.stderr);
452+
log::error!("llama-server --list-devices failed: {}", stderr);
453+
return Err(ServerError::LlamacppError(format!(
454+
"Command failed with exit code {:?}: {}",
455+
output.status.code(),
456+
stderr
457+
)));
458+
}
459+
460+
// Parse the output
461+
let stdout = String::from_utf8_lossy(&output.stdout);
462+
log::info!("Device list output:\n{}", stdout);
463+
464+
parse_device_output(&stdout)
465+
}
466+
467+
fn parse_device_output(output: &str) -> ServerResult<Vec<DeviceInfo>> {
468+
let mut devices = Vec::new();
469+
let mut found_devices_section = false;
470+
471+
for raw in output.lines() {
472+
// detect header (ignoring whitespace)
473+
if raw.trim() == "Available devices:" {
474+
found_devices_section = true;
475+
continue;
476+
}
477+
478+
if !found_devices_section {
479+
continue;
480+
}
481+
482+
// skip blank lines
483+
if raw.trim().is_empty() {
484+
continue;
485+
}
486+
487+
// now parse any non-blank line after the header
488+
let line = raw.trim();
489+
if let Some(device) = parse_device_line(line)? {
490+
devices.push(device);
491+
}
492+
}
493+
494+
if devices.is_empty() && found_devices_section {
495+
log::warn!("No devices found in output");
496+
} else if !found_devices_section {
497+
return Err(ServerError::ParseError(
498+
"Could not find 'Available devices:' section in output".to_string(),
499+
));
500+
}
501+
502+
Ok(devices)
503+
}
504+
505+
506+
fn parse_device_line(line: &str) -> ServerResult<Option<DeviceInfo>> {
507+
let line = line.trim();
508+
509+
log::info!("Parsing device line: '{}'", line);
510+
511+
// Expected formats:
512+
// "Vulkan0: Intel(R) Arc(tm) A750 Graphics (DG2) (8128 MiB, 8128 MiB free)"
513+
// "CUDA0: NVIDIA GeForce RTX 4090 (24576 MiB, 24000 MiB free)"
514+
// "SYCL0: Intel(R) Arc(TM) A750 Graphics (8000 MiB, 7721 MiB free)"
515+
516+
// Split by colon to get ID and rest
517+
let parts: Vec<&str> = line.splitn(2, ':').collect();
518+
if parts.len() != 2 {
519+
log::warn!("Skipping malformed device line: {}", line);
520+
return Ok(None);
521+
}
522+
523+
let id = parts[0].trim().to_string();
524+
let rest = parts[1].trim();
525+
526+
// Use regex-like approach to find the memory pattern at the end
527+
// Look for pattern: (number MiB, number MiB free) at the end
528+
if let Some(memory_match) = find_memory_pattern(rest) {
529+
let (memory_start, memory_content) = memory_match;
530+
let name = rest[..memory_start].trim().to_string();
531+
532+
// Parse memory info: "8128 MiB, 8128 MiB free"
533+
let memory_parts: Vec<&str> = memory_content.split(',').collect();
534+
if memory_parts.len() >= 2 {
535+
if let (Ok(total_mem), Ok(free_mem)) = (
536+
parse_memory_value(memory_parts[0].trim()),
537+
parse_memory_value(memory_parts[1].trim())
538+
) {
539+
log::info!("Parsed device - ID: '{}', Name: '{}', Mem: {}, Free: {}", id, name, total_mem, free_mem);
540+
541+
return Ok(Some(DeviceInfo {
542+
id,
543+
name,
544+
mem: total_mem,
545+
free: free_mem,
546+
}));
547+
}
548+
}
549+
}
550+
551+
log::warn!("Could not parse device line: {}", line);
552+
Ok(None)
553+
}
554+
555+
fn find_memory_pattern(text: &str) -> Option<(usize, &str)> {
556+
// Find the last parenthesis that contains the memory pattern
557+
let mut last_match = None;
558+
let mut chars = text.char_indices().peekable();
559+
560+
while let Some((start_idx, ch)) = chars.next() {
561+
if ch == '(' {
562+
// Find the closing parenthesis
563+
let remaining = &text[start_idx + 1..];
564+
if let Some(close_pos) = remaining.find(')') {
565+
let content = &remaining[..close_pos];
566+
567+
// Check if this looks like memory info
568+
if is_memory_pattern(content) {
569+
last_match = Some((start_idx, content));
570+
}
571+
}
572+
}
573+
}
574+
575+
last_match
576+
}
577+
578+
fn is_memory_pattern(content: &str) -> bool {
579+
// Check if content matches pattern like "8128 MiB, 8128 MiB free"
580+
// Must contain: numbers, "MiB", comma, "free"
581+
if !(content.contains("MiB") && content.contains("free") && content.contains(',')) {
582+
return false;
583+
}
584+
585+
let parts: Vec<&str> = content.split(',').collect();
586+
if parts.len() != 2 {
587+
return false;
588+
}
589+
590+
parts.iter().all(|part| {
591+
let part = part.trim();
592+
// Each part should start with a number and contain "MiB"
593+
part.split_whitespace().next()
594+
.map_or(false, |first_word| first_word.parse::<i32>().is_ok()) &&
595+
part.contains("MiB")
596+
})
597+
}
598+
599+
fn parse_memory_value(mem_str: &str) -> ServerResult<i32> {
600+
// Handle formats like "8000 MiB" or "7721 MiB free"
601+
let parts: Vec<&str> = mem_str.split_whitespace().collect();
602+
if parts.is_empty() {
603+
return Err(ServerError::ParseError(format!(
604+
"Empty memory value: '{}'",
605+
mem_str
606+
)));
607+
}
608+
609+
// Take the first part which should be the number
610+
let number_str = parts[0];
611+
number_str.parse::<i32>().map_err(|_| {
612+
ServerError::ParseError(format!(
613+
"Could not parse memory value: '{}'",
614+
number_str
615+
))
616+
})
617+
}
618+
365619
// crypto
366620
#[tauri::command]
367621
pub fn generate_api_key(model_id: String, api_secret: String) -> Result<String, String> {

src-tauri/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ pub fn run() {
9494
// llama-cpp extension
9595
core::utils::extensions::inference_llamacpp_extension::server::load_llama_model,
9696
core::utils::extensions::inference_llamacpp_extension::server::unload_llama_model,
97+
core::utils::extensions::inference_llamacpp_extension::server::get_devices,
9798
core::utils::extensions::inference_llamacpp_extension::server::generate_api_key,
9899
core::utils::extensions::inference_llamacpp_extension::server::is_process_running,
99100
])

0 commit comments

Comments
 (0)