Skip to content

Commit b9f658f

Browse files
authored
fix: correct memory suitability checks in llamacpp extension (#6504)
The previous implementation mixed model size and VRAM checks, leading to inaccurate status reporting (e.g., false RED results). - Simplified import statement for `readGgufMetadata`. - Fixed RAM/VRAM comparison by removing unnecessary parentheses. - Replaced ambiguous `modelSize > usableTotalMemory` check with a clear `totalRequired > usableTotalMemory` hard‑limit condition. - Refactored the status logic to explicitly handle the CPU‑GPU hybrid scenario, returning **YELLOW** when the total requirement fits combined memory but exceeds VRAM. - Updated comments for better readability and maintenance.
1 parent 6304632 commit b9f658f

File tree

1 file changed

+10
-15
lines changed
  • extensions/llamacpp-extension/src

1 file changed

+10
-15
lines changed

extensions/llamacpp-extension/src/index.ts

Lines changed: 10 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ import {
3636
import { invoke } from '@tauri-apps/api/core'
3737
import { getProxyConfig } from './util'
3838
import { basename } from '@tauri-apps/api/path'
39-
import {
40-
readGgufMetadata,
41-
} from '@janhq/tauri-plugin-llamacpp-api'
39+
import { readGgufMetadata } from '@janhq/tauri-plugin-llamacpp-api'
4240
import { getSystemUsage, getSystemInfo } from '@janhq/tauri-plugin-hardware-api'
4341

4442
// Error message constant - matches web-app/src/utils/error.ts
@@ -2162,7 +2160,7 @@ export default class llamacpp_extension extends AIEngine {
21622160
).size
21632161

21642162
const ramForModel = modelSize + (offloadMmproj ? 0 : mmprojSize)
2165-
if (ramForModel + vramForMinContext > (usableSystemMemory + usableVRAM)) {
2163+
if (ramForModel + vramForMinContext > usableSystemMemory + usableVRAM) {
21662164
logger.error(
21672165
`Model unsupported. Not enough resources for model and min context.`
21682166
)
@@ -2425,24 +2423,21 @@ export default class llamacpp_extension extends AIEngine {
24252423
memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
24262424
const usableVRAM = memoryInfo.totalVRAM * USABLE_MEMORY_PERCENTAGE
24272425

2428-
// Check if model fits in total memory at all
2429-
if (modelSize > usableTotalMemory) {
2430-
return 'RED'
2426+
// Check if model fits in total memory at all (this is the hard limit)
2427+
if (totalRequired > usableTotalMemory) {
2428+
return 'RED' // Truly impossible to run
24312429
}
24322430

24332431
// Check if everything fits in VRAM (ideal case)
24342432
if (totalRequired <= usableVRAM) {
24352433
return 'GREEN'
24362434
}
24372435

2438-
// Check if model fits in VRAM but total requirement exceeds VRAM
2439-
// OR if total requirement fits in total memory but not in VRAM
2440-
if (modelSize <= usableVRAM || totalRequired <= usableTotalMemory) {
2441-
return 'YELLOW'
2442-
}
2443-
2444-
// If we get here, nothing fits properly
2445-
return 'RED'
2436+
// If we get here, it means:
2437+
// - Total requirement fits in combined memory
2438+
// - But doesn't fit entirely in VRAM
2439+
// This is the CPU-GPU hybrid scenario
2440+
return 'YELLOW'
24462441
} catch (e) {
24472442
throw new Error(String(e))
24482443
}

0 commit comments

Comments
 (0)