Skip to content

Commit 88decc9

Browse files
Merge pull request #6333 from menloresearch/release/v0.6.9
2 parents f14d23d + 5fae954 commit 88decc9

File tree

1 file changed

+13
-8
lines changed
  • extensions/llamacpp-extension/src

1 file changed

+13
-8
lines changed

extensions/llamacpp-extension/src/index.ts

Lines changed: 13 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1954,22 +1954,27 @@ export default class llamacpp_extension extends AIEngine {
19541954
logger.info(
19551955
`isModelSupported: Total memory requirement: ${totalRequired} for ${path}`
19561956
)
1957-
let availableMemBytes: number
1957+
let totalMemBytes: number
19581958
const devices = await this.getDevices()
19591959
if (devices.length > 0) {
1960-
// Sum free memory across all GPUs
1961-
availableMemBytes = devices
1962-
.map((d) => d.free * 1024 * 1024)
1960+
// Sum total memory across all GPUs
1961+
totalMemBytes = devices
1962+
.map((d) => d.mem * 1024 * 1024)
19631963
.reduce((a, b) => a + b, 0)
19641964
} else {
19651965
// CPU fallback
19661966
const sys = await getSystemUsage()
1967-
availableMemBytes = (sys.total_memory - sys.used_memory) * 1024 * 1024
1967+
totalMemBytes = sys.total_memory * 1024 * 1024
19681968
}
1969-
// check model size wrt system memory
1970-
if (modelSize > availableMemBytes) {
1969+
1970+
// Use 80% of total memory as the usable limit
1971+
const USABLE_MEMORY_PERCENTAGE = 0.8
1972+
const usableMemBytes = totalMemBytes * USABLE_MEMORY_PERCENTAGE
1973+
1974+
// check model size wrt 80% of system memory
1975+
if (modelSize > usableMemBytes) {
19711976
return 'RED'
1972-
} else if (modelSize + kvCacheSize > availableMemBytes) {
1977+
} else if (modelSize + kvCacheSize > usableMemBytes) {
19731978
return 'YELLOW'
19741979
} else {
19751980
return 'GREEN'

0 commit comments

Comments
 (0)