@@ -1954,22 +1954,27 @@ export default class llamacpp_extension extends AIEngine {
19541954 logger . info (
19551955 `isModelSupported: Total memory requirement: ${ totalRequired } for ${ path } `
19561956 )
1957- let availableMemBytes : number
1957+ let totalMemBytes : number
19581958 const devices = await this . getDevices ( )
19591959 if ( devices . length > 0 ) {
1960- // Sum free memory across all GPUs
1961- availableMemBytes = devices
1962- . map ( ( d ) => d . free * 1024 * 1024 )
1960+ // Sum total memory across all GPUs
1961+ totalMemBytes = devices
1962+ . map ( ( d ) => d . mem * 1024 * 1024 )
19631963 . reduce ( ( a , b ) => a + b , 0 )
19641964 } else {
19651965 // CPU fallback
19661966 const sys = await getSystemUsage ( )
1967- availableMemBytes = ( sys . total_memory - sys . used_memory ) * 1024 * 1024
1967+ totalMemBytes = sys . total_memory * 1024 * 1024
19681968 }
1969- // check model size wrt system memory
1970- if ( modelSize > availableMemBytes ) {
1969+
1970+ // Use 80% of total memory as the usable limit
1971+ const USABLE_MEMORY_PERCENTAGE = 0.8
1972+ const usableMemBytes = totalMemBytes * USABLE_MEMORY_PERCENTAGE
1973+
1974+ // check model size wrt 80% of system memory
1975+ if ( modelSize > usableMemBytes ) {
19711976 return 'RED'
1972- } else if ( modelSize + kvCacheSize > availableMemBytes ) {
1977+ } else if ( modelSize + kvCacheSize > usableMemBytes ) {
19731978 return 'YELLOW'
19741979 } else {
19751980 return 'GREEN'
0 commit comments