@@ -43,6 +43,7 @@ export enum Settings {
4343 flash_attn = 'flash_attn' ,
4444 cache_type = 'cache_type' ,
4545 use_mmap = 'use_mmap' ,
46+ cpu_threads = 'cpu_threads' ,
4647}
4748
4849/**
@@ -66,6 +67,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
6667 flash_attn : boolean = true
6768 use_mmap : boolean = true
6869 cache_type : string = 'f16'
70+ cpu_threads ?: number
6971
7072 /**
7173 * The URL for making inference requests.
@@ -105,6 +107,10 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
105107 this . flash_attn = await this . getSetting < boolean > ( Settings . flash_attn , true )
106108 this . use_mmap = await this . getSetting < boolean > ( Settings . use_mmap , true )
107109 this . cache_type = await this . getSetting < string > ( Settings . cache_type , 'f16' )
110+ const threads_number = Number (
111+ await this . getSetting < string > ( Settings . cpu_threads , '' )
112+ )
113+ if ( ! Number . isNaN ( threads_number ) ) this . cpu_threads = threads_number
108114
109115 this . queue . add ( ( ) => this . clean ( ) )
110116
@@ -150,6 +156,9 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
150156 this . cache_type = value as string
151157 } else if ( key === Settings . use_mmap && typeof value === 'boolean' ) {
152158 this . use_mmap = value as boolean
159+ } else if ( key === Settings . cpu_threads && typeof value === 'string' ) {
160+ const threads_number = Number ( value )
161+ if ( ! Number . isNaN ( threads_number ) ) this . cpu_threads = threads_number
153162 }
154163 }
155164
@@ -207,6 +216,7 @@ export default class JanInferenceCortexExtension extends LocalOAIEngine {
207216 flash_attn : this . flash_attn ,
208217 cache_type : this . cache_type ,
209218 use_mmap : this . use_mmap ,
219+ ...( this . cpu_threads ? { cpu_threads : this . cpu_threads } : { } ) ,
210220 } ,
211221 timeout : false ,
212222 signal,
0 commit comments