Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 11 additions & 2 deletions extensions/llamacpp-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ type LlamacppConfig = {
auto_unload: boolean
chat_template: string
n_gpu_layers: number
override_tensor_buffer_t: string
ctx_size: number
threads: number
threads_batch: number
Expand Down Expand Up @@ -1262,6 +1263,14 @@ export default class llamacpp_extension extends AIEngine {
args.push('--jinja')
args.push('--reasoning-format', 'none')
args.push('-m', modelPath)
// For overriding tensor buffer type, useful where
// massive MOE models can be made faster by keeping attention on the GPU
// and offloading the expert FFNs to the CPU.
// This is an expert level settings and should only be used by people
// who knows what they are doing.
// Takes a regex with matching tensor name as input
if (cfg.override_tensor_buffer_t)
args.push('--override-tensor', cfg.override_tensor_buffer_t)
args.push('-a', modelId)
args.push('--port', String(port))
if (modelConfig.mmproj_path) {
Expand Down Expand Up @@ -1340,8 +1349,8 @@ export default class llamacpp_extension extends AIEngine {

return sInfo
} catch (error) {
logger.error('Error loading llama-server:\n', error)
throw new Error(`Failed to load llama-server: ${error}`)
logger.error('Error in load command:\n', error)
throw new Error(`Failed to load model:\n${error}`)
}
}

Expand Down
6 changes: 4 additions & 2 deletions web-app/src/containers/ModelSetting.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,60 +22,60 @@
}

export function ModelSetting({
model,
provider,
smallIcon,
}: ModelSettingProps) {
const { updateProvider } = useModelProvider()
const { t } = useTranslation()

Check warning on line 30 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

25-30 lines are not covered with tests

// Create a debounced version of stopModel that waits 500ms after the last call
const debouncedStopModel = debounce((modelId: string) => {
stopModel(modelId)
}, 500)

Check warning on line 35 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

33-35 lines are not covered with tests

const handleSettingChange = (
key: string,
value: string | boolean | number
) => {
if (!provider) return

Check warning on line 41 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

37-41 lines are not covered with tests

// Create a copy of the model with updated settings
const updatedModel = {
...model,
settings: {
...model.settings,
[key]: {
...(model.settings?.[key] != null ? model.settings?.[key] : {}),
controller_props: {
...(model.settings?.[key]?.controller_props ?? {}),
value: value,
},
},
},
}

Check warning on line 56 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

44-56 lines are not covered with tests

// Find the model index in the provider's models array
const modelIndex = provider.models.findIndex((m) => m.id === model.id)

Check warning on line 59 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

59 line is not covered with tests

if (modelIndex !== -1) {

Check warning on line 61 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

61 line is not covered with tests
// Create a copy of the provider's models array
const updatedModels = [...provider.models]

Check warning on line 63 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

63 line is not covered with tests

// Update the specific model in the array
updatedModels[modelIndex] = updatedModel as Model

Check warning on line 66 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

66 line is not covered with tests

// Update the provider with the new models array
updateProvider(provider.provider, {
models: updatedModels,
})

Check warning on line 71 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

69-71 lines are not covered with tests

// Call debounced stopModel only when updating ctx_len or ngl
if (key === 'ctx_len' || key === 'ngl' || key === 'chat_template') {
debouncedStopModel(model.id)
}
}
}

Check warning on line 78 in web-app/src/containers/ModelSetting.tsx

View workflow job for this annotation

GitHub Actions / coverage-check

74-78 lines are not covered with tests

return (
<Sheet>
Expand Down Expand Up @@ -106,8 +106,10 @@
<div key={key} className="space-y-2">
<div
className={cn(
'flex items-start justify-between gap-8',
key === 'chat_template' && 'flex-col gap-1'
'flex items-start justify-between gap-8 last:mb-2',
(key === 'chat_template' ||
key === 'override_tensor_buffer_t') &&
'flex-col gap-1 w-full'
)}
>
<div className="space-y-1 mb-2">
Expand Down
27 changes: 26 additions & 1 deletion web-app/src/hooks/useModelProvider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -276,9 +276,34 @@ export const useModelProvider = create<ModelProviderState>()(
})
}

// Migration for override_tensor_buffer_type key (version 2 -> 3)
if (version === 2 && state?.providers) {
state.providers.forEach((provider) => {
if (provider.models) {
provider.models.forEach((model) => {
// Initialize settings if it doesn't exist
if (!model.settings) {
model.settings = {}
}

// Add missing override_tensor_buffer_type setting if it doesn't exist
if (!model.settings.override_tensor_buffer_t) {
model.settings.override_tensor_buffer_t = {
...modelSettings.override_tensor_buffer_t,
controller_props: {
...modelSettings.override_tensor_buffer_t
.controller_props,
},
}
}
})
}
})
}

return state
},
version: 2,
version: 3,
}
)
)
11 changes: 11 additions & 0 deletions web-app/src/lib/predefined.ts
Original file line number Diff line number Diff line change
Expand Up @@ -133,4 +133,15 @@ export const modelSettings = {
textAlign: 'right',
},
},
override_tensor_buffer_t: {
key: 'override_tensor_buffer_t',
title: 'Override Tensor Buffer Type',
description: 'Override the tensor buffer type for the model',
controller_type: 'input',
controller_props: {
value: '',
placeholder: 'e.g., layers\\.\\d+\\.ffn_.*=CPU',
type: 'text',
},
},
}
Loading