janhq
diff --git a/‎extensions/llamacpp-extension/src/index.ts‎
Lines changed: 12 additions & 9 deletions b/‎extensions/llamacpp-extension/src/index.ts‎
Lines changed: 12 additions & 9 deletions
diff --git a/‎web-app/src/containers/ModelSetting.tsx‎
Lines changed: 171 additions & 3 deletions b/‎web-app/src/containers/ModelSetting.tsx‎
Lines changed: 171 additions & 3 deletions
diff --git a/‎web-app/src/routes/settings/providers/$providerName.tsx‎
Lines changed: 38 additions & 30 deletions b/‎web-app/src/routes/settings/providers/$providerName.tsx‎
Lines changed: 38 additions & 30 deletions
@@ -2167,39 +2167,42 @@ export default class llamacpp_extension extends AIEngine {
   async validateGgufFile(filePath: string): Promise<{
     isValid: boolean
     error?: string
-    metadata?: GgufMetadata
+    metadata?: any
   }> {
     try {
       logger.info(`Validating GGUF file: ${filePath}`)
       const metadata = await readGgufMetadata(filePath)
-      
+
       // Log full metadata for debugging
       logger.info('Full GGUF metadata:', JSON.stringify(metadata, null, 2))
-      
+
       // Check if architecture is 'clip' which is not supported for text generation
       const architecture = metadata.metadata?.['general.architecture']
       logger.info(`Model architecture: ${architecture}`)
-      
+
       if (architecture === 'clip') {
-        const errorMessage = 'This model has CLIP architecture and cannot be imported as a text generation model. CLIP models are designed for vision tasks and require different handling.'
+        const errorMessage =
+          'This model has CLIP architecture and cannot be imported as a text generation model. CLIP models are designed for vision tasks and require different handling.'
         logger.error('CLIP architecture detected:', architecture)
         return {
           isValid: false,
           error: errorMessage,
-          metadata
+          metadata,
         }
       }
-      
+
       logger.info('Model validation passed. Architecture:', architecture)
       return {
         isValid: true,
-        metadata
+        metadata,
       }
     } catch (error) {
       logger.error('Failed to validate GGUF file:', error)
       return {
         isValid: false,
-        error: `Failed to read model metadata: ${error instanceof Error ? error.message : 'Unknown error'}`
+        error: `Failed to read model metadata: ${
+          error instanceof Error ? error.message : 'Unknown error'
+        }`,
       }
     }
   }
 
@@ -1,5 +1,6 @@
-import { IconSettings } from '@tabler/icons-react'
+import { IconSettings, IconLoader } from '@tabler/icons-react'
 import debounce from 'lodash.debounce'
+import { useState } from 'react'
 
 import {
   Sheet,
@@ -9,6 +10,7 @@ import {
   SheetTitle,
   SheetTrigger,
 } from '@/components/ui/sheet'
+import { Button } from '@/components/ui/button'
 import { DynamicControllerSetting } from '@/containers/dynamicControllerSetting'
 import { useModelProvider } from '@/hooks/useModelProvider'
 import { useServiceHub } from '@/hooks/useServiceHub'
@@ -30,11 +32,134 @@ export function ModelSetting({
   const { t } = useTranslation()
   const serviceHub = useServiceHub()
 
+  const [isPlanning, setIsPlanning] = useState(false)
+
   // Create a debounced version of stopModel that waits 500ms after the last call
   const debouncedStopModel = debounce((modelId: string) => {
     serviceHub.models().stopModel(modelId)
   }, 500)
 
+  const handlePlanModelLoad = async () => {
+    if (provider.provider !== 'llamacpp') {
+      console.warn('planModelLoad is only available for llamacpp provider')
+      return
+    }
+    setIsPlanning(true)
+    try {
+      // Read the model config to get the actual model path
+      const modelConfig = await serviceHub.app().readYaml<{
+        model_path: string
+      }>(`llamacpp/models/${model.id}/model.yml`)
+
+      if (modelConfig && modelConfig.model_path) {
+        const result = await serviceHub
+          .models()
+          .planModelLoad(modelConfig.model_path)
+
+        // Apply the recommended settings to the model sequentially to avoid race conditions
+        const settingsToUpdate: Array<{
+          key: string
+          value: number | boolean
+        }> = []
+
+        if (model.settings?.ngl && result.gpuLayers !== undefined) {
+          settingsToUpdate.push({ key: 'ngl', value: result.gpuLayers })
+        }
+
+        if (model.settings?.ctx_len && result.maxContextLength !== undefined) {
+          settingsToUpdate.push({
+            key: 'ctx_len',
+            value: result.maxContextLength,
+          })
+        }
+
+        if (
+          model.settings?.no_kv_offload &&
+          result.noOffloadKVCache !== undefined
+        ) {
+          settingsToUpdate.push({
+            key: 'no_kv_offload',
+            value: result.noOffloadKVCache,
+          })
+        }
+
+        // Apply all settings in a single update to avoid race conditions
+        if (settingsToUpdate.length > 0) {
+          handleMultipleSettingsChange(settingsToUpdate)
+        }
+      } else {
+        console.warn('No model_path found in config for', model.id)
+      }
+    } catch (error) {
+      console.error('Error calling planModelLoad:', error)
+    } finally {
+      setIsPlanning(false)
+    }
+  }
+
+  const handleMultipleSettingsChange = (
+    settingsToUpdate: Array<{ key: string; value: number | boolean }>
+  ) => {
+    if (!provider) return
+
+    // Create a copy of the model with ALL updated settings at once
+    let updatedModel = { ...model }
+
+    settingsToUpdate.forEach(({ key, value }) => {
+      const existingSetting = updatedModel.settings?.[key] as ProviderSetting
+      updatedModel = {
+        ...updatedModel,
+        settings: {
+          ...updatedModel.settings,
+          [key]: {
+            ...existingSetting,
+            controller_props: {
+              ...existingSetting?.controller_props,
+              value: value,
+            },
+          } as ProviderSetting,
+        },
+      }
+    })
+
+    // Find the model index in the provider's models array
+    const modelIndex = provider.models.findIndex((m) => m.id === model.id)
+
+    if (modelIndex !== -1) {
+      // Create a copy of the provider's models array
+      const updatedModels = [...provider.models]
+
+      // Update the specific model in the array
+      updatedModels[modelIndex] = updatedModel as Model
+
+      // Update the provider with the new models array
+      updateProvider(provider.provider, {
+        models: updatedModels,
+      })
+
+      // Check if any of the updated settings require a model restart
+      const requiresRestart = settingsToUpdate.some(
+        ({ key }) =>
+          key === 'ctx_len' ||
+          key === 'ngl' ||
+          key === 'chat_template' ||
+          key === 'offload_mmproj'
+      )
+
+      if (requiresRestart) {
+        // Check if model is running before stopping it
+        serviceHub
+          .models()
+          .getActiveModels()
+          .then((activeModels) => {
+            if (activeModels.includes(model.id)) {
+              debouncedStopModel(model.id)
+            }
+          })
+      }
+    }
+  }
+
   const handleSettingChange = (
     key: string,
     value: string | boolean | number
@@ -72,8 +197,22 @@ export function ModelSetting({
       })
 
       // Call debounced stopModel only when updating ctx_len, ngl, chat_template, or offload_mmproj
-      if (key === 'ctx_len' || key === 'ngl' || key === 'chat_template' || key === 'offload_mmproj') {
-        debouncedStopModel(model.id)
+      // and only if the model is currently running
+      if (
+        key === 'ctx_len' ||
+        key === 'ngl' ||
+        key === 'chat_template' ||
+        key === 'offload_mmproj'
+      ) {
+        // Check if model is running before stopping it
+        serviceHub
+          .models()
+          .getActiveModels()
+          .then((activeModels) => {
+            if (activeModels.includes(model.id)) {
+              debouncedStopModel(model.id)
+            }
+          })
       }
     }
   }
@@ -98,7 +237,36 @@ export function ModelSetting({
           <SheetDescription>
             {t('common:modelSettings.description')}
           </SheetDescription>
+
+          {/* Model Load Planning Section - Only show for llamacpp provider */}
+          {provider.provider === 'llamacpp' && (
+            <div className="pb-4 border-b border-main-view-fg/10 my-4">
+              <div>
+                <h3 className="font-medium mb-1">Optimize Settings</h3>
+                <p className="text-main-view-fg/70 text-xs mb-3">
+                  Analyze your system and model, then apply optimal loading
+                  settings automatically
+                </p>
+                <Button
+                  onClick={handlePlanModelLoad}
+                  disabled={isPlanning}
+                  variant="default"
+                  className="w-full"
+                >
+                  {isPlanning ? (
+                    <>
+                      <IconLoader size={16} className="mr-2 animate-spin" />
+                      Optimizing...
+                    </>
+                  ) : (
+                    <>Auto-Optimize Settings</>
+                  )}
+                </Button>
+              </div>
+            </div>
+          )}
         </SheetHeader>
+
         <div className="px-4 space-y-6">
           {Object.entries(model.settings || {}).map(([key, value]) => {
             const config = value as ProviderSetting
 
@@ -93,16 +93,19 @@ function ProviderDetail() {
   const handleModelImportSuccess = async (importedModelName?: string) => {
     // Refresh the provider to update the models list
     await serviceHub.providers().getProviders().then(setProviders)
-    
+
     // If a model was imported and it might have vision capabilities, check and update
     if (importedModelName && providerName === 'llamacpp') {
       try {
-        const mmprojExists = await serviceHub.models().checkMmprojExists(importedModelName)
+        const mmprojExists = await serviceHub
+          .models()
+          .checkMmprojExists(importedModelName)
         if (mmprojExists) {
           // Get the updated provider after refresh
-          const { getProviderByName, updateProvider: updateProviderState } = useModelProvider.getState()
+          const { getProviderByName, updateProvider: updateProviderState } =
+            useModelProvider.getState()
           const llamacppProvider = getProviderByName('llamacpp')
-          
+
           if (llamacppProvider) {
             const modelIndex = llamacppProvider.models.findIndex(
               (m: Model) => m.id === importedModelName
@@ -120,7 +123,9 @@ function ProviderDetail() {
                 }
 
                 updateProviderState('llamacpp', { models: updatedModels })
-                console.log(`Vision capability added to model after provider refresh: ${importedModelName}`)
+                console.log(
+                  `Vision capability added to model after provider refresh: ${importedModelName}`
+                )
               }
             }
           }
@@ -245,33 +250,36 @@ function ProviderDetail() {
     }
   }
 
-  const handleStartModel = (modelId: string) => {
+  const handleStartModel = async (modelId: string) => {
     // Add model to loading state
     setLoadingModels((prev) => [...prev, modelId])
-    if (provider)
-      // Original: startModel(provider, modelId).then(() => { setActiveModels((prevModels) => [...prevModels, modelId]) })
-      serviceHub
-        .models()
-        .startModel(provider, modelId)
-        .then(() => {
-          // Refresh active models after starting
-          serviceHub
-            .models()
-            .getActiveModels()
-            .then((models) => setActiveModels(models || []))
-        })
-        .catch((error) => {
-          console.error('Error starting model:', error)
-          if (error && typeof error === 'object' && 'message' in error) {
-            setModelLoadError(error)
-          } else {
-            setModelLoadError(`${error}`)
-          }
-        })
-        .finally(() => {
-          // Remove model from loading state
-          setLoadingModels((prev) => prev.filter((id) => id !== modelId))
-        })
+    if (provider) {
+      try {
+        // Start the model with plan result
+        await serviceHub.models().startModel(provider, modelId)
+
+        // Refresh active models after starting
+        serviceHub
+          .models()
+          .getActiveModels()
+          .then((models) => setActiveModels(models || []))
+      } catch (error) {
+        console.error('Error starting model:', error)
+        if (
+          error &&
+          typeof error === 'object' &&
+          'message' in error &&
+          typeof error.message === 'string'
+        ) {
+          setModelLoadError({ message: error.message })
+        } else {
+          setModelLoadError(typeof error === 'string' ? error : `${error}`)
+        }
+      } finally {
+        // Remove model from loading state
+        setLoadingModels((prev) => prev.filter((id) => id !== modelId))
+      }
+    }
   }
 
   const handleStopModel = (modelId: string) => {