Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 20 additions & 7 deletions extensions/llamacpp-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -332,12 +332,14 @@ export default class llamacpp_extension extends AIEngine {
)
// Clear the invalid stored preference
this.clearStoredBackendType()
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
}
} else {
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
}

let settings = structuredClone(SETTINGS)
Expand Down Expand Up @@ -2151,7 +2153,12 @@ export default class llamacpp_extension extends AIEngine {
if (mmprojPath && !this.isAbsolutePath(mmprojPath))
mmprojPath = await joinPath([await getJanDataFolderPath(), path])
try {
const result = await planModelLoadInternal(path, this.memoryMode, mmprojPath, requestedCtx)
const result = await planModelLoadInternal(
path,
this.memoryMode,
mmprojPath,
requestedCtx
)
return result
} catch (e) {
throw new Error(String(e))
Expand Down Expand Up @@ -2279,12 +2286,18 @@ export default class llamacpp_extension extends AIEngine {
}

// Calculate text tokens
const messages = JSON.stringify({ messages: opts.messages })
// Use chat_template_kwargs from opts if provided, otherwise default to disable enable_thinking
const tokenizeRequest = {
messages: opts.messages,
chat_template_kwargs: opts.chat_template_kwargs || {
enable_thinking: false,
},
}

let parseResponse = await fetch(`${baseUrl}/apply-template`, {
method: 'POST',
headers: headers,
body: messages,
body: JSON.stringify(tokenizeRequest),
})

if (!parseResponse.ok) {
Expand Down
6 changes: 6 additions & 0 deletions web-app/src/services/models/default.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@
}

async getModel(modelId: string): Promise<modelInfo | undefined> {
return this.getEngine()?.get(modelId)
}

Check warning on line 35 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

34-35 lines are not covered with tests

async fetchModels(): Promise<modelInfo[]> {
return this.getEngine()?.list() ?? []
Expand Down Expand Up @@ -78,9 +78,9 @@
`https://huggingface.co/api/models/${cleanRepoId}?blobs=true&files_metadata=true`,
{
headers: hfToken
? {
Authorization: `Bearer ${hfToken}`,
}

Check warning on line 83 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

81-83 lines are not covered with tests
: {},
}
)
Expand Down Expand Up @@ -139,13 +139,13 @@

// Convert mmproj files to mmproj_models format
const mmprojModels = mmprojFiles.map((file) => {
const modelId = file.rfilename.replace(/\.gguf$/i, '')

Check warning on line 142 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

142 line is not covered with tests

return {
model_id: sanitizeModelId(modelId),
path: `https://huggingface.co/${repo.modelId}/resolve/main/${file.rfilename}`,
file_size: formatFileSize(file.size),
}

Check warning on line 148 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

144-148 lines are not covered with tests
})

return {
Expand Down Expand Up @@ -193,30 +193,30 @@
}

async pullModelWithMetadata(
id: string,
modelPath: string,
mmprojPath?: string,
hfToken?: string
): Promise<void> {
let modelSha256: string | undefined
let modelSize: number | undefined
let mmprojSha256: string | undefined
let mmprojSize: number | undefined

Check warning on line 204 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

196-204 lines are not covered with tests

// Extract repo ID from model URL
// URL format: https://huggingface.co/{repo}/resolve/main/{filename}
const modelUrlMatch = modelPath.match(
/https:\/\/huggingface\.co\/([^/]+\/[^/]+)\/resolve\/main\/(.+)/
)

Check warning on line 210 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

208-210 lines are not covered with tests

if (modelUrlMatch) {
const [, repoId, modelFilename] = modelUrlMatch

Check warning on line 213 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

212-213 lines are not covered with tests

try {

Check warning on line 215 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

215 line is not covered with tests
// Fetch real-time metadata from HuggingFace
const repoInfo = await this.fetchHuggingFaceRepo(repoId, hfToken)

Check warning on line 217 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

217 line is not covered with tests

if (repoInfo?.siblings) {

Check warning on line 219 in web-app/src/services/models/default.ts

View workflow job for this annotation

GitHub Actions / coverage-check

219 line is not covered with tests
// Find the specific model file
const modelFile = repoInfo.siblings.find(
(file) => file.rfilename === modelFilename
Expand Down Expand Up @@ -578,6 +578,9 @@
}
}>
}>
chat_template_kwargs?: {
enable_thinking: boolean
}
}) => Promise<number>
}

Expand Down Expand Up @@ -654,6 +657,9 @@
return await engine.getTokensCount({
model: modelId,
messages: transformedMessages,
chat_template_kwargs: {
enable_thinking: false,
},
})
}

Expand Down
Loading