Skip to content
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions core/src/browser/extension.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ export enum ExtensionTypeEnum {
HuggingFace = 'huggingFace',
Engine = 'engine',
Hardware = 'hardware',
RAG = 'rag',
VectorDB = 'vectorDB',
}

export interface ExtensionType {
Expand Down
1 change: 1 addition & 0 deletions core/src/browser/extensions/engines/AIEngine.ts
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,7 @@ export interface SessionInfo {
port: number // llama-server output port (corrected from portid)
model_id: string //name of the model
model_path: string // path of the loaded model
is_embedding: boolean
api_key: string
mmproj_path?: string
}
Expand Down
5 changes: 5 additions & 0 deletions core/src/browser/extensions/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,8 @@ export { MCPExtension } from './mcp'
* Base AI Engines.
*/
export * from './engines'

export { RAGExtension, RAG_INTERNAL_SERVER } from './rag'
export type { AttachmentInput, IngestAttachmentsResult } from './rag'
export { VectorDBExtension } from './vector-db'
export type { SearchMode, VectorDBStatus, VectorChunkInput, VectorSearchResult, AttachmentFileInfo, VectorDBFileInput, VectorDBIngestOptions } from './vector-db'
36 changes: 36 additions & 0 deletions core/src/browser/extensions/rag.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'
import type { MCPTool, MCPToolCallResult } from '../../types'
import type { AttachmentFileInfo } from './vector-db'

export interface AttachmentInput {
path: string
name?: string
type?: string
size?: number
}

export interface IngestAttachmentsResult {
filesProcessed: number
chunksInserted: number
files: AttachmentFileInfo[]
}

export const RAG_INTERNAL_SERVER = 'rag-internal'

/**
* RAG extension base: exposes RAG tools and orchestration API.
*/
export abstract class RAGExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.RAG
}

abstract getTools(): Promise<MCPTool[]>
/**
* Lightweight list of tool names for quick routing/lookup.
*/
abstract getToolNames(): Promise<string[]>
abstract callTool(toolName: string, args: Record<string, unknown>): Promise<MCPToolCallResult>

abstract ingestAttachments(threadId: string, files: AttachmentInput[]): Promise<IngestAttachmentsResult>
}
82 changes: 82 additions & 0 deletions core/src/browser/extensions/vector-db.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { BaseExtension, ExtensionTypeEnum } from '../extension'

export type SearchMode = 'auto' | 'ann' | 'linear'

export interface VectorDBStatus {
ann_available: boolean
}

export interface VectorChunkInput {
text: string
embedding: number[]
}

export interface VectorSearchResult {
id: string
text: string
score?: number
file_id: string
chunk_file_order: number
}

export interface AttachmentFileInfo {
id: string
name?: string
path?: string
type?: string
size?: number
chunk_count: number
}

// High-level input types for file ingestion
export interface VectorDBFileInput {
path: string
name?: string
type?: string
size?: number
}

export interface VectorDBIngestOptions {
chunkSize: number
chunkOverlap: number
}

/**
* Vector DB extension base: abstraction over local vector storage and search.
*/
export abstract class VectorDBExtension extends BaseExtension {
type(): ExtensionTypeEnum | undefined {
return ExtensionTypeEnum.VectorDB
}

abstract getStatus(): Promise<VectorDBStatus>
abstract createCollection(threadId: string, dimension: number): Promise<void>
abstract insertChunks(
threadId: string,
fileId: string,
chunks: VectorChunkInput[]
): Promise<void>
abstract ingestFile(
threadId: string,
file: VectorDBFileInput,
opts: VectorDBIngestOptions
): Promise<AttachmentFileInfo>
abstract searchCollection(
threadId: string,
query_embedding: number[],
limit: number,
threshold: number,
mode?: SearchMode,
fileIds?: string[]
): Promise<VectorSearchResult[]>
abstract deleteChunks(threadId: string, ids: string[]): Promise<void>
abstract deleteFile(threadId: string, fileId: string): Promise<void>
abstract deleteCollection(threadId: string): Promise<void>
abstract listAttachments(threadId: string, limit?: number): Promise<AttachmentFileInfo[]>
abstract getChunks(
threadId: string,
fileId: string,
startOrder: number,
endOrder: number
): Promise<VectorSearchResult[]>
}
2 changes: 2 additions & 0 deletions core/src/types/setting/settingComponent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ export type SettingComponentProps = {
extensionName?: string
requireModelReload?: boolean
configType?: ConfigType
titleKey?: string
descriptionKey?: string
}

export type ConfigType = 'runtime' | 'setting'
Expand Down
32 changes: 25 additions & 7 deletions extensions-web/src/jan-provider-web/provider.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export default class JanProviderWeb extends AIEngine {
// Verify Jan models capabilities in localStorage
private validateJanModelsLocalStorage() {
try {
console.log("Validating Jan models in localStorage...")
console.log('Validating Jan models in localStorage...')
const storageKey = 'model-provider'
const data = localStorage.getItem(storageKey)
if (!data) return
Expand All @@ -60,9 +60,14 @@ export default class JanProviderWeb extends AIEngine {
if (provider.provider === 'jan' && provider.models) {
for (const model of provider.models) {
console.log(`Checking Jan model: ${model.id}`, model.capabilities)
if (JSON.stringify(model.capabilities) !== JSON.stringify(JAN_MODEL_CAPABILITIES)) {
if (
JSON.stringify(model.capabilities) !==
JSON.stringify(JAN_MODEL_CAPABILITIES)
) {
hasInvalidModel = true
console.log(`Found invalid Jan model: ${model.id}, clearing localStorage`)
console.log(
`Found invalid Jan model: ${model.id}, clearing localStorage`
)
break
}
}
Expand All @@ -79,9 +84,17 @@ export default class JanProviderWeb extends AIEngine {
// If still present, try setting to empty state
if (afterRemoval) {
// Try alternative clearing method
localStorage.setItem(storageKey, JSON.stringify({ state: { providers: [] }, version: parsed.version || 3 }))
localStorage.setItem(
storageKey,
JSON.stringify({
state: { providers: [] },
version: parsed.version || 3,
})
)
}
console.log('Cleared model-provider from localStorage due to invalid Jan capabilities')
console.log(
'Cleared model-provider from localStorage due to invalid Jan capabilities'
)
// Force a page reload to ensure clean state
window.location.reload()
}
Expand Down Expand Up @@ -159,6 +172,7 @@ export default class JanProviderWeb extends AIEngine {
port: 443, // HTTPS port
model_id: modelId,
model_path: `remote:${modelId}`, // Indicate this is a remote model
is_embedding: false, // assume false here, TODO: might need further implementation
api_key: '', // API key handled by auth service
}

Expand Down Expand Up @@ -193,8 +207,12 @@ export default class JanProviderWeb extends AIEngine {
console.error(`Failed to unload Jan session ${sessionId}:`, error)
return {
success: false,
error: error instanceof ApiError ? error.message :
error instanceof Error ? error.message : 'Unknown error',
error:
error instanceof ApiError
? error.message
: error instanceof Error
? error.message
: 'Unknown error',
}
}
}
Expand Down
93 changes: 66 additions & 27 deletions extensions/llamacpp-extension/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -333,14 +333,12 @@ export default class llamacpp_extension extends AIEngine {
)
// Clear the invalid stored preference
this.clearStoredBackendType()
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
}
} else {
bestAvailableBackendString = await this.determineBestBackend(
version_backends
)
bestAvailableBackendString =
await this.determineBestBackend(version_backends)
}

let settings = structuredClone(SETTINGS)
Expand Down Expand Up @@ -1530,17 +1528,41 @@ export default class llamacpp_extension extends AIEngine {

if (
this.autoUnload &&
!isEmbedding &&
(loadedModels.length > 0 || otherLoadingPromises.length > 0)
) {
// Wait for OTHER loading models to finish, then unload everything
if (otherLoadingPromises.length > 0) {
await Promise.all(otherLoadingPromises)
}

// Now unload all loaded models
// Now unload all loaded Text models excluding embedding models
const allLoadedModels = await this.getLoadedModels()
if (allLoadedModels.length > 0) {
await Promise.all(allLoadedModels.map((model) => this.unload(model)))
const sessionInfos: (SessionInfo | null)[] = await Promise.all(
allLoadedModels.map(async (modelId) => {
try {
return await this.findSessionByModel(modelId)
} catch (e) {
logger.warn(`Unable to find session for model "${modelId}": ${e}`)
return null // treat as “not‑eligible for unload”
}
})
)

logger.info(JSON.stringify(sessionInfos))

const nonEmbeddingModels: string[] = sessionInfos
.filter(
(s): s is SessionInfo => s !== null && s.is_embedding === false
)
.map((s) => s.model_id)

if (nonEmbeddingModels.length > 0) {
await Promise.all(
nonEmbeddingModels.map((modelId) => this.unload(modelId))
)
}
}
}
const args: string[] = []
Expand Down Expand Up @@ -1638,7 +1660,7 @@ export default class llamacpp_extension extends AIEngine {
if (cfg.no_kv_offload) args.push('--no-kv-offload')
if (isEmbedding) {
args.push('--embedding')
args.push('--pooling mean')
args.push('--pooling', 'mean')
} else {
if (cfg.ctx_size > 0) args.push('--ctx-size', String(cfg.ctx_size))
if (cfg.n_predict > 0) args.push('--n-predict', String(cfg.n_predict))
Expand Down Expand Up @@ -1677,6 +1699,7 @@ export default class llamacpp_extension extends AIEngine {
libraryPath,
args,
envs,
isEmbedding,
}
)
return sInfo
Expand Down Expand Up @@ -2083,6 +2106,7 @@ export default class llamacpp_extension extends AIEngine {
}

async embed(text: string[]): Promise<EmbeddingResponse> {
// Ensure the sentence-transformer model is present
let sInfo = await this.findSessionByModel('sentence-transformer-mini')
if (!sInfo) {
const downloadedModelList = await this.list()
Expand All @@ -2096,30 +2120,45 @@ export default class llamacpp_extension extends AIEngine {
'https://huggingface.co/second-state/All-MiniLM-L6-v2-Embedding-GGUF/resolve/main/all-MiniLM-L6-v2-ggml-model-f16.gguf?download=true',
})
}
sInfo = await this.load('sentence-transformer-mini')
// Load specifically in embedding mode
sInfo = await this.load('sentence-transformer-mini', undefined, true)
}
const baseUrl = `http://localhost:${sInfo.port}/v1/embeddings`
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${sInfo.api_key}`,

const attemptRequest = async (session: SessionInfo) => {
const baseUrl = `http://localhost:${session.port}/v1/embeddings`
const headers = {
'Content-Type': 'application/json',
'Authorization': `Bearer ${session.api_key}`,
}
const body = JSON.stringify({
input: text,
model: session.model_id,
encoding_format: 'float',
})
const response = await fetch(baseUrl, {
method: 'POST',
headers,
body,
})
return response
}

// First try with the existing session (may have been started without --embedding previously)
let response = await attemptRequest(sInfo)

// If embeddings endpoint is not available (501), reload with embedding mode and retry once
if (response.status === 501) {
try {
await this.unload('sentence-transformer-mini')
} catch {}
sInfo = await this.load('sentence-transformer-mini', undefined, true)
response = await attemptRequest(sInfo)
}
const body = JSON.stringify({
input: text,
model: sInfo.model_id,
encoding_format: 'float',
})
const response = await fetch(baseUrl, {
method: 'POST',
headers,
body,
})

if (!response.ok) {
const errorData = await response.json().catch(() => null)
throw new Error(
`API request failed with status ${response.status}: ${JSON.stringify(
errorData
)}`
`API request failed with status ${response.status}: ${JSON.stringify(errorData)}`
)
}
const responseData = await response.json()
Expand Down
Loading
Loading