diff --git a/.gitignore b/.gitignore index 4bfb0576fe..d400a3b813 100644 --- a/.gitignore +++ b/.gitignore @@ -17,7 +17,7 @@ package-lock.json core/lib/** # Nitro binary files -extensions/inference-extension/nitro/*/nitro -extensions/inference-extension/nitro/*/*.exe -extensions/inference-extension/nitro/*/*.dll -extensions/inference-extension/nitro/*/*.metal \ No newline at end of file +extensions/inference-nitro-extension/bin/*/nitro +extensions/inference-nitro-extension/bin/*/*.exe +extensions/inference-nitro-extension/bin/*/*.dll +extensions/inference-nitro-extension/bin/*/*.metal \ No newline at end of file diff --git a/core/src/events.ts b/core/src/events.ts index f588daad78..1acbef918f 100644 --- a/core/src/events.ts +++ b/core/src/events.ts @@ -8,6 +8,18 @@ export enum EventName { OnMessageResponse = "OnMessageResponse", /** The `OnMessageUpdate` event is emitted when a message is updated. */ OnMessageUpdate = "OnMessageUpdate", + /** The `OnModelInit` event is emitted when a model inits. */ + OnModelInit = "OnModelInit", + /** The `OnModelReady` event is emitted when a model ready. */ + OnModelReady = "OnModelReady", + /** The `OnModelFail` event is emitted when a model fails loading. */ + OnModelFail = "OnModelFail", + /** The `OnModelStop` event is emitted when a model start to stop. */ + OnModelStop = "OnModelStop", + /** The `OnModelStopped` event is emitted when a model stopped ok. */ + OnModelStopped = "OnModelStopped", + /** The `OnInferenceStopped` event is emitted when a inference is stopped. */ + OnInferenceStopped = "OnInferenceStopped", } /** diff --git a/core/src/extensions/inference.ts b/core/src/extensions/inference.ts index 483ba1339c..9453a06d58 100644 --- a/core/src/extensions/inference.ts +++ b/core/src/extensions/inference.ts @@ -5,26 +5,10 @@ import { BaseExtension } from "../extension"; * Inference extension. Start, stop and inference models. */ export abstract class InferenceExtension extends BaseExtension { - /** - * Initializes the model for the extension. - * @param modelId - The ID of the model to initialize. - */ - abstract initModel(modelId: string, settings?: ModelSettingParams): Promise; - - /** - * Stops the model for the extension. - */ - abstract stopModel(): Promise; - - /** - * Stops the streaming inference. - */ - abstract stopInference(): Promise; - /** * Processes an inference request. * @param data - The data for the inference request. * @returns The result of the inference request. */ - abstract inferenceRequest(data: MessageRequest): Promise; + abstract inference(data: MessageRequest): Promise; } diff --git a/core/src/fs.ts b/core/src/fs.ts index 4013479dd5..d15bf62304 100644 --- a/core/src/fs.ts +++ b/core/src/fs.ts @@ -5,52 +5,52 @@ * @returns {Promise} A Promise that resolves when the file is written successfully. */ const writeFile: (path: string, data: string) => Promise = (path, data) => - global.core.api?.writeFile(path, data); + global.core.api?.writeFile(path, data) /** * Checks whether the path is a directory. * @param path - The path to check. * @returns {boolean} A boolean indicating whether the path is a directory. */ -const isDirectory = (path: string): Promise => - global.core.api?.isDirectory(path); +const isDirectory = (path: string): Promise => global.core.api?.isDirectory(path) /** * Reads the contents of a file at the specified path. * @param {string} path - The path of the file to read. * @returns {Promise} A Promise that resolves with the contents of the file. */ -const readFile: (path: string) => Promise = (path) => - global.core.api?.readFile(path); +const readFile: (path: string) => Promise = (path) => global.core.api?.readFile(path) +/** + * Check whether the file exists + * @param {string} path + * @returns {boolean} A boolean indicating whether the path is a file. + */ +const exists = (path: string): Promise => global.core.api?.exists(path) /** * List the directory files * @param {string} path - The path of the directory to list files. * @returns {Promise} A Promise that resolves with the contents of the directory. */ -const listFiles: (path: string) => Promise = (path) => - global.core.api?.listFiles(path); +const listFiles: (path: string) => Promise = (path) => global.core.api?.listFiles(path) /** * Creates a directory at the specified path. * @param {string} path - The path of the directory to create. * @returns {Promise} A Promise that resolves when the directory is created successfully. */ -const mkdir: (path: string) => Promise = (path) => - global.core.api?.mkdir(path); +const mkdir: (path: string) => Promise = (path) => global.core.api?.mkdir(path) /** * Removes a directory at the specified path. * @param {string} path - The path of the directory to remove. * @returns {Promise} A Promise that resolves when the directory is removed successfully. */ -const rmdir: (path: string) => Promise = (path) => - global.core.api?.rmdir(path); +const rmdir: (path: string) => Promise = (path) => global.core.api?.rmdir(path) /** * Deletes a file from the local file system. * @param {string} path - The path of the file to delete. * @returns {Promise} A Promise that resolves when the file is deleted. */ -const deleteFile: (path: string) => Promise = (path) => - global.core.api?.deleteFile(path); +const deleteFile: (path: string) => Promise = (path) => global.core.api?.deleteFile(path) /** * Appends data to a file at the specified path. @@ -58,10 +58,10 @@ const deleteFile: (path: string) => Promise = (path) => * @param data data to append */ const appendFile: (path: string, data: string) => Promise = (path, data) => - global.core.api?.appendFile(path, data); + global.core.api?.appendFile(path, data) const copyFile: (src: string, dest: string) => Promise = (src, dest) => - global.core.api?.copyFile(src, dest); + global.core.api?.copyFile(src, dest) /** * Reads a file line by line. @@ -69,12 +69,13 @@ const copyFile: (src: string, dest: string) => Promise = (src, dest) => * @returns {Promise} A promise that resolves to the lines of the file. */ const readLineByLine: (path: string) => Promise = (path) => - global.core.api?.readLineByLine(path); + global.core.api?.readLineByLine(path) export const fs = { isDirectory, writeFile, readFile, + exists, listFiles, mkdir, rmdir, @@ -82,4 +83,4 @@ export const fs = { appendFile, readLineByLine, copyFile, -}; +} diff --git a/core/src/types/index.ts b/core/src/types/index.ts index 87343aa653..5b91fcc8a5 100644 --- a/core/src/types/index.ts +++ b/core/src/types/index.ts @@ -41,8 +41,8 @@ export type MessageRequest = { /** Messages for constructing a chat completion request **/ messages?: ChatCompletionMessage[]; - /** Runtime parameters for constructing a chat completion request **/ - parameters?: ModelRuntimeParam; + /** Settings for constructing a chat completion request **/ + model?: ModelInfo; }; /** @@ -153,7 +153,8 @@ export type ThreadAssistantInfo = { export type ModelInfo = { id: string; settings: ModelSettingParams; - parameters: ModelRuntimeParam; + parameters: ModelRuntimeParams; + engine?: InferenceEngine; }; /** @@ -166,6 +167,17 @@ export type ThreadState = { error?: Error; lastMessage?: string; }; +/** + * Represents the inference engine. + * @stored + */ + +enum InferenceEngine { + nitro = "nitro", + openai = "openai", + nvidia_triton = "nvidia_triton", + hf_endpoint = "hf_endpoint", +} /** * Model type defines the shape of a model object. @@ -228,12 +240,16 @@ export interface Model { /** * The model runtime parameters. */ - parameters: ModelRuntimeParam; + parameters: ModelRuntimeParams; /** * Metadata of the model. */ metadata: ModelMetadata; + /** + * The model engine. + */ + engine: InferenceEngine; } export type ModelMetadata = { @@ -268,7 +284,7 @@ export type ModelSettingParams = { /** * The available model runtime parameters. */ -export type ModelRuntimeParam = { +export type ModelRuntimeParams = { temperature?: number; token_limit?: number; top_k?: number; diff --git a/docs/openapi/specs/models.yaml b/docs/openapi/specs/models.yaml index 1bd7e65d74..1322b90ee6 100644 --- a/docs/openapi/specs/models.yaml +++ b/docs/openapi/specs/models.yaml @@ -289,7 +289,7 @@ components: engine: type: string description: "The engine used by the model." - example: "llamacpp" + enum: [nitro, openai, hf_inference] quantization: type: string description: "Quantization parameter of the model." diff --git a/electron/handlers/fs.ts b/electron/handlers/fs.ts index 16cef6eb68..acc0ed2dab 100644 --- a/electron/handlers/fs.ts +++ b/electron/handlers/fs.ts @@ -50,6 +50,19 @@ export function handleFsIPCs() { }) }) + /** + * Checks whether a file exists in the user data directory. + * @param event - The event object. + * @param path - The path of the file to check. + * @returns A promise that resolves with a boolean indicating whether the file exists. + */ + ipcMain.handle('exists', async (_event, path: string) => { + return new Promise((resolve, reject) => { + const fullPath = join(userSpacePath, path) + fs.existsSync(fullPath) ? resolve(true) : resolve(false) + }) + }) + /** * Writes data to a file in the user data directory. * @param event - The event object. diff --git a/electron/invokers/fs.ts b/electron/invokers/fs.ts index 309562ad6e..e1aa67cca3 100644 --- a/electron/invokers/fs.ts +++ b/electron/invokers/fs.ts @@ -27,6 +27,12 @@ export function fsInvokers() { */ readFile: (path: string) => ipcRenderer.invoke('readFile', path), + /** + * Reads a file at the specified path. + * @param {string} path - The path of the file to read. + */ + exists: (path: string) => ipcRenderer.invoke('exists', path), + /** * Writes data to a file at the specified path. * @param {string} path - The path of the file to write to. diff --git a/extensions/inference-extension/download.bat b/extensions/inference-extension/download.bat deleted file mode 100644 index 7232689191..0000000000 --- a/extensions/inference-extension/download.bat +++ /dev/null @@ -1,3 +0,0 @@ -@echo off -set /p NITRO_VERSION=<./nitro/version.txt -.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./nitro/win-cpu diff --git a/extensions/inference-extension/nitro/linux-cpu/.gitkeep b/extensions/inference-extension/nitro/linux-cpu/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/extensions/inference-extension/nitro/linux-cuda/.gitkeep b/extensions/inference-extension/nitro/linux-cuda/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/extensions/inference-extension/nitro/mac-arm64/.gitkeep b/extensions/inference-extension/nitro/mac-arm64/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/extensions/inference-extension/nitro/mac-x64/.gitkeep b/extensions/inference-extension/nitro/mac-x64/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/extensions/inference-extension/nitro/win-cpu/.gitkeep b/extensions/inference-extension/nitro/win-cpu/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/extensions/inference-extension/nitro/win-cuda/.gitkeep b/extensions/inference-extension/nitro/win-cuda/.gitkeep deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/extensions/inference-extension/package.json b/extensions/inference-extension/package.json deleted file mode 100644 index 798d2e46d8..0000000000 --- a/extensions/inference-extension/package.json +++ /dev/null @@ -1,57 +0,0 @@ -{ - "name": "@janhq/inference-extension", - "version": "1.0.0", - "description": "Inference Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.", - "main": "dist/index.js", - "module": "dist/module.js", - "author": "Jan ", - "license": "AGPL-3.0", - "scripts": { - "build": "tsc -b . && webpack --config webpack.config.js", - "downloadnitro:linux": "NITRO_VERSION=$(cat ./nitro/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./nitro/linux-cpu && chmod +x ./nitro/linux-cpu/nitro && chmod +x ./nitro/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./nitro/linux-cuda && chmod +x ./nitro/linux-cuda/nitro && chmod +x ./nitro/linux-start.sh", - "downloadnitro:darwin": "NITRO_VERSION=$(cat ./nitro/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./nitro/mac-arm64 && chmod +x ./nitro/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./nitro/mac-x64 && chmod +x ./nitro/mac-x64/nitro", - "downloadnitro:win32": "download.bat", - "downloadnitro": "run-script-os", - "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"nitro/**\" \"dist/nitro\" && npm pack && cpx *.tgz ../../electron/pre-install", - "build:publish": "run-script-os" - }, - "exports": { - ".": "./dist/index.js", - "./main": "./dist/module.js" - }, - "devDependencies": { - "cpx": "^1.5.0", - "rimraf": "^3.0.2", - "run-script-os": "^1.1.6", - "webpack": "^5.88.2", - "webpack-cli": "^5.1.4" - }, - "dependencies": { - "@janhq/core": "file:../../core", - "download-cli": "^1.1.1", - "electron-log": "^5.0.1", - "fetch-retry": "^5.0.6", - "kill-port": "^2.0.1", - "path-browserify": "^1.0.1", - "rxjs": "^7.8.1", - "tcp-port-used": "^1.0.2", - "ts-loader": "^9.5.0", - "ulid": "^2.3.0" - }, - "engines": { - "node": ">=18.0.0" - }, - "files": [ - "dist/*", - "package.json", - "README.md" - ], - "bundleDependencies": [ - "tcp-port-used", - "kill-port", - "fetch-retry", - "electron-log" - ] -} diff --git a/extensions/inference-extension/src/@types/global.d.ts b/extensions/inference-extension/src/@types/global.d.ts deleted file mode 100644 index 7267f09400..0000000000 --- a/extensions/inference-extension/src/@types/global.d.ts +++ /dev/null @@ -1,2 +0,0 @@ -declare const MODULE: string; -declare const INFERENCE_URL: string; diff --git a/extensions/inference-extension/README.md b/extensions/inference-nitro-extension/README.md similarity index 100% rename from extensions/inference-extension/README.md rename to extensions/inference-nitro-extension/README.md diff --git a/extensions/inference-extension/nitro/linux-start.sh b/extensions/inference-nitro-extension/bin/linux-start.sh similarity index 100% rename from extensions/inference-extension/nitro/linux-start.sh rename to extensions/inference-nitro-extension/bin/linux-start.sh diff --git a/extensions/inference-extension/nitro/version.txt b/extensions/inference-nitro-extension/bin/version.txt similarity index 100% rename from extensions/inference-extension/nitro/version.txt rename to extensions/inference-nitro-extension/bin/version.txt diff --git a/extensions/inference-extension/nitro/win-start.bat b/extensions/inference-nitro-extension/bin/win-start.bat similarity index 100% rename from extensions/inference-extension/nitro/win-start.bat rename to extensions/inference-nitro-extension/bin/win-start.bat diff --git a/extensions/inference-nitro-extension/download.bat b/extensions/inference-nitro-extension/download.bat new file mode 100644 index 0000000000..1776b7dfec --- /dev/null +++ b/extensions/inference-nitro-extension/download.bat @@ -0,0 +1,3 @@ +@echo off +set /p NITRO_VERSION=<./bin/version.txt +.\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64-cuda.tar.gz -e --strip 1 -o ./bin/win-cuda && .\node_modules\.bin\download https://github.com/janhq/nitro/releases/download/v%NITRO_VERSION%/nitro-%NITRO_VERSION%-win-amd64.tar.gz -e --strip 1 -o ./bin/win-cpu diff --git a/extensions/inference-nitro-extension/package.json b/extensions/inference-nitro-extension/package.json new file mode 100644 index 0000000000..ef74fff080 --- /dev/null +++ b/extensions/inference-nitro-extension/package.json @@ -0,0 +1,57 @@ +{ + "name": "@janhq/inference-nitro-extension", + "version": "1.0.0", + "description": "Inference Engine for Nitro Extension, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "tsc -b . && webpack --config webpack.config.js", + "downloadnitro:linux": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64.tar.gz -e --strip 1 -o ./bin/linux-cpu && chmod +x ./bin/linux-cpu/nitro && chmod +x ./bin/linux-start.sh && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-linux-amd64-cuda.tar.gz -e --strip 1 -o ./bin/linux-cuda && chmod +x ./bin/linux-cuda/nitro && chmod +x ./bin/linux-start.sh", + "downloadnitro:darwin": "NITRO_VERSION=$(cat ./bin/version.txt) && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-arm64.tar.gz -e --strip 1 -o ./bin/mac-arm64 && chmod +x ./bin/mac-arm64/nitro && download https://github.com/janhq/nitro/releases/download/v${NITRO_VERSION}/nitro-${NITRO_VERSION}-mac-amd64.tar.gz -e --strip 1 -o ./bin/mac-x64 && chmod +x ./bin/mac-x64/nitro", + "downloadnitro:win32": "download.bat", + "downloadnitro": "run-script-os", + "build:publish:darwin": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && ../../.github/scripts/auto-sign.sh && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:win32": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish:linux": "rimraf *.tgz --glob && npm run build && npm run downloadnitro && cpx \"bin/**\" \"dist/bin\" && npm pack && cpx *.tgz ../../electron/pre-install", + "build:publish": "run-script-os" + }, + "exports": { + ".": "./dist/index.js", + "./main": "./dist/module.js" + }, + "devDependencies": { + "cpx": "^1.5.0", + "rimraf": "^3.0.2", + "run-script-os": "^1.1.6", + "webpack": "^5.88.2", + "webpack-cli": "^5.1.4" + }, + "dependencies": { + "@janhq/core": "file:../../core", + "download-cli": "^1.1.1", + "electron-log": "^5.0.1", + "fetch-retry": "^5.0.6", + "kill-port": "^2.0.1", + "path-browserify": "^1.0.1", + "rxjs": "^7.8.1", + "tcp-port-used": "^1.0.2", + "ts-loader": "^9.5.0", + "ulid": "^2.3.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ], + "bundleDependencies": [ + "tcp-port-used", + "kill-port", + "fetch-retry", + "electron-log" + ] +} diff --git a/extensions/inference-nitro-extension/src/@types/global.d.ts b/extensions/inference-nitro-extension/src/@types/global.d.ts new file mode 100644 index 0000000000..642f109090 --- /dev/null +++ b/extensions/inference-nitro-extension/src/@types/global.d.ts @@ -0,0 +1,26 @@ +declare const MODULE: string; +declare const INFERENCE_URL: string; + +/** + * The parameters for the initModel function. + * @property settings - The settings for the machine learning model. + * @property settings.ctx_len - The context length. + * @property settings.ngl - The number of generated tokens. + * @property settings.cont_batching - Whether to use continuous batching. + * @property settings.embedding - Whether to use embedding. + */ +interface EngineSettings { + ctx_len: number; + ngl: number; + cont_batching: boolean; + embedding: boolean; +} + +/** + * The response from the initModel function. + * @property error - An error message if the model fails to load. + */ +interface ModelOperationResponse { + error?: any; + modelFile?: string; +} diff --git a/extensions/inference-extension/src/helpers/sse.ts b/extensions/inference-nitro-extension/src/helpers/sse.ts similarity index 93% rename from extensions/inference-extension/src/helpers/sse.ts rename to extensions/inference-nitro-extension/src/helpers/sse.ts index f427e443c9..d9d8712ddb 100644 --- a/extensions/inference-extension/src/helpers/sse.ts +++ b/extensions/inference-nitro-extension/src/helpers/sse.ts @@ -1,3 +1,4 @@ +import { Model } from "@janhq/core"; import { Observable } from "rxjs"; /** * Sends a request to the inference server to generate a response based on the recent messages. @@ -6,21 +7,23 @@ import { Observable } from "rxjs"; */ export function requestInference( recentMessages: any[], + engine: EngineSettings, + model: Model, controller?: AbortController ): Observable { return new Observable((subscriber) => { const requestBody = JSON.stringify({ messages: recentMessages, + model: model.id, stream: true, - model: "gpt-3.5-turbo", - max_tokens: 2048, + // ...model.parameters, }); fetch(INFERENCE_URL, { method: "POST", headers: { "Content-Type": "application/json", - Accept: "text/event-stream", "Access-Control-Allow-Origin": "*", + Accept: "text/event-stream", }, body: requestBody, signal: controller?.signal, diff --git a/extensions/inference-extension/src/index.ts b/extensions/inference-nitro-extension/src/index.ts similarity index 57% rename from extensions/inference-extension/src/index.ts rename to extensions/inference-nitro-extension/src/index.ts index e8e7758bb0..975d94100a 100644 --- a/extensions/inference-extension/src/index.ts +++ b/extensions/inference-nitro-extension/src/index.ts @@ -19,6 +19,8 @@ import { events, executeOnMain, getUserSpace, + fs, + Model, } from "@janhq/core"; import { InferenceExtension } from "@janhq/core"; import { requestInference } from "./helpers/sse"; @@ -30,7 +32,19 @@ import { join } from "path"; * The class provides methods for initializing and stopping a model, and for making inference requests. * It also subscribes to events emitted by the @janhq/core package and handles new message requests. */ -export default class JanInferenceExtension implements InferenceExtension { +export default class JanInferenceNitroExtension implements InferenceExtension { + private static readonly _homeDir = "engines"; + private static readonly _engineMetadataFileName = "nitro.json"; + + private static _currentModel: Model; + + private static _engineSettings: EngineSettings = { + ctx_len: 2048, + ngl: 100, + cont_batching: false, + embedding: false, + }; + controller = new AbortController(); isCancelled = false; /** @@ -45,51 +59,88 @@ export default class JanInferenceExtension implements InferenceExtension { * Subscribes to events emitted by the @janhq/core package. */ onLoad(): void { + fs.mkdir(JanInferenceNitroExtension._homeDir); + this.writeDefaultEngineSettings(); + + // Events subscription events.on(EventName.OnMessageSent, (data) => - JanInferenceExtension.handleMessageRequest(data, this) + JanInferenceNitroExtension.handleMessageRequest(data, this) ); + + events.on(EventName.OnModelInit, (model: Model) => { + JanInferenceNitroExtension.handleModelInit(model); + }); + + events.on(EventName.OnModelStop, (model: Model) => { + JanInferenceNitroExtension.handleModelStop(model); + }); + + events.on(EventName.OnInferenceStopped, () => { + JanInferenceNitroExtension.handleInferenceStopped(this); + }); } /** * Stops the model inference. */ - onUnload(): void { - this.stopModel(); + onUnload(): void {} + + + private async writeDefaultEngineSettings() { + try { + const engineFile = join( + JanInferenceNitroExtension._homeDir, + JanInferenceNitroExtension._engineMetadataFileName + ); + if (await fs.exists(engineFile)) { + JanInferenceNitroExtension._engineSettings = JSON.parse( + await fs.readFile(engineFile) + ); + } else { + await fs.writeFile( + engineFile, + JSON.stringify(JanInferenceNitroExtension._engineSettings, null, 2) + ); + } + } catch (err) { + console.error(err); + } } - /** - * Initializes the model with the specified file name. - * @param {string} modelId - The ID of the model to initialize. - * @returns {Promise} A promise that resolves when the model is initialized. - */ - async initModel( - modelId: string, - settings?: ModelSettingParams - ): Promise { + private static async handleModelInit(model: Model) { + if (model.engine !== "nitro") { + return; + } const userSpacePath = await getUserSpace(); - const modelFullPath = join(userSpacePath, "models", modelId, modelId); + const modelFullPath = join(userSpacePath, "models", model.id, model.id); - return executeOnMain(MODULE, "initModel", { - modelFullPath, - settings, + const nitroInitResult = await executeOnMain(MODULE, "initModel", { + modelFullPath: modelFullPath, + model: model, }); + + if (nitroInitResult.error === null) { + events.emit(EventName.OnModelFail, model); + } else { + JanInferenceNitroExtension._currentModel = model; + events.emit(EventName.OnModelReady, model); + } } - /** - * Stops the model. - * @returns {Promise} A promise that resolves when the model is stopped. - */ - async stopModel(): Promise { - return executeOnMain(MODULE, "killSubprocess"); + private static async handleModelStop(model: Model) { + if (model.engine !== "nitro") { + return; + } else { + await executeOnMain(MODULE, "stopModel"); + events.emit(EventName.OnModelStopped, model); + } } - /** - * Stops streaming inference. - * @returns {Promise} A promise that resolves when the streaming is stopped. - */ - async stopInference(): Promise { - this.isCancelled = true; - this.controller?.abort(); + private static async handleInferenceStopped( + instance: JanInferenceNitroExtension + ) { + instance.isCancelled = true; + instance.controller?.abort(); } /** @@ -97,7 +148,7 @@ export default class JanInferenceExtension implements InferenceExtension { * @param {MessageRequest} data - The data for the inference request. * @returns {Promise} A promise that resolves with the inference response. */ - async inferenceRequest(data: MessageRequest): Promise { + async inference(data: MessageRequest): Promise { const timestamp = Date.now(); const message: ThreadMessage = { thread_id: data.threadId, @@ -111,7 +162,11 @@ export default class JanInferenceExtension implements InferenceExtension { }; return new Promise(async (resolve, reject) => { - requestInference(data.messages ?? []).subscribe({ + requestInference( + data.messages ?? [], + JanInferenceNitroExtension._engineSettings, + JanInferenceNitroExtension._currentModel + ).subscribe({ next: (_content) => {}, complete: async () => { resolve(message); @@ -131,8 +186,11 @@ export default class JanInferenceExtension implements InferenceExtension { */ private static async handleMessageRequest( data: MessageRequest, - instance: JanInferenceExtension + instance: JanInferenceNitroExtension ) { + if (data.model.engine !== "nitro") { + return; + } const timestamp = Date.now(); const message: ThreadMessage = { id: ulid(), @@ -150,7 +208,12 @@ export default class JanInferenceExtension implements InferenceExtension { instance.isCancelled = false; instance.controller = new AbortController(); - requestInference(data.messages, instance.controller).subscribe({ + requestInference( + data.messages ?? [], + JanInferenceNitroExtension._engineSettings, + JanInferenceNitroExtension._currentModel, + instance.controller + ).subscribe({ next: (content) => { const messageContent: ThreadContent = { type: ContentType.Text, diff --git a/extensions/inference-extension/src/module.ts b/extensions/inference-nitro-extension/src/module.ts similarity index 80% rename from extensions/inference-extension/src/module.ts rename to extensions/inference-nitro-extension/src/module.ts index 72e418d6c0..d36553f409 100644 --- a/extensions/inference-extension/src/module.ts +++ b/extensions/inference-nitro-extension/src/module.ts @@ -20,51 +20,51 @@ let subprocess = null; let currentModelFile = null; /** - * The response from the initModel function. - * @property error - An error message if the model fails to load. + * Stops a Nitro subprocess. + * @param wrapper - The model wrapper. + * @returns A Promise that resolves when the subprocess is terminated successfully, or rejects with an error message if the subprocess fails to terminate. */ -interface InitModelResponse { - error?: any; - modelFile?: string; +function stopModel(): Promise { + return new Promise((resolve, reject) => { + checkAndUnloadNitro(); + resolve({ error: undefined }); + }); } /** * Initializes a Nitro subprocess to load a machine learning model. - * @param modelFile - The name of the machine learning model file. + * @param wrapper - The model wrapper. * @returns A Promise that resolves when the model is loaded successfully, or rejects with an error message if the model is not found or fails to load. * TODO: Should pass absolute of the model file instead of just the name - So we can modurize the module.ts to npm package * TODO: Should it be startModel instead? */ -function initModel(wrapper: any): Promise { - // 1. Check if the model file exists +function initModel(wrapper: any): Promise { currentModelFile = wrapper.modelFullPath; - log.info("Started to load model " + wrapper.modelFullPath); - - const settings = { - llama_model_path: currentModelFile, - ctx_len: 2048, - ngl: 100, - cont_batching: false, - embedding: false, // Always enable embedding mode on - ...wrapper.settings, - }; - log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); - - return ( - // 1. Check if the port is used, if used, attempt to unload model / kill nitro process - validateModelVersion() - .then(checkAndUnloadNitro) - // 2. Spawn the Nitro subprocess - .then(spawnNitroProcess) - // 4. Load the model into the Nitro subprocess (HTTP POST request) - .then(() => loadLLMModel(settings)) - // 5. Check if the model is loaded successfully - .then(validateModelStatus) - .catch((err) => { - log.error("error: " + JSON.stringify(err)); - return { error: err, currentModelFile }; - }) - ); + if (wrapper.model.engine !== "nitro") { + return Promise.resolve({ error: "Not a nitro model" }); + } else { + log.info("Started to load model " + wrapper.model.modelFullPath); + const settings = { + llama_model_path: currentModelFile, + ...wrapper.model.settings, + }; + log.info(`Load model settings: ${JSON.stringify(settings, null, 2)}`); + return ( + // 1. Check if the port is used, if used, attempt to unload model / kill nitro process + validateModelVersion() + .then(checkAndUnloadNitro) + // 2. Spawn the Nitro subprocess + .then(spawnNitroProcess) + // 4. Load the model into the Nitro subprocess (HTTP POST request) + .then(() => loadLLMModel(settings)) + // 5. Check if the model is loaded successfully + .then(validateModelStatus) + .catch((err) => { + log.error("error: " + JSON.stringify(err)); + return { error: err, currentModelFile }; + }) + ); + } } /** @@ -91,11 +91,11 @@ function loadLLMModel(settings): Promise { /** * Validates the status of a model. - * @returns {Promise} A promise that resolves to an object. + * @returns {Promise} A promise that resolves to an object. * If the model is loaded successfully, the object is empty. * If the model is not loaded successfully, the object contains an error message. */ -async function validateModelStatus(): Promise { +async function validateModelStatus(): Promise { // Send a GET request to the validation URL. // Retry the request up to 3 times if it fails, with a delay of 500 milliseconds between retries. return fetchRetry(NITRO_HTTP_VALIDATE_MODEL_URL, { @@ -142,8 +142,8 @@ function killSubprocess(): Promise { * Check port is used or not, if used, attempt to unload model * If unload failed, kill the port */ -function checkAndUnloadNitro() { - return tcpPortUsed.check(PORT, LOCAL_HOST).then((inUse) => { +async function checkAndUnloadNitro() { + return tcpPortUsed.check(PORT, LOCAL_HOST).then(async (inUse) => { // If inUse - try unload or kill process, otherwise do nothing if (inUse) { // Attempt to unload model @@ -168,7 +168,7 @@ function checkAndUnloadNitro() { */ async function spawnNitroProcess(): Promise { return new Promise((resolve, reject) => { - let binaryFolder = path.join(__dirname, "nitro"); // Current directory by default + let binaryFolder = path.join(__dirname, "bin"); // Current directory by default let binaryName; if (process.platform === "win32") { diff --git a/extensions/inference-extension/tsconfig.json b/extensions/inference-nitro-extension/tsconfig.json similarity index 100% rename from extensions/inference-extension/tsconfig.json rename to extensions/inference-nitro-extension/tsconfig.json diff --git a/extensions/inference-extension/webpack.config.js b/extensions/inference-nitro-extension/webpack.config.js similarity index 100% rename from extensions/inference-extension/webpack.config.js rename to extensions/inference-nitro-extension/webpack.config.js diff --git a/extensions/inference-openai-extension/README.md b/extensions/inference-openai-extension/README.md new file mode 100644 index 0000000000..455783efb1 --- /dev/null +++ b/extensions/inference-openai-extension/README.md @@ -0,0 +1,78 @@ +# Jan inference plugin + +Created using Jan app example + +# Create a Jan Plugin using Typescript + +Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀 + +## Create Your Own Plugin + +To create your own plugin, you can use this repository as a template! Just follow the below instructions: + +1. Click the Use this template button at the top of the repository +2. Select Create a new repository +3. Select an owner and name for your new repository +4. Click Create repository +5. Clone your new repository + +## Initial Setup + +After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin. + +> [!NOTE] +> +> You'll need to have a reasonably modern version of +> [Node.js](https://nodejs.org) handy. If you are using a version manager like +> [`nodenv`](https://github.com/nodenv/nodenv) or +> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the +> root of your repository to install the version specified in +> [`package.json`](./package.json). Otherwise, 20.x or later should work! + +1. :hammer_and_wrench: Install the dependencies + + ```bash + npm install + ``` + +1. :building_construction: Package the TypeScript for distribution + + ```bash + npm run bundle + ``` + +1. :white_check_mark: Check your artifact + + There will be a tgz file in your plugin directory now + +## Update the Plugin Metadata + +The [`package.json`](package.json) file defines metadata about your plugin, such as +plugin name, main entry, description and version. + +When you copy this repository, update `package.json` with the name, description for your plugin. + +## Update the Plugin Code + +The [`src/`](./src/) directory is the heart of your plugin! This contains the +source code that will be run when your plugin extension functions are invoked. You can replace the +contents of this directory with your own code. + +There are a few things to keep in mind when writing your plugin code: + +- Most Jan Plugin Extension functions are processed asynchronously. + In `index.ts`, you will see that the extension function will return a `Promise`. + + ```typescript + import { core } from "@janhq/core"; + + function onStart(): Promise { + return core.invokePluginFunc(MODULE_PATH, "run", 0); + } + ``` + + For more information about the Jan Plugin Core module, see the + [documentation](https://github.com/janhq/jan/blob/main/core/README.md). + +So, what are you waiting for? Go ahead and start customizing your plugin! + diff --git a/extensions/inference-openai-extension/package.json b/extensions/inference-openai-extension/package.json new file mode 100644 index 0000000000..c32027ca8e --- /dev/null +++ b/extensions/inference-openai-extension/package.json @@ -0,0 +1,41 @@ +{ + "name": "@janhq/inference-openai-extension", + "version": "1.0.0", + "description": "Inference Engine for OpenAI Extension that can be used with any OpenAI compatible API", + "main": "dist/index.js", + "module": "dist/module.js", + "author": "Jan ", + "license": "AGPL-3.0", + "scripts": { + "build": "tsc -b . && webpack --config webpack.config.js", + "build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install" + }, + "exports": { + ".": "./dist/index.js", + "./main": "./dist/module.js" + }, + "devDependencies": { + "cpx": "^1.5.0", + "rimraf": "^3.0.2", + "webpack": "^5.88.2", + "webpack-cli": "^5.1.4" + }, + "dependencies": { + "@janhq/core": "file:../../core", + "fetch-retry": "^5.0.6", + "path-browserify": "^1.0.1", + "ts-loader": "^9.5.0", + "ulid": "^2.3.0" + }, + "engines": { + "node": ">=18.0.0" + }, + "files": [ + "dist/*", + "package.json", + "README.md" + ], + "bundleDependencies": [ + "fetch-retry" + ] +} diff --git a/extensions/inference-openai-extension/src/@types/global.d.ts b/extensions/inference-openai-extension/src/@types/global.d.ts new file mode 100644 index 0000000000..bb0c6e9bfa --- /dev/null +++ b/extensions/inference-openai-extension/src/@types/global.d.ts @@ -0,0 +1,27 @@ +import { Model } from "@janhq/core"; + +declare const MODULE: string; + +declare interface EngineSettings { + full_url?: string; + api_key?: string; +} + +enum OpenAIChatCompletionModelName { + "gpt-3.5-turbo-instruct" = "gpt-3.5-turbo-instruct", + "gpt-3.5-turbo-instruct-0914" = "gpt-3.5-turbo-instruct-0914", + "gpt-4-1106-preview" = "gpt-4-1106-preview", + "gpt-3.5-turbo-0613" = "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-0301" = "gpt-3.5-turbo-0301", + "gpt-3.5-turbo" = "gpt-3.5-turbo", + "gpt-3.5-turbo-16k-0613" = "gpt-3.5-turbo-16k-0613", + "gpt-3.5-turbo-1106" = "gpt-3.5-turbo-1106", + "gpt-4-vision-preview" = "gpt-4-vision-preview", + "gpt-4" = "gpt-4", + "gpt-4-0314" = "gpt-4-0314", + "gpt-4-0613" = "gpt-4-0613", +} + +declare type OpenAIModel = Omit & { + id: OpenAIChatCompletionModelName; +}; diff --git a/extensions/inference-openai-extension/src/helpers/sse.ts b/extensions/inference-openai-extension/src/helpers/sse.ts new file mode 100644 index 0000000000..c8ddefca64 --- /dev/null +++ b/extensions/inference-openai-extension/src/helpers/sse.ts @@ -0,0 +1,68 @@ +import { Observable } from "rxjs"; +import { EngineSettings, OpenAIModel } from "../@types/global"; + +/** + * Sends a request to the inference server to generate a response based on the recent messages. + * @param recentMessages - An array of recent messages to use as context for the inference. + * @param engine - The engine settings to use for the inference. + * @param model - The model to use for the inference. + * @returns An Observable that emits the generated response as a string. + */ +export function requestInference( + recentMessages: any[], + engine: EngineSettings, + model: OpenAIModel, + controller?: AbortController +): Observable { + return new Observable((subscriber) => { + let model_id: string = model.id + if (engine.full_url.includes("openai.azure.com")){ + model_id = engine.full_url.split("/")[5] + } + const requestBody = JSON.stringify({ + messages: recentMessages, + stream: true, + model: model_id + // ...model.parameters, + }); + fetch(`${engine.full_url}`, { + method: "POST", + headers: { + "Content-Type": "application/json", + Accept: "text/event-stream", + "Access-Control-Allow-Origin": "*", + Authorization: `Bearer ${engine.api_key}`, + "api-key": `${engine.api_key}`, + }, + body: requestBody, + signal: controller?.signal, + }) + .then(async (response) => { + const stream = response.body; + const decoder = new TextDecoder("utf-8"); + const reader = stream?.getReader(); + let content = ""; + + while (true && reader) { + const { done, value } = await reader.read(); + if (done) { + break; + } + const text = decoder.decode(value); + const lines = text.trim().split("\n"); + for (const line of lines) { + if (line.startsWith("data: ") && !line.includes("data: [DONE]")) { + const data = JSON.parse(line.replace("data: ", "")); + content += data.choices[0]?.delta?.content ?? ""; + if (content.startsWith("assistant: ")) { + content = content.replace("assistant: ", ""); + } + subscriber.next(content); + } + } + } + subscriber.complete(); + }) + .catch((err) => subscriber.error(err)); + }); +} diff --git a/extensions/inference-openai-extension/src/index.ts b/extensions/inference-openai-extension/src/index.ts new file mode 100644 index 0000000000..7e3e6e71e8 --- /dev/null +++ b/extensions/inference-openai-extension/src/index.ts @@ -0,0 +1,231 @@ +/** + * @file This file exports a class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + * @version 1.0.0 + * @module inference-openai-extension/src/index + */ + +import { + ChatCompletionRole, + ContentType, + EventName, + MessageRequest, + MessageStatus, + ModelSettingParams, + ExtensionType, + ThreadContent, + ThreadMessage, + events, + fs, +} from "@janhq/core"; +import { InferenceExtension } from "@janhq/core"; +import { requestInference } from "./helpers/sse"; +import { ulid } from "ulid"; +import { join } from "path"; +import { EngineSettings, OpenAIModel } from "./@types/global"; + +/** + * A class that implements the InferenceExtension interface from the @janhq/core package. + * The class provides methods for initializing and stopping a model, and for making inference requests. + * It also subscribes to events emitted by the @janhq/core package and handles new message requests. + */ +export default class JanInferenceOpenAIExtension implements InferenceExtension { + private static readonly _homeDir = "engines"; + private static readonly _engineMetadataFileName = "openai.json"; + + private static _currentModel: OpenAIModel; + + private static _engineSettings: EngineSettings = { + full_url: "https://api.openai.com/v1/chat/completions", + api_key: "sk-", + }; + + controller = new AbortController(); + isCancelled = false; + + /** + * Returns the type of the extension. + * @returns {ExtensionType} The type of the extension. + */ + // TODO: To fix + type(): ExtensionType { + return undefined; + } + /** + * Subscribes to events emitted by the @janhq/core package. + */ + onLoad(): void { + fs.mkdir(JanInferenceOpenAIExtension._homeDir); + JanInferenceOpenAIExtension.writeDefaultEngineSettings(); + + // Events subscription + events.on(EventName.OnMessageSent, (data) => + JanInferenceOpenAIExtension.handleMessageRequest(data, this) + ); + + events.on(EventName.OnModelInit, (model: OpenAIModel) => { + JanInferenceOpenAIExtension.handleModelInit(model); + }); + + events.on(EventName.OnModelStop, (model: OpenAIModel) => { + JanInferenceOpenAIExtension.handleModelStop(model); + }); + events.on(EventName.OnInferenceStopped, () => { + JanInferenceOpenAIExtension.handleInferenceStopped(this); + }); + } + + /** + * Stops the model inference. + */ + onUnload(): void {} + + static async writeDefaultEngineSettings() { + try { + const engineFile = join( + JanInferenceOpenAIExtension._homeDir, + JanInferenceOpenAIExtension._engineMetadataFileName + ); + if (await fs.exists(engineFile)) { + JanInferenceOpenAIExtension._engineSettings = JSON.parse( + await fs.readFile(engineFile) + ); + } else { + await fs.writeFile( + engineFile, + JSON.stringify(JanInferenceOpenAIExtension._engineSettings, null, 2) + ); + } + } catch (err) { + console.error(err); + } + } + + /** + * Makes a single response inference request. + * @param {MessageRequest} data - The data for the inference request. + * @returns {Promise} A promise that resolves with the inference response. + */ + async inference(data: MessageRequest): Promise { + const timestamp = Date.now(); + const message: ThreadMessage = { + thread_id: data.threadId, + created: timestamp, + updated: timestamp, + status: MessageStatus.Ready, + id: "", + role: ChatCompletionRole.Assistant, + object: "thread.message", + content: [], + }; + + return new Promise(async (resolve, reject) => { + requestInference( + data.messages ?? [], + JanInferenceOpenAIExtension._engineSettings, + JanInferenceOpenAIExtension._currentModel + ).subscribe({ + next: (_content) => {}, + complete: async () => { + resolve(message); + }, + error: async (err) => { + reject(err); + }, + }); + }); + } + + private static async handleModelInit(model: OpenAIModel) { + if (model.engine !== "openai") { + return; + } else { + JanInferenceOpenAIExtension._currentModel = model; + JanInferenceOpenAIExtension.writeDefaultEngineSettings(); + // Todo: Check model list with API key + events.emit(EventName.OnModelReady, model); + } + } + + private static async handleModelStop(model: OpenAIModel) { + if (model.engine !== "openai") { + return; + } + events.emit(EventName.OnModelStopped, model); + } + + private static async handleInferenceStopped( + instance: JanInferenceOpenAIExtension + ) { + instance.isCancelled = true; + instance.controller?.abort(); + } + + /** + * Handles a new message request by making an inference request and emitting events. + * Function registered in event manager, should be static to avoid binding issues. + * Pass instance as a reference. + * @param {MessageRequest} data - The data for the new message request. + */ + private static async handleMessageRequest( + data: MessageRequest, + instance: JanInferenceOpenAIExtension + ) { + if (data.model.engine !== "openai") { + return; + } + + const timestamp = Date.now(); + const message: ThreadMessage = { + id: ulid(), + thread_id: data.threadId, + assistant_id: data.assistantId, + role: ChatCompletionRole.Assistant, + content: [], + status: MessageStatus.Pending, + created: timestamp, + updated: timestamp, + object: "thread.message", + }; + events.emit(EventName.OnMessageResponse, message); + + instance.isCancelled = false; + instance.controller = new AbortController(); + + requestInference( + data?.messages ?? [], + this._engineSettings, + JanInferenceOpenAIExtension._currentModel, + instance.controller + ).subscribe({ + next: (content) => { + const messageContent: ThreadContent = { + type: ContentType.Text, + text: { + value: content.trim(), + annotations: [], + }, + }; + message.content = [messageContent]; + events.emit(EventName.OnMessageUpdate, message); + }, + complete: async () => { + message.status = MessageStatus.Ready; + events.emit(EventName.OnMessageUpdate, message); + }, + error: async (err) => { + const messageContent: ThreadContent = { + type: ContentType.Text, + text: { + value: "Error occurred: " + err.message, + annotations: [], + }, + }; + message.content = [messageContent]; + message.status = MessageStatus.Ready; + events.emit(EventName.OnMessageUpdate, message); + }, + }); + } +} diff --git a/extensions/inference-openai-extension/tsconfig.json b/extensions/inference-openai-extension/tsconfig.json new file mode 100644 index 0000000000..b48175a169 --- /dev/null +++ b/extensions/inference-openai-extension/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2016", + "module": "ES6", + "moduleResolution": "node", + + "outDir": "./dist", + "esModuleInterop": true, + "forceConsistentCasingInFileNames": true, + "strict": false, + "skipLibCheck": true, + "rootDir": "./src" + }, + "include": ["./src"] +} diff --git a/extensions/inference-openai-extension/webpack.config.js b/extensions/inference-openai-extension/webpack.config.js new file mode 100644 index 0000000000..45be622710 --- /dev/null +++ b/extensions/inference-openai-extension/webpack.config.js @@ -0,0 +1,42 @@ +const path = require("path"); +const webpack = require("webpack"); +const packageJson = require("./package.json"); + +module.exports = { + experiments: { outputModule: true }, + entry: "./src/index.ts", // Adjust the entry point to match your project's main file + mode: "production", + module: { + rules: [ + { + test: /\.tsx?$/, + use: "ts-loader", + exclude: /node_modules/, + }, + ], + }, + plugins: [ + new webpack.DefinePlugin({ + MODULE: JSON.stringify(`${packageJson.name}/${packageJson.module}`), + INFERENCE_URL: JSON.stringify( + process.env.INFERENCE_URL || + "http://127.0.0.1:3928/inferences/llamacpp/chat_completion" + ), + }), + ], + output: { + filename: "index.js", // Adjust the output file name as needed + path: path.resolve(__dirname, "dist"), + library: { type: "module" }, // Specify ESM output format + }, + resolve: { + extensions: [".ts", ".js"], + fallback: { + path: require.resolve("path-browserify"), + }, + }, + optimization: { + minimize: false, + }, + // Add loaders and other configuration as needed for your project +}; diff --git a/models/capybara-34b/model.json b/models/capybara-34b/model.json index 4ac9899f1f..e4263f9572 100644 --- a/models/capybara-34b/model.json +++ b/models/capybara-34b/model.json @@ -19,6 +19,7 @@ "author": "NousResearch, The Bloke", "tags": ["34B", "Finetuned"], "size": 24320000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/deepseek-coder-1.3b/model.json b/models/deepseek-coder-1.3b/model.json index 4bab24ae26..70f86ec896 100644 --- a/models/deepseek-coder-1.3b/model.json +++ b/models/deepseek-coder-1.3b/model.json @@ -1,3 +1,4 @@ + { "source_url": "https://huggingface.co/TheBloke/deepseek-coder-1.3b-instruct-GGUF/resolve/main/deepseek-coder-1.3b-instruct.Q8_0.gguf", "id": "deepseek-coder-1.3b", @@ -19,5 +20,6 @@ "author": "Deepseek, The Bloke", "tags": ["Tiny", "Foundational Model"], "size": 1430000000 - } + }, + "engine": "nitro" } diff --git a/models/deepseek-coder-34b/model.json b/models/deepseek-coder-34b/model.json index eb8e8e8c1f..4024193575 100644 --- a/models/deepseek-coder-34b/model.json +++ b/models/deepseek-coder-34b/model.json @@ -19,6 +19,7 @@ "author": "Deepseek, The Bloke", "tags": ["34B", "Foundational Model"], "size": 26040000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/gpt-3.5-turbo-16k-0613/model.json b/models/gpt-3.5-turbo-16k-0613/model.json new file mode 100644 index 0000000000..c7e2f0d1ee --- /dev/null +++ b/models/gpt-3.5-turbo-16k-0613/model.json @@ -0,0 +1,20 @@ +{ + "source_url": "https://openai.com", + "id": "gpt-3.5-turbo-16k-0613", + "object": "model", + "name": "OpenAI GPT 3.5 Turbo 16k 0613", + "version": 1.0, + "description": "OpenAI GPT 3.5 Turbo 16k 0613 model is extremely good", + "format": "api", + "settings": {}, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai", + "state": "ready" +} + \ No newline at end of file diff --git a/models/gpt-3.5-turbo/model.json b/models/gpt-3.5-turbo/model.json new file mode 100644 index 0000000000..91e4ca8f40 --- /dev/null +++ b/models/gpt-3.5-turbo/model.json @@ -0,0 +1,18 @@ +{ + "source_url": "https://openai.com", + "id": "gpt-3.5-turbo", + "object": "model", + "name": "OpenAI GPT 3.5 Turbo", + "version": 1.0, + "description": "OpenAI GPT 3.5 Turbo model is extremely good", + "format": "api", + "settings": {}, + "parameters": {}, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai", + "state": "ready" +} + \ No newline at end of file diff --git a/models/gpt-4/model.json b/models/gpt-4/model.json new file mode 100644 index 0000000000..8883fd8ef5 --- /dev/null +++ b/models/gpt-4/model.json @@ -0,0 +1,20 @@ +{ + "source_url": "https://openai.com", + "id": "gpt-4", + "object": "model", + "name": "OpenAI GPT 3.5", + "version": 1.0, + "description": "OpenAI GPT 3.5 model is extremely good", + "format": "api", + "settings": {}, + "parameters": { + "max_tokens": 4096 + }, + "metadata": { + "author": "OpenAI", + "tags": ["General", "Big Context Length"] + }, + "engine": "openai", + "state": "ready" +} + \ No newline at end of file diff --git a/models/llama2-chat-70b-q4/model.json b/models/llama2-chat-70b-q4/model.json index 00349d5784..07886aed59 100644 --- a/models/llama2-chat-70b-q4/model.json +++ b/models/llama2-chat-70b-q4/model.json @@ -19,6 +19,7 @@ "author": "MetaAI, The Bloke", "tags": ["70B", "Foundational Model"], "size": 43920000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/llama2-chat-7b-q4/model.json b/models/llama2-chat-7b-q4/model.json index b28d4e082c..2d1a67236e 100644 --- a/models/llama2-chat-7b-q4/model.json +++ b/models/llama2-chat-7b-q4/model.json @@ -19,6 +19,7 @@ "author": "MetaAI, The Bloke", "tags": ["7B", "Foundational Model"], "size": 4080000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/llama2-chat-7b-q5/model.json b/models/llama2-chat-7b-q5/model.json index b43e134eb2..96c652ab2f 100644 --- a/models/llama2-chat-7b-q5/model.json +++ b/models/llama2-chat-7b-q5/model.json @@ -19,6 +19,7 @@ "author": "MetaAI, The Bloke", "tags": ["7B", "Foundational Model"], "size": 4780000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/lzlv-70b/model.json b/models/lzlv-70b/model.json index 621d980abf..ca6af617ec 100644 --- a/models/lzlv-70b/model.json +++ b/models/lzlv-70b/model.json @@ -19,6 +19,7 @@ "author": "Lizpreciatior, The Bloke", "tags": ["70B", "Finetuned"], "size": 48750000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/mistral-ins-7b-q4/model.json b/models/mistral-ins-7b-q4/model.json index a5f87c9800..9fc86f1bea 100644 --- a/models/mistral-ins-7b-q4/model.json +++ b/models/mistral-ins-7b-q4/model.json @@ -20,6 +20,7 @@ "tags": ["Featured", "7B", "Foundational Model"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/mistral-ins-7b-q4/cover.png" - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/mistral-ins-7b-q5/model.json b/models/mistral-ins-7b-q5/model.json index b8669161ee..291f2ad0ef 100644 --- a/models/mistral-ins-7b-q5/model.json +++ b/models/mistral-ins-7b-q5/model.json @@ -19,6 +19,7 @@ "author": "MistralAI, The Bloke", "tags": ["7B", "Foundational Model"], "size": 5130000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/neural-chat-7b/model.json b/models/neural-chat-7b/model.json index dfccf073dc..1d62714693 100644 --- a/models/neural-chat-7b/model.json +++ b/models/neural-chat-7b/model.json @@ -19,6 +19,7 @@ "author": "Intel, The Bloke", "tags": ["Recommended", "7B", "Finetuned"], "size": 4370000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/noromaid-20b/model.json b/models/noromaid-20b/model.json index 698687d8bd..5c937a8313 100644 --- a/models/noromaid-20b/model.json +++ b/models/noromaid-20b/model.json @@ -19,6 +19,7 @@ "author": "NeverSleep, The Bloke", "tags": ["34B", "Finetuned"], "size": 12040000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/openhermes-neural-7b/model.json b/models/openhermes-neural-7b/model.json index 707967e5ab..8dcb51ad7f 100644 --- a/models/openhermes-neural-7b/model.json +++ b/models/openhermes-neural-7b/model.json @@ -20,5 +20,6 @@ "tags": ["Featured", "7B", "Merged"], "size": 4370000000, "cover": "https://raw.githubusercontent.com/janhq/jan/main/models/openhermes-neural-7b/cover.png" - } - } \ No newline at end of file + }, + "engine": "nitro" + } diff --git a/models/openorca-13b/model.json b/models/openorca-13b/model.json index fc47733715..42c8bd96e7 100644 --- a/models/openorca-13b/model.json +++ b/models/openorca-13b/model.json @@ -19,6 +19,7 @@ "author": "Microsoft, The Bloke", "tags": ["13B", "Finetuned"], "size": 9230000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/phind-34b/model.json b/models/phind-34b/model.json index 40d53b77a5..eb3c1a18af 100644 --- a/models/phind-34b/model.json +++ b/models/phind-34b/model.json @@ -19,6 +19,7 @@ "author": "Phind, The Bloke", "tags": ["34B", "Finetuned"], "size": 24320000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/rocket-3b/model.json b/models/rocket-3b/model.json index c40ee62584..8d4fc76bf7 100644 --- a/models/rocket-3b/model.json +++ b/models/rocket-3b/model.json @@ -19,5 +19,6 @@ "author": "Pansophic, The Bloke", "tags": ["Tiny", "Finetuned"], "size": 1710000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/starling-7b/model.json b/models/starling-7b/model.json index de76936736..52b03f8b1b 100644 --- a/models/starling-7b/model.json +++ b/models/starling-7b/model.json @@ -19,6 +19,7 @@ "author": "Berkeley-nest, The Bloke", "tags": ["Recommended", "7B","Finetuned"], "size": 4370000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/tiefighter-13b/model.json b/models/tiefighter-13b/model.json index c7f209b77f..20075777c1 100644 --- a/models/tiefighter-13b/model.json +++ b/models/tiefighter-13b/model.json @@ -19,6 +19,7 @@ "author": "KoboldAI, The Bloke", "tags": ["13B", "Finetuned"], "size": 9230000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/tinyllama-1.1b/model.json b/models/tinyllama-1.1b/model.json index ace0ca6a0b..bb6aeaf5c6 100644 --- a/models/tinyllama-1.1b/model.json +++ b/models/tinyllama-1.1b/model.json @@ -19,5 +19,6 @@ "author": "TinyLlama", "tags": ["Tiny", "Foundation Model"], "size": 637000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/wizardcoder-13b/model.json b/models/wizardcoder-13b/model.json index 63cbd174b5..40f2750372 100644 --- a/models/wizardcoder-13b/model.json +++ b/models/wizardcoder-13b/model.json @@ -19,6 +19,7 @@ "author": "WizardLM, The Bloke", "tags": ["Recommended", "13B", "Finetuned"], "size": 9230000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/yi-34b/model.json b/models/yi-34b/model.json index 7c6da2f03e..ab111591cf 100644 --- a/models/yi-34b/model.json +++ b/models/yi-34b/model.json @@ -19,6 +19,7 @@ "author": "01-ai, The Bloke", "tags": ["34B", "Foundational Model"], "size": 24320000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/models/zephyr-beta-7b/model.json b/models/zephyr-beta-7b/model.json index f1ef4eb20f..4993366fd5 100644 --- a/models/zephyr-beta-7b/model.json +++ b/models/zephyr-beta-7b/model.json @@ -19,6 +19,7 @@ "author": "HuggingFaceH4, The Bloke", "tags": ["7B", "Finetuned"], "size": 4370000000 - } + }, + "engine": "nitro" } \ No newline at end of file diff --git a/web/containers/Providers/EventHandler.tsx b/web/containers/Providers/EventHandler.tsx index 46f4b19d49..6d0f5ff26f 100644 --- a/web/containers/Providers/EventHandler.tsx +++ b/web/containers/Providers/EventHandler.tsx @@ -7,10 +7,16 @@ import { ThreadMessage, ExtensionType, MessageStatus, + Model, } from '@janhq/core' import { ConversationalExtension } from '@janhq/core' import { useAtomValue, useSetAtom } from 'jotai' +import { activeModelAtom, stateModelAtom } from '@/hooks/useActiveModel' +import { useGetDownloadedModels } from '@/hooks/useGetDownloadedModels' + +import { toaster } from '../Toast' + import { extensionManager } from '@/extension' import { addNewMessageAtom, @@ -24,19 +30,61 @@ import { export default function EventHandler({ children }: { children: ReactNode }) { const addNewMessage = useSetAtom(addNewMessageAtom) const updateMessage = useSetAtom(updateMessageAtom) + const { downloadedModels } = useGetDownloadedModels() + const setActiveModel = useSetAtom(activeModelAtom) + const setStateModel = useSetAtom(stateModelAtom) const updateThreadWaiting = useSetAtom(updateThreadWaitingForResponseAtom) const threads = useAtomValue(threadsAtom) + const modelsRef = useRef(downloadedModels) const threadsRef = useRef(threads) useEffect(() => { threadsRef.current = threads }, [threads]) + useEffect(() => { + modelsRef.current = downloadedModels + }, [downloadedModels]) + async function handleNewMessageResponse(message: ThreadMessage) { addNewMessage(message) } + async function handleModelReady(model: Model) { + setActiveModel(model) + toaster({ + title: 'Success!', + description: `Model ${model.id} has been started.`, + }) + setStateModel(() => ({ + state: 'stop', + loading: false, + model: model.id, + })) + } + + async function handleModelStopped(model: Model) { + setTimeout(async () => { + setActiveModel(undefined) + setStateModel({ state: 'start', loading: false, model: '' }) + toaster({ + title: 'Success!', + description: `Model ${model.id} has been stopped.`, + }) + }, 500) + } + + async function handleModelFail(res: any) { + const errorMessage = `${res.error}` + alert(errorMessage) + setStateModel(() => ({ + state: 'start', + loading: false, + model: res.modelId, + })) + } + async function handleMessageResponseUpdate(message: ThreadMessage) { updateMessage( message.id, @@ -73,6 +121,9 @@ export default function EventHandler({ children }: { children: ReactNode }) { if (window.core.events) { events.on(EventName.OnMessageResponse, handleNewMessageResponse) events.on(EventName.OnMessageUpdate, handleMessageResponseUpdate) + events.on(EventName.OnModelReady, handleModelReady) + events.on(EventName.OnModelFail, handleModelFail) + events.on(EventName.OnModelStopped, handleModelStopped) } // eslint-disable-next-line react-hooks/exhaustive-deps }, []) diff --git a/web/hooks/useActiveModel.ts b/web/hooks/useActiveModel.ts index 60be0f2c4b..699b162790 100644 --- a/web/hooks/useActiveModel.ts +++ b/web/hooks/useActiveModel.ts @@ -1,5 +1,8 @@ /* eslint-disable @typescript-eslint/no-explicit-any */ -import { ExtensionType, InferenceExtension } from '@janhq/core' +import { + EventName, + events, +} from '@janhq/core' import { Model, ModelSettingParams } from '@janhq/core' import { atom, useAtom } from 'jotai' @@ -9,9 +12,13 @@ import { useGetDownloadedModels } from './useGetDownloadedModels' import { extensionManager } from '@/extension' -const activeModelAtom = atom(undefined) +export const activeModelAtom = atom(undefined) -const stateModelAtom = atom({ state: 'start', loading: false, model: '' }) +export const stateModelAtom = atom({ + state: 'start', + loading: false, + model: '', +}) export function useActiveModel() { const [activeModel, setActiveModel] = useAtom(activeModelAtom) @@ -47,59 +54,14 @@ export function useActiveModel() { return } - const currentTime = Date.now() - const res = await initModel(modelId, model?.settings) - if (res && res.error) { - const errorMessage = `${res.error}` - alert(errorMessage) - setStateModel(() => ({ - state: 'start', - loading: false, - model: modelId, - })) - } else { - console.debug( - `Model ${modelId} successfully initialized! Took ${ - Date.now() - currentTime - }ms` - ) - setActiveModel(model) - toaster({ - title: 'Success!', - description: `Model ${modelId} has been started.`, - }) - setStateModel(() => ({ - state: 'stop', - loading: false, - model: modelId, - })) - } + events.emit(EventName.OnModelInit, model) } const stopModel = async (modelId: string) => { + const model = downloadedModels.find((e) => e.id === modelId) setStateModel({ state: 'stop', loading: true, model: modelId }) - setTimeout(async () => { - extensionManager - .get(ExtensionType.Inference) - ?.stopModel() - - setActiveModel(undefined) - setStateModel({ state: 'start', loading: false, model: '' }) - toaster({ - title: 'Success!', - description: `Model ${modelId} has been stopped.`, - }) - }, 500) + events.emit(EventName.OnModelStop, model) } return { activeModel, startModel, stopModel, stateModel } } - -const initModel = async ( - modelId: string, - settings?: ModelSettingParams -): Promise => { - return extensionManager - .get(ExtensionType.Inference) - ?.initModel(modelId, settings) -} diff --git a/web/hooks/useCreateNewThread.ts b/web/hooks/useCreateNewThread.ts index 7526feb495..2ba9adb3fb 100644 --- a/web/hooks/useCreateNewThread.ts +++ b/web/hooks/useCreateNewThread.ts @@ -67,6 +67,7 @@ export const useCreateNewThread = () => { top_p: 0, stream: false, }, + engine: undefined }, instructions: assistant.instructions, } diff --git a/web/hooks/useSendChatMessage.ts b/web/hooks/useSendChatMessage.ts index 1a06a8e886..970aedbecd 100644 --- a/web/hooks/useSendChatMessage.ts +++ b/web/hooks/useSendChatMessage.ts @@ -50,7 +50,6 @@ export default function useSendChatMessage() { const [queuedMessage, setQueuedMessage] = useState(false) const modelRef = useRef() - useEffect(() => { modelRef.current = activeModel }, [activeModel]) @@ -91,18 +90,35 @@ export default function useSendChatMessage() { id: ulid(), messages: messages, threadId: activeThread.id, + model: activeThread.assistants[0].model ?? selectedModel, } const modelId = selectedModel?.id ?? activeThread.assistants[0].model.id if (activeModel?.id !== modelId) { setQueuedMessage(true) - await startModel(modelId) + startModel(modelId) + await WaitForModelStarting(modelId) setQueuedMessage(false) } events.emit(EventName.OnMessageSent, messageRequest) } + // TODO: Refactor @louis + const WaitForModelStarting = async (modelId: string) => { + return new Promise((resolve) => { + setTimeout(async () => { + if (modelRef.current?.id !== modelId) { + console.log('waiting for model to start') + await WaitForModelStarting(modelId) + resolve() + } else { + resolve() + } + }, 200) + }) + } + const sendChatMessage = async () => { if (!currentPrompt || currentPrompt.trim().length === 0) { return @@ -132,6 +148,7 @@ export default function useSendChatMessage() { id: selectedModel.id, settings: selectedModel.settings, parameters: selectedModel.parameters, + engine: selectedModel.engine, }, }, ], @@ -178,7 +195,7 @@ export default function useSendChatMessage() { id: msgId, threadId: activeThread.id, messages, - parameters: activeThread.assistants[0].model.parameters, + model: selectedModel ?? activeThread.assistants[0].model, } const timestamp = Date.now() const threadMessage: ThreadMessage = { @@ -210,7 +227,8 @@ export default function useSendChatMessage() { if (activeModel?.id !== modelId) { setQueuedMessage(true) - await startModel(modelId) + startModel(modelId) + await WaitForModelStarting(modelId) setQueuedMessage(false) } events.emit(EventName.OnMessageSent, messageRequest) diff --git a/web/screens/Chat/MessageToolbar/index.tsx b/web/screens/Chat/MessageToolbar/index.tsx index f877e1bdb2..fe7cac1f5f 100644 --- a/web/screens/Chat/MessageToolbar/index.tsx +++ b/web/screens/Chat/MessageToolbar/index.tsx @@ -30,9 +30,8 @@ const MessageToolbar = ({ message }: { message: ThreadMessage }) => { const { resendChatMessage } = useSendChatMessage() const onStopInferenceClick = async () => { - await extensionManager - .get(ExtensionType.Inference) - ?.stopInference() + events.emit(EventName.OnInferenceStopped, {}) + setTimeout(() => { events.emit(EventName.OnMessageUpdate, { ...message, diff --git a/web/utils/converter.ts b/web/utils/converter.ts index 630366ed09..ed8a61d653 100644 --- a/web/utils/converter.ts +++ b/web/utils/converter.ts @@ -1,4 +1,5 @@ export const toGigabytes = (input: number) => { + if (!input) return '' if (input > 1024 ** 3) { return (input / 1000 ** 3).toFixed(2) + 'GB' } else if (input > 1024 ** 2) {