Skip to content

Commit f268877

Browse files
committed
feat: Add triton trtllm for engine for remote models
1 parent 44d4368 commit f268877

File tree

8 files changed

+478
-1
lines changed

8 files changed

+478
-1
lines changed

core/src/types/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -174,7 +174,7 @@ export type ThreadState = {
174174
enum InferenceEngine {
175175
nitro = "nitro",
176176
openai = "openai",
177-
nvidia_triton = "nvidia_triton",
177+
triton_trtllm = "triton_trtllm",
178178
hf_endpoint = "hf_endpoint",
179179
}
180180

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Jan inference plugin
2+
3+
Created using Jan app example
4+
5+
# Create a Jan Plugin using Typescript
6+
7+
Use this template to bootstrap the creation of a TypeScript Jan plugin. 🚀
8+
9+
## Create Your Own Plugin
10+
11+
To create your own plugin, you can use this repository as a template! Just follow the below instructions:
12+
13+
1. Click the Use this template button at the top of the repository
14+
2. Select Create a new repository
15+
3. Select an owner and name for your new repository
16+
4. Click Create repository
17+
5. Clone your new repository
18+
19+
## Initial Setup
20+
21+
After you've cloned the repository to your local machine or codespace, you'll need to perform some initial setup steps before you can develop your plugin.
22+
23+
> [!NOTE]
24+
>
25+
> You'll need to have a reasonably modern version of
26+
> [Node.js](https://nodejs.org) handy. If you are using a version manager like
27+
> [`nodenv`](https://github.com/nodenv/nodenv) or
28+
> [`nvm`](https://github.com/nvm-sh/nvm), you can run `nodenv install` in the
29+
> root of your repository to install the version specified in
30+
> [`package.json`](./package.json). Otherwise, 20.x or later should work!
31+
32+
1. :hammer_and_wrench: Install the dependencies
33+
34+
```bash
35+
npm install
36+
```
37+
38+
1. :building_construction: Package the TypeScript for distribution
39+
40+
```bash
41+
npm run bundle
42+
```
43+
44+
1. :white_check_mark: Check your artifact
45+
46+
There will be a tgz file in your plugin directory now
47+
48+
## Update the Plugin Metadata
49+
50+
The [`package.json`](package.json) file defines metadata about your plugin, such as
51+
plugin name, main entry, description and version.
52+
53+
When you copy this repository, update `package.json` with the name, description for your plugin.
54+
55+
## Update the Plugin Code
56+
57+
The [`src/`](./src/) directory is the heart of your plugin! This contains the
58+
source code that will be run when your plugin extension functions are invoked. You can replace the
59+
contents of this directory with your own code.
60+
61+
There are a few things to keep in mind when writing your plugin code:
62+
63+
- Most Jan Plugin Extension functions are processed asynchronously.
64+
In `index.ts`, you will see that the extension function will return a `Promise<any>`.
65+
66+
```typescript
67+
import { core } from "@janhq/core";
68+
69+
function onStart(): Promise<any> {
70+
return core.invokePluginFunc(MODULE_PATH, "run", 0);
71+
}
72+
```
73+
74+
For more information about the Jan Plugin Core module, see the
75+
[documentation](https://github.com/janhq/jan/blob/main/core/README.md).
76+
77+
So, what are you waiting for? Go ahead and start customizing your plugin!
78+
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"name": "@janhq/inference-triton-trt-llm-extension",
3+
"version": "1.0.0",
4+
"description": "Inference Engine for NVIDIA Triton with TensorRT-LLM Extension integration on Jan extension framework",
5+
"main": "dist/index.js",
6+
"module": "dist/module.js",
7+
"author": "Jan <[email protected]>",
8+
"license": "AGPL-3.0",
9+
"scripts": {
10+
"build": "tsc -b . && webpack --config webpack.config.js",
11+
"build:publish": "rimraf *.tgz --glob && npm run build && npm pack && cpx *.tgz ../../electron/pre-install"
12+
},
13+
"exports": {
14+
".": "./dist/index.js",
15+
"./main": "./dist/module.js"
16+
},
17+
"devDependencies": {
18+
"cpx": "^1.5.0",
19+
"rimraf": "^3.0.2",
20+
"webpack": "^5.88.2",
21+
"webpack-cli": "^5.1.4"
22+
},
23+
"dependencies": {
24+
"@janhq/core": "file:../../core",
25+
"fetch-retry": "^5.0.6",
26+
"path-browserify": "^1.0.1",
27+
"ts-loader": "^9.5.0",
28+
"ulid": "^2.3.0"
29+
},
30+
"engines": {
31+
"node": ">=18.0.0"
32+
},
33+
"files": [
34+
"dist/*",
35+
"package.json",
36+
"README.md"
37+
],
38+
"bundleDependencies": [
39+
"fetch-retry"
40+
]
41+
}
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
import { Model } from "@janhq/core";
2+
3+
declare const MODULE: string;
4+
5+
declare interface EngineSettings {
6+
base_url?: string;
7+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
import { Observable } from "rxjs";
2+
import { EngineSettings } from "../@types/global";
3+
import { Model } from "@janhq/core";
4+
5+
/**
6+
* Sends a request to the inference server to generate a response based on the recent messages.
7+
* @param recentMessages - An array of recent messages to use as context for the inference.
8+
* @param engine - The engine settings to use for the inference.
9+
* @param model - The model to use for the inference.
10+
* @returns An Observable that emits the generated response as a string.
11+
*/
12+
export function requestInference(
13+
recentMessages: any[],
14+
engine: EngineSettings,
15+
model: Model,
16+
controller?: AbortController
17+
): Observable<string> {
18+
return new Observable((subscriber) => {
19+
const text_input = recentMessages.map((message) => message.text).join("\n");
20+
const requestBody = JSON.stringify({
21+
text_input: text_input,
22+
max_tokens: 4096,
23+
temperature: 0,
24+
bad_words: "",
25+
stop_words: "[DONE]",
26+
stream: true
27+
});
28+
fetch(`${engine.base_url}/v2/models/ensemble/generate_stream`, {
29+
method: "POST",
30+
headers: {
31+
"Content-Type": "application/json",
32+
Accept: "text/event-stream",
33+
"Access-Control-Allow-Origin": "*",
34+
},
35+
body: requestBody,
36+
signal: controller?.signal,
37+
})
38+
.then(async (response) => {
39+
const stream = response.body;
40+
const decoder = new TextDecoder("utf-8");
41+
const reader = stream?.getReader();
42+
let content = "";
43+
44+
while (true && reader) {
45+
const { done, value } = await reader.read();
46+
if (done) {
47+
break;
48+
}
49+
const text = decoder.decode(value);
50+
const lines = text.trim().split("\n");
51+
for (const line of lines) {
52+
if (line.startsWith("data: ") && !line.includes("data: [DONE]")) {
53+
const data = JSON.parse(line.replace("data: ", ""));
54+
content += data.choices[0]?.delta?.content ?? "";
55+
subscriber.next(content);
56+
}
57+
}
58+
}
59+
subscriber.complete();
60+
})
61+
.catch((err) => subscriber.error(err));
62+
});
63+
}

0 commit comments

Comments
 (0)