Skip to content

Commit 95b410a

Browse files
committed
refactor: revamp inference plugin as class - deprecate function registering
1 parent 471018a commit 95b410a

File tree

15 files changed

+232
-216
lines changed

15 files changed

+232
-216
lines changed

core/src/index.ts

Lines changed: 1 addition & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,6 @@
55
export type CoreService =
66
| StoreService
77
| DataService
8-
| InferenceService
98
| ModelManagementService
109
| SystemMonitoringService
1110
| PreferenceService
@@ -93,27 +92,6 @@ export enum DataService {
9392
UpdateBot = "updateBot",
9493
}
9594

96-
/**
97-
* InferenceService exports.
98-
* @enum {string}
99-
*/
100-
export enum InferenceService {
101-
/**
102-
* Initializes a model for inference.
103-
*/
104-
InitModel = "initModel",
105-
106-
/**
107-
* Stops a running inference model.
108-
*/
109-
StopModel = "stopModel",
110-
111-
/**
112-
* Single inference response.
113-
*/
114-
InferenceRequest = "inferenceRequest",
115-
}
116-
11795
/**
11896
* ModelManagementService exports.
11997
* @enum {string}
@@ -261,4 +239,4 @@ export {
261239
export { preferences } from "./preferences";
262240
export { fs } from "./fs";
263241

264-
export { JanPlugin, PluginType } from "./plugin";
242+
export { JanPlugin, PluginType } from "./plugin";

core/src/plugins/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
11
export { ConversationalPlugin } from './conversational'
2+
export { InferencePlugin } from './inference'
3+
export { ModelPlugin } from './model'

core/src/plugins/inference.ts

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
import { NewMessageRequest } from "../events";
2+
import { JanPlugin } from "../plugin";
3+
4+
export abstract class InferencePlugin extends JanPlugin {
5+
abstract initModel(modelFileName: string): Promise<void>;
6+
abstract stopModel(): Promise<void>;
7+
abstract inferenceRequest(data: NewMessageRequest): Promise<any>
8+
}

plugins/inference-plugin/@types/global.d.ts

Lines changed: 0 additions & 3 deletions
This file was deleted.

plugins/inference-plugin/index.ts

Lines changed: 0 additions & 166 deletions
This file was deleted.

plugins/inference-plugin/package.json

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
"name": "@janhq/inference-plugin",
33
"version": "1.0.20",
44
"description": "Inference Plugin, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.",
5-
"icon": "https://raw.githubusercontent.com/tailwindlabs/heroicons/88e98b0c2b458553fbadccddc2d2f878edc0387b/src/20/solid/command-line.svg",
65
"main": "dist/index.js",
76
"module": "dist/module.js",
87
"author": "Jan <[email protected]>",
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
declare const MODULE: string;
2+
declare const INFERENCE_URL: string;
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
import { Observable } from "rxjs";
2+
/**
3+
* Sends a request to the inference server to generate a response based on the recent messages.
4+
* @param recentMessages - An array of recent messages to use as context for the inference.
5+
* @returns An Observable that emits the generated response as a string.
6+
*/
7+
export function requestInference(recentMessages: any[]): Observable<string> {
8+
return new Observable((subscriber) => {
9+
const requestBody = JSON.stringify({
10+
messages: recentMessages,
11+
stream: true,
12+
model: "gpt-3.5-turbo",
13+
max_tokens: 2048,
14+
});
15+
fetch(INFERENCE_URL, {
16+
method: "POST",
17+
headers: {
18+
"Content-Type": "application/json",
19+
Accept: "text/event-stream",
20+
"Access-Control-Allow-Origin": "*",
21+
},
22+
body: requestBody,
23+
})
24+
.then(async (response) => {
25+
const stream = response.body;
26+
const decoder = new TextDecoder("utf-8");
27+
const reader = stream?.getReader();
28+
let content = "";
29+
30+
while (true && reader) {
31+
const { done, value } = await reader.read();
32+
if (done) {
33+
break;
34+
}
35+
const text = decoder.decode(value);
36+
const lines = text.trim().split("\n");
37+
for (const line of lines) {
38+
if (line.startsWith("data: ") && !line.includes("data: [DONE]")) {
39+
const data = JSON.parse(line.replace("data: ", ""));
40+
content += data.choices[0]?.delta?.content ?? "";
41+
if (content.startsWith("assistant: ")) {
42+
content = content.replace("assistant: ", "");
43+
}
44+
subscriber.next(content);
45+
}
46+
}
47+
}
48+
subscriber.complete();
49+
})
50+
.catch((err) => subscriber.error(err));
51+
});
52+
}

0 commit comments

Comments
 (0)