janhq · louis-jan · Nov 4, 2023 · Nov 4, 2023 · Nov 4, 2023 · louis-jan
diff --git a/core/src/index.ts b/core/src/index.ts
@@ -5,7 +5,6 @@
 export type CoreService =
   | StoreService
   | DataService
-  | InferenceService
   | ModelManagementService
   | SystemMonitoringService
   | PreferenceService
@@ -93,27 +92,6 @@ export enum DataService {
   UpdateBot = "updateBot",
 }
 
-/**
- * InferenceService exports.
- * @enum {string}
- */
-export enum InferenceService {
-  /**
-   * Initializes a model for inference.
-   */
-  InitModel = "initModel",
-
-  /**
-   * Stops a running inference model.
-   */
-  StopModel = "stopModel",
-
-  /**
-   * Single inference response.
-   */
-  InferenceRequest = "inferenceRequest",
-}
-
 /**
  * ModelManagementService exports.
  * @enum {string}
@@ -261,4 +239,4 @@ export {
 export { preferences } from "./preferences";
 export { fs } from "./fs";
 
-export { JanPlugin, PluginType } from "./plugin";
+export { JanPlugin, PluginType } from "./plugin";
diff --git a/core/src/plugins/index.ts b/core/src/plugins/index.ts
@@ -1 +1,3 @@
 export { ConversationalPlugin } from './conversational'
+export { InferencePlugin } from './inference'
+export { ModelPlugin } from './model'
diff --git a/core/src/plugins/inference.ts b/core/src/plugins/inference.ts
@@ -0,0 +1,8 @@
+import { NewMessageRequest } from "../events";
+import { JanPlugin } from "../plugin";
+
+export abstract class InferencePlugin extends JanPlugin {
+  abstract initModel(modelFileName: string): Promise<void>;
+  abstract stopModel(): Promise<void>;
+  abstract inferenceRequest(data: NewMessageRequest): Promise<any>
+}
diff --git a/plugins/inference-plugin/@types/global.d.ts b/plugins/inference-plugin/@types/global.d.ts
diff --git a/plugins/inference-plugin/index.ts b/plugins/inference-plugin/index.ts
diff --git a/plugins/inference-plugin/package.json b/plugins/inference-plugin/package.json
@@ -2,7 +2,6 @@
   "name": "@janhq/inference-plugin",
   "version": "1.0.20",
   "description": "Inference Plugin, powered by @janhq/nitro, bring a high-performance Llama model inference in pure C++.",
-  "icon": "https://raw.githubusercontent.com/tailwindlabs/heroicons/88e98b0c2b458553fbadccddc2d2f878edc0387b/src/20/solid/command-line.svg",
   "main": "dist/index.js",
   "module": "dist/module.js",
   "author": "Jan <[email protected]>",

diff --git a/plugins/inference-plugin/src/@types/global.d.ts b/plugins/inference-plugin/src/@types/global.d.ts
@@ -0,0 +1,2 @@
+declare const MODULE: string;
+declare const INFERENCE_URL: string;
diff --git a/plugins/inference-plugin/src/helpers/sse.ts b/plugins/inference-plugin/src/helpers/sse.ts
@@ -0,0 +1,52 @@
+import { Observable } from "rxjs";
+/**
+ * Sends a request to the inference server to generate a response based on the recent messages.
+ * @param recentMessages - An array of recent messages to use as context for the inference.
+ * @returns An Observable that emits the generated response as a string.
+ */
+export function requestInference(recentMessages: any[]): Observable<string> {
+  return new Observable((subscriber) => {
+    const requestBody = JSON.stringify({
+      messages: recentMessages,
+      stream: true,
+      model: "gpt-3.5-turbo",
+      max_tokens: 2048,
+    });
+    fetch(INFERENCE_URL, {
+      method: "POST",
+      headers: {
+        "Content-Type": "application/json",
+        Accept: "text/event-stream",
+        "Access-Control-Allow-Origin": "*",
+      },
+      body: requestBody,
+    })
+      .then(async (response) => {
+        const stream = response.body;
+        const decoder = new TextDecoder("utf-8");
+        const reader = stream?.getReader();
+        let content = "";
+
+        while (true && reader) {
+          const { done, value } = await reader.read();
+          if (done) {
+            break;
+          }
+          const text = decoder.decode(value);
+          const lines = text.trim().split("\n");
+          for (const line of lines) {
+            if (line.startsWith("data: ") && !line.includes("data: [DONE]")) {
+              const data = JSON.parse(line.replace("data: ", ""));
+              content += data.choices[0]?.delta?.content ?? "";
+              if (content.startsWith("assistant: ")) {
+                content = content.replace("assistant: ", "");
+              }
+              subscriber.next(content);
+            }
+          }
+        }
+        subscriber.complete();
+      })
+      .catch((err) => subscriber.error(err));
+  });
+}
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		declare const MODULE: string;
		declare const INFERENCE_URL: string;