Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions server/.env.example
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,13 @@ SIG_SALT='salt' # Please generate random string at least 32 chars long.
# GENERIC_OPEN_AI_EMBEDDING_API_KEY='sk-123abc'
# GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS=500
# GENERIC_OPEN_AI_EMBEDDING_API_DELAY_MS=1000
# Optional prefixes for asymmetric embedding models (Qwen3-Embedding, BGE, E5-instruct, etc.).
# Wrap the value in double quotes so dotenv interprets `\n` as a real newline.
# For Qwen3-Embedding the model card recommends:
# EMBEDDING_QUERY_PREFIX="Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:"
# EMBEDDING_PASSAGE_PREFIX=
# EMBEDDING_QUERY_PREFIX=
# EMBEDDING_PASSAGE_PREFIX=

# EMBEDDING_ENGINE='gemini'
# GEMINI_EMBEDDING_API_KEY=
Expand Down
135 changes: 135 additions & 0 deletions server/__tests__/utils/EmbeddingEngines/genericOpenAi/index.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
/* eslint-env jest, node */

/**
* Tests for GenericOpenAiEmbedder query/passage prefix support.
*
* Verifies that EMBEDDING_QUERY_PREFIX is applied only to inputs going through
* embedTextInput (the query path) and EMBEDDING_PASSAGE_PREFIX only to inputs
* going through embedChunks (the ingest path), with empty-string defaults that
* preserve pre-existing behavior.
*
* Related issue: https://github.com/Mintplex-Labs/anything-llm/issues/5403
*/

const capturedRequests = [];

jest.mock(
"openai",
() => {
return {
OpenAI: class FakeOpenAI {
constructor() {
this.embeddings = {
create: jest.fn(async ({ model, input }) => {
capturedRequests.push({ model, input });
const inputs = Array.isArray(input) ? input : [input];
return {
data: inputs.map(() => ({ embedding: [0.1, 0.2, 0.3] })),
};
}),
};
}
},
};
},
{ virtual: true }
);

describe("GenericOpenAiEmbedder query/passage prefix", () => {
const ORIGINAL_ENV = process.env;

beforeEach(() => {
jest.resetModules();
capturedRequests.length = 0;
process.env = { ...ORIGINAL_ENV };
process.env.EMBEDDING_BASE_PATH = "http://127.0.0.1:9999/v1";
process.env.EMBEDDING_MODEL_PREF = "test-embed-model";
delete process.env.EMBEDDING_QUERY_PREFIX;
delete process.env.EMBEDDING_PASSAGE_PREFIX;
delete process.env.GENERIC_OPEN_AI_EMBEDDING_API_DELAY_MS;
});

afterAll(() => {
process.env = ORIGINAL_ENV;
});

function loadEmbedder() {
const {
GenericOpenAiEmbedder,
} = require("../../../../utils/EmbeddingEngines/genericOpenAi");
return new GenericOpenAiEmbedder();
}

test("defaults: no prefix is applied to either path (backwards compatible)", async () => {
const e = loadEmbedder();

await e.embedTextInput("what is the snow load on a barn roof");
await e.embedChunks([
"Section 7.3.2 specifies a ground snow load of 30 psf.",
]);

expect(capturedRequests).toHaveLength(2);
expect(capturedRequests[0].input).toEqual([
"what is the snow load on a barn roof",
]);
expect(capturedRequests[1].input).toEqual([
"Section 7.3.2 specifies a ground snow load of 30 psf.",
]);
});

test("EMBEDDING_QUERY_PREFIX is applied only to the query path", async () => {
process.env.EMBEDDING_QUERY_PREFIX =
"Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:";
const e = loadEmbedder();

await e.embedTextInput("what is the snow load on a barn roof");
await e.embedChunks([
"Section 7.3.2 specifies a ground snow load of 30 psf.",
]);

expect(capturedRequests[0].input).toEqual([
"Instruct: Given a web search query, retrieve relevant passages that answer the query\nQuery:what is the snow load on a barn roof",
]);
// Passage path stays raw when only the query prefix is set.
expect(capturedRequests[1].input).toEqual([
"Section 7.3.2 specifies a ground snow load of 30 psf.",
]);
});

test("EMBEDDING_PASSAGE_PREFIX is applied only to the ingest path", async () => {
process.env.EMBEDDING_PASSAGE_PREFIX = "passage: ";
const e = loadEmbedder();

await e.embedTextInput("what is the snow load");
await e.embedChunks(["chunk one", "chunk two"]);

// Query path stays raw when only the passage prefix is set.
expect(capturedRequests[0].input).toEqual(["what is the snow load"]);
expect(capturedRequests[1].input).toEqual([
"passage: chunk one",
"passage: chunk two",
]);
});

test("query prefix does not leak into the passage path even when both are set", async () => {
process.env.EMBEDDING_QUERY_PREFIX = "QPFX:";
process.env.EMBEDDING_PASSAGE_PREFIX = "PPFX:";
const e = loadEmbedder();

await e.embedChunks(["doc-a", "doc-b"]);

expect(capturedRequests[0].input).toEqual(["PPFX:doc-a", "PPFX:doc-b"]);
expect(capturedRequests[0].input.every((s) => !s.includes("QPFX:"))).toBe(
true
);
});

test("embedTextInput accepts an array input and prefixes each element", async () => {
process.env.EMBEDDING_QUERY_PREFIX = "Q:";
const e = loadEmbedder();

await e.embedTextInput(["alpha", "beta"]);

expect(capturedRequests[0].input).toEqual(["Q:alpha", "Q:beta"]);
});
});
33 changes: 29 additions & 4 deletions server/utils/EmbeddingEngines/genericOpenAi/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,44 @@ class GenericOpenAiEmbedder {
return Number(process.env.GENERIC_OPEN_AI_EMBEDDING_MAX_CONCURRENT_CHUNKS);
}

/**
* Optional prefix prepended to each query before embedding. Empty by default
* for backwards compatibility. Required by asymmetric models like
* Qwen3-Embedding, which expect queries wrapped as
* `Instruct: <task>\nQuery:<query>` while passages are sent raw.
* @returns {string}
*/
get queryPrefix() {
return process.env.EMBEDDING_QUERY_PREFIX ?? "";
}

/**
* Optional prefix prepended to each passage before embedding. Empty by
* default. Most asymmetric models (Qwen3-Embedding included) leave passages
* unwrapped, but some BGE/E5 variants expect a `passage: ` prefix.
* @returns {string}
*/
get passagePrefix() {
return process.env.EMBEDDING_PASSAGE_PREFIX ?? "";
}

async embedTextInput(textInput) {
const result = await this.embedChunks(
Array.isArray(textInput) ? textInput : [textInput]
const prefix = this.queryPrefix;
const inputs = (Array.isArray(textInput) ? textInput : [textInput]).map(
(t) => prefix + t
);
const result = await this.embedChunks(inputs, { isPassage: false });
return result?.[0] || [];
}

async embedChunks(textChunks = []) {
async embedChunks(textChunks = [], { isPassage = true } = {}) {
// Because there is a hard POST limit on how many chunks can be sent at once to OpenAI (~8mb)
// we sequentially execute each max batch of text chunks possible.
// Refer to constructor maxConcurrentChunks for more info.
const prefix = isPassage ? this.passagePrefix : "";
const inputs = prefix ? textChunks.map((t) => prefix + t) : textChunks;
const allResults = [];
for (const chunk of toChunks(textChunks, this.maxConcurrentChunks)) {
for (const chunk of toChunks(inputs, this.maxConcurrentChunks)) {
const { data = [], error = null } = await new Promise((resolve) => {
this.openai.embeddings
.create({
Expand Down