From b9b1db37dcf8b2d5c3838d9a80eabc385f42aa59 Mon Sep 17 00:00:00 2001 From: Pip Build Date: Sun, 1 Mar 2026 23:13:28 -0500 Subject: [PATCH 01/11] security: escape user content in triage prompt delimiters (#164) Add escapePromptDelimiters() to HTML-encode < and > in user-supplied message content before it is inserted between XML-style section tags in the LLM prompt. Without escaping, a crafted message containing the literal text `` could break out of the user-content section and inject attacker-controlled instructions into the prompt structure. Changes: - Add escapePromptDelimiters(text) utility exported from triage-prompt.js - Apply escape to m.content and m.replyTo.content in buildConversationText() - Add 13 new tests covering the escape function and injection scenarios Closes #164 --- src/modules/triage-prompt.js | 25 +++++- tests/modules/triage-prompt.test.js | 113 ++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 2 deletions(-) diff --git a/src/modules/triage-prompt.js b/src/modules/triage-prompt.js index bd6d0a68e..ee140ad59 100644 --- a/src/modules/triage-prompt.js +++ b/src/modules/triage-prompt.js @@ -5,6 +5,24 @@ import { loadPrompt } from '../prompts/index.js'; +// ── Prompt injection defense ────────────────────────────────────────────────── + +/** + * Escape XML-style delimiter characters in user-supplied content to prevent + * prompt injection attacks. A crafted message containing `` + * could otherwise break out of its designated section and inject instructions. + * + * Replaces `<` with `<` and `>` with `>` so the LLM sees the literal + * characters without interpreting them as structural delimiters. + * + * @param {string} text - Raw user-supplied message content + * @returns {string} Escaped text safe for insertion between XML-style tags + */ +export function escapePromptDelimiters(text) { + if (typeof text !== 'string') return text; + return text.replace(//g, '>'); +} + // ── Conversation text formatting ───────────────────────────────────────────── /** @@ -12,6 +30,9 @@ import { loadPrompt } from '../prompts/index.js'; * Splits output into (context) and (buffer). * Includes timestamps and reply context when available. * + * User-supplied content (message body and reply excerpts) is passed through + * {@link escapePromptDelimiters} to neutralise prompt-injection attempts. + * * @param {Array} context - Historical messages fetched from Discord API * @param {Array} buffer - Buffered messages to evaluate * @returns {string} Formatted conversation text with section markers @@ -21,9 +42,9 @@ export function buildConversationText(context, buffer) { const time = m.timestamp ? new Date(m.timestamp).toISOString().slice(11, 19) : ''; const timePrefix = time ? `[${time}] ` : ''; const replyPrefix = m.replyTo - ? `(replying to ${m.replyTo.author}: "${m.replyTo.content.slice(0, 100)}")\n ` + ? `(replying to ${m.replyTo.author}: "${escapePromptDelimiters(m.replyTo.content.slice(0, 100))}")\n ` : ''; - return `${timePrefix}[${m.messageId}] ${m.author} (<@${m.userId}>): ${replyPrefix}${m.content}`; + return `${timePrefix}[${m.messageId}] ${m.author} (<@${m.userId}>): ${replyPrefix}${escapePromptDelimiters(m.content)}`; }; let text = ''; diff --git a/tests/modules/triage-prompt.test.js b/tests/modules/triage-prompt.test.js index e667bdb0b..3f7573eb5 100644 --- a/tests/modules/triage-prompt.test.js +++ b/tests/modules/triage-prompt.test.js @@ -372,3 +372,116 @@ describe('triage-prompt', () => { }); }); }); + +// Re-import to get escapePromptDelimiters (same module, just destructuring) +import { escapePromptDelimiters } from '../../src/modules/triage-prompt.js'; + +describe('escapePromptDelimiters', () => { + it('should escape < and > characters', () => { + expect(escapePromptDelimiters('