Skip to content

Commit 240f3b8

Browse files
authored
Merge pull request #1879 from mfts/feat/index-improvements
feat: improve document processing
2 parents 02f303c + 15aa25a commit 240f3b8

3 files changed

Lines changed: 60 additions & 0 deletions

File tree

ee/features/security/lib/ratelimit.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ export const rateLimiters = {
1111
redis,
1212
limiter: Ratelimit.slidingWindow(3, "20 m"),
1313
prefix: "rl:auth",
14+
enableProtection: true,
1415
analytics: true,
1516
}),
1617

@@ -19,6 +20,7 @@ export const rateLimiters = {
1920
redis,
2021
limiter: Ratelimit.slidingWindow(3, "30 m"),
2122
prefix: "rl:billing",
23+
enableProtection: true,
2224
analytics: true,
2325
}),
2426
};

lib/trigger/pdf-to-image-route.ts

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,25 @@ export const convertPdfToImageRoute = task({
133133
);
134134

135135
if (!response.ok) {
136+
const errorData = await response.json().catch(() => ({}));
137+
138+
// If document was blocked, stop processing entirely
139+
if (response.status === 400 && errorData.error?.includes("blocked")) {
140+
logger.error("Document blocked", {
141+
pageNumber: currentPage,
142+
matchedUrl: errorData.matchedUrl,
143+
matchedKeyword: errorData.matchedKeyword,
144+
payload,
145+
});
146+
147+
updateStatus({
148+
progress: 0,
149+
text: `Document couldn't be processed`,
150+
});
151+
152+
throw new Error("Document processing blocked");
153+
}
154+
136155
throw new Error("Failed to convert page");
137156
}
138157

pages/api/mupdf/convert-page.ts

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import { NextApiRequest, NextApiResponse } from "next";
22

33
import { DocumentPage } from "@prisma/client";
4+
import { get } from "@vercel/edge-config";
5+
import { waitUntil } from "@vercel/functions";
46
import * as mupdf from "mupdf";
57

68
import { putFileServer } from "@/lib/files/put-file-server";
@@ -142,6 +144,43 @@ export default async (req: NextApiRequest, res: NextApiResponse) => {
142144
return { href: link.getURI(), coords: link.getBounds().join(",") };
143145
});
144146

147+
// Check embedded links for blocked keywords
148+
if (embeddedLinks.length > 0) {
149+
try {
150+
const keywords = await get("keywords");
151+
if (Array.isArray(keywords) && keywords.length > 0) {
152+
for (const link of embeddedLinks) {
153+
if (link.href) {
154+
const matchedKeyword = keywords.find(
155+
(keyword) =>
156+
typeof keyword === "string" && link.href.includes(keyword),
157+
);
158+
159+
if (matchedKeyword) {
160+
waitUntil(
161+
log({
162+
message: `Document processing blocked: ${matchedKeyword} \n\n \`Metadata: {teamId: ${teamId}, documentVersionId: ${documentVersionId}, pageNumber: ${pageNumber}}\``,
163+
type: "error",
164+
mention: true,
165+
}),
166+
);
167+
res.status(400).json({
168+
error: "Document processing blocked",
169+
matchedUrl: link.href,
170+
matchedKeyword: matchedKeyword,
171+
pageNumber: pageNumber,
172+
});
173+
return;
174+
}
175+
}
176+
}
177+
}
178+
} catch (error) {
179+
// Log error but continue processing if check fails
180+
console.log("Failed to check keywords:", error);
181+
}
182+
}
183+
145184
// Will be updated if we use a reduced scale factor
146185
let actualScaleFactor = scaleFactor;
147186

0 commit comments

Comments
 (0)