From 30038544d9328a5c8ee9f16e0225f9132fcc156e Mon Sep 17 00:00:00 2001 From: bkellam Date: Tue, 26 Aug 2025 22:04:52 -0400 Subject: [PATCH 01/14] Add data model for user <> repo permission link --- .../migration.sql | 14 ++++ packages/db/prisma/schema.prisma | 67 +++++++++++-------- packages/web/src/actions.ts | 49 ++++++++++++-- packages/web/src/env.mjs | 2 + packages/web/src/features/fileTree/actions.ts | 33 +++++++-- packages/web/src/features/search/searchApi.ts | 23 +++++-- 6 files changed, 140 insertions(+), 48 deletions(-) create mode 100644 packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql diff --git a/packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql b/packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql new file mode 100644 index 000000000..62a56d496 --- /dev/null +++ b/packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql @@ -0,0 +1,14 @@ +-- CreateTable +CREATE TABLE "UserToRepoPermission" ( + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "repoId" INTEGER NOT NULL, + "userId" TEXT NOT NULL, + + CONSTRAINT "UserToRepoPermission_pkey" PRIMARY KEY ("repoId","userId") +); + +-- AddForeignKey +ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index d3304af44..a50a4eb25 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -56,7 +56,8 @@ model Repo { webUrl String? connections RepoToConnection[] imageUrl String? - repoIndexingStatus RepoIndexingStatus @default(NEW) + repoIndexingStatus RepoIndexingStatus @default(NEW) + permittedUsers UserToRepoPermission[] // The id of the repo in the external service external_id String @@ -77,9 +78,9 @@ model Repo { model SearchContext { id Int @id @default(autoincrement()) - name String + name String description String? - repos Repo[] + repos Repo[] org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) orgId Int @@ -149,7 +150,7 @@ model AccountRequest { createdAt DateTime @default(now()) - requestedBy User @relation(fields: [requestedById], references: [id], onDelete: Cascade) + requestedBy User @relation(fields: [requestedById], references: [id], onDelete: Cascade) requestedById String @unique org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) @@ -171,7 +172,7 @@ model Org { apiKeys ApiKey[] isOnboarded Boolean @default(false) imageUrl String? - metadata Json? // For schema see orgMetadataSchema in packages/web/src/types.ts + metadata Json? // For schema see orgMetadataSchema in packages/web/src/types.ts memberApprovalRequired Boolean @default(true) @@ -181,10 +182,10 @@ model Org { /// List of pending invites to this organization invites Invite[] - + /// The invite id for this organization inviteLinkEnabled Boolean @default(false) - inviteLinkId String? + inviteLinkId String? audits Audit[] @@ -231,55 +232,53 @@ model Secret { } model ApiKey { - name String - hash String @id @unique + name String + hash String @id @unique - createdAt DateTime @default(now()) + createdAt DateTime @default(now()) lastUsedAt DateTime? org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) orgId Int - createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) + createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) createdById String - } model Audit { - id String @id @default(cuid()) + id String @id @default(cuid()) timestamp DateTime @default(now()) - - action String - actorId String - actorType String - targetId String - targetType String + + action String + actorId String + actorType String + targetId String + targetType String sourcebotVersion String - metadata Json? + metadata Json? org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) orgId Int @@index([actorId, actorType, targetId, targetType, orgId]) - // Fast path for analytics queries – orgId is first because we assume most deployments are single tenant @@index([orgId, timestamp, action, actorId], map: "idx_audit_core_actions_full") - // Fast path for analytics queries for a specific user @@index([actorId, timestamp], map: "idx_audit_actor_time_full") } // @see : https://authjs.dev/concepts/database-models#user model User { - id String @id @default(cuid()) + id String @id @default(cuid()) name String? - email String? @unique + email String? @unique hashedPassword String? emailVerified DateTime? image String? accounts Account[] orgs UserToOrg[] accountRequest AccountRequest? + accessibleRepos UserToRepoPermission[] /// List of pending invites that the user has created invites Invite[] @@ -292,6 +291,18 @@ model User { updatedAt DateTime @updatedAt } +model UserToRepoPermission { + createdAt DateTime @default(now()) + + repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade) + repoId Int + + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + userId String + + @@id([repoId, userId]) +} + // @see : https://authjs.dev/concepts/database-models#account model Account { id String @id @default(cuid()) @@ -329,17 +340,17 @@ model Chat { name String? - createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) + createdBy User @relation(fields: [createdById], references: [id], onDelete: Cascade) createdById String createdAt DateTime @default(now()) updatedAt DateTime @updatedAt - + org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) orgId Int visibility ChatVisibility @default(PRIVATE) - isReadonly Boolean @default(false) + isReadonly Boolean @default(false) messages Json // This is a JSON array of `Message` types from @ai-sdk/ui-utils. -} \ No newline at end of file +} diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 25e62a7f9..1ba66b599 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -640,7 +640,7 @@ export const getConnectionInfo = async (connectionId: number, domain: string) => }))); export const getRepos = async (filter: { status?: RepoIndexingStatus[], connectionId?: number } = {}) => sew(() => - withOptionalAuthV2(async ({ org }) => { + withOptionalAuthV2(async ({ org, user }) => { const repos = await prisma.repo.findMany({ where: { orgId: org.id, @@ -654,6 +654,13 @@ export const getRepos = async (filter: { status?: RepoIndexingStatus[], connecti } } } : {}), + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: user?.id, + } + } + } : {}) }, include: { connections: { @@ -723,6 +730,13 @@ export const getRepoInfoByName = async (repoName: string, domain: string) => sew where: { name: repoName, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: userId, + } + } + } : {}) }, }); @@ -805,7 +819,7 @@ export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: strin // Parse repository URL to extract owner/repo const repoInfo = (() => { const url = repositoryUrl.trim(); - + // Handle various GitHub URL formats const patterns = [ // https://github.com/owner/repo or https://github.com/owner/repo.git @@ -815,7 +829,7 @@ export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: strin // owner/repo /^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/ ]; - + for (const pattern of patterns) { const match = url.match(pattern); if (match) { @@ -825,7 +839,7 @@ export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: strin }; } } - + return null; })(); @@ -838,7 +852,7 @@ export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: strin } const { owner, repo } = repoInfo; - + // Use GitHub API to fetch repository information and get the external_id const octokit = new Octokit({ auth: env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN @@ -867,7 +881,7 @@ export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: strin message: `Access to repository '${owner}/${repo}' is forbidden. Only public repositories can be added.`, } satisfies ServiceError; } - + return { statusCode: StatusCodes.INTERNAL_SERVER_ERROR, errorCode: ErrorCode.INVALID_REQUEST_BODY, @@ -890,6 +904,13 @@ export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: strin external_id: githubRepo.id.toString(), external_codeHostType: 'github', external_codeHostUrl: 'https://github.com', + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: userId, + } + } + } : {}) } }); @@ -1040,6 +1061,13 @@ export const flagReposForIndex = async (repoIds: number[], domain: string) => se where: { id: { in: repoIds }, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: userId, + } + } + } : {}) }, data: { repoIndexingStatus: RepoIndexingStatus.NEW, @@ -2022,6 +2050,13 @@ export const getRepoImage = async (repoId: number, domain: string): Promise sew(() => - withAuth((session) => - withOrgMembership(session, domain, async ({ org }) => { + withAuth((userId) => + withOrgMembership(userId, domain, async ({ org }) => { const { repoName, revisionName } = params; const repo = await prisma.repo.findFirst({ where: { name: repoName, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: userId, + } + } + } : {}) }, }); @@ -85,13 +92,20 @@ export const getTree = async (params: { repoName: string, revisionName: string } * at a given revision. */ export const getFolderContents = async (params: { repoName: string, revisionName: string, path: string }, domain: string) => sew(() => - withAuth((session) => - withOrgMembership(session, domain, async ({ org }) => { + withAuth((userId) => + withOrgMembership(userId, domain, async ({ org }) => { const { repoName, revisionName, path } = params; const repo = await prisma.repo.findFirst({ where: { name: repoName, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: userId, + } + } + } : {}) }, }); @@ -158,14 +172,21 @@ export const getFolderContents = async (params: { repoName: string, revisionName ); export const getFiles = async (params: { repoName: string, revisionName: string }, domain: string) => sew(() => - withAuth((session) => - withOrgMembership(session, domain, async ({ org }) => { + withAuth((userId) => + withOrgMembership(userId, domain, async ({ org }) => { const { repoName, revisionName } = params; const repo = await prisma.repo.findFirst({ where: { name: repoName, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId: userId, + } + } + } : {}) }, }); diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 6d006bbdb..65640fe60 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -10,7 +10,6 @@ import { StatusCodes } from "http-status-codes"; import { zoektSearchResponseSchema } from "./zoektSchema"; import { SearchRequest, SearchResponse, SourceRange } from "./types"; import { OrgRole, Repo } from "@sourcebot/db"; -import * as Sentry from "@sentry/nextjs"; import { sew, withAuth, withOrgMembership } from "@/actions"; import { base64Decode } from "@sourcebot/shared"; @@ -204,6 +203,13 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"), }, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId, + } + } + } : {}) } })).forEach(repo => repos.set(repo.id, repo)); @@ -213,6 +219,13 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"), }, orgId: org.id, + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + permittedUsers: { + some: { + userId, + } + } + } : {}) } })).forEach(repo => repos.set(repo.name, repo)); @@ -234,12 +247,8 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ const identifier = file.RepositoryID ?? file.Repository; const repo = repos.get(identifier); - // This should never happen... but if it does, we skip the file. + // This can happen if the user doesn't have access to the repository. if (!repo) { - Sentry.captureMessage( - `Repository not found for identifier: ${identifier}; skipping file "${file.FileName}"`, - 'warning' - ); return undefined; } @@ -349,4 +358,4 @@ export const search = async ({ query, matches, contextLines, whole }: SearchRequ return parser.parseAsync(searchBody); }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true, apiKey ? { apiKey, domain } : undefined) - ); +); From 0b03f94f67906664d744b5032720aad62c5f89ca Mon Sep 17 00:00:00 2001 From: bkellam Date: Tue, 16 Sep 2025 22:02:04 -0700 Subject: [PATCH 02/14] permission syncer --- packages/backend/src/connectionManager.ts | 13 +- packages/backend/src/env.ts | 2 + packages/backend/src/github.ts | 77 +++++---- packages/backend/src/index.ts | 120 +++++++++----- packages/backend/src/main.ts | 49 ------ packages/backend/src/permissionSyncer.ts | 181 ++++++++++++++++++++++ packages/backend/src/repoManager.ts | 17 +- packages/backend/src/types.ts | 5 +- 8 files changed, 322 insertions(+), 142 deletions(-) delete mode 100644 packages/backend/src/main.ts create mode 100644 packages/backend/src/permissionSyncer.ts diff --git a/packages/backend/src/connectionManager.ts b/packages/backend/src/connectionManager.ts index 5cf119b64..d1cb0b891 100644 --- a/packages/backend/src/connectionManager.ts +++ b/packages/backend/src/connectionManager.ts @@ -11,12 +11,6 @@ import { env } from "./env.js"; import * as Sentry from "@sentry/node"; import { loadConfig, syncSearchContexts } from "@sourcebot/shared"; -interface IConnectionManager { - scheduleConnectionSync: (connection: Connection) => Promise; - registerPollingCallback: () => void; - dispose: () => void; -} - const QUEUE_NAME = 'connectionSyncQueue'; type JobPayload = { @@ -30,7 +24,7 @@ type JobResult = { repoCount: number, } -export class ConnectionManager implements IConnectionManager { +export class ConnectionManager { private worker: Worker; private queue: Queue; private logger = createLogger('connection-manager'); @@ -75,8 +69,9 @@ export class ConnectionManager implements IConnectionManager { }); } - public async registerPollingCallback() { - setInterval(async () => { + public startScheduler() { + this.logger.debug('Starting scheduler'); + return setInterval(async () => { const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs); const connections = await this.db.connection.findMany({ where: { diff --git a/packages/backend/src/env.ts b/packages/backend/src/env.ts index 0a533db00..5d0562844 100644 --- a/packages/backend/src/env.ts +++ b/packages/backend/src/env.ts @@ -52,6 +52,8 @@ export const env = createEnv({ REPO_SYNC_RETRY_BASE_SLEEP_SECONDS: numberSchema.default(60), GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10), + + EXPERIMENT_PERMISSION_SYNC_ENABLED: booleanSchema.default("false"), }, runtimeEnv: process.env, emptyStringAsUndefined: true, diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index 34f01724e..730c891c9 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -30,35 +30,21 @@ export type OctokitRepository = { size?: number, owner: { avatar_url: string, + login: string, } } const isHttpError = (error: unknown, status: number): boolean => { - return error !== null + return error !== null && typeof error === 'object' - && 'status' in error + && 'status' in error && error.status === status; } export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => { - const hostname = config.url ? - new URL(config.url).hostname : - GITHUB_CLOUD_HOSTNAME; - - const token = config.token ? - await getTokenFromConfig(config.token, orgId, db, logger) : - hostname === GITHUB_CLOUD_HOSTNAME ? - env.FALLBACK_GITHUB_CLOUD_TOKEN : - undefined; - - const octokit = new Octokit({ - auth: token, - ...(config.url ? { - baseUrl: `${config.url}/api/v3` - } : {}), - }); + const { octokit, isAuthenticated } = await createOctokitFromConfig(config, orgId, db); - if (token) { + if (isAuthenticated) { try { await octokit.rest.users.getAuthenticated(); } catch (error) { @@ -127,16 +113,51 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o logger.debug(`Found ${repos.length} total repositories.`); return { - validRepos: repos, + validRepos: repos, notFound, }; } +export const getUserIdsWithReadAccessToRepo = async (owner: string, repo: string, octokit: Octokit) => { + const fetchFn = () => octokit.paginate(octokit.repos.listCollaborators, { + owner, + repo, + per_page: 100, + }); + + const collaborators = await fetchWithRetry(fetchFn, `repo ${owner}/${repo}`, logger); + return collaborators.map(collaborator => collaborator.id.toString()); +} + +export const createOctokitFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient): Promise<{ octokit: Octokit, isAuthenticated: boolean }> => { + const hostname = config.url ? + new URL(config.url).hostname : + GITHUB_CLOUD_HOSTNAME; + + const token = config.token ? + await getTokenFromConfig(config.token, orgId, db, logger) : + hostname === GITHUB_CLOUD_HOSTNAME ? + env.FALLBACK_GITHUB_CLOUD_TOKEN : + undefined; + + const octokit = new Octokit({ + auth: token, + ...(config.url ? { + baseUrl: `${config.url}/api/v3` + } : {}), + }); + + return { + octokit, + isAuthenticated: !!token, + }; +} + export const shouldExcludeRepo = ({ repo, include, exclude -} : { +}: { repo: OctokitRepository, include?: { topics?: GithubConnectionConfig['topics'] @@ -156,23 +177,23 @@ export const shouldExcludeRepo = ({ reason = `\`exclude.forks\` is true`; return true; } - + if (!!exclude?.archived && !!repo.archived) { reason = `\`exclude.archived\` is true`; return true; } - + if (exclude?.repos) { if (micromatch.isMatch(repoName, exclude.repos)) { reason = `\`exclude.repos\` contains ${repoName}`; return true; } } - + if (exclude?.topics) { const configTopics = exclude.topics.map(topic => topic.toLowerCase()); const repoTopics = repo.topics ?? []; - + const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics)); if (matchingTopics.length > 0) { reason = `\`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`; @@ -190,17 +211,17 @@ export const shouldExcludeRepo = ({ return true; } } - + const repoSizeInBytes = repo.size ? repo.size * 1000 : undefined; if (exclude?.size && repoSizeInBytes) { const min = exclude.size.min; const max = exclude.size.max; - + if (min && repoSizeInBytes < min) { reason = `repo is less than \`exclude.size.min\`=${min} bytes.`; return true; } - + if (max && repoSizeInBytes > max) { reason = `repo is greater than \`exclude.size.max\`=${max} bytes.`; return true; diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index c93622d61..0f0aa14bf 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -1,44 +1,36 @@ import "./instrument.js"; -import * as Sentry from "@sentry/node"; +import { PrismaClient } from "@sourcebot/db"; +import { createLogger } from "@sourcebot/logger"; +import { loadConfig } from '@sourcebot/shared'; import { existsSync } from 'fs'; import { mkdir } from 'fs/promises'; +import { Redis } from 'ioredis'; import path from 'path'; -import { AppContext } from "./types.js"; -import { main } from "./main.js" -import { PrismaClient } from "@sourcebot/db"; +import { ConnectionManager } from './connectionManager.js'; +import { DEFAULT_SETTINGS } from './constants.js'; import { env } from "./env.js"; -import { createLogger } from "@sourcebot/logger"; - -const logger = createLogger('backend-entrypoint'); +import { RepoPermissionSyncer } from './permissionSyncer.js'; +import { PromClient } from './promClient.js'; +import { RepoManager } from './repoManager.js'; +import { AppContext } from "./types.js"; -// Register handler for normal exit -process.on('exit', (code) => { - logger.info(`Process is exiting with code: ${code}`); -}); +const logger = createLogger('backend-entrypoint'); -// Register handlers for abnormal terminations -process.on('SIGINT', () => { - logger.info('Process interrupted (SIGINT)'); - process.exit(0); -}); +const getSettings = async (configPath?: string) => { + if (!configPath) { + return DEFAULT_SETTINGS; + } -process.on('SIGTERM', () => { - logger.info('Process terminated (SIGTERM)'); - process.exit(0); -}); + const config = await loadConfig(configPath); -// Register handlers for uncaught exceptions and unhandled rejections -process.on('uncaughtException', (err) => { - logger.error(`Uncaught exception: ${err.message}`); - process.exit(1); -}); + return { + ...DEFAULT_SETTINGS, + ...config.settings, + } +} -process.on('unhandledRejection', (reason, promise) => { - logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`); - process.exit(1); -}); const cacheDir = env.DATA_CACHE_DIR; const reposPath = path.join(cacheDir, 'repos'); @@ -59,18 +51,60 @@ const context: AppContext = { const prisma = new PrismaClient(); -main(prisma, context) - .then(async () => { - await prisma.$disconnect(); - }) - .catch(async (e) => { - logger.error(e); - Sentry.captureException(e); - - await prisma.$disconnect(); - process.exit(1); - }) - .finally(() => { - logger.info("Shutting down..."); - }); +const redis = new Redis(env.REDIS_URL, { + maxRetriesPerRequest: null +}); +redis.ping().then(() => { + logger.info('Connected to redis'); +}).catch((err: unknown) => { + logger.error('Failed to connect to redis'); + logger.error(err); + process.exit(1); +}); + +const promClient = new PromClient(); + +const settings = await getSettings(env.CONFIG_PATH); + +const connectionManager = new ConnectionManager(prisma, settings, redis); +const repoManager = new RepoManager(prisma, settings, redis, promClient, context); +const permissionSyncer = new RepoPermissionSyncer(prisma, redis); + +await repoManager.validateIndexedReposHaveShards(); + +const connectionManagerInterval = connectionManager.startScheduler(); +const repoManagerInterval = repoManager.startScheduler(); +const permissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED ? permissionSyncer.startScheduler() : null; + + +const cleanup = async (signal: string) => { + logger.info(`Recieved ${signal}, cleaning up...`); + + if (permissionSyncerInterval) { + clearInterval(permissionSyncerInterval); + } + + clearInterval(connectionManagerInterval); + clearInterval(repoManagerInterval); + + connectionManager.dispose(); + repoManager.dispose(); + permissionSyncer.dispose(); + await prisma.$disconnect(); + await redis.quit(); +} + +process.on('SIGINT', () => cleanup('SIGINT').finally(() => process.exit(0))); +process.on('SIGTERM', () => cleanup('SIGTERM').finally(() => process.exit(0))); + +// Register handlers for uncaught exceptions and unhandled rejections +process.on('uncaughtException', (err) => { + logger.error(`Uncaught exception: ${err.message}`); + cleanup('uncaughtException').finally(() => process.exit(1)); +}); + +process.on('unhandledRejection', (reason, promise) => { + logger.error(`Unhandled rejection at: ${promise}, reason: ${reason}`); + cleanup('unhandledRejection').finally(() => process.exit(1)); +}); diff --git a/packages/backend/src/main.ts b/packages/backend/src/main.ts deleted file mode 100644 index f3cf00501..000000000 --- a/packages/backend/src/main.ts +++ /dev/null @@ -1,49 +0,0 @@ -import { PrismaClient } from '@sourcebot/db'; -import { createLogger } from "@sourcebot/logger"; -import { AppContext } from "./types.js"; -import { DEFAULT_SETTINGS } from './constants.js'; -import { Redis } from 'ioredis'; -import { ConnectionManager } from './connectionManager.js'; -import { RepoManager } from './repoManager.js'; -import { env } from './env.js'; -import { PromClient } from './promClient.js'; -import { loadConfig } from '@sourcebot/shared'; - -const logger = createLogger('backend-main'); - -const getSettings = async (configPath?: string) => { - if (!configPath) { - return DEFAULT_SETTINGS; - } - - const config = await loadConfig(configPath); - - return { - ...DEFAULT_SETTINGS, - ...config.settings, - } -} - -export const main = async (db: PrismaClient, context: AppContext) => { - const redis = new Redis(env.REDIS_URL, { - maxRetriesPerRequest: null - }); - redis.ping().then(() => { - logger.info('Connected to redis'); - }).catch((err: unknown) => { - logger.error('Failed to connect to redis'); - logger.error(err); - process.exit(1); - }); - - const settings = await getSettings(env.CONFIG_PATH); - - const promClient = new PromClient(); - - const connectionManager = new ConnectionManager(db, settings, redis); - connectionManager.registerPollingCallback(); - - const repoManager = new RepoManager(db, settings, redis, promClient, context); - await repoManager.validateIndexedReposHaveShards(); - await repoManager.blockingPollLoop(); -} diff --git a/packages/backend/src/permissionSyncer.ts b/packages/backend/src/permissionSyncer.ts new file mode 100644 index 000000000..d20592670 --- /dev/null +++ b/packages/backend/src/permissionSyncer.ts @@ -0,0 +1,181 @@ +import { PrismaClient } from "@sourcebot/db"; +import { createLogger } from "@sourcebot/logger"; +import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type"; +import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; +import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; +import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; +import { Job, Queue, Worker } from 'bullmq'; +import { Redis } from 'ioredis'; +import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "./github.js"; +import { RepoWithConnections } from "./types.js"; + +type RepoPermissionSyncJob = { + repoId: number; +} + +const QUEUE_NAME = 'repoPermissionSyncQueue'; + +const logger = createLogger('permission-syncer'); + +export class RepoPermissionSyncer { + private queue: Queue; + private worker: Worker; + + constructor( + private db: PrismaClient, + redis: Redis, + ) { + this.queue = new Queue(QUEUE_NAME, { + connection: redis, + }); + this.worker = new Worker(QUEUE_NAME, this.runJob.bind(this), { + connection: redis, + }); + this.worker.on('completed', this.onJobCompleted.bind(this)); + this.worker.on('failed', this.onJobFailed.bind(this)); + } + + public async scheduleJob(repoId: number) { + await this.queue.add(QUEUE_NAME, { + repoId, + }); + } + + public startScheduler() { + logger.debug('Starting scheduler'); + + // @todo: we should only sync permissions for a repository if it has been at least ~24 hours since the last sync. + return setInterval(async () => { + const repos = await this.db.repo.findMany({ + where: { + external_codeHostType: { + in: ['github'], + } + } + }); + + for (const repo of repos) { + await this.scheduleJob(repo.id); + } + + // @todo: make this configurable + }, 1000 * 5); + } + + public dispose() { + this.worker.close(); + this.queue.close(); + } + + private async runJob(job: Job) { + const id = job.data.repoId; + const repo = await this.db.repo.findUnique({ + where: { + id, + }, + include: { + connections: { + include: { + connection: true, + }, + }, + }, + }); + + if (!repo) { + throw new Error(`Repo ${id} not found`); + } + + const connection = getFirstConnectionWithToken(repo); + if (!connection) { + throw new Error(`No connection with token found for repo ${id}`); + } + + const userIds = await (async () => { + if (connection.connectionType === 'github') { + const config = connection.config as unknown as GithubConnectionConfig; + const { octokit } = await createOctokitFromConfig(config, repo.orgId, this.db); + + // @nocheckin - need to handle when repo displayName is not set. + const [owner, repoName] = repo.displayName!.split('/'); + + const githubUserIds = await getUserIdsWithReadAccessToRepo(owner, repoName, octokit); + + const accounts = await this.db.account.findMany({ + where: { + provider: 'github', + providerAccountId: { + in: githubUserIds, + } + }, + select: { + userId: true, + }, + }); + + return accounts.map(account => account.userId); + } + + return []; + })(); + + logger.info(`User IDs with read access to repo ${id}: ${userIds}`); + + await this.db.repo.update({ + where: { + id: repo.id, + }, + data: { + permittedUsers: { + deleteMany: {}, + } + } + }); + + await this.db.userToRepoPermission.createMany({ + data: userIds.map(userId => ({ + userId, + repoId: repo.id, + })), + }); + } + + private async onJobCompleted(job: Job) { + logger.info(`Repo permission sync job completed for repo ${job.data.repoId}`); + } + + private async onJobFailed(job: Job | undefined, err: Error) { + logger.error(`Repo permission sync job failed for repo ${job?.data.repoId}: ${err}`); + } +} + +const getFirstConnectionWithToken = (repo: RepoWithConnections) => { + for (const { connection } of repo.connections) { + if (connection.connectionType === 'github') { + const config = connection.config as unknown as GithubConnectionConfig; + if (config.token) { + return connection; + } + } + if (connection.connectionType === 'gitlab') { + const config = connection.config as unknown as GitlabConnectionConfig; + if (config.token) { + return connection; + } + } + if (connection.connectionType === 'gitea') { + const config = connection.config as unknown as GiteaConnectionConfig; + if (config.token) { + return connection; + } + } + if (connection.connectionType === 'bitbucket') { + const config = connection.config as unknown as BitbucketConnectionConfig; + if (config.token) { + return connection; + } + } + } + + return undefined; +} \ No newline at end of file diff --git a/packages/backend/src/repoManager.ts b/packages/backend/src/repoManager.ts index cc277a18a..ea122abf1 100644 --- a/packages/backend/src/repoManager.ts +++ b/packages/backend/src/repoManager.ts @@ -12,12 +12,6 @@ import { PromClient } from './promClient.js'; import * as Sentry from "@sentry/node"; import { env } from './env.js'; -interface IRepoManager { - validateIndexedReposHaveShards: () => Promise; - blockingPollLoop: () => void; - dispose: () => void; -} - const REPO_INDEXING_QUEUE = 'repoIndexingQueue'; const REPO_GC_QUEUE = 'repoGarbageCollectionQueue'; @@ -32,7 +26,7 @@ type RepoGarbageCollectionPayload = { const logger = createLogger('repo-manager'); -export class RepoManager implements IRepoManager { +export class RepoManager { private indexWorker: Worker; private indexQueue: Queue; private gcWorker: Worker; @@ -68,14 +62,13 @@ export class RepoManager implements IRepoManager { this.gcWorker.on('failed', this.onGarbageCollectionJobFailed.bind(this)); } - public async blockingPollLoop() { - while (true) { + public startScheduler() { + logger.debug('Starting scheduler'); + return setInterval(async () => { await this.fetchAndScheduleRepoIndexing(); await this.fetchAndScheduleRepoGarbageCollection(); await this.fetchAndScheduleRepoTimeouts(); - - await new Promise(resolve => setTimeout(resolve, this.settings.reindexRepoPollingIntervalMs)); - } + }, this.settings.reindexRepoPollingIntervalMs); } /////////////////////////// diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index 58674f494..737720b4e 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -1,3 +1,4 @@ +import { Connection, Repo, RepoToConnection } from "@sourcebot/db"; import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type"; import { z } from "zod"; @@ -50,4 +51,6 @@ export type DeepPartial = T extends object ? { } : T; // @see: https://stackoverflow.com/a/69328045 -export type WithRequired = T & { [P in K]-?: T[P] }; \ No newline at end of file +export type WithRequired = T & { [P in K]-?: T[P] }; + +export type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection })[] }; From 671fd78360c6718beacf65949b27e3639a36116f Mon Sep 17 00:00:00 2001 From: bkellam Date: Wed, 17 Sep 2025 18:31:42 -0700 Subject: [PATCH 03/14] wip: move permissions check to Prisma extension --- packages/backend/src/index.ts | 2 +- packages/backend/src/permissionSyncer.ts | 2 +- packages/web/src/actions.ts | 519 ++++++++---------- .../[...path]/components/codePreviewPanel.tsx | 2 +- .../[...path]/components/treePreviewPanel.tsx | 7 +- .../app/[domain]/browse/[...path]/page.tsx | 1 - .../components/fileSearchCommandDialog.tsx | 6 +- .../repos/components/addRepositoryDialog.tsx | 4 +- .../[domain]/repos/[repoId]/image/route.ts | 6 +- packages/web/src/features/fileTree/actions.ts | 333 ++++++----- .../fileTree/components/fileTreePanel.tsx | 6 +- packages/web/src/features/search/searchApi.ts | 424 +++++++------- packages/web/src/prisma.ts | 45 +- packages/web/src/withAuthV2.ts | 29 +- 14 files changed, 673 insertions(+), 713 deletions(-) diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 0f0aa14bf..f80340344 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -74,7 +74,7 @@ await repoManager.validateIndexedReposHaveShards(); const connectionManagerInterval = connectionManager.startScheduler(); const repoManagerInterval = repoManager.startScheduler(); -const permissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED ? permissionSyncer.startScheduler() : null; +const permissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? permissionSyncer.startScheduler() : null; const cleanup = async (signal: string) => { diff --git a/packages/backend/src/permissionSyncer.ts b/packages/backend/src/permissionSyncer.ts index d20592670..bb93d2b06 100644 --- a/packages/backend/src/permissionSyncer.ts +++ b/packages/backend/src/permissionSyncer.ts @@ -59,7 +59,7 @@ export class RepoPermissionSyncer { } // @todo: make this configurable - }, 1000 * 5); + }, 1000 * 60); } public dispose() { diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 1ba66b599..215ebef6b 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -1,46 +1,45 @@ 'use server'; +import { getAuditService } from "@/ee/features/audit/factory"; import { env } from "@/env.mjs"; +import { addUserToOrganization, orgHasAvailability } from "@/lib/authUtils"; import { ErrorCode } from "@/lib/errorCodes"; import { notAuthenticated, notFound, orgNotFound, secretAlreadyExists, ServiceError, ServiceErrorException, unexpectedError } from "@/lib/serviceError"; -import { CodeHostType, isHttpError, isServiceError } from "@/lib/utils"; +import { CodeHostType, getOrgMetadata, isHttpError, isServiceError } from "@/lib/utils"; import { prisma } from "@/prisma"; import { render } from "@react-email/components"; import * as Sentry from '@sentry/nextjs'; -import { decrypt, encrypt, generateApiKey, hashSecret, getTokenFromConfig } from "@sourcebot/crypto"; -import { ConnectionSyncStatus, OrgRole, Prisma, RepoIndexingStatus, StripeSubscriptionStatus, Org, ApiKey } from "@sourcebot/db"; +import { decrypt, encrypt, generateApiKey, getTokenFromConfig, hashSecret } from "@sourcebot/crypto"; +import { ApiKey, ConnectionSyncStatus, Org, OrgRole, Prisma, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db"; +import { createLogger } from "@sourcebot/logger"; +import { azuredevopsSchema } from "@sourcebot/schemas/v3/azuredevops.schema"; +import { bitbucketSchema } from "@sourcebot/schemas/v3/bitbucket.schema"; import { ConnectionConfig } from "@sourcebot/schemas/v3/connection.type"; +import { genericGitHostSchema } from "@sourcebot/schemas/v3/genericGitHost.schema"; import { gerritSchema } from "@sourcebot/schemas/v3/gerrit.schema"; import { giteaSchema } from "@sourcebot/schemas/v3/gitea.schema"; +import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; import { githubSchema } from "@sourcebot/schemas/v3/github.schema"; -import { gitlabSchema } from "@sourcebot/schemas/v3/gitlab.schema"; -import { azuredevopsSchema } from "@sourcebot/schemas/v3/azuredevops.schema"; import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; +import { gitlabSchema } from "@sourcebot/schemas/v3/gitlab.schema"; import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; -import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; +import { getPlan, hasEntitlement } from "@sourcebot/shared"; import Ajv from "ajv"; import { StatusCodes } from "http-status-codes"; import { cookies, headers } from "next/headers"; import { createTransport } from "nodemailer"; -import { auth } from "./auth"; import { Octokit } from "octokit"; +import { auth } from "./auth"; import { getConnection } from "./data/connection"; +import { getOrgFromDomain } from "./data/org"; +import { decrementOrgSeatCount, getSubscriptionForOrg } from "./ee/features/billing/serverUtils"; import { IS_BILLING_ENABLED } from "./ee/features/billing/stripe"; import InviteUserEmail from "./emails/inviteUserEmail"; +import JoinRequestApprovedEmail from "./emails/joinRequestApprovedEmail"; +import JoinRequestSubmittedEmail from "./emails/joinRequestSubmittedEmail"; import { AGENTIC_SEARCH_TUTORIAL_DISMISSED_COOKIE_NAME, MOBILE_UNSUPPORTED_SPLASH_SCREEN_DISMISSED_COOKIE_NAME, SEARCH_MODE_COOKIE_NAME, SINGLE_TENANT_ORG_DOMAIN, SOURCEBOT_GUEST_USER_ID, SOURCEBOT_SUPPORT_EMAIL } from "./lib/constants"; import { orgDomainSchema, orgNameSchema, repositoryQuerySchema } from "./lib/schemas"; -import { TenancyMode, ApiKeyPayload } from "./lib/types"; -import { decrementOrgSeatCount, getSubscriptionForOrg } from "./ee/features/billing/serverUtils"; -import { bitbucketSchema } from "@sourcebot/schemas/v3/bitbucket.schema"; -import { genericGitHostSchema } from "@sourcebot/schemas/v3/genericGitHost.schema"; -import { getPlan, hasEntitlement } from "@sourcebot/shared"; -import JoinRequestSubmittedEmail from "./emails/joinRequestSubmittedEmail"; -import JoinRequestApprovedEmail from "./emails/joinRequestApprovedEmail"; -import { createLogger } from "@sourcebot/logger"; -import { getAuditService } from "@/ee/features/audit/factory"; -import { addUserToOrganization, orgHasAvailability } from "@/lib/authUtils"; -import { getOrgMetadata } from "@/lib/utils"; -import { getOrgFromDomain } from "./data/org"; +import { ApiKeyPayload, TenancyMode } from "./lib/types"; import { withOptionalAuthV2 } from "./withAuthV2"; const ajv = new Ajv({ @@ -640,7 +639,7 @@ export const getConnectionInfo = async (connectionId: number, domain: string) => }))); export const getRepos = async (filter: { status?: RepoIndexingStatus[], connectionId?: number } = {}) => sew(() => - withOptionalAuthV2(async ({ org, user }) => { + withOptionalAuthV2(async ({ org, prisma }) => { const repos = await prisma.repo.findMany({ where: { orgId: org.id, @@ -654,13 +653,6 @@ export const getRepos = async (filter: { status?: RepoIndexingStatus[], connecti } } } : {}), - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: user?.id, - } - } - } : {}) }, include: { connections: { @@ -688,74 +680,65 @@ export const getRepos = async (filter: { status?: RepoIndexingStatus[], connecti })) })); -export const getRepoInfoByName = async (repoName: string, domain: string) => sew(() => - withAuth((userId) => - withOrgMembership(userId, domain, async ({ org }) => { - // @note: repo names are represented by their remote url - // on the code host. E.g.,: - // - github.com/sourcebot-dev/sourcebot - // - gitlab.com/gitlab-org/gitlab - // - gerrit.wikimedia.org/r/mediawiki/extensions/OnionsPorFavor - // etc. - // - // For most purposes, repo names are unique within an org, so using - // findFirst is equivalent to findUnique. Duplicates _can_ occur when - // a repository is specified by its remote url in a generic `git` - // connection. For example: - // - // ```json - // { - // "connections": { - // "connection-1": { - // "type": "github", - // "repos": [ - // "sourcebot-dev/sourcebot" - // ] - // }, - // "connection-2": { - // "type": "git", - // "url": "file:///tmp/repos/sourcebot" - // } - // } - // } - // ``` - // - // In this scenario, both repos will be named "github.com/sourcebot-dev/sourcebot". - // We will leave this as an edge case for now since it's unlikely to happen in practice. - // - // @v4-todo: we could add a unique constraint on repo name + orgId to help de-duplicate - // these cases. - // @see: repoCompileUtils.ts - const repo = await prisma.repo.findFirst({ - where: { - name: repoName, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - }, - }); +export const getRepoInfoByName = async (repoName: string) => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + // @note: repo names are represented by their remote url + // on the code host. E.g.,: + // - github.com/sourcebot-dev/sourcebot + // - gitlab.com/gitlab-org/gitlab + // - gerrit.wikimedia.org/r/mediawiki/extensions/OnionsPorFavor + // etc. + // + // For most purposes, repo names are unique within an org, so using + // findFirst is equivalent to findUnique. Duplicates _can_ occur when + // a repository is specified by its remote url in a generic `git` + // connection. For example: + // + // ```json + // { + // "connections": { + // "connection-1": { + // "type": "github", + // "repos": [ + // "sourcebot-dev/sourcebot" + // ] + // }, + // "connection-2": { + // "type": "git", + // "url": "file:///tmp/repos/sourcebot" + // } + // } + // } + // ``` + // + // In this scenario, both repos will be named "github.com/sourcebot-dev/sourcebot". + // We will leave this as an edge case for now since it's unlikely to happen in practice. + // + // @v4-todo: we could add a unique constraint on repo name + orgId to help de-duplicate + // these cases. + // @see: repoCompileUtils.ts + const repo = await prisma.repo.findFirst({ + where: { + name: repoName, + orgId: org.id, + }, + }); - if (!repo) { - return notFound(); - } + if (!repo) { + return notFound(); + } - return { - id: repo.id, - name: repo.name, - displayName: repo.displayName ?? undefined, - codeHostType: repo.external_codeHostType, - webUrl: repo.webUrl ?? undefined, - imageUrl: repo.imageUrl ?? undefined, - indexedAt: repo.indexedAt ?? undefined, - repoIndexingStatus: repo.repoIndexingStatus, - } - }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true - )); + return { + id: repo.id, + name: repo.name, + displayName: repo.displayName ?? undefined, + codeHostType: repo.external_codeHostType, + webUrl: repo.webUrl ?? undefined, + imageUrl: repo.imageUrl ?? undefined, + indexedAt: repo.indexedAt ?? undefined, + repoIndexingStatus: repo.repoIndexingStatus, + } + })); export const createConnection = async (name: string, type: CodeHostType, connectionConfig: string, domain: string): Promise<{ id: number } | ServiceError> => sew(() => withAuth((userId) => @@ -805,150 +788,141 @@ export const createConnection = async (name: string, type: CodeHostType, connect }, OrgRole.OWNER) )); -export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: string, domain: string): Promise<{ connectionId: number } | ServiceError> => sew(() => - withAuth((userId) => - withOrgMembership(userId, domain, async ({ org }) => { - if (env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_ENABLED !== 'true') { - return { - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: "This feature is not enabled.", - } satisfies ServiceError; - } +export const experimental_addGithubRepositoryByUrl = async (repositoryUrl: string): Promise<{ connectionId: number } | ServiceError> => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + if (env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_ENABLED !== 'true') { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "This feature is not enabled.", + } satisfies ServiceError; + } - // Parse repository URL to extract owner/repo - const repoInfo = (() => { - const url = repositoryUrl.trim(); - - // Handle various GitHub URL formats - const patterns = [ - // https://github.com/owner/repo or https://github.com/owner/repo.git - /^https?:\/\/github\.com\/([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+?)(?:\.git)?\/?$/, - // github.com/owner/repo - /^github\.com\/([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+?)(?:\.git)?\/?$/, - // owner/repo - /^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/ - ]; - - for (const pattern of patterns) { - const match = url.match(pattern); - if (match) { - return { - owner: match[1], - repo: match[2] - }; - } + // Parse repository URL to extract owner/repo + const repoInfo = (() => { + const url = repositoryUrl.trim(); + + // Handle various GitHub URL formats + const patterns = [ + // https://github.com/owner/repo or https://github.com/owner/repo.git + /^https?:\/\/github\.com\/([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+?)(?:\.git)?\/?$/, + // github.com/owner/repo + /^github\.com\/([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+?)(?:\.git)?\/?$/, + // owner/repo + /^([a-zA-Z0-9_.-]+)\/([a-zA-Z0-9_.-]+)$/ + ]; + + for (const pattern of patterns) { + const match = url.match(pattern); + if (match) { + return { + owner: match[1], + repo: match[2] + }; } - - return null; - })(); - - if (!repoInfo) { - return { - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: "Invalid repository URL format. Please use 'owner/repo' or 'https://github.com/owner/repo' format.", - } satisfies ServiceError; } - const { owner, repo } = repoInfo; + return null; + })(); - // Use GitHub API to fetch repository information and get the external_id - const octokit = new Octokit({ - auth: env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN - }); + if (!repoInfo) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "Invalid repository URL format. Please use 'owner/repo' or 'https://github.com/owner/repo' format.", + } satisfies ServiceError; + } - let githubRepo; - try { - const response = await octokit.rest.repos.get({ - owner, - repo, - }); - githubRepo = response.data; - } catch (error) { - if (isHttpError(error, 404)) { - return { - statusCode: StatusCodes.NOT_FOUND, - errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: `Repository '${owner}/${repo}' not found or is private. Only public repositories can be added.`, - } satisfies ServiceError; - } + const { owner, repo } = repoInfo; - if (isHttpError(error, 403)) { - return { - statusCode: StatusCodes.FORBIDDEN, - errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: `Access to repository '${owner}/${repo}' is forbidden. Only public repositories can be added.`, - } satisfies ServiceError; - } + // Use GitHub API to fetch repository information and get the external_id + const octokit = new Octokit({ + auth: env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN + }); + let githubRepo; + try { + const response = await octokit.rest.repos.get({ + owner, + repo, + }); + githubRepo = response.data; + } catch (error) { + if (isHttpError(error, 404)) { return { - statusCode: StatusCodes.INTERNAL_SERVER_ERROR, + statusCode: StatusCodes.NOT_FOUND, errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: `Failed to fetch repository information: ${error instanceof Error ? error.message : 'Unknown error'}`, + message: `Repository '${owner}/${repo}' not found or is private. Only public repositories can be added.`, } satisfies ServiceError; } - if (githubRepo.private) { + if (isHttpError(error, 403)) { return { - statusCode: StatusCodes.BAD_REQUEST, + statusCode: StatusCodes.FORBIDDEN, errorCode: ErrorCode.INVALID_REQUEST_BODY, - message: "Only public repositories can be added.", + message: `Access to repository '${owner}/${repo}' is forbidden. Only public repositories can be added.`, } satisfies ServiceError; } - // Check if this repository is already connected using the external_id - const existingRepo = await prisma.repo.findFirst({ - where: { - orgId: org.id, - external_id: githubRepo.id.toString(), - external_codeHostType: 'github', - external_codeHostUrl: 'https://github.com', - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - } - }); + return { + statusCode: StatusCodes.INTERNAL_SERVER_ERROR, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: `Failed to fetch repository information: ${error instanceof Error ? error.message : 'Unknown error'}`, + } satisfies ServiceError; + } - if (existingRepo) { - return { - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.CONNECTION_ALREADY_EXISTS, - message: "This repository already exists.", - } satisfies ServiceError; + if (githubRepo.private) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.INVALID_REQUEST_BODY, + message: "Only public repositories can be added.", + } satisfies ServiceError; + } + + // Check if this repository is already connected using the external_id + const existingRepo = await prisma.repo.findFirst({ + where: { + orgId: org.id, + external_id: githubRepo.id.toString(), + external_codeHostType: 'github', + external_codeHostUrl: 'https://github.com', } + }); - const connectionName = `${owner}-${repo}-${Date.now()}`; + if (existingRepo) { + return { + statusCode: StatusCodes.BAD_REQUEST, + errorCode: ErrorCode.CONNECTION_ALREADY_EXISTS, + message: "This repository already exists.", + } satisfies ServiceError; + } - // Create GitHub connection config - const connectionConfig: GithubConnectionConfig = { - type: "github" as const, - repos: [`${owner}/${repo}`], - ...(env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN ? { - token: { - env: 'EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN' - } - } : {}) - }; + const connectionName = `${owner}-${repo}-${Date.now()}`; - const connection = await prisma.connection.create({ - data: { - orgId: org.id, - name: connectionName, - config: connectionConfig as unknown as Prisma.InputJsonValue, - connectionType: 'github', + // Create GitHub connection config + const connectionConfig: GithubConnectionConfig = { + type: "github" as const, + repos: [`${owner}/${repo}`], + ...(env.EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN ? { + token: { + env: 'EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN' } - }); + } : {}) + }; - return { - connectionId: connection.id, + const connection = await prisma.connection.create({ + data: { + orgId: org.id, + name: connectionName, + config: connectionConfig as unknown as Prisma.InputJsonValue, + connectionType: 'github', } - }, OrgRole.GUEST), /* allowAnonymousAccess = */ true - )); + }); + + return { + connectionId: connection.id, + } + })); export const updateConnectionDisplayName = async (connectionId: number, name: string, domain: string): Promise<{ success: boolean } | ServiceError> => sew(() => withAuth((userId) => @@ -2043,82 +2017,73 @@ export const getSearchContexts = async (domain: string) => sew(() => }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true )); -export const getRepoImage = async (repoId: number, domain: string): Promise => sew(async () => { - return await withAuth(async (userId) => { - return await withOrgMembership(userId, domain, async ({ org }) => { - const repo = await prisma.repo.findUnique({ - where: { - id: repoId, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - }, - include: { - connections: { - include: { - connection: true, - } +export const getRepoImage = async (repoId: number): Promise => sew(async () => { + return await withOptionalAuthV2(async ({ org, prisma }) => { + const repo = await prisma.repo.findUnique({ + where: { + id: repoId, + orgId: org.id, + }, + include: { + connections: { + include: { + connection: true, } - }, - }); + } + }, + }); - if (!repo || !repo.imageUrl) { - return notFound(); - } + if (!repo || !repo.imageUrl) { + return notFound(); + } - const authHeaders: Record = {}; - for (const { connection } of repo.connections) { - try { - if (connection.connectionType === 'github') { - const config = connection.config as unknown as GithubConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, prisma); - authHeaders['Authorization'] = `token ${token}`; - break; - } - } else if (connection.connectionType === 'gitlab') { - const config = connection.config as unknown as GitlabConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, prisma); - authHeaders['PRIVATE-TOKEN'] = token; - break; - } - } else if (connection.connectionType === 'gitea') { - const config = connection.config as unknown as GiteaConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, prisma); - authHeaders['Authorization'] = `token ${token}`; - break; - } + const authHeaders: Record = {}; + for (const { connection } of repo.connections) { + try { + if (connection.connectionType === 'github') { + const config = connection.config as unknown as GithubConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, prisma); + authHeaders['Authorization'] = `token ${token}`; + break; + } + } else if (connection.connectionType === 'gitlab') { + const config = connection.config as unknown as GitlabConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, prisma); + authHeaders['PRIVATE-TOKEN'] = token; + break; + } + } else if (connection.connectionType === 'gitea') { + const config = connection.config as unknown as GiteaConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, prisma); + authHeaders['Authorization'] = `token ${token}`; + break; } - } catch (error) { - logger.warn(`Failed to get token for connection ${connection.id}:`, error); } + } catch (error) { + logger.warn(`Failed to get token for connection ${connection.id}:`, error); } + } - try { - const response = await fetch(repo.imageUrl, { - headers: authHeaders, - }); - - if (!response.ok) { - logger.warn(`Failed to fetch image from ${repo.imageUrl}: ${response.status}`); - return notFound(); - } + try { + const response = await fetch(repo.imageUrl, { + headers: authHeaders, + }); - const imageBuffer = await response.arrayBuffer(); - return imageBuffer; - } catch (error) { - logger.error(`Error proxying image for repo ${repoId}:`, error); + if (!response.ok) { + logger.warn(`Failed to fetch image from ${repo.imageUrl}: ${response.status}`); return notFound(); } - }, /* minRequiredRole = */ OrgRole.GUEST); - }, /* allowAnonymousAccess = */ true); + + const imageBuffer = await response.arrayBuffer(); + return imageBuffer; + } catch (error) { + logger.error(`Error proxying image for repo ${repoId}:`, error); + return notFound(); + } + }) }); export const getAnonymousAccessStatus = async (domain: string): Promise => sew(async () => { diff --git a/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx b/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx index 317e13c3d..091122f82 100644 --- a/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx +++ b/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx @@ -20,7 +20,7 @@ export const CodePreviewPanel = async ({ path, repoName, revisionName, domain }: repository: repoName, branch: revisionName, }, domain), - getRepoInfoByName(repoName, domain), + getRepoInfoByName(repoName), ]); if (isServiceError(fileSourceResponse) || isServiceError(repoInfoResponse)) { diff --git a/packages/web/src/app/[domain]/browse/[...path]/components/treePreviewPanel.tsx b/packages/web/src/app/[domain]/browse/[...path]/components/treePreviewPanel.tsx index bd7cc4634..4a0c38576 100644 --- a/packages/web/src/app/[domain]/browse/[...path]/components/treePreviewPanel.tsx +++ b/packages/web/src/app/[domain]/browse/[...path]/components/treePreviewPanel.tsx @@ -10,17 +10,16 @@ interface TreePreviewPanelProps { path: string; repoName: string; revisionName?: string; - domain: string; } -export const TreePreviewPanel = async ({ path, repoName, revisionName, domain }: TreePreviewPanelProps) => { +export const TreePreviewPanel = async ({ path, repoName, revisionName }: TreePreviewPanelProps) => { const [repoInfoResponse, folderContentsResponse] = await Promise.all([ - getRepoInfoByName(repoName, domain), + getRepoInfoByName(repoName), getFolderContents({ repoName, revisionName: revisionName ?? 'HEAD', path, - }, domain) + }) ]); if (isServiceError(folderContentsResponse) || isServiceError(repoInfoResponse)) { diff --git a/packages/web/src/app/[domain]/browse/[...path]/page.tsx b/packages/web/src/app/[domain]/browse/[...path]/page.tsx index 81c27be5a..20e543279 100644 --- a/packages/web/src/app/[domain]/browse/[...path]/page.tsx +++ b/packages/web/src/app/[domain]/browse/[...path]/page.tsx @@ -42,7 +42,6 @@ export default async function BrowsePage(props: BrowsePageProps) { path={path} repoName={repoName} revisionName={revisionName} - domain={domain} /> )} diff --git a/packages/web/src/app/[domain]/browse/components/fileSearchCommandDialog.tsx b/packages/web/src/app/[domain]/browse/components/fileSearchCommandDialog.tsx index d87eab85d..0cfe720a4 100644 --- a/packages/web/src/app/[domain]/browse/components/fileSearchCommandDialog.tsx +++ b/packages/web/src/app/[domain]/browse/components/fileSearchCommandDialog.tsx @@ -6,7 +6,6 @@ import { useHotkeys } from "react-hotkeys-hook"; import { useQuery } from "@tanstack/react-query"; import { unwrapServiceError } from "@/lib/utils"; import { FileTreeItem, getFiles } from "@/features/fileTree/actions"; -import { useDomain } from "@/hooks/useDomain"; import { Dialog, DialogContent, DialogDescription, DialogTitle } from "@/components/ui/dialog"; import { useBrowseNavigation } from "../hooks/useBrowseNavigation"; import { useBrowseState } from "../hooks/useBrowseState"; @@ -28,7 +27,6 @@ type SearchResult = { export const FileSearchCommandDialog = () => { const { repoName, revisionName } = useBrowseParams(); - const domain = useDomain(); const { state: { isFileSearchOpen }, updateBrowseState } = useBrowseState(); const commandListRef = useRef(null); @@ -57,8 +55,8 @@ export const FileSearchCommandDialog = () => { }, [isFileSearchOpen]); const { data: files, isLoading, isError } = useQuery({ - queryKey: ['files', repoName, revisionName, domain], - queryFn: () => unwrapServiceError(getFiles({ repoName, revisionName: revisionName ?? 'HEAD' }, domain)), + queryKey: ['files', repoName, revisionName], + queryFn: () => unwrapServiceError(getFiles({ repoName, revisionName: revisionName ?? 'HEAD' })), enabled: isFileSearchOpen, }); diff --git a/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx b/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx index 79bfc1c81..45a8b6c60 100644 --- a/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx +++ b/packages/web/src/app/[domain]/repos/components/addRepositoryDialog.tsx @@ -8,7 +8,6 @@ import { zodResolver } from "@hookform/resolvers/zod"; import { useForm } from "react-hook-form"; import { z } from "zod"; import { experimental_addGithubRepositoryByUrl } from "@/actions"; -import { useDomain } from "@/hooks/useDomain"; import { isServiceError } from "@/lib/utils"; import { useToast } from "@/components/hooks/use-toast"; import { useRouter } from "next/navigation"; @@ -37,7 +36,6 @@ const formSchema = z.object({ }); export const AddRepositoryDialog = ({ isOpen, onOpenChange }: AddRepositoryDialogProps) => { - const domain = useDomain(); const { toast } = useToast(); const router = useRouter(); @@ -52,7 +50,7 @@ export const AddRepositoryDialog = ({ isOpen, onOpenChange }: AddRepositoryDialo const onSubmit = async (data: z.infer) => { - const result = await experimental_addGithubRepositoryByUrl(data.repositoryUrl.trim(), domain); + const result = await experimental_addGithubRepositoryByUrl(data.repositoryUrl.trim()); if (isServiceError(result)) { toast({ title: "Error adding repository", diff --git a/packages/web/src/app/api/[domain]/repos/[repoId]/image/route.ts b/packages/web/src/app/api/[domain]/repos/[repoId]/image/route.ts index f5f9bb535..b235d5723 100644 --- a/packages/web/src/app/api/[domain]/repos/[repoId]/image/route.ts +++ b/packages/web/src/app/api/[domain]/repos/[repoId]/image/route.ts @@ -3,18 +3,18 @@ import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; export async function GET( - request: NextRequest, + _request: NextRequest, props: { params: Promise<{ domain: string; repoId: string }> } ) { const params = await props.params; - const { domain, repoId } = params; + const { repoId } = params; const repoIdNum = parseInt(repoId); if (isNaN(repoIdNum)) { return new Response("Invalid repo ID", { status: 400 }); } - const result = await getRepoImage(repoIdNum, domain); + const result = await getRepoImage(repoIdNum); if (isServiceError(result)) { return new Response(result.message, { status: result.statusCode }); } diff --git a/packages/web/src/features/fileTree/actions.ts b/packages/web/src/features/fileTree/actions.ts index fa83ddb38..003b82b2e 100644 --- a/packages/web/src/features/fileTree/actions.ts +++ b/packages/web/src/features/fileTree/actions.ts @@ -1,13 +1,13 @@ 'use server'; -import { sew, withAuth, withOrgMembership } from '@/actions'; +import { sew } from '@/actions'; import { env } from '@/env.mjs'; -import { OrgRole, Repo } from '@sourcebot/db'; -import { prisma } from '@/prisma'; import { notFound, unexpectedError } from '@/lib/serviceError'; -import { simpleGit } from 'simple-git'; -import path from 'path'; +import { withOptionalAuthV2 } from '@/withAuthV2'; +import { Repo } from '@sourcebot/db'; import { createLogger } from '@sourcebot/logger'; +import path from 'path'; +import { simpleGit } from 'simple-git'; const logger = createLogger('file-tree'); @@ -25,209 +25,182 @@ export type FileTreeNode = FileTreeItem & { * Returns the tree of files (blobs) and directories (trees) for a given repository, * at a given revision. */ -export const getTree = async (params: { repoName: string, revisionName: string }, domain: string) => sew(() => - withAuth((userId) => - withOrgMembership(userId, domain, async ({ org }) => { - const { repoName, revisionName } = params; - const repo = await prisma.repo.findFirst({ - where: { - name: repoName, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - }, - }); - - if (!repo) { - return notFound(); - } - - const { path: repoPath } = getRepoPath(repo); - - const git = simpleGit().cwd(repoPath); - - let result: string; - try { - result = await git.raw([ - 'ls-tree', - revisionName, - // recursive - '-r', - // include trees when recursing - '-t', - // format as output as {type},{path} - '--format=%(objecttype),%(path)', - ]); - } catch (error) { - logger.error('git ls-tree failed.', { error }); - return unexpectedError('git ls-tree command failed.'); - } - - const lines = result.split('\n').filter(line => line.trim()); +export const getTree = async (params: { repoName: string, revisionName: string }) => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const { repoName, revisionName } = params; + const repo = await prisma.repo.findFirst({ + where: { + name: repoName, + orgId: org.id, + }, + }); + + if (!repo) { + return notFound(); + } - const flatList = lines.map(line => { - const [type, path] = line.split(','); - return { - type, - path, - } - }); + const { path: repoPath } = getRepoPath(repo); + + const git = simpleGit().cwd(repoPath); + + let result: string; + try { + result = await git.raw([ + 'ls-tree', + revisionName, + // recursive + '-r', + // include trees when recursing + '-t', + // format as output as {type},{path} + '--format=%(objecttype),%(path)', + ]); + } catch (error) { + logger.error('git ls-tree failed.', { error }); + return unexpectedError('git ls-tree command failed.'); + } - const tree = buildFileTree(flatList); + const lines = result.split('\n').filter(line => line.trim()); + const flatList = lines.map(line => { + const [type, path] = line.split(','); return { - tree, + type, + path, } + }); - }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true) -); + const tree = buildFileTree(flatList); + + return { + tree, + } + + })); /** * Returns the contents of a folder at a given path in a given repository, * at a given revision. */ -export const getFolderContents = async (params: { repoName: string, revisionName: string, path: string }, domain: string) => sew(() => - withAuth((userId) => - withOrgMembership(userId, domain, async ({ org }) => { - const { repoName, revisionName, path } = params; - const repo = await prisma.repo.findFirst({ - where: { - name: repoName, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - }, - }); +export const getFolderContents = async (params: { repoName: string, revisionName: string, path: string }) => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const { repoName, revisionName, path } = params; + const repo = await prisma.repo.findFirst({ + where: { + name: repoName, + orgId: org.id, + }, + }); + + if (!repo) { + return notFound(); + } - if (!repo) { - return notFound(); - } + const { path: repoPath } = getRepoPath(repo); - const { path: repoPath } = getRepoPath(repo); + // @note: we don't allow directory traversal + // or null bytes in the path. + if (path.includes('..') || path.includes('\0')) { + return notFound(); + } - // @note: we don't allow directory traversal - // or null bytes in the path. - if (path.includes('..') || path.includes('\0')) { - return notFound(); - } + // Normalize the path by... + let normalizedPath = path; - // Normalize the path by... - let normalizedPath = path; + // ... adding a trailing slash if it doesn't have one. + // This is important since ls-tree won't return the contents + // of a directory if it doesn't have a trailing slash. + if (!normalizedPath.endsWith('/')) { + normalizedPath = `${normalizedPath}/`; + } - // ... adding a trailing slash if it doesn't have one. - // This is important since ls-tree won't return the contents - // of a directory if it doesn't have a trailing slash. - if (!normalizedPath.endsWith('/')) { - normalizedPath = `${normalizedPath}/`; - } + // ... removing any leading slashes. This is needed since + // the path is relative to the repository's root, so we + // need a relative path. + if (normalizedPath.startsWith('/')) { + normalizedPath = normalizedPath.slice(1); + } - // ... removing any leading slashes. This is needed since - // the path is relative to the repository's root, so we - // need a relative path. - if (normalizedPath.startsWith('/')) { - normalizedPath = normalizedPath.slice(1); - } + const git = simpleGit().cwd(repoPath); + + let result: string; + try { + result = await git.raw([ + 'ls-tree', + revisionName, + // format as output as {type},{path} + '--format=%(objecttype),%(path)', + ...(normalizedPath.length === 0 ? [] : [normalizedPath]), + ]); + } catch (error) { + logger.error('git ls-tree failed.', { error }); + return unexpectedError('git ls-tree command failed.'); + } - const git = simpleGit().cwd(repoPath); - - let result: string; - try { - result = await git.raw([ - 'ls-tree', - revisionName, - // format as output as {type},{path} - '--format=%(objecttype),%(path)', - ...(normalizedPath.length === 0 ? [] : [normalizedPath]), - ]); - } catch (error) { - logger.error('git ls-tree failed.', { error }); - return unexpectedError('git ls-tree command failed.'); - } + const lines = result.split('\n').filter(line => line.trim()); - const lines = result.split('\n').filter(line => line.trim()); + const contents: FileTreeItem[] = lines.map(line => { + const [type, path] = line.split(','); + const name = path.split('/').pop() ?? ''; - const contents: FileTreeItem[] = lines.map(line => { - const [type, path] = line.split(','); - const name = path.split('/').pop() ?? ''; + return { + type, + path, + name, + } + }); - return { - type, - path, - name, - } - }); + return contents; + })); - return contents; - }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true) -); - -export const getFiles = async (params: { repoName: string, revisionName: string }, domain: string) => sew(() => - withAuth((userId) => - withOrgMembership(userId, domain, async ({ org }) => { - const { repoName, revisionName } = params; - - const repo = await prisma.repo.findFirst({ - where: { - name: repoName, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - }, - }); +export const getFiles = async (params: { repoName: string, revisionName: string }) => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const { repoName, revisionName } = params; - if (!repo) { - return notFound(); - } + const repo = await prisma.repo.findFirst({ + where: { + name: repoName, + orgId: org.id, + }, + }); - const { path: repoPath } = getRepoPath(repo); - - const git = simpleGit().cwd(repoPath); - - let result: string; - try { - result = await git.raw([ - 'ls-tree', - revisionName, - // recursive - '-r', - // only return the names of the files - '--name-only', - ]); - } catch (error) { - logger.error('git ls-tree failed.', { error }); - return unexpectedError('git ls-tree command failed.'); - } + if (!repo) { + return notFound(); + } + + const { path: repoPath } = getRepoPath(repo); + + const git = simpleGit().cwd(repoPath); + + let result: string; + try { + result = await git.raw([ + 'ls-tree', + revisionName, + // recursive + '-r', + // only return the names of the files + '--name-only', + ]); + } catch (error) { + logger.error('git ls-tree failed.', { error }); + return unexpectedError('git ls-tree command failed.'); + } - const paths = result.split('\n').filter(line => line.trim()); + const paths = result.split('\n').filter(line => line.trim()); - const files: FileTreeItem[] = paths.map(path => { - const name = path.split('/').pop() ?? ''; - return { - type: 'blob', - path, - name, - } - }); + const files: FileTreeItem[] = paths.map(path => { + const name = path.split('/').pop() ?? ''; + return { + type: 'blob', + path, + name, + } + }); - return files; + return files; - }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true) -); + })); const buildFileTree = (flatList: { type: string, path: string }[]): FileTreeNode => { const root: FileTreeNode = { diff --git a/packages/web/src/features/fileTree/components/fileTreePanel.tsx b/packages/web/src/features/fileTree/components/fileTreePanel.tsx index fffabeba7..a7579628d 100644 --- a/packages/web/src/features/fileTree/components/fileTreePanel.tsx +++ b/packages/web/src/features/fileTree/components/fileTreePanel.tsx @@ -3,7 +3,6 @@ import { getTree } from "../actions"; import { useQuery } from "@tanstack/react-query"; import { unwrapServiceError } from "@/lib/utils"; -import { useDomain } from "@/hooks/useDomain"; import { ResizablePanel } from "@/components/ui/resizable"; import { Skeleton } from "@/components/ui/skeleton"; import { useBrowseState } from "@/app/[domain]/browse/hooks/useBrowseState"; @@ -41,17 +40,16 @@ export const FileTreePanel = ({ order }: FileTreePanelProps) => { updateBrowseState, } = useBrowseState(); - const domain = useDomain(); const { repoName, revisionName, path } = useBrowseParams(); const fileTreePanelRef = useRef(null); const { data, isPending, isError } = useQuery({ - queryKey: ['tree', repoName, revisionName, domain], + queryKey: ['tree', repoName, revisionName], queryFn: () => unwrapServiceError( getTree({ repoName, revisionName: revisionName ?? 'HEAD', - }, domain) + }) ), }); diff --git a/packages/web/src/features/search/searchApi.ts b/packages/web/src/features/search/searchApi.ts index 65640fe60..60d04dec4 100644 --- a/packages/web/src/features/search/searchApi.ts +++ b/packages/web/src/features/search/searchApi.ts @@ -4,14 +4,14 @@ import { env } from "@/env.mjs"; import { invalidZoektResponse, ServiceError } from "../../lib/serviceError"; import { isServiceError } from "../../lib/utils"; import { zoektFetch } from "./zoektClient"; -import { prisma } from "@/prisma"; import { ErrorCode } from "../../lib/errorCodes"; import { StatusCodes } from "http-status-codes"; import { zoektSearchResponseSchema } from "./zoektSchema"; import { SearchRequest, SearchResponse, SourceRange } from "./types"; -import { OrgRole, Repo } from "@sourcebot/db"; -import { sew, withAuth, withOrgMembership } from "@/actions"; +import { PrismaClient, Repo } from "@sourcebot/db"; +import { sew } from "@/actions"; import { base64Decode } from "@sourcebot/shared"; +import { withOptionalAuthV2 } from "@/withAuthV2"; // List of supported query prefixes in zoekt. // @see : https://github.com/sourcebot-dev/zoekt/blob/main/query/parse.go#L417 @@ -36,7 +36,7 @@ enum zoektPrefixes { reposet = "reposet:", } -const transformZoektQuery = async (query: string, orgId: number): Promise => { +const transformZoektQuery = async (query: string, orgId: number, prisma: PrismaClient): Promise => { const prevQueryParts = query.split(" "); const newQueryParts = []; @@ -127,235 +127,219 @@ const getFileWebUrl = (template: string, branch: string, fileName: string): stri return encodeURI(url + optionalQueryParams); } -export const search = async ({ query, matches, contextLines, whole }: SearchRequest, domain: string, apiKey: string | undefined = undefined) => sew(() => - withAuth((userId, _apiKeyHash) => - withOrgMembership(userId, domain, async ({ org }) => { - const transformedQuery = await transformZoektQuery(query, org.id); - if (isServiceError(transformedQuery)) { - return transformedQuery; - } - query = transformedQuery; +export const search = async ({ query, matches, contextLines, whole }: SearchRequest) => sew(() => + withOptionalAuthV2(async ({ org, prisma }) => { + const transformedQuery = await transformZoektQuery(query, org.id, prisma); + if (isServiceError(transformedQuery)) { + return transformedQuery; + } + query = transformedQuery; - const isBranchFilteringEnabled = ( - query.includes(zoektPrefixes.branch) || - query.includes(zoektPrefixes.branchShort) - ); + const isBranchFilteringEnabled = ( + query.includes(zoektPrefixes.branch) || + query.includes(zoektPrefixes.branchShort) + ); - // We only want to show matches for the default branch when - // the user isn't explicitly filtering by branch. - if (!isBranchFilteringEnabled) { - query = query.concat(` branch:HEAD`); + // We only want to show matches for the default branch when + // the user isn't explicitly filtering by branch. + if (!isBranchFilteringEnabled) { + query = query.concat(` branch:HEAD`); + } + + const body = JSON.stringify({ + q: query, + // @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892 + opts: { + ChunkMatches: true, + MaxMatchDisplayCount: matches, + NumContextLines: contextLines, + Whole: !!whole, + TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT, + ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT, + MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds } + }); + + let header: Record = {}; + header = { + "X-Tenant-ID": org.id.toString() + }; + + const searchResponse = await zoektFetch({ + path: "/api/search", + body, + header, + method: "POST", + }); + + if (!searchResponse.ok) { + return invalidZoektResponse(searchResponse); + } - const body = JSON.stringify({ - q: query, - // @see: https://github.com/sourcebot-dev/zoekt/blob/main/api.go#L892 - opts: { - ChunkMatches: true, - MaxMatchDisplayCount: matches, - NumContextLines: contextLines, - Whole: !!whole, - TotalMaxMatchCount: env.TOTAL_MAX_MATCH_COUNT, - ShardMaxMatchCount: env.SHARD_MAX_MATCH_COUNT, - MaxWallTime: env.ZOEKT_MAX_WALL_TIME_MS * 1000 * 1000, // zoekt expects a duration in nanoseconds + const searchBody = await searchResponse.json(); + + const parser = zoektSearchResponseSchema.transform(async ({ Result }) => { + // @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field + // which corresponds to the `id` in the Repo table. In order to efficiently fetch repository + // metadata when transforming (potentially thousands) of file matches, we aggregate a unique + // set of repository ids* and map them to their corresponding Repo record. + // + // *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`? + // A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was + // always undefined. To make this a non-breaking change, we fallback to using the repository's name + // (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in + // practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo', + // 'gitea.com/org/repo', etc.). + // + // Note: When a repository is re-indexed (every hour) this ID will be populated. + // @see: https://github.com/sourcebot-dev/zoekt/pull/6 + const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []); + const repos = new Map(); + + (await prisma.repo.findMany({ + where: { + id: { + in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"), + }, + orgId: org.id, } - }); - - let header: Record = {}; - header = { - "X-Tenant-ID": org.id.toString() - }; + })).forEach(repo => repos.set(repo.id, repo)); - const searchResponse = await zoektFetch({ - path: "/api/search", - body, - header, - method: "POST", - }); + (await prisma.repo.findMany({ + where: { + name: { + in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"), + }, + orgId: org.id, + } + })).forEach(repo => repos.set(repo.name, repo)); - if (!searchResponse.ok) { - return invalidZoektResponse(searchResponse); - } + const files = Result.Files?.map((file) => { + const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName); - const searchBody = await searchResponse.json(); - - const parser = zoektSearchResponseSchema.transform(async ({ Result }) => { - // @note (2025-05-12): in zoekt, repositories are identified by the `RepositoryID` field - // which corresponds to the `id` in the Repo table. In order to efficiently fetch repository - // metadata when transforming (potentially thousands) of file matches, we aggregate a unique - // set of repository ids* and map them to their corresponding Repo record. - // - // *Q: Why is `RepositoryID` optional? And why are we falling back to `Repository`? - // A: Prior to this change, the repository id was not plumbed into zoekt, so RepositoryID was - // always undefined. To make this a non-breaking change, we fallback to using the repository's name - // (`Repository`) as the identifier in these cases. This is not guaranteed to be unique, but in - // practice it is since the repository name includes the host and path (e.g., 'github.com/org/repo', - // 'gitea.com/org/repo', etc.). - // - // Note: When a repository is re-indexed (every hour) this ID will be populated. - // @see: https://github.com/sourcebot-dev/zoekt/pull/6 - const repoIdentifiers = new Set(Result.Files?.map((file) => file.RepositoryID ?? file.Repository) ?? []); - const repos = new Map(); - - (await prisma.repo.findMany({ - where: { - id: { - in: Array.from(repoIdentifiers).filter((id) => typeof id === "number"), - }, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId, - } - } - } : {}) - } - })).forEach(repo => repos.set(repo.id, repo)); - - (await prisma.repo.findMany({ - where: { - name: { - in: Array.from(repoIdentifiers).filter((id) => typeof id === "string"), - }, - orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId, - } - } - } : {}) + const webUrl = (() => { + const template: string | undefined = Result.RepoURLs[file.Repository]; + if (!template) { + return undefined; } - })).forEach(repo => repos.set(repo.name, repo)); - const files = Result.Files?.map((file) => { - const fileNameChunks = file.ChunkMatches.filter((chunk) => chunk.FileName); + // If there are multiple branches pointing to the same revision of this file, it doesn't + // matter which branch we use here, so use the first one. + const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD"; + return getFileWebUrl(template, branch, file.FileName); + })(); - const webUrl = (() => { - const template: string | undefined = Result.RepoURLs[file.Repository]; - if (!template) { - return undefined; - } + const identifier = file.RepositoryID ?? file.Repository; + const repo = repos.get(identifier); - // If there are multiple branches pointing to the same revision of this file, it doesn't - // matter which branch we use here, so use the first one. - const branch = file.Branches && file.Branches.length > 0 ? file.Branches[0] : "HEAD"; - return getFileWebUrl(template, branch, file.FileName); - })(); - - const identifier = file.RepositoryID ?? file.Repository; - const repo = repos.get(identifier); - - // This can happen if the user doesn't have access to the repository. - if (!repo) { - return undefined; - } - - return { - fileName: { - text: file.FileName, - matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({ - start: { - byteOffset: range.Start.ByteOffset, - column: range.Start.Column, - lineNumber: range.Start.LineNumber, - }, - end: { - byteOffset: range.End.ByteOffset, - column: range.End.Column, - lineNumber: range.End.LineNumber, - } - })) : [], - }, - repository: repo.name, - repositoryId: repo.id, - webUrl: webUrl, - language: file.Language, - chunks: file.ChunkMatches - .filter((chunk) => !chunk.FileName) // Filter out filename chunks. - .map((chunk) => { - return { - content: base64Decode(chunk.Content), - matchRanges: chunk.Ranges.map((range) => ({ - start: { - byteOffset: range.Start.ByteOffset, - column: range.Start.Column, - lineNumber: range.Start.LineNumber, - }, - end: { - byteOffset: range.End.ByteOffset, - column: range.End.Column, - lineNumber: range.End.LineNumber, - } - }) satisfies SourceRange), - contentStart: { - byteOffset: chunk.ContentStart.ByteOffset, - column: chunk.ContentStart.Column, - lineNumber: chunk.ContentStart.LineNumber, - }, - symbols: chunk.SymbolInfo?.map((symbol) => { - return { - symbol: symbol.Sym, - kind: symbol.Kind, - parent: symbol.Parent.length > 0 ? { - symbol: symbol.Parent, - kind: symbol.ParentKind, - } : undefined, - } - }) ?? undefined, - } - }), - branches: file.Branches, - content: file.Content ? base64Decode(file.Content) : undefined, - } - }).filter((file) => file !== undefined) ?? []; + // This can happen if the user doesn't have access to the repository. + if (!repo) { + return undefined; + } return { - zoektStats: { - duration: Result.Duration, - fileCount: Result.FileCount, - matchCount: Result.MatchCount, - filesSkipped: Result.FilesSkipped, - contentBytesLoaded: Result.ContentBytesLoaded, - indexBytesLoaded: Result.IndexBytesLoaded, - crashes: Result.Crashes, - shardFilesConsidered: Result.ShardFilesConsidered, - filesConsidered: Result.FilesConsidered, - filesLoaded: Result.FilesLoaded, - shardsScanned: Result.ShardsScanned, - shardsSkipped: Result.ShardsSkipped, - shardsSkippedFilter: Result.ShardsSkippedFilter, - ngramMatches: Result.NgramMatches, - ngramLookups: Result.NgramLookups, - wait: Result.Wait, - matchTreeConstruction: Result.MatchTreeConstruction, - matchTreeSearch: Result.MatchTreeSearch, - regexpsConsidered: Result.RegexpsConsidered, - flushReason: Result.FlushReason, + fileName: { + text: file.FileName, + matchRanges: fileNameChunks.length === 1 ? fileNameChunks[0].Ranges.map((range) => ({ + start: { + byteOffset: range.Start.ByteOffset, + column: range.Start.Column, + lineNumber: range.Start.LineNumber, + }, + end: { + byteOffset: range.End.ByteOffset, + column: range.End.Column, + lineNumber: range.End.LineNumber, + } + })) : [], }, - files, - repositoryInfo: Array.from(repos.values()).map((repo) => ({ - id: repo.id, - codeHostType: repo.external_codeHostType, - name: repo.name, - displayName: repo.displayName ?? undefined, - webUrl: repo.webUrl ?? undefined, - })), - isBranchFilteringEnabled: isBranchFilteringEnabled, - stats: { - matchCount: files.reduce( - (acc, file) => - acc + file.chunks.reduce( - (acc, chunk) => acc + chunk.matchRanges.length, - 0, - ), - 0, - ) - } - } satisfies SearchResponse; - }); + repository: repo.name, + repositoryId: repo.id, + webUrl: webUrl, + language: file.Language, + chunks: file.ChunkMatches + .filter((chunk) => !chunk.FileName) // Filter out filename chunks. + .map((chunk) => { + return { + content: base64Decode(chunk.Content), + matchRanges: chunk.Ranges.map((range) => ({ + start: { + byteOffset: range.Start.ByteOffset, + column: range.Start.Column, + lineNumber: range.Start.LineNumber, + }, + end: { + byteOffset: range.End.ByteOffset, + column: range.End.Column, + lineNumber: range.End.LineNumber, + } + }) satisfies SourceRange), + contentStart: { + byteOffset: chunk.ContentStart.ByteOffset, + column: chunk.ContentStart.Column, + lineNumber: chunk.ContentStart.LineNumber, + }, + symbols: chunk.SymbolInfo?.map((symbol) => { + return { + symbol: symbol.Sym, + kind: symbol.Kind, + parent: symbol.Parent.length > 0 ? { + symbol: symbol.Parent, + kind: symbol.ParentKind, + } : undefined, + } + }) ?? undefined, + } + }), + branches: file.Branches, + content: file.Content ? base64Decode(file.Content) : undefined, + } + }).filter((file) => file !== undefined) ?? []; + + return { + zoektStats: { + duration: Result.Duration, + fileCount: Result.FileCount, + matchCount: Result.MatchCount, + filesSkipped: Result.FilesSkipped, + contentBytesLoaded: Result.ContentBytesLoaded, + indexBytesLoaded: Result.IndexBytesLoaded, + crashes: Result.Crashes, + shardFilesConsidered: Result.ShardFilesConsidered, + filesConsidered: Result.FilesConsidered, + filesLoaded: Result.FilesLoaded, + shardsScanned: Result.ShardsScanned, + shardsSkipped: Result.ShardsSkipped, + shardsSkippedFilter: Result.ShardsSkippedFilter, + ngramMatches: Result.NgramMatches, + ngramLookups: Result.NgramLookups, + wait: Result.Wait, + matchTreeConstruction: Result.MatchTreeConstruction, + matchTreeSearch: Result.MatchTreeSearch, + regexpsConsidered: Result.RegexpsConsidered, + flushReason: Result.FlushReason, + }, + files, + repositoryInfo: Array.from(repos.values()).map((repo) => ({ + id: repo.id, + codeHostType: repo.external_codeHostType, + name: repo.name, + displayName: repo.displayName ?? undefined, + webUrl: repo.webUrl ?? undefined, + })), + isBranchFilteringEnabled: isBranchFilteringEnabled, + stats: { + matchCount: files.reduce( + (acc, file) => + acc + file.chunks.reduce( + (acc, chunk) => acc + chunk.matchRanges.length, + 0, + ), + 0, + ) + } + } satisfies SearchResponse; + }); - return parser.parseAsync(searchBody); - }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true, apiKey ? { apiKey, domain } : undefined) -); + return parser.parseAsync(searchBody); + })); diff --git a/packages/web/src/prisma.ts b/packages/web/src/prisma.ts index bbd1bc46c..7807d9c97 100644 --- a/packages/web/src/prisma.ts +++ b/packages/web/src/prisma.ts @@ -1,7 +1,48 @@ import 'server-only'; -import { PrismaClient } from "@sourcebot/db"; +import { env } from "@/env.mjs"; +import { Prisma, PrismaClient } from "@sourcebot/db"; // @see: https://authjs.dev/getting-started/adapters/prisma const globalForPrisma = globalThis as unknown as { prisma: PrismaClient } + +// @NOTE: In almost all cases, the userScopedPrismaClientExtension should be used +// (since actions & queries are scoped to a particular user). There are some exceptions +// (e.g., in initialize.ts). +// +// @todo: we can mark this as `__unsafePrisma` in the future once we've migrated +// all of the actions & queries to use the userScopedPrismaClientExtension to avoid +// accidental misuse. export const prisma = globalForPrisma.prisma || new PrismaClient() -if (process.env.NODE_ENV !== "production") globalForPrisma.prisma = prisma \ No newline at end of file +if (env.NODE_ENV !== "production") globalForPrisma.prisma = prisma + +/** + * Creates a prisma client extension that scopes queries to striclty information + * a given user should be able to access. + */ +export const userScopedPrismaClientExtension = (userId?: string) => { + return Prisma.defineExtension( + (prisma) => { + return prisma.$extends({ + query: { + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + repo: { + $allOperations({ args, query }) { + if ('where' in args) { + args.where = { + ...args.where, + permittedUsers: { + some: { + userId, + } + } + } + } + + return query(args); + } + } + } : {}) + } + }) + }) +} \ No newline at end of file diff --git a/packages/web/src/withAuthV2.ts b/packages/web/src/withAuthV2.ts index aba1a5d47..1b0a723f3 100644 --- a/packages/web/src/withAuthV2.ts +++ b/packages/web/src/withAuthV2.ts @@ -1,6 +1,6 @@ -import { prisma } from "@/prisma"; +import { prisma as __unsafePrisma, userScopedPrismaClientExtension } from "@/prisma"; import { hashSecret } from "@sourcebot/crypto"; -import { ApiKey, Org, OrgRole, User } from "@sourcebot/db"; +import { ApiKey, Org, OrgRole, PrismaClient, User } from "@sourcebot/db"; import { headers } from "next/headers"; import { auth } from "./auth"; import { notAuthenticated, notFound, ServiceError } from "./lib/serviceError"; @@ -14,12 +14,14 @@ interface OptionalAuthContext { user?: User; org: Org; role: OrgRole; + prisma: PrismaClient; } interface RequiredAuthContext { user: User; org: Org; role: Omit; + prisma: PrismaClient; } export const withAuthV2 = async (fn: (params: RequiredAuthContext) => Promise) => { @@ -29,13 +31,13 @@ export const withAuthV2 = async (fn: (params: RequiredAuthContext) => Promise return authContext; } - const { user, org, role } = authContext; + const { user, org, role, prisma } = authContext; if (!user || role === OrgRole.GUEST) { return notAuthenticated(); } - return fn({ user, org, role }); + return fn({ user, org, role, prisma }); }; export const withOptionalAuthV2 = async (fn: (params: OptionalAuthContext) => Promise) => { @@ -44,7 +46,7 @@ export const withOptionalAuthV2 = async (fn: (params: OptionalAuthContext) => return authContext; } - const { user, org, role } = authContext; + const { user, org, role, prisma } = authContext; const hasAnonymousAccessEntitlement = hasEntitlement("anonymous-access"); const orgMetadata = getOrgMetadata(org); @@ -61,13 +63,13 @@ export const withOptionalAuthV2 = async (fn: (params: OptionalAuthContext) => return notAuthenticated(); } - return fn({ user, org, role }); + return fn({ user, org, role, prisma }); }; export const getAuthContext = async (): Promise => { const user = await getAuthenticatedUser(); - const org = await prisma.org.findUnique({ + const org = await __unsafePrisma.org.findUnique({ where: { id: SINGLE_TENANT_ORG_ID, } @@ -77,7 +79,7 @@ export const getAuthContext = async (): Promise { const session = await auth(); if (session) { const userId = session.user.id; - const user = await prisma.user.findUnique({ + const user = await __unsafePrisma.user.findUnique({ where: { id: userId, } @@ -116,7 +121,7 @@ export const getAuthenticatedUser = async () => { } // Attempt to find the user associated with this api key. - const user = await prisma.user.findUnique({ + const user = await __unsafePrisma.user.findUnique({ where: { id: apiKey.createdById, }, @@ -127,7 +132,7 @@ export const getAuthenticatedUser = async () => { } // Update the last used at timestamp for this api key. - await prisma.apiKey.update({ + await __unsafePrisma.apiKey.update({ where: { hash: apiKey.hash, }, @@ -152,7 +157,7 @@ const getVerifiedApiObject = async (apiKeyString: string): Promise Date: Wed, 17 Sep 2025 20:34:10 -0700 Subject: [PATCH 04/14] wip --- packages/backend/src/permissionSyncer.ts | 128 ++++++++++++++---- .../migration.sql | 6 + packages/db/prisma/schema.prisma | 12 ++ .../[...path]/components/codePreviewPanel.tsx | 5 +- .../app/[domain]/browse/[...path]/page.tsx | 3 - .../components/codePreviewPanel/index.tsx | 6 +- .../web/src/app/api/(server)/search/route.ts | 14 +- .../web/src/app/api/(server)/source/route.ts | 14 +- .../review-agent/nodes/fetchFileContent.ts | 3 +- packages/web/src/features/chat/agent.ts | 3 +- packages/web/src/features/chat/tools.ts | 4 +- packages/web/src/features/codeNav/actions.ts | 4 +- .../web/src/features/search/fileSourceApi.ts | 104 +++++++------- 13 files changed, 186 insertions(+), 120 deletions(-) create mode 100644 packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql diff --git a/packages/backend/src/permissionSyncer.ts b/packages/backend/src/permissionSyncer.ts index bb93d2b06..baf85f059 100644 --- a/packages/backend/src/permissionSyncer.ts +++ b/packages/backend/src/permissionSyncer.ts @@ -1,4 +1,5 @@ -import { PrismaClient } from "@sourcebot/db"; +import * as Sentry from "@sentry/node"; +import { PrismaClient, Repo, RepoPermissionSyncStatus } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type"; import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; @@ -6,6 +7,7 @@ import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; import { Job, Queue, Worker } from 'bullmq'; import { Redis } from 'ioredis'; +import { env } from "./env.js"; import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "./github.js"; import { RepoWithConnections } from "./types.js"; @@ -17,6 +19,8 @@ const QUEUE_NAME = 'repoPermissionSyncQueue'; const logger = createLogger('permission-syncer'); +const SUPPORTED_CODE_HOST_TYPES = ['github']; + export class RepoPermissionSyncer { private queue: Queue; private worker: Worker; @@ -30,36 +34,59 @@ export class RepoPermissionSyncer { }); this.worker = new Worker(QUEUE_NAME, this.runJob.bind(this), { connection: redis, + concurrency: 1, }); this.worker.on('completed', this.onJobCompleted.bind(this)); this.worker.on('failed', this.onJobFailed.bind(this)); } - public async scheduleJob(repoId: number) { - await this.queue.add(QUEUE_NAME, { - repoId, - }); - } - public startScheduler() { logger.debug('Starting scheduler'); - // @todo: we should only sync permissions for a repository if it has been at least ~24 hours since the last sync. return setInterval(async () => { + // @todo: make this configurable + const thresholdDate = new Date(Date.now() - 1000 * 60 * 60 * 24); const repos = await this.db.repo.findMany({ + // Repos need their permissions to be synced against the code host when... where: { - external_codeHostType: { - in: ['github'], - } + // They belong to a code host that supports permissions syncing + AND: [ + { + external_codeHostType: { + in: SUPPORTED_CODE_HOST_TYPES, + } + }, + // and, they either require a sync (SYNC_NEEDED) or have been in a completed state (SYNCED or FAILED) + // for > some duration (default 24 hours) + { + OR: [ + { + permissionSyncStatus: RepoPermissionSyncStatus.SYNC_NEEDED + }, + { + AND: [ + { + OR: [ + { permissionSyncStatus: RepoPermissionSyncStatus.SYNCED }, + { permissionSyncStatus: RepoPermissionSyncStatus.FAILED }, + ] + }, + { + OR: [ + { permissionSyncJobLastCompletedAt: null }, + { permissionSyncJobLastCompletedAt: { lt: thresholdDate } } + ] + } + ] + } + ] + }, + ] } }); - for (const repo of repos) { - await this.scheduleJob(repo.id); - } - - // @todo: make this configurable - }, 1000 * 60); + await this.schedulePermissionSync(repos); + }, 1000 * 30); } public dispose() { @@ -67,11 +94,34 @@ export class RepoPermissionSyncer { this.queue.close(); } + private async schedulePermissionSync(repos: Repo[]) { + await this.db.$transaction(async (tx) => { + await tx.repo.updateMany({ + where: { id: { in: repos.map(repo => repo.id) } }, + data: { permissionSyncStatus: RepoPermissionSyncStatus.IN_SYNC_QUEUE }, + }); + + await this.queue.addBulk(repos.map(repo => ({ + name: 'repoPermissionSyncJob', + data: { + repoId: repo.id, + }, + opts: { + removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, + removeOnFail: env.REDIS_REMOVE_ON_FAIL, + } + }))) + }); + } + private async runJob(job: Job) { const id = job.data.repoId; - const repo = await this.db.repo.findUnique({ + const repo = await this.db.repo.update({ where: { - id, + id + }, + data: { + permissionSyncStatus: RepoPermissionSyncStatus.SYNCING, }, include: { connections: { @@ -86,6 +136,8 @@ export class RepoPermissionSyncer { throw new Error(`Repo ${id} not found`); } + logger.info(`Syncing permissions for repo ${repo.displayName}...`); + const connection = getFirstConnectionWithToken(repo); if (!connection) { throw new Error(`No connection with token found for repo ${id}`); @@ -119,8 +171,6 @@ export class RepoPermissionSyncer { return []; })(); - logger.info(`User IDs with read access to repo ${id}: ${userIds}`); - await this.db.repo.update({ where: { id: repo.id, @@ -141,11 +191,43 @@ export class RepoPermissionSyncer { } private async onJobCompleted(job: Job) { - logger.info(`Repo permission sync job completed for repo ${job.data.repoId}`); + const repo = await this.db.repo.update({ + where: { + id: job.data.repoId, + }, + data: { + permissionSyncStatus: RepoPermissionSyncStatus.SYNCED, + permissionSyncJobLastCompletedAt: new Date(), + }, + }); + + logger.info(`Permissions synced for repo ${repo.displayName ?? repo.name}`); } private async onJobFailed(job: Job | undefined, err: Error) { - logger.error(`Repo permission sync job failed for repo ${job?.data.repoId}: ${err}`); + Sentry.captureException(err, { + tags: { + repoId: job?.data.repoId, + queue: QUEUE_NAME, + } + }); + + const errorMessage = (repoName: string) => `Repo permission sync job failed for repo ${repoName}: ${err}`; + + if (job) { + const repo = await this.db.repo.update({ + where: { + id: job?.data.repoId, + }, + data: { + permissionSyncStatus: RepoPermissionSyncStatus.FAILED, + permissionSyncJobLastCompletedAt: new Date(), + }, + }); + logger.error(errorMessage(repo.displayName ?? repo.name)); + } else { + logger.error(errorMessage('unknown repo (id not found)')); + } } } diff --git a/packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql b/packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql new file mode 100644 index 000000000..ffbe376e3 --- /dev/null +++ b/packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql @@ -0,0 +1,6 @@ +-- CreateEnum +CREATE TYPE "RepoPermissionSyncStatus" AS ENUM ('SYNC_NEEDED', 'IN_SYNC_QUEUE', 'SYNCING', 'SYNCED', 'FAILED'); + +-- AlterTable +ALTER TABLE "Repo" ADD COLUMN "permissionSyncJobLastCompletedAt" TIMESTAMP(3), +ADD COLUMN "permissionSyncStatus" "RepoPermissionSyncStatus" NOT NULL DEFAULT 'SYNC_NEEDED'; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index a50a4eb25..feaaff024 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -30,6 +30,14 @@ enum ConnectionSyncStatus { FAILED } +enum RepoPermissionSyncStatus { + SYNC_NEEDED + IN_SYNC_QUEUE + SYNCING + SYNCED + FAILED +} + enum StripeSubscriptionStatus { ACTIVE INACTIVE @@ -59,6 +67,10 @@ model Repo { repoIndexingStatus RepoIndexingStatus @default(NEW) permittedUsers UserToRepoPermission[] + permissionSyncStatus RepoPermissionSyncStatus @default(SYNC_NEEDED) + /// When the repo permissions were last synced, either successfully or unsuccessfully. + permissionSyncJobLastCompletedAt DateTime? + // The id of the repo in the external service external_id String // The type of the external service (e.g., github, gitlab, etc.) diff --git a/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx b/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx index 091122f82..01a84447e 100644 --- a/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx +++ b/packages/web/src/app/[domain]/browse/[...path]/components/codePreviewPanel.tsx @@ -10,16 +10,15 @@ interface CodePreviewPanelProps { path: string; repoName: string; revisionName?: string; - domain: string; } -export const CodePreviewPanel = async ({ path, repoName, revisionName, domain }: CodePreviewPanelProps) => { +export const CodePreviewPanel = async ({ path, repoName, revisionName }: CodePreviewPanelProps) => { const [fileSourceResponse, repoInfoResponse] = await Promise.all([ getFileSource({ fileName: path, repository: repoName, branch: revisionName, - }, domain), + }), getRepoInfoByName(repoName), ]); diff --git a/packages/web/src/app/[domain]/browse/[...path]/page.tsx b/packages/web/src/app/[domain]/browse/[...path]/page.tsx index 20e543279..84c879129 100644 --- a/packages/web/src/app/[domain]/browse/[...path]/page.tsx +++ b/packages/web/src/app/[domain]/browse/[...path]/page.tsx @@ -7,7 +7,6 @@ import { TreePreviewPanel } from "./components/treePreviewPanel"; interface BrowsePageProps { params: Promise<{ path: string[]; - domain: string; }>; } @@ -16,7 +15,6 @@ export default async function BrowsePage(props: BrowsePageProps) { const { path: _rawPath, - domain } = params; const rawPath = _rawPath.join('/'); @@ -35,7 +33,6 @@ export default async function BrowsePage(props: BrowsePageProps) { path={path} repoName={repoName} revisionName={revisionName} - domain={domain} /> ) : ( { - const domain = useDomain(); // If there are multiple branches pointing to the same revision of this file, it doesn't // matter which branch we use here, so use the first one. @@ -31,13 +29,13 @@ export const CodePreviewPanel = ({ }, [previewedFile]); const { data: file, isLoading, isPending, isError } = useQuery({ - queryKey: ["source", previewedFile, branch, domain], + queryKey: ["source", previewedFile, branch], queryFn: () => unwrapServiceError( getFileSource({ fileName: previewedFile.fileName.text, repository: previewedFile.repository, branch, - }, domain) + }) ), select: (data) => { return { diff --git a/packages/web/src/app/api/(server)/search/route.ts b/packages/web/src/app/api/(server)/search/route.ts index 145d3fa95..83a5e6a0c 100644 --- a/packages/web/src/app/api/(server)/search/route.ts +++ b/packages/web/src/app/api/(server)/search/route.ts @@ -5,20 +5,8 @@ import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError"; import { searchRequestSchema } from "@/features/search/schemas"; -import { ErrorCode } from "@/lib/errorCodes"; -import { StatusCodes } from "http-status-codes"; export const POST = async (request: NextRequest) => { - const domain = request.headers.get("X-Org-Domain"); - const apiKey = request.headers.get("X-Sourcebot-Api-Key") ?? undefined; - if (!domain) { - return serviceErrorResponse({ - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER, - message: "Missing X-Org-Domain header", - }); - } - const body = await request.json(); const parsed = await searchRequestSchema.safeParseAsync(body); if (!parsed.success) { @@ -27,7 +15,7 @@ export const POST = async (request: NextRequest) => { ); } - const response = await search(parsed.data, domain, apiKey); + const response = await search(parsed.data); if (isServiceError(response)) { return serviceErrorResponse(response); } diff --git a/packages/web/src/app/api/(server)/source/route.ts b/packages/web/src/app/api/(server)/source/route.ts index a6364b361..d64d701d5 100644 --- a/packages/web/src/app/api/(server)/source/route.ts +++ b/packages/web/src/app/api/(server)/source/route.ts @@ -5,20 +5,8 @@ import { schemaValidationError, serviceErrorResponse } from "@/lib/serviceError" import { isServiceError } from "@/lib/utils"; import { NextRequest } from "next/server"; import { fileSourceRequestSchema } from "@/features/search/schemas"; -import { ErrorCode } from "@/lib/errorCodes"; -import { StatusCodes } from "http-status-codes"; export const POST = async (request: NextRequest) => { - const domain = request.headers.get("X-Org-Domain"); - const apiKey = request.headers.get("X-Sourcebot-Api-Key") ?? undefined; - if (!domain) { - return serviceErrorResponse({ - statusCode: StatusCodes.BAD_REQUEST, - errorCode: ErrorCode.MISSING_ORG_DOMAIN_HEADER, - message: "Missing X-Org-Domain header", - }); - } - const body = await request.json(); const parsed = await fileSourceRequestSchema.safeParseAsync(body); if (!parsed.success) { @@ -27,7 +15,7 @@ export const POST = async (request: NextRequest) => { ); } - const response = await getFileSource(parsed.data, domain, apiKey); + const response = await getFileSource(parsed.data); if (isServiceError(response)) { return serviceErrorResponse(response); } diff --git a/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts b/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts index 06cc7a44f..7617c959a 100644 --- a/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts +++ b/packages/web/src/features/agents/review-agent/nodes/fetchFileContent.ts @@ -2,7 +2,6 @@ import { sourcebot_context, sourcebot_pr_payload } from "@/features/agents/revie import { getFileSource } from "@/features/search/fileSourceApi"; import { fileSourceResponseSchema } from "@/features/search/schemas"; import { isServiceError } from "@/lib/utils"; -import { env } from "@/env.mjs"; import { createLogger } from "@sourcebot/logger"; const logger = createLogger('fetch-file-content'); @@ -17,7 +16,7 @@ export const fetchFileContent = async (pr_payload: sourcebot_pr_payload, filenam } logger.debug(JSON.stringify(fileSourceRequest, null, 2)); - const response = await getFileSource(fileSourceRequest, "~", env.REVIEW_AGENT_API_KEY); + const response = await getFileSource(fileSourceRequest); if (isServiceError(response)) { throw new Error(`Failed to fetch file content for ${filename} from ${repoPath}: ${response.message}`); } diff --git a/packages/web/src/features/chat/agent.ts b/packages/web/src/features/chat/agent.ts index 7010151fc..7df5a6d48 100644 --- a/packages/web/src/features/chat/agent.ts +++ b/packages/web/src/features/chat/agent.ts @@ -1,6 +1,5 @@ import { env } from "@/env.mjs"; import { getFileSource } from "@/features/search/fileSourceApi"; -import { SINGLE_TENANT_ORG_DOMAIN } from "@/lib/constants"; import { isServiceError } from "@/lib/utils"; import { ProviderOptions } from "@ai-sdk/provider-utils"; import { createLogger } from "@sourcebot/logger"; @@ -252,7 +251,7 @@ const resolveFileSource = async ({ path, repo, revision }: FileSource) => { repository: repo, branch: revision, // @todo: handle multi-tenancy. - }, SINGLE_TENANT_ORG_DOMAIN); + }); if (isServiceError(fileSource)) { // @todo: handle this diff --git a/packages/web/src/features/chat/tools.ts b/packages/web/src/features/chat/tools.ts index 608c1213f..f69c5f341 100644 --- a/packages/web/src/features/chat/tools.ts +++ b/packages/web/src/features/chat/tools.ts @@ -114,7 +114,7 @@ export const readFilesTool = tool({ repository, branch: revision, // @todo(mt): handle multi-tenancy. - }, SINGLE_TENANT_ORG_DOMAIN); + }); })); if (responses.some(isServiceError)) { @@ -187,7 +187,7 @@ Multiple expressions can be or'd together with or, negated with -, or grouped wi contextLines: 3, whole: false, // @todo(mt): handle multi-tenancy. - }, SINGLE_TENANT_ORG_DOMAIN); + }); if (isServiceError(response)) { return response; diff --git a/packages/web/src/features/codeNav/actions.ts b/packages/web/src/features/codeNav/actions.ts index f342de6eb..b55cfa30d 100644 --- a/packages/web/src/features/codeNav/actions.ts +++ b/packages/web/src/features/codeNav/actions.ts @@ -34,7 +34,7 @@ export const findSearchBasedSymbolReferences = async ( query, matches: MAX_REFERENCE_COUNT, contextLines: 0, - }, domain); + }); if (isServiceError(searchResult)) { return searchResult; @@ -67,7 +67,7 @@ export const findSearchBasedSymbolDefinitions = async ( query, matches: MAX_REFERENCE_COUNT, contextLines: 0, - }, domain); + }); if (isServiceError(searchResult)) { return searchResult; diff --git a/packages/web/src/features/search/fileSourceApi.ts b/packages/web/src/features/search/fileSourceApi.ts index aa3ae07a3..249abb420 100644 --- a/packages/web/src/features/search/fileSourceApi.ts +++ b/packages/web/src/features/search/fileSourceApi.ts @@ -5,60 +5,58 @@ import { fileNotFound, ServiceError, unexpectedError } from "../../lib/serviceEr import { FileSourceRequest, FileSourceResponse } from "./types"; import { isServiceError } from "../../lib/utils"; import { search } from "./searchApi"; -import { sew, withAuth, withOrgMembership } from "@/actions"; -import { OrgRole } from "@sourcebot/db"; +import { sew } from "@/actions"; +import { withOptionalAuthV2 } from "@/withAuthV2"; // @todo (bkellam) : We should really be using `git show :` to fetch file contents here. // This will allow us to support permalinks to files at a specific revision that may not be indexed // by zoekt. -export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest, domain: string, apiKey: string | undefined = undefined): Promise => sew(() => - withAuth((userId, _apiKeyHash) => - withOrgMembership(userId, domain, async () => { - const escapedFileName = escapeStringRegexp(fileName); - const escapedRepository = escapeStringRegexp(repository); - - let query = `file:${escapedFileName} repo:^${escapedRepository}$`; - if (branch) { - query = query.concat(` branch:${branch}`); - } - - const searchResponse = await search({ - query, - matches: 1, - whole: true, - }, domain, apiKey); - - if (isServiceError(searchResponse)) { - return searchResponse; - } - - const files = searchResponse.files; - - if (!files || files.length === 0) { - return fileNotFound(fileName, repository); - } - - const file = files[0]; - const source = file.content ?? ''; - const language = file.language; - - const repoInfo = searchResponse.repositoryInfo.find((repo) => repo.id === file.repositoryId); - if (!repoInfo) { - // This should never happen. - return unexpectedError("Repository info not found"); - } - - return { - source, - language, - path: fileName, - repository, - repositoryCodeHostType: repoInfo.codeHostType, - repositoryDisplayName: repoInfo.displayName, - repositoryWebUrl: repoInfo.webUrl, - branch, - webUrl: file.webUrl, - } satisfies FileSourceResponse; - - }, /* minRequiredRole = */ OrgRole.GUEST), /* allowAnonymousAccess = */ true, apiKey ? { apiKey, domain } : undefined) -); +export const getFileSource = async ({ fileName, repository, branch }: FileSourceRequest): Promise => sew(() => + withOptionalAuthV2(async () => { + const escapedFileName = escapeStringRegexp(fileName); + const escapedRepository = escapeStringRegexp(repository); + + let query = `file:${escapedFileName} repo:^${escapedRepository}$`; + if (branch) { + query = query.concat(` branch:${branch}`); + } + + const searchResponse = await search({ + query, + matches: 1, + whole: true, + }); + + if (isServiceError(searchResponse)) { + return searchResponse; + } + + const files = searchResponse.files; + + if (!files || files.length === 0) { + return fileNotFound(fileName, repository); + } + + const file = files[0]; + const source = file.content ?? ''; + const language = file.language; + + const repoInfo = searchResponse.repositoryInfo.find((repo) => repo.id === file.repositoryId); + if (!repoInfo) { + // This should never happen. + return unexpectedError("Repository info not found"); + } + + return { + source, + language, + path: fileName, + repository, + repositoryCodeHostType: repoInfo.codeHostType, + repositoryDisplayName: repoInfo.displayName, + repositoryWebUrl: repoInfo.webUrl, + branch, + webUrl: file.webUrl, + } satisfies FileSourceResponse; + + })); From e5c8caadb819154fd1ed9b93615c965e1e2810ab Mon Sep 17 00:00:00 2001 From: bkellam Date: Thu, 18 Sep 2025 23:56:39 -0700 Subject: [PATCH 05/14] user syncing + represent sync job status in a seperate table --- package.json | 1 + packages/backend/src/constants.ts | 4 + packages/backend/src/github.ts | 11 + packages/backend/src/index.ts | 19 +- ...ssionSyncer.ts => repoPermissionSyncer.ts} | 162 +++++++----- packages/backend/src/userPermissionSyncer.ts | 249 ++++++++++++++++++ .../migration.sql | 14 - .../migration.sql | 6 - .../migration.sql | 58 ++++ packages/db/prisma/schema.prisma | 57 +++- packages/web/src/ee/features/sso/sso.tsx | 14 +- 11 files changed, 488 insertions(+), 107 deletions(-) rename packages/backend/src/{permissionSyncer.ts => repoPermissionSyncer.ts} (60%) create mode 100644 packages/backend/src/userPermissionSyncer.ts delete mode 100644 packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql delete mode 100644 packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql create mode 100644 packages/db/prisma/migrations/20250919065312_add_permission_sync_tables/migration.sql diff --git a/package.json b/package.json index 7420579f2..c5909e76c 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,7 @@ "watch:mcp": "yarn workspace @sourcebot/mcp build:watch", "watch:schemas": "yarn workspace @sourcebot/schemas watch", "dev:prisma:migrate:dev": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:dev", + "dev:prisma:generate": "yarn with-env yarn workspace @sourcebot/db prisma:generate", "dev:prisma:studio": "yarn with-env yarn workspace @sourcebot/db prisma:studio", "dev:prisma:migrate:reset": "yarn with-env yarn workspace @sourcebot/db prisma:migrate:reset", "dev:prisma:db:push": "yarn with-env yarn workspace @sourcebot/db prisma:db:push", diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index c0d77f05e..3329f3d8b 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -17,3 +17,7 @@ export const DEFAULT_SETTINGS: Settings = { repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours enablePublicAccess: false // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead } + +export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [ + 'github', +]; \ No newline at end of file diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index 730c891c9..a489c55c4 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -129,6 +129,17 @@ export const getUserIdsWithReadAccessToRepo = async (owner: string, repo: string return collaborators.map(collaborator => collaborator.id.toString()); } +export const getReposThatAuthenticatedUserHasReadAccessTo = async (octokit: Octokit) => { + const fetchFn = () => octokit.paginate(octokit.repos.listForAuthenticatedUser, { + per_page: 100, + // @todo: do we need to set a visibility to private only? + // visibility: 'private' + }); + + const repos = await fetchWithRetry(fetchFn, `authenticated user`, logger); + return repos.map(repo => repo.id.toString()); +} + export const createOctokitFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient): Promise<{ octokit: Octokit, isAuthenticated: boolean }> => { const hostname = config.url ? new URL(config.url).hostname : diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index f80340344..0210b3ba0 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -10,10 +10,11 @@ import path from 'path'; import { ConnectionManager } from './connectionManager.js'; import { DEFAULT_SETTINGS } from './constants.js'; import { env } from "./env.js"; -import { RepoPermissionSyncer } from './permissionSyncer.js'; +import { RepoPermissionSyncer } from './repoPermissionSyncer.js'; import { PromClient } from './promClient.js'; import { RepoManager } from './repoManager.js'; import { AppContext } from "./types.js"; +import { UserPermissionSyncer } from "./userPermissionSyncer.js"; const logger = createLogger('backend-entrypoint'); @@ -68,20 +69,25 @@ const settings = await getSettings(env.CONFIG_PATH); const connectionManager = new ConnectionManager(prisma, settings, redis); const repoManager = new RepoManager(prisma, settings, redis, promClient, context); -const permissionSyncer = new RepoPermissionSyncer(prisma, redis); +const repoPermissionSyncer = new RepoPermissionSyncer(prisma, redis); +const userPermissionSyncer = new UserPermissionSyncer(prisma, redis); await repoManager.validateIndexedReposHaveShards(); const connectionManagerInterval = connectionManager.startScheduler(); const repoManagerInterval = repoManager.startScheduler(); -const permissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? permissionSyncer.startScheduler() : null; +const repoPermissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? repoPermissionSyncer.startScheduler() : null; +const userPermissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? userPermissionSyncer.startScheduler() : null; const cleanup = async (signal: string) => { logger.info(`Recieved ${signal}, cleaning up...`); - if (permissionSyncerInterval) { - clearInterval(permissionSyncerInterval); + if (userPermissionSyncerInterval) { + clearInterval(userPermissionSyncerInterval); + } + if (repoPermissionSyncerInterval) { + clearInterval(repoPermissionSyncerInterval); } clearInterval(connectionManagerInterval); @@ -89,7 +95,8 @@ const cleanup = async (signal: string) => { connectionManager.dispose(); repoManager.dispose(); - permissionSyncer.dispose(); + repoPermissionSyncer.dispose(); + userPermissionSyncer.dispose(); await prisma.$disconnect(); await redis.quit(); diff --git a/packages/backend/src/permissionSyncer.ts b/packages/backend/src/repoPermissionSyncer.ts similarity index 60% rename from packages/backend/src/permissionSyncer.ts rename to packages/backend/src/repoPermissionSyncer.ts index baf85f059..b6d8be8dd 100644 --- a/packages/backend/src/permissionSyncer.ts +++ b/packages/backend/src/repoPermissionSyncer.ts @@ -1,5 +1,5 @@ import * as Sentry from "@sentry/node"; -import { PrismaClient, Repo, RepoPermissionSyncStatus } from "@sourcebot/db"; +import { PrismaClient, Repo, RepoPermissionSyncJobStatus } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type"; import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; @@ -10,16 +10,16 @@ import { Redis } from 'ioredis'; import { env } from "./env.js"; import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "./github.js"; import { RepoWithConnections } from "./types.js"; +import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "./constants.js"; type RepoPermissionSyncJob = { - repoId: number; + jobId: string; } const QUEUE_NAME = 'repoPermissionSyncQueue'; -const logger = createLogger('permission-syncer'); +const logger = createLogger('repo-permission-syncer'); -const SUPPORTED_CODE_HOST_TYPES = ['github']; export class RepoPermissionSyncer { private queue: Queue; @@ -46,6 +46,7 @@ export class RepoPermissionSyncer { return setInterval(async () => { // @todo: make this configurable const thresholdDate = new Date(Date.now() - 1000 * 60 * 60 * 24); + const repos = await this.db.repo.findMany({ // Repos need their permissions to be synced against the code host when... where: { @@ -53,40 +54,47 @@ export class RepoPermissionSyncer { AND: [ { external_codeHostType: { - in: SUPPORTED_CODE_HOST_TYPES, + in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES, } }, - // and, they either require a sync (SYNC_NEEDED) or have been in a completed state (SYNCED or FAILED) - // for > some duration (default 24 hours) { OR: [ - { - permissionSyncStatus: RepoPermissionSyncStatus.SYNC_NEEDED - }, - { - AND: [ - { - OR: [ - { permissionSyncStatus: RepoPermissionSyncStatus.SYNCED }, - { permissionSyncStatus: RepoPermissionSyncStatus.FAILED }, - ] - }, - { - OR: [ - { permissionSyncJobLastCompletedAt: null }, - { permissionSyncJobLastCompletedAt: { lt: thresholdDate } } - ] - } - ] + { permissionSyncedAt: null }, + { permissionSyncedAt: { lt: thresholdDate } }, + ], + }, + { + NOT: { + permissionSyncJobs: { + some: { + OR: [ + // Don't schedule if there are active jobs + { + status: { + in: [ + RepoPermissionSyncJobStatus.PENDING, + RepoPermissionSyncJobStatus.IN_PROGRESS, + ], + } + }, + // Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition. + { + AND: [ + { status: RepoPermissionSyncJobStatus.FAILED }, + { completedAt: { gt: thresholdDate } }, + ] + } + ] + } } - ] + } }, ] } }); await this.schedulePermissionSync(repos); - }, 1000 * 30); + }, 1000 * 5); } public dispose() { @@ -96,15 +104,16 @@ export class RepoPermissionSyncer { private async schedulePermissionSync(repos: Repo[]) { await this.db.$transaction(async (tx) => { - await tx.repo.updateMany({ - where: { id: { in: repos.map(repo => repo.id) } }, - data: { permissionSyncStatus: RepoPermissionSyncStatus.IN_SYNC_QUEUE }, + const jobs = await tx.repoPermissionSyncJob.createManyAndReturn({ + data: repos.map(repo => ({ + repoId: repo.id, + })), }); - await this.queue.addBulk(repos.map(repo => ({ + await this.queue.addBulk(jobs.map((job) => ({ name: 'repoPermissionSyncJob', data: { - repoId: repo.id, + jobId: job.id, }, opts: { removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, @@ -115,21 +124,25 @@ export class RepoPermissionSyncer { } private async runJob(job: Job) { - const id = job.data.repoId; - const repo = await this.db.repo.update({ + const id = job.data.jobId; + const { repo } = await this.db.repoPermissionSyncJob.update({ where: { - id + id, }, data: { - permissionSyncStatus: RepoPermissionSyncStatus.SYNCING, + status: RepoPermissionSyncJobStatus.IN_PROGRESS, }, - include: { - connections: { + select: { + repo: { include: { - connection: true, - }, - }, - }, + connections: { + include: { + connection: true, + } + } + } + } + } }); if (!repo) { @@ -171,34 +184,43 @@ export class RepoPermissionSyncer { return []; })(); - await this.db.repo.update({ - where: { - id: repo.id, - }, - data: { - permittedUsers: { - deleteMany: {}, + await this.db.$transaction([ + this.db.repo.update({ + where: { + id: repo.id, + }, + data: { + permittedUsers: { + deleteMany: {}, + } } - } - }); - - await this.db.userToRepoPermission.createMany({ - data: userIds.map(userId => ({ - userId, - repoId: repo.id, - })), - }); + }), + this.db.userToRepoPermission.createMany({ + data: userIds.map(userId => ({ + userId, + repoId: repo.id, + })), + }) + ]); } private async onJobCompleted(job: Job) { - const repo = await this.db.repo.update({ + const { repo } = await this.db.repoPermissionSyncJob.update({ where: { - id: job.data.repoId, + id: job.data.jobId, }, data: { - permissionSyncStatus: RepoPermissionSyncStatus.SYNCED, - permissionSyncJobLastCompletedAt: new Date(), + status: RepoPermissionSyncJobStatus.COMPLETED, + repo: { + update: { + permissionSyncedAt: new Date(), + } + }, + completedAt: new Date(), }, + select: { + repo: true + } }); logger.info(`Permissions synced for repo ${repo.displayName ?? repo.name}`); @@ -207,21 +229,25 @@ export class RepoPermissionSyncer { private async onJobFailed(job: Job | undefined, err: Error) { Sentry.captureException(err, { tags: { - repoId: job?.data.repoId, + jobId: job?.data.jobId, queue: QUEUE_NAME, } }); - const errorMessage = (repoName: string) => `Repo permission sync job failed for repo ${repoName}: ${err}`; + const errorMessage = (repoName: string) => `Repo permission sync job failed for repo ${repoName}: ${err.message}`; if (job) { - const repo = await this.db.repo.update({ + const { repo } = await this.db.repoPermissionSyncJob.update({ where: { - id: job?.data.repoId, + id: job.data.jobId, }, data: { - permissionSyncStatus: RepoPermissionSyncStatus.FAILED, - permissionSyncJobLastCompletedAt: new Date(), + status: RepoPermissionSyncJobStatus.FAILED, + completedAt: new Date(), + errorMessage: err.message, + }, + select: { + repo: true }, }); logger.error(errorMessage(repo.displayName ?? repo.name)); diff --git a/packages/backend/src/userPermissionSyncer.ts b/packages/backend/src/userPermissionSyncer.ts new file mode 100644 index 000000000..872b1f7a7 --- /dev/null +++ b/packages/backend/src/userPermissionSyncer.ts @@ -0,0 +1,249 @@ +import { Octokit } from "@octokit/rest"; +import * as Sentry from "@sentry/node"; +import { PrismaClient, User, UserPermissionSyncJobStatus } from "@sourcebot/db"; +import { createLogger } from "@sourcebot/logger"; +import { Job, Queue, Worker } from "bullmq"; +import { Redis } from "ioredis"; +import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "./constants.js"; +import { env } from "./env.js"; +import { getReposThatAuthenticatedUserHasReadAccessTo } from "./github.js"; + +const logger = createLogger('user-permission-syncer'); + +const QUEUE_NAME = 'userPermissionSyncQueue'; + +type UserPermissionSyncJob = { + jobId: string; +} + + +export class UserPermissionSyncer { + private queue: Queue; + private worker: Worker; + + constructor( + private db: PrismaClient, + redis: Redis, + ) { + this.queue = new Queue(QUEUE_NAME, { + connection: redis, + }); + this.worker = new Worker(QUEUE_NAME, this.runJob.bind(this), { + connection: redis, + concurrency: 1, + }); + this.worker.on('completed', this.onJobCompleted.bind(this)); + this.worker.on('failed', this.onJobFailed.bind(this)); + } + + public startScheduler() { + logger.debug('Starting scheduler'); + + return setInterval(async () => { + const thresholdDate = new Date(Date.now() - 1000 * 60 * 60 * 24); + + const users = await this.db.user.findMany({ + where: { + AND: [ + { + accounts: { + some: { + provider: { + in: PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES + } + } + } + }, + { + OR: [ + { permissionSyncedAt: null }, + { permissionSyncedAt: { lt: thresholdDate } }, + ] + }, + { + NOT: { + permissionSyncJobs: { + some: { + OR: [ + // Don't schedule if there are active jobs + { + status: { + in: [ + UserPermissionSyncJobStatus.PENDING, + UserPermissionSyncJobStatus.IN_PROGRESS, + ], + } + }, + // Don't schedule if there are recent failed jobs (within the threshold date). Note `gt` is used here since this is a inverse condition. + { + AND: [ + { status: UserPermissionSyncJobStatus.FAILED }, + { completedAt: { gt: thresholdDate } }, + ] + } + ] + } + } + } + }, + ] + } + }); + + await this.schedulePermissionSync(users); + }, 1000 * 5); + } + + public dispose() { + this.worker.close(); + this.queue.close(); + } + + private async schedulePermissionSync(users: User[]) { + await this.db.$transaction(async (tx) => { + const jobs = await tx.userPermissionSyncJob.createManyAndReturn({ + data: users.map(user => ({ + userId: user.id, + })), + }); + + await this.queue.addBulk(jobs.map((job) => ({ + name: 'userPermissionSyncJob', + data: { + jobId: job.id, + }, + opts: { + removeOnComplete: env.REDIS_REMOVE_ON_COMPLETE, + removeOnFail: env.REDIS_REMOVE_ON_FAIL, + } + }))) + }); + } + + private async runJob(job: Job) { + const id = job.data.jobId; + const { user } = await this.db.userPermissionSyncJob.update({ + where: { + id, + }, + data: { + status: UserPermissionSyncJobStatus.IN_PROGRESS, + }, + select: { + user: { + include: { + accounts: true, + } + } + } + }); + + if (!user) { + throw new Error(`User ${id} not found`); + } + + logger.info(`Syncing permissions for user ${user.email}...`); + + for (const account of user.accounts) { + const repoIds = await (async () => { + if (account.provider === 'github') { + // @todo: we will need to provide some mechanism for the user to provide a custom + // URL here. This will correspond to the host URL they are using for their GitHub + // instance. + const octokit = new Octokit({ + auth: account.access_token, + // baseUrl: /* todo */ + }); + + const repoIds = await getReposThatAuthenticatedUserHasReadAccessTo(octokit); + + const repos = await this.db.repo.findMany({ + where: { + external_codeHostType: 'github', + external_id: { + in: repoIds, + } + } + }); + + return repos.map(repo => repo.id); + } + + return []; + })(); + + + await this.db.$transaction([ + this.db.user.update({ + where: { + id: user.id, + }, + data: { + accessibleRepos: { + deleteMany: {}, + } + } + }), + this.db.userToRepoPermission.createMany({ + data: repoIds.map(repoId => ({ + userId: user.id, + repoId, + })) + }) + ]); + } + } + + private async onJobCompleted(job: Job) { + const { user } = await this.db.userPermissionSyncJob.update({ + where: { + id: job.data.jobId, + }, + data: { + status: UserPermissionSyncJobStatus.COMPLETED, + user: { + update: { + permissionSyncedAt: new Date(), + } + }, + completedAt: new Date(), + }, + select: { + user: true + } + }); + + logger.info(`Permissions synced for user ${user.email}`); + } + + private async onJobFailed(job: Job | undefined, err: Error) { + Sentry.captureException(err, { + tags: { + jobId: job?.data.jobId, + queue: QUEUE_NAME, + } + }); + + const errorMessage = (email: string) => `User permission sync job failed for user ${email}: ${err.message}`; + + if (job) { + const { user } = await this.db.userPermissionSyncJob.update({ + where: { + id: job.data.jobId, + }, + data: { + status: UserPermissionSyncJobStatus.FAILED, + completedAt: new Date(), + errorMessage: err.message, + }, + select: { + user: true, + } + }); + + logger.error(errorMessage(user.email ?? user.id)); + } else { + logger.error(errorMessage('unknown user (id not found)')); + } + } +} \ No newline at end of file diff --git a/packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql b/packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql deleted file mode 100644 index 62a56d496..000000000 --- a/packages/db/prisma/migrations/20250827010055_repo_to_user_join_table/migration.sql +++ /dev/null @@ -1,14 +0,0 @@ --- CreateTable -CREATE TABLE "UserToRepoPermission" ( - "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, - "repoId" INTEGER NOT NULL, - "userId" TEXT NOT NULL, - - CONSTRAINT "UserToRepoPermission_pkey" PRIMARY KEY ("repoId","userId") -); - --- AddForeignKey -ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE; - --- AddForeignKey -ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql b/packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql deleted file mode 100644 index ffbe376e3..000000000 --- a/packages/db/prisma/migrations/20250918030650_add_permission_sync_tracking_to_repo_table/migration.sql +++ /dev/null @@ -1,6 +0,0 @@ --- CreateEnum -CREATE TYPE "RepoPermissionSyncStatus" AS ENUM ('SYNC_NEEDED', 'IN_SYNC_QUEUE', 'SYNCING', 'SYNCED', 'FAILED'); - --- AlterTable -ALTER TABLE "Repo" ADD COLUMN "permissionSyncJobLastCompletedAt" TIMESTAMP(3), -ADD COLUMN "permissionSyncStatus" "RepoPermissionSyncStatus" NOT NULL DEFAULT 'SYNC_NEEDED'; diff --git a/packages/db/prisma/migrations/20250919065312_add_permission_sync_tables/migration.sql b/packages/db/prisma/migrations/20250919065312_add_permission_sync_tables/migration.sql new file mode 100644 index 000000000..1a8f05972 --- /dev/null +++ b/packages/db/prisma/migrations/20250919065312_add_permission_sync_tables/migration.sql @@ -0,0 +1,58 @@ +-- CreateEnum +CREATE TYPE "RepoPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED'); + +-- CreateEnum +CREATE TYPE "UserPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED'); + +-- AlterTable +ALTER TABLE "Repo" ADD COLUMN "permissionSyncedAt" TIMESTAMP(3); + +-- AlterTable +ALTER TABLE "User" ADD COLUMN "permissionSyncedAt" TIMESTAMP(3); + +-- CreateTable +CREATE TABLE "RepoPermissionSyncJob" ( + "id" TEXT NOT NULL, + "status" "RepoPermissionSyncJobStatus" NOT NULL DEFAULT 'PENDING', + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + "completedAt" TIMESTAMP(3), + "errorMessage" TEXT, + "repoId" INTEGER NOT NULL, + + CONSTRAINT "RepoPermissionSyncJob_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "UserPermissionSyncJob" ( + "id" TEXT NOT NULL, + "status" "UserPermissionSyncJobStatus" NOT NULL DEFAULT 'PENDING', + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "updatedAt" TIMESTAMP(3) NOT NULL, + "completedAt" TIMESTAMP(3), + "errorMessage" TEXT, + "userId" TEXT NOT NULL, + + CONSTRAINT "UserPermissionSyncJob_pkey" PRIMARY KEY ("id") +); + +-- CreateTable +CREATE TABLE "UserToRepoPermission" ( + "createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP, + "repoId" INTEGER NOT NULL, + "userId" TEXT NOT NULL, + + CONSTRAINT "UserToRepoPermission_pkey" PRIMARY KEY ("repoId","userId") +); + +-- AddForeignKey +ALTER TABLE "RepoPermissionSyncJob" ADD CONSTRAINT "RepoPermissionSyncJob_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "UserPermissionSyncJob" ADD CONSTRAINT "UserPermissionSyncJob_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_repoId_fkey" FOREIGN KEY ("repoId") REFERENCES "Repo"("id") ON DELETE CASCADE ON UPDATE CASCADE; + +-- AddForeignKey +ALTER TABLE "UserToRepoPermission" ADD CONSTRAINT "UserToRepoPermission_userId_fkey" FOREIGN KEY ("userId") REFERENCES "User"("id") ON DELETE CASCADE ON UPDATE CASCADE; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index feaaff024..db0ed9062 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -30,14 +30,6 @@ enum ConnectionSyncStatus { FAILED } -enum RepoPermissionSyncStatus { - SYNC_NEEDED - IN_SYNC_QUEUE - SYNCING - SYNCED - FAILED -} - enum StripeSubscriptionStatus { ACTIVE INACTIVE @@ -67,9 +59,9 @@ model Repo { repoIndexingStatus RepoIndexingStatus @default(NEW) permittedUsers UserToRepoPermission[] - permissionSyncStatus RepoPermissionSyncStatus @default(SYNC_NEEDED) - /// When the repo permissions were last synced, either successfully or unsuccessfully. - permissionSyncJobLastCompletedAt DateTime? + permissionSyncJobs RepoPermissionSyncJob[] + /// When the permissions were last synced successfully. + permissionSyncedAt DateTime? // The id of the repo in the external service external_id String @@ -87,6 +79,26 @@ model Repo { @@index([orgId]) } +enum RepoPermissionSyncJobStatus { + PENDING + IN_PROGRESS + COMPLETED + FAILED +} + +model RepoPermissionSyncJob { + id String @id @default(cuid()) + status RepoPermissionSyncJobStatus @default(PENDING) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + completedAt DateTime? + + errorMessage String? + + repo Repo @relation(fields: [repoId], references: [id], onDelete: Cascade) + repoId Int +} + model SearchContext { id Int @id @default(autoincrement()) @@ -301,6 +313,29 @@ model User { createdAt DateTime @default(now()) updatedAt DateTime @updatedAt + + permissionSyncJobs UserPermissionSyncJob[] + permissionSyncedAt DateTime? +} + +enum UserPermissionSyncJobStatus { + PENDING + IN_PROGRESS + COMPLETED + FAILED +} + +model UserPermissionSyncJob { + id String @id @default(cuid()) + status UserPermissionSyncJobStatus @default(PENDING) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + completedAt DateTime? + + errorMessage String? + + user User @relation(fields: [userId], references: [id], onDelete: Cascade) + userId String } model UserToRepoPermission { diff --git a/packages/web/src/ee/features/sso/sso.tsx b/packages/web/src/ee/features/sso/sso.tsx index 966f9c79f..07332b63c 100644 --- a/packages/web/src/ee/features/sso/sso.tsx +++ b/packages/web/src/ee/features/sso/sso.tsx @@ -27,7 +27,17 @@ export const getSSOProviders = (): Provider[] => { authorization: { url: `${baseUrl}/login/oauth/authorize`, params: { - scope: "read:user user:email", + scope: [ + 'read:user', + 'user:email', + // Permission syncing requires the `repo` in order to fetch repositories + // for the authenticated user. + // @see: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user + ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? + ['repo'] : + [] + ), + ].join(' '), }, }, token: { @@ -103,7 +113,7 @@ export const getSSOProviders = (): Provider[] => { } const oauth2Client = new OAuth2Client(); - + const { pubkeys } = await oauth2Client.getIapPublicKeys(); const ticket = await oauth2Client.verifySignedJwtWithCertsAsync( iapAssertion, From ca4999838387689a7c4bac2fbe8f8b9f68e2c183 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 19 Sep 2025 15:46:40 -0700 Subject: [PATCH 06/14] resolve migration --- .../migration.sql | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename packages/db/prisma/migrations/{20250919065312_add_permission_sync_tables => 20250919224623_add_permission_sync_tables}/migration.sql (100%) diff --git a/packages/db/prisma/migrations/20250919065312_add_permission_sync_tables/migration.sql b/packages/db/prisma/migrations/20250919224623_add_permission_sync_tables/migration.sql similarity index 100% rename from packages/db/prisma/migrations/20250919065312_add_permission_sync_tables/migration.sql rename to packages/db/prisma/migrations/20250919224623_add_permission_sync_tables/migration.sql From 344ff5f10059582ee4b2f942c016d8be033ac60c Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 19 Sep 2025 16:22:33 -0700 Subject: [PATCH 07/14] allow visibility on public repositories --- packages/backend/src/repoCompileUtils.ts | 145 ++++++++++-------- .../migration.sql | 2 + packages/db/prisma/schema.prisma | 1 + packages/web/src/prisma.ts | 17 +- 4 files changed, 94 insertions(+), 71 deletions(-) create mode 100644 packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql diff --git a/packages/backend/src/repoCompileUtils.ts b/packages/backend/src/repoCompileUtils.ts index ab162ffee..098f39c9a 100644 --- a/packages/backend/src/repoCompileUtils.ts +++ b/packages/backend/src/repoCompileUtils.ts @@ -50,6 +50,7 @@ export const compileGithubConfig = async ( const repoDisplayName = repo.full_name; const repoName = path.join(repoNameRoot, repoDisplayName); const cloneUrl = new URL(repo.clone_url!); + const isPublic = repo.private === false; logger.debug(`Found github repo ${repoDisplayName} with webUrl: ${repo.html_url}`); @@ -64,6 +65,7 @@ export const compileGithubConfig = async ( imageUrl: repo.owner.avatar_url, isFork: repo.fork, isArchived: !!repo.archived, + isPublic: isPublic, org: { connect: { id: orgId, @@ -85,7 +87,7 @@ export const compileGithubConfig = async ( 'zoekt.github-forks': (repo.forks_count ?? 0).toString(), 'zoekt.archived': marshalBool(repo.archived), 'zoekt.fork': marshalBool(repo.fork), - 'zoekt.public': marshalBool(repo.private === false), + 'zoekt.public': marshalBool(isPublic), 'zoekt.display-name': repoDisplayName, }, branches: config.revisions?.branches ?? undefined, @@ -121,6 +123,8 @@ export const compileGitlabConfig = async ( const projectUrl = `${hostUrl}/${project.path_with_namespace}`; const cloneUrl = new URL(project.http_url_to_repo); const isFork = project.forked_from_project !== undefined; + // @todo: we will need to double check whether 'internal' should also be considered public or not. + const isPublic = project.visibility === 'public'; const repoDisplayName = project.path_with_namespace; const repoName = path.join(repoNameRoot, repoDisplayName); // project.avatar_url is not directly accessible with tokens; use the avatar API endpoint if available @@ -139,6 +143,7 @@ export const compileGitlabConfig = async ( displayName: repoDisplayName, imageUrl: avatarUrl, isFork: isFork, + isPublic: isPublic, isArchived: !!project.archived, org: { connect: { @@ -159,7 +164,7 @@ export const compileGitlabConfig = async ( 'zoekt.gitlab-forks': (project.forks_count ?? 0).toString(), 'zoekt.archived': marshalBool(project.archived), 'zoekt.fork': marshalBool(isFork), - 'zoekt.public': marshalBool(project.private === false), + 'zoekt.public': marshalBool(isPublic), 'zoekt.display-name': repoDisplayName, }, branches: config.revisions?.branches ?? undefined, @@ -197,6 +202,7 @@ export const compileGiteaConfig = async ( cloneUrl.host = configUrl.host const repoDisplayName = repo.full_name!; const repoName = path.join(repoNameRoot, repoDisplayName); + const isPublic = repo.internal === false && repo.private === false; logger.debug(`Found gitea repo ${repoDisplayName} with webUrl: ${repo.html_url}`); @@ -210,6 +216,7 @@ export const compileGiteaConfig = async ( displayName: repoDisplayName, imageUrl: repo.owner?.avatar_url, isFork: repo.fork!, + isPublic: isPublic, isArchived: !!repo.archived, org: { connect: { @@ -228,7 +235,7 @@ export const compileGiteaConfig = async ( 'zoekt.name': repoName, 'zoekt.archived': marshalBool(repo.archived), 'zoekt.fork': marshalBool(repo.fork!), - 'zoekt.public': marshalBool(repo.internal === false && repo.private === false), + 'zoekt.public': marshalBool(isPublic), 'zoekt.display-name': repoDisplayName, }, branches: config.revisions?.branches ?? undefined, @@ -411,6 +418,7 @@ export const compileBitbucketConfig = async ( name: repoName, displayName: displayName, isFork: isFork, + isPublic: isPublic, isArchived: isArchived, org: { connect: { @@ -546,6 +554,70 @@ export const compileGenericGitHostConfig_file = async ( } } + +export const compileGenericGitHostConfig_url = async ( + config: GenericGitHostConnectionConfig, + orgId: number, + connectionId: number, +) => { + const remoteUrl = new URL(config.url); + assert(remoteUrl.protocol === 'http:' || remoteUrl.protocol === 'https:', 'config.url must be a http:// or https:// URL'); + + const notFound: { + users: string[], + orgs: string[], + repos: string[], + } = { + users: [], + orgs: [], + repos: [], + }; + + // Validate that we are dealing with a valid git repo. + const isGitRepo = await isUrlAValidGitRepo(remoteUrl.toString()); + if (!isGitRepo) { + notFound.repos.push(remoteUrl.toString()); + return { + repoData: [], + notFound, + } + } + + // @note: matches the naming here: + // https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293 + const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, '')); + + const repo: RepoData = { + external_codeHostType: 'generic-git-host', + external_codeHostUrl: remoteUrl.origin, + external_id: remoteUrl.toString(), + cloneUrl: remoteUrl.toString(), + name: repoName, + displayName: repoName, + isFork: false, + isArchived: false, + org: { + connect: { + id: orgId, + }, + }, + connections: { + create: { + connectionId: connectionId, + } + }, + metadata: { + branches: config.revisions?.branches ?? undefined, + tags: config.revisions?.tags ?? undefined, + } + }; + + return { + repoData: [repo], + notFound, + } +} + export const compileAzureDevOpsConfig = async ( config: AzureDevOpsConnectionConfig, connectionId: number, @@ -569,6 +641,7 @@ export const compileAzureDevOpsConfig = async ( const repoDisplayName = `${repo.project.name}/${repo.name}`; const repoName = path.join(repoNameRoot, repoDisplayName); + const isPublic = repo.project.visibility === ProjectVisibility.Public; if (!repo.remoteUrl) { throw new Error(`No remoteUrl found for repository ${repoDisplayName}`); @@ -593,6 +666,7 @@ export const compileAzureDevOpsConfig = async ( imageUrl: null, isFork: !!repo.isFork, isArchived: false, + isPublic: isPublic, org: { connect: { id: orgId, @@ -610,7 +684,7 @@ export const compileAzureDevOpsConfig = async ( 'zoekt.name': repoName, 'zoekt.archived': marshalBool(false), 'zoekt.fork': marshalBool(!!repo.isFork), - 'zoekt.public': marshalBool(repo.project.visibility === ProjectVisibility.Public), + 'zoekt.public': marshalBool(isPublic), 'zoekt.display-name': repoDisplayName, }, branches: config.revisions?.branches ?? undefined, @@ -626,66 +700,3 @@ export const compileAzureDevOpsConfig = async ( notFound, }; } - -export const compileGenericGitHostConfig_url = async ( - config: GenericGitHostConnectionConfig, - orgId: number, - connectionId: number, -) => { - const remoteUrl = new URL(config.url); - assert(remoteUrl.protocol === 'http:' || remoteUrl.protocol === 'https:', 'config.url must be a http:// or https:// URL'); - - const notFound: { - users: string[], - orgs: string[], - repos: string[], - } = { - users: [], - orgs: [], - repos: [], - }; - - // Validate that we are dealing with a valid git repo. - const isGitRepo = await isUrlAValidGitRepo(remoteUrl.toString()); - if (!isGitRepo) { - notFound.repos.push(remoteUrl.toString()); - return { - repoData: [], - notFound, - } - } - - // @note: matches the naming here: - // https://github.com/sourcebot-dev/zoekt/blob/main/gitindex/index.go#L293 - const repoName = path.join(remoteUrl.host, remoteUrl.pathname.replace(/\.git$/, '')); - - const repo: RepoData = { - external_codeHostType: 'generic-git-host', - external_codeHostUrl: remoteUrl.origin, - external_id: remoteUrl.toString(), - cloneUrl: remoteUrl.toString(), - name: repoName, - displayName: repoName, - isFork: false, - isArchived: false, - org: { - connect: { - id: orgId, - }, - }, - connections: { - create: { - connectionId: connectionId, - } - }, - metadata: { - branches: config.revisions?.branches ?? undefined, - tags: config.revisions?.tags ?? undefined, - } - }; - - return { - repoData: [repo], - notFound, - } -} \ No newline at end of file diff --git a/packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql b/packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql new file mode 100644 index 000000000..909a6d466 --- /dev/null +++ b/packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql @@ -0,0 +1,2 @@ +-- AlterTable +ALTER TABLE "Repo" ADD COLUMN "isPublic" BOOLEAN; diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index db0ed9062..45d9452ca 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -51,6 +51,7 @@ model Repo { indexedAt DateTime? isFork Boolean isArchived Boolean + isPublic Boolean? metadata Json // For schema see repoMetadataSchema in packages/backend/src/types.ts cloneUrl String webUrl String? diff --git a/packages/web/src/prisma.ts b/packages/web/src/prisma.ts index 7807d9c97..5904c5227 100644 --- a/packages/web/src/prisma.ts +++ b/packages/web/src/prisma.ts @@ -30,11 +30,20 @@ export const userScopedPrismaClientExtension = (userId?: string) => { if ('where' in args) { args.where = { ...args.where, - permittedUsers: { - some: { - userId, + OR: [ + // Only include repos that are permitted to the user, + { + permittedUsers: { + some: { + userId, + } + } + }, + // or are public. + { + isPublic: true, } - } + ] } } From c7e2f5ae06787483c8493966a600216c3498937b Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 19 Sep 2025 17:27:36 -0700 Subject: [PATCH 08/14] fix tests --- packages/web/src/__mocks__/prisma.ts | 6 ++++-- packages/web/src/withAuthV2.test.ts | 4 ++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/packages/web/src/__mocks__/prisma.ts b/packages/web/src/__mocks__/prisma.ts index 66470017c..4db4de460 100644 --- a/packages/web/src/__mocks__/prisma.ts +++ b/packages/web/src/__mocks__/prisma.ts @@ -1,6 +1,6 @@ import { SINGLE_TENANT_ORG_DOMAIN, SINGLE_TENANT_ORG_ID, SINGLE_TENANT_ORG_NAME } from '@/lib/constants'; import { ApiKey, Org, PrismaClient, User } from '@prisma/client'; -import { beforeEach } from 'vitest'; +import { beforeEach, vi } from 'vitest'; import { mockDeep, mockReset } from 'vitest-mock-extended'; beforeEach(() => { @@ -43,6 +43,8 @@ export const MOCK_USER: User = { updatedAt: new Date(), hashedPassword: null, emailVerified: null, - image: null + image: null, + permissionSyncedAt: null } +export const userScopedPrismaClientExtension = vi.fn(); \ No newline at end of file diff --git a/packages/web/src/withAuthV2.test.ts b/packages/web/src/withAuthV2.test.ts index 7056cbefd..23d7d05b7 100644 --- a/packages/web/src/withAuthV2.test.ts +++ b/packages/web/src/withAuthV2.test.ts @@ -188,6 +188,7 @@ describe('getAuthContext', () => { }, org: MOCK_ORG, role: OrgRole.MEMBER, + prisma: undefined, }); }); @@ -217,6 +218,7 @@ describe('getAuthContext', () => { }, org: MOCK_ORG, role: OrgRole.OWNER, + prisma: undefined, }); }); @@ -241,6 +243,7 @@ describe('getAuthContext', () => { }, org: MOCK_ORG, role: OrgRole.GUEST, + prisma: undefined, }); }); @@ -256,6 +259,7 @@ describe('getAuthContext', () => { user: undefined, org: MOCK_ORG, role: OrgRole.GUEST, + prisma: undefined, }); }); }); From 0e527f4e08634e318b007aeaf7685f3d878ac679 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 19 Sep 2025 18:05:18 -0700 Subject: [PATCH 09/14] move feature to EE --- LICENSE.md | 2 +- .../src/{ => ee}/repoPermissionSyncer.ts | 13 +++++++++---- .../src/{ => ee}/userPermissionSyncer.ts | 11 ++++++++--- packages/backend/src/env.ts | 2 +- packages/backend/src/index.ts | 19 ++++++++++++++----- packages/shared/src/entitlements.ts | 7 ++++--- packages/web/src/actions.ts | 2 +- packages/web/src/ee/features/sso/sso.tsx | 5 +++-- packages/web/src/env.mjs | 2 +- packages/web/src/prisma.ts | 3 ++- 10 files changed, 44 insertions(+), 22 deletions(-) rename packages/backend/src/{ => ee}/repoPermissionSyncer.ts (96%) rename packages/backend/src/{ => ee}/userPermissionSyncer.ts (95%) diff --git a/LICENSE.md b/LICENSE.md index 93c142545..315bde810 100644 --- a/LICENSE.md +++ b/LICENSE.md @@ -2,7 +2,7 @@ Copyright (c) 2025 Taqla Inc. Portions of this software are licensed as follows: -- All content that resides under the "ee/", "packages/web/src/ee/", and "packages/shared/src/ee/" directories of this repository, if these directories exist, is licensed under the license defined in "ee/LICENSE". +- All content that resides under the "ee/", "packages/web/src/ee/", "packages/backend/src/ee/", and "packages/shared/src/ee/" directories of this repository, if these directories exist, is licensed under the license defined in "ee/LICENSE". - All third party components incorporated into the Sourcebot Software are licensed under the original license provided by the owner of the applicable component. - Content outside of the above mentioned directories or restrictions above is available under the "Functional Source License" as defined below. diff --git a/packages/backend/src/repoPermissionSyncer.ts b/packages/backend/src/ee/repoPermissionSyncer.ts similarity index 96% rename from packages/backend/src/repoPermissionSyncer.ts rename to packages/backend/src/ee/repoPermissionSyncer.ts index b6d8be8dd..e4bec4c46 100644 --- a/packages/backend/src/repoPermissionSyncer.ts +++ b/packages/backend/src/ee/repoPermissionSyncer.ts @@ -7,10 +7,11 @@ import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; import { Job, Queue, Worker } from 'bullmq'; import { Redis } from 'ioredis'; -import { env } from "./env.js"; -import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "./github.js"; -import { RepoWithConnections } from "./types.js"; -import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "./constants.js"; +import { env } from "../env.js"; +import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "../github.js"; +import { RepoWithConnections } from "../types.js"; +import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; +import { hasEntitlement } from "@sourcebot/shared"; type RepoPermissionSyncJob = { jobId: string; @@ -41,6 +42,10 @@ export class RepoPermissionSyncer { } public startScheduler() { + if (!hasEntitlement('permission-syncing')) { + throw new Error('Permission syncing is not supported in current plan.'); + } + logger.debug('Starting scheduler'); return setInterval(async () => { diff --git a/packages/backend/src/userPermissionSyncer.ts b/packages/backend/src/ee/userPermissionSyncer.ts similarity index 95% rename from packages/backend/src/userPermissionSyncer.ts rename to packages/backend/src/ee/userPermissionSyncer.ts index 872b1f7a7..f8b39f457 100644 --- a/packages/backend/src/userPermissionSyncer.ts +++ b/packages/backend/src/ee/userPermissionSyncer.ts @@ -4,9 +4,10 @@ import { PrismaClient, User, UserPermissionSyncJobStatus } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; import { Job, Queue, Worker } from "bullmq"; import { Redis } from "ioredis"; -import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "./constants.js"; -import { env } from "./env.js"; -import { getReposThatAuthenticatedUserHasReadAccessTo } from "./github.js"; +import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; +import { env } from "../env.js"; +import { getReposThatAuthenticatedUserHasReadAccessTo } from "../github.js"; +import { hasEntitlement } from "@sourcebot/shared"; const logger = createLogger('user-permission-syncer'); @@ -37,6 +38,10 @@ export class UserPermissionSyncer { } public startScheduler() { + if (!hasEntitlement('permission-syncing')) { + throw new Error('Permission syncing is not supported in current plan.'); + } + logger.debug('Starting scheduler'); return setInterval(async () => { diff --git a/packages/backend/src/env.ts b/packages/backend/src/env.ts index 5d0562844..4715b635e 100644 --- a/packages/backend/src/env.ts +++ b/packages/backend/src/env.ts @@ -53,7 +53,7 @@ export const env = createEnv({ GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10), - EXPERIMENT_PERMISSION_SYNC_ENABLED: booleanSchema.default("false"), + EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'), }, runtimeEnv: process.env, emptyStringAsUndefined: true, diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 0210b3ba0..2182fbad9 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -2,7 +2,7 @@ import "./instrument.js"; import { PrismaClient } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; -import { loadConfig } from '@sourcebot/shared'; +import { hasEntitlement, loadConfig } from '@sourcebot/shared'; import { existsSync } from 'fs'; import { mkdir } from 'fs/promises'; import { Redis } from 'ioredis'; @@ -10,11 +10,11 @@ import path from 'path'; import { ConnectionManager } from './connectionManager.js'; import { DEFAULT_SETTINGS } from './constants.js'; import { env } from "./env.js"; -import { RepoPermissionSyncer } from './repoPermissionSyncer.js'; +import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js'; import { PromClient } from './promClient.js'; import { RepoManager } from './repoManager.js'; import { AppContext } from "./types.js"; -import { UserPermissionSyncer } from "./userPermissionSyncer.js"; +import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js"; const logger = createLogger('backend-entrypoint'); @@ -76,9 +76,18 @@ await repoManager.validateIndexedReposHaveShards(); const connectionManagerInterval = connectionManager.startScheduler(); const repoManagerInterval = repoManager.startScheduler(); -const repoPermissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? repoPermissionSyncer.startScheduler() : null; -const userPermissionSyncerInterval = env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? userPermissionSyncer.startScheduler() : null; +let repoPermissionSyncerInterval: NodeJS.Timeout | null = null; +let userPermissionSyncerInterval: NodeJS.Timeout | null = null; + +if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) { + logger.error('Permission syncing is not supported in current plan. Please contact support@sourcebot.dev for assistance.'); + process.exit(1); +} +else if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing')) { + repoPermissionSyncerInterval = repoPermissionSyncer.startScheduler(); + userPermissionSyncerInterval = userPermissionSyncer.startScheduler(); +} const cleanup = async (signal: string) => { logger.info(`Recieved ${signal}, cleaning up...`); diff --git a/packages/shared/src/entitlements.ts b/packages/shared/src/entitlements.ts index 965989c14..be40b9275 100644 --- a/packages/shared/src/entitlements.ts +++ b/packages/shared/src/entitlements.ts @@ -38,15 +38,16 @@ const entitlements = [ "sso", "code-nav", "audit", - "analytics" + "analytics", + "permission-syncing" ] as const; export type Entitlement = (typeof entitlements)[number]; const entitlementsByPlan: Record = { oss: ["anonymous-access"], "cloud:team": ["billing", "multi-tenancy", "sso", "code-nav"], - "self-hosted:enterprise": ["search-contexts", "sso", "code-nav", "audit", "analytics"], - "self-hosted:enterprise-unlimited": ["search-contexts", "anonymous-access", "sso", "code-nav", "audit", "analytics"], + "self-hosted:enterprise": ["search-contexts", "sso", "code-nav", "audit", "analytics", "permission-syncing"], + "self-hosted:enterprise-unlimited": ["search-contexts", "anonymous-access", "sso", "code-nav", "audit", "analytics", "permission-syncing"], // Special entitlement for https://demo.sourcebot.dev "cloud:demo": ["anonymous-access", "code-nav", "search-contexts"], } as const; diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 215ebef6b..77301c771 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -1035,7 +1035,7 @@ export const flagReposForIndex = async (repoIds: number[], domain: string) => se where: { id: { in: repoIds }, orgId: org.id, - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + ...(env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' ? { permittedUsers: { some: { userId: userId, diff --git a/packages/web/src/ee/features/sso/sso.tsx b/packages/web/src/ee/features/sso/sso.tsx index 07332b63c..0f14a364f 100644 --- a/packages/web/src/ee/features/sso/sso.tsx +++ b/packages/web/src/ee/features/sso/sso.tsx @@ -12,6 +12,7 @@ import Credentials from "next-auth/providers/credentials"; import type { User as AuthJsUser } from "next-auth"; import { onCreateUser } from "@/lib/authUtils"; import { createLogger } from "@sourcebot/logger"; +import { hasEntitlement } from "@sourcebot/shared"; const logger = createLogger('web-sso'); @@ -30,10 +31,10 @@ export const getSSOProviders = (): Provider[] => { scope: [ 'read:user', 'user:email', - // Permission syncing requires the `repo` in order to fetch repositories + // Permission syncing requires the `repo` scope in order to fetch repositories // for the authenticated user. // @see: https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? + ...(env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing') ? ['repo'] : [] ), diff --git a/packages/web/src/env.mjs b/packages/web/src/env.mjs index b6fcb6ff7..922b2b840 100644 --- a/packages/web/src/env.mjs +++ b/packages/web/src/env.mjs @@ -137,7 +137,7 @@ export const env = createEnv({ // @NOTE: Take care to update actions.ts when changing the name of this. EXPERIMENT_SELF_SERVE_REPO_INDEXING_GITHUB_TOKEN: z.string().optional(), - EXPERIMENT_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'), + EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'), }, // @NOTE: Please make sure of the following: // - Make sure you destructure all client variables in diff --git a/packages/web/src/prisma.ts b/packages/web/src/prisma.ts index 5904c5227..1908c5fb3 100644 --- a/packages/web/src/prisma.ts +++ b/packages/web/src/prisma.ts @@ -1,6 +1,7 @@ import 'server-only'; import { env } from "@/env.mjs"; import { Prisma, PrismaClient } from "@sourcebot/db"; +import { hasEntitlement } from "@sourcebot/shared"; // @see: https://authjs.dev/getting-started/adapters/prisma const globalForPrisma = globalThis as unknown as { prisma: PrismaClient } @@ -24,7 +25,7 @@ export const userScopedPrismaClientExtension = (userId?: string) => { (prisma) => { return prisma.$extends({ query: { - ...(env.EXPERIMENT_PERMISSION_SYNC_ENABLED === 'true' ? { + ...(env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing') ? { repo: { $allOperations({ args, query }) { if ('where' in args) { From f90db801cf0cba05f88e568be2c5e9f20a159d64 Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 19 Sep 2025 21:29:05 -0700 Subject: [PATCH 10/14] docs and minor tweaks --- docs/docs.json | 1 + docs/docs/configuration/config-file.mdx | 30 ++++---- .../configuration/environment-variables.mdx | 1 + docs/docs/connections/github.mdx | 6 +- docs/docs/features/agents/overview.mdx | 6 +- docs/docs/features/permission-syncing.mdx | 72 +++++++++++++++++++ .../snippets/experimental-feature-warning.mdx | 4 ++ docs/snippets/schemas/v3/index.schema.mdx | 20 ++++++ packages/backend/src/constants.ts | 4 +- .../backend/src/ee/repoPermissionSyncer.ts | 15 ++-- .../backend/src/ee/userPermissionSyncer.ts | 20 +++--- packages/backend/src/env.ts | 1 + packages/backend/src/github.ts | 13 +++- packages/backend/src/index.ts | 4 +- packages/schemas/src/v3/index.schema.ts | 20 ++++++ packages/schemas/src/v3/index.type.ts | 8 +++ .../src/ee/features/sso/{sso.tsx => sso.ts} | 0 schemas/v3/index.json | 10 +++ 18 files changed, 196 insertions(+), 39 deletions(-) create mode 100644 docs/docs/features/permission-syncing.mdx create mode 100644 docs/snippets/experimental-feature-warning.mdx rename packages/web/src/ee/features/sso/{sso.tsx => sso.ts} (100%) diff --git a/docs/docs.json b/docs/docs.json index 4710e6d04..d15ab85cb 100644 --- a/docs/docs.json +++ b/docs/docs.json @@ -46,6 +46,7 @@ "docs/features/code-navigation", "docs/features/analytics", "docs/features/mcp-server", + "docs/features/permission-syncing", { "group": "Agents", "tag": "experimental", diff --git a/docs/docs/configuration/config-file.mdx b/docs/docs/configuration/config-file.mdx index c0e89c772..58d7a1b13 100644 --- a/docs/docs/configuration/config-file.mdx +++ b/docs/docs/configuration/config-file.mdx @@ -33,17 +33,19 @@ Sourcebot syncs the config file on startup, and automatically whenever a change The following are settings that can be provided in your config file to modify Sourcebot's behavior -| Setting | Type | Default | Minimum | Description / Notes | -|-------------------------------------------|---------|------------|---------|----------------------------------------------------------------------------------------| -| `maxFileSize` | number | 2 MB | 1 | Maximum size (bytes) of a file to index. Files exceeding this are skipped. | -| `maxTrigramCount` | number | 20 000 | 1 | Maximum trigrams per document. Larger files are skipped. | -| `reindexIntervalMs` | number | 1 hour | 1 | Interval at which all repositories are re‑indexed. | -| `resyncConnectionIntervalMs` | number | 24 hours | 1 | Interval for checking connections that need re‑syncing. | -| `resyncConnectionPollingIntervalMs` | number | 1 second | 1 | DB polling rate for connections that need re‑syncing. | -| `reindexRepoPollingIntervalMs` | number | 1 second | 1 | DB polling rate for repos that should be re‑indexed. | -| `maxConnectionSyncJobConcurrency` | number | 8 | 1 | Concurrent connection‑sync jobs. | -| `maxRepoIndexingJobConcurrency` | number | 8 | 1 | Concurrent repo‑indexing jobs. | -| `maxRepoGarbageCollectionJobConcurrency` | number | 8 | 1 | Concurrent repo‑garbage‑collection jobs. | -| `repoGarbageCollectionGracePeriodMs` | number | 10 seconds | 1 | Grace period to avoid deleting shards while loading. | -| `repoIndexTimeoutMs` | number | 2 hours | 1 | Timeout for a single repo‑indexing run. | -| `enablePublicAccess` **(deprecated)** | boolean | false | — | Use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. | +| Setting | Type | Default | Minimum | Description / Notes | +|-------------------------------------------------|---------|------------|---------|----------------------------------------------------------------------------------------| +| `maxFileSize` | number | 2 MB | 1 | Maximum size (bytes) of a file to index. Files exceeding this are skipped. | +| `maxTrigramCount` | number | 20 000 | 1 | Maximum trigrams per document. Larger files are skipped. | +| `reindexIntervalMs` | number | 1 hour | 1 | Interval at which all repositories are re‑indexed. | +| `resyncConnectionIntervalMs` | number | 24 hours | 1 | Interval for checking connections that need re‑syncing. | +| `resyncConnectionPollingIntervalMs` | number | 1 second | 1 | DB polling rate for connections that need re‑syncing. | +| `reindexRepoPollingIntervalMs` | number | 1 second | 1 | DB polling rate for repos that should be re‑indexed. | +| `maxConnectionSyncJobConcurrency` | number | 8 | 1 | Concurrent connection‑sync jobs. | +| `maxRepoIndexingJobConcurrency` | number | 8 | 1 | Concurrent repo‑indexing jobs. | +| `maxRepoGarbageCollectionJobConcurrency` | number | 8 | 1 | Concurrent repo‑garbage‑collection jobs. | +| `repoGarbageCollectionGracePeriodMs` | number | 10 seconds | 1 | Grace period to avoid deleting shards while loading. | +| `repoIndexTimeoutMs` | number | 2 hours | 1 | Timeout for a single repo‑indexing run. | +| `enablePublicAccess` **(deprecated)** | boolean | false | — | Use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. | +| `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24 hours | 1 | Interval at which the repo permission syncer should run. | +| `experiment_userDrivenPermissionSyncIntervalMs` | number | 24 hours | 1 | Interval at which the user permission syncer should run. | diff --git a/docs/docs/configuration/environment-variables.mdx b/docs/docs/configuration/environment-variables.mdx index d6aab9ebc..70da72d8a 100644 --- a/docs/docs/configuration/environment-variables.mdx +++ b/docs/docs/configuration/environment-variables.mdx @@ -59,6 +59,7 @@ The following environment variables allow you to configure your Sourcebot deploy | `AUTH_EE_OKTA_ISSUER` | `-` |

The issuer URL for Okta SSO authentication.

| | `AUTH_EE_GCP_IAP_ENABLED` | `false` |

When enabled, allows Sourcebot to automatically register/login from a successful GCP IAP redirect

| | `AUTH_EE_GCP_IAP_AUDIENCE` | - |

The GCP IAP audience to use when verifying JWT tokens. Must be set to enable GCP IAP JIT provisioning

| +| `EXPERIMENT_EE_PERMISSION_SYNC_ENABLED` | `false` |

Enables [permission syncing](/docs/features/permission-syncing).

| ### Review Agent Environment Variables diff --git a/docs/docs/connections/github.mdx b/docs/docs/connections/github.mdx index c6e5f5c60..98fc5b507 100644 --- a/docs/docs/connections/github.mdx +++ b/docs/docs/connections/github.mdx @@ -196,4 +196,8 @@ To connect to a GitHub host other than `github.com`, provide the `url` property - \ No newline at end of file + + +## See also + +- [Syncing GitHub Access permissions to Sourcebot](/docs/features/permission-syncing#github) diff --git a/docs/docs/features/agents/overview.mdx b/docs/docs/features/agents/overview.mdx index 8c3e9335f..5b3bea6f5 100644 --- a/docs/docs/features/agents/overview.mdx +++ b/docs/docs/features/agents/overview.mdx @@ -3,9 +3,9 @@ title: "Agents Overview" sidebarTitle: "Overview" --- - -Agents are currently a experimental feature. Have an idea for an agent that we haven't built? Submit a [feature request](https://github.com/sourcebot-dev/sourcebot/issues/new?template=feature_request.md) on our GitHub. - +import ExperimentalFeatureWarning from '/snippets/experimental-feature-warning.mdx' + + Agents are automations that leverage the code indexed on Sourcebot to perform a specific task. Once you've setup Sourcebot, check out the guides below to configure additional agents. diff --git a/docs/docs/features/permission-syncing.mdx b/docs/docs/features/permission-syncing.mdx new file mode 100644 index 000000000..527b81f01 --- /dev/null +++ b/docs/docs/features/permission-syncing.mdx @@ -0,0 +1,72 @@ +--- +title: "Permission syncing" +sidebarTitle: "Permission syncing" +tag: "experimental" +--- + +import LicenseKeyRequired from '/snippets/license-key-required.mdx' +import ExperimentalFeatureWarning from '/snippets/experimental-feature-warning.mdx' + + + + +# Overview + +Permission syncing allows you to sync Access Permission Lists (ACLs) from a code host to Sourcebot. When configured, users signed into Sourcebot (via the code host's OAuth provider) will only be able to access repositories that they have access to on the code host. Practically, this means: + +- Code Search results will only include repositories that the user has access to. +- Code navigation results will only include repositories that the user has access to. +- Ask Sourcebot (and the underlying LLM) will only have access to repositories that the user has access to. +- File browsing is scoped to the repositories that the user has access to. + +Permission syncing can be enabled by setting the `EXPERIMENT_EE_PERMISSION_SYNC_ENABLED` environment variable to `true`. + +```bash +docker run \ + -e EXPERIMENT_EE_PERMISSION_SYNC_ENABLED=true \ + /* additional args */ \ + ghcr.io/sourcebot-dev/sourcebot:latest +``` + +## Platform support + +We are actively working on supporting more code hosts. If you'd like to see a specific code host supported, please [reach out](https://www.sourcebot.dev/contact). + +| Platform | Permission syncing | +|:----------|------------------------------| +| [GitHub (GHEC & GHEC Server)](/docs/features/permission-syncing#github) | ✅ | +| GitLab | 🛑 | +| Bitbucket Cloud | 🛑 | +| Bitbucket Data Center | 🛑 | +| Gitea | 🛑 | +| Gerrit | 🛑 | +| Generic git host | 🛑 | + +# Getting started + +## GitHub + +Prerequisite: [Add GitHub as an OAuth provider](/docs/configuration/auth/providers#github). + +Permission syncing works with **GitHub.com**, **GitHub Enterprise Cloud**, and **GitHub Enterprise Server**. For organization-owned repositories, users that have **read-only** access (or above) via the following methods will have their access synced to Sourcebot: +- Outside collaborators +- Organization members that are direct collaborators +- Organization members with access through team memberships +- Organization members with access through default organization permissions +- Organization owners. + +**Notes:** +- A GitHub OAuth provider must be configured to (1) correlate a Sourcebot user with a GitHub user, and (2) to list repositories that the user has access to for [User driven syncing](/docs/features/permission-syncing#how-it-works). +- OAuth tokens must assume the `repo` scope in order to use the [List repositories for the authenticated user API](https://docs.github.com/en/rest/repos/repos?apiVersion=2022-11-28#list-repositories-for-the-authenticated-user) during [User driven syncing](/docs/features/permission-syncing#how-it-works). Sourcebot **will only** use this token for **reads**. + +# How it works + +Permission syncing works by periodically syncing ACLs from the code host(s) to Sourcebot to build an internal mapping between Users and Repositories. This mapping is hydrated in two directions: +- **User driven** : fetches the list of all repositories that a given user has access to. +- **Repo driven** : fetches the list of all users that have access to a given repository. + +User driven and repo driven syncing occurs every 24 hours by default. These intervals can be configured using the following settings in the [config file](/docs/configuration/config-file): +| Setting | Type | Default | Minimum | +|-------------------------------------------------|---------|------------|---------| +| `experiment_repoDrivenPermissionSyncIntervalMs` | number | 24 hours | 1 | +| `experiment_userDrivenPermissionSyncIntervalMs` | number | 24 hours | 1 | \ No newline at end of file diff --git a/docs/snippets/experimental-feature-warning.mdx b/docs/snippets/experimental-feature-warning.mdx new file mode 100644 index 000000000..b00db5d88 --- /dev/null +++ b/docs/snippets/experimental-feature-warning.mdx @@ -0,0 +1,4 @@ + + +This is an experimental feature. Certain functionality may be buggy or incomplete, and breaking changes may ship in non-major releases. Have feedback? Submit a [issue](https://github.com/sourcebot-dev/sourcebot/issues) on GitHub. + diff --git a/docs/snippets/schemas/v3/index.schema.mdx b/docs/snippets/schemas/v3/index.schema.mdx index 8f3a4e6f0..82e18d114 100644 --- a/docs/snippets/schemas/v3/index.schema.mdx +++ b/docs/snippets/schemas/v3/index.schema.mdx @@ -69,6 +69,16 @@ "deprecated": true, "description": "This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead.", "default": false + }, + "experiment_repoDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours.", + "minimum": 1 + }, + "experiment_userDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours.", + "minimum": 1 } }, "additionalProperties": false @@ -195,6 +205,16 @@ "deprecated": true, "description": "This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead.", "default": false + }, + "experiment_repoDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours.", + "minimum": 1 + }, + "experiment_userDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours.", + "minimum": 1 } }, "additionalProperties": false diff --git a/packages/backend/src/constants.ts b/packages/backend/src/constants.ts index 3329f3d8b..89778fb2e 100644 --- a/packages/backend/src/constants.ts +++ b/packages/backend/src/constants.ts @@ -15,7 +15,9 @@ export const DEFAULT_SETTINGS: Settings = { maxRepoGarbageCollectionJobConcurrency: 8, repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours - enablePublicAccess: false // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead + enablePublicAccess: false, // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead + experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours + experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours } export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [ diff --git a/packages/backend/src/ee/repoPermissionSyncer.ts b/packages/backend/src/ee/repoPermissionSyncer.ts index e4bec4c46..08a0cde30 100644 --- a/packages/backend/src/ee/repoPermissionSyncer.ts +++ b/packages/backend/src/ee/repoPermissionSyncer.ts @@ -9,7 +9,7 @@ import { Job, Queue, Worker } from 'bullmq'; import { Redis } from 'ioredis'; import { env } from "../env.js"; import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "../github.js"; -import { RepoWithConnections } from "../types.js"; +import { RepoWithConnections, Settings } from "../types.js"; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; import { hasEntitlement } from "@sourcebot/shared"; @@ -28,6 +28,7 @@ export class RepoPermissionSyncer { constructor( private db: PrismaClient, + private settings: Settings, redis: Redis, ) { this.queue = new Queue(QUEUE_NAME, { @@ -50,7 +51,7 @@ export class RepoPermissionSyncer { return setInterval(async () => { // @todo: make this configurable - const thresholdDate = new Date(Date.now() - 1000 * 60 * 60 * 24); + const thresholdDate = new Date(Date.now() - this.settings.experiment_repoDrivenPermissionSyncIntervalMs); const repos = await this.db.repo.findMany({ // Repos need their permissions to be synced against the code host when... @@ -166,8 +167,14 @@ export class RepoPermissionSyncer { const config = connection.config as unknown as GithubConnectionConfig; const { octokit } = await createOctokitFromConfig(config, repo.orgId, this.db); - // @nocheckin - need to handle when repo displayName is not set. - const [owner, repoName] = repo.displayName!.split('/'); + // @note: this is a bit of a hack since the displayName _might_ not be set.. + // however, this property was introduced many versions ago and _should_ be set + // on each connection sync. Let's throw an error just in case. + if (!repo.displayName) { + throw new Error(`Repo ${id} does not have a displayName`); + } + + const [owner, repoName] = repo.displayName.split('/'); const githubUserIds = await getUserIdsWithReadAccessToRepo(owner, repoName, octokit); diff --git a/packages/backend/src/ee/userPermissionSyncer.ts b/packages/backend/src/ee/userPermissionSyncer.ts index f8b39f457..5e6fc2d1b 100644 --- a/packages/backend/src/ee/userPermissionSyncer.ts +++ b/packages/backend/src/ee/userPermissionSyncer.ts @@ -6,8 +6,9 @@ import { Job, Queue, Worker } from "bullmq"; import { Redis } from "ioredis"; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; import { env } from "../env.js"; -import { getReposThatAuthenticatedUserHasReadAccessTo } from "../github.js"; +import { createOctokitFromOAuthToken, getReposForAuthenticatedUser } from "../github.js"; import { hasEntitlement } from "@sourcebot/shared"; +import { Settings } from "../types.js"; const logger = createLogger('user-permission-syncer'); @@ -24,6 +25,7 @@ export class UserPermissionSyncer { constructor( private db: PrismaClient, + private settings: Settings, redis: Redis, ) { this.queue = new Queue(QUEUE_NAME, { @@ -45,7 +47,7 @@ export class UserPermissionSyncer { logger.debug('Starting scheduler'); return setInterval(async () => { - const thresholdDate = new Date(Date.now() - 1000 * 60 * 60 * 24); + const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs); const users = await this.db.user.findMany({ where: { @@ -152,15 +154,11 @@ export class UserPermissionSyncer { for (const account of user.accounts) { const repoIds = await (async () => { if (account.provider === 'github') { - // @todo: we will need to provide some mechanism for the user to provide a custom - // URL here. This will correspond to the host URL they are using for their GitHub - // instance. - const octokit = new Octokit({ - auth: account.access_token, - // baseUrl: /* todo */ - }); - - const repoIds = await getReposThatAuthenticatedUserHasReadAccessTo(octokit); + const octokit = await createOctokitFromOAuthToken(account.access_token); + // @note: we only care about the private repos since we don't need to build a mapping + // for public repos. + // @see: packages/web/src/prisma.ts + const repoIds = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit); const repos = await this.db.repo.findMany({ where: { diff --git a/packages/backend/src/env.ts b/packages/backend/src/env.ts index 4715b635e..80bbba5e9 100644 --- a/packages/backend/src/env.ts +++ b/packages/backend/src/env.ts @@ -54,6 +54,7 @@ export const env = createEnv({ GITLAB_CLIENT_QUERY_TIMEOUT_SECONDS: numberSchema.default(60 * 10), EXPERIMENT_EE_PERMISSION_SYNC_ENABLED: booleanSchema.default('false'), + AUTH_EE_GITHUB_BASE_URL: z.string().optional(), }, runtimeEnv: process.env, emptyStringAsUndefined: true, diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index a489c55c4..dc5114543 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -129,11 +129,10 @@ export const getUserIdsWithReadAccessToRepo = async (owner: string, repo: string return collaborators.map(collaborator => collaborator.id.toString()); } -export const getReposThatAuthenticatedUserHasReadAccessTo = async (octokit: Octokit) => { +export const getReposForAuthenticatedUser = async (visibility: 'all' | 'private' | 'public' = 'all', octokit: Octokit) => { const fetchFn = () => octokit.paginate(octokit.repos.listForAuthenticatedUser, { per_page: 100, - // @todo: do we need to set a visibility to private only? - // visibility: 'private' + visibility, }); const repos = await fetchWithRetry(fetchFn, `authenticated user`, logger); @@ -164,6 +163,14 @@ export const createOctokitFromConfig = async (config: GithubConnectionConfig, or }; } +export const createOctokitFromOAuthToken = async (token: string | null): Promise => { + const apiUrl = env.AUTH_EE_GITHUB_BASE_URL ? `${env.AUTH_EE_GITHUB_BASE_URL}/api/v3` : "https://api.github.com"; + return new Octokit({ + auth: token, + baseUrl: apiUrl, + }); +} + export const shouldExcludeRepo = ({ repo, include, diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 2182fbad9..3664024aa 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -69,8 +69,8 @@ const settings = await getSettings(env.CONFIG_PATH); const connectionManager = new ConnectionManager(prisma, settings, redis); const repoManager = new RepoManager(prisma, settings, redis, promClient, context); -const repoPermissionSyncer = new RepoPermissionSyncer(prisma, redis); -const userPermissionSyncer = new UserPermissionSyncer(prisma, redis); +const repoPermissionSyncer = new RepoPermissionSyncer(prisma, settings, redis); +const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis); await repoManager.validateIndexedReposHaveShards(); diff --git a/packages/schemas/src/v3/index.schema.ts b/packages/schemas/src/v3/index.schema.ts index 02b9f3613..c8fe48e89 100644 --- a/packages/schemas/src/v3/index.schema.ts +++ b/packages/schemas/src/v3/index.schema.ts @@ -68,6 +68,16 @@ const schema = { "deprecated": true, "description": "This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead.", "default": false + }, + "experiment_repoDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours.", + "minimum": 1 + }, + "experiment_userDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours.", + "minimum": 1 } }, "additionalProperties": false @@ -194,6 +204,16 @@ const schema = { "deprecated": true, "description": "This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead.", "default": false + }, + "experiment_repoDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours.", + "minimum": 1 + }, + "experiment_userDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours.", + "minimum": 1 } }, "additionalProperties": false diff --git a/packages/schemas/src/v3/index.type.ts b/packages/schemas/src/v3/index.type.ts index 68417ba79..2bea94538 100644 --- a/packages/schemas/src/v3/index.type.ts +++ b/packages/schemas/src/v3/index.type.ts @@ -102,6 +102,14 @@ export interface Settings { * This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead. */ enablePublicAccess?: boolean; + /** + * The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours. + */ + experiment_repoDrivenPermissionSyncIntervalMs?: number; + /** + * The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours. + */ + experiment_userDrivenPermissionSyncIntervalMs?: number; } /** * Search context diff --git a/packages/web/src/ee/features/sso/sso.tsx b/packages/web/src/ee/features/sso/sso.ts similarity index 100% rename from packages/web/src/ee/features/sso/sso.tsx rename to packages/web/src/ee/features/sso/sso.ts diff --git a/schemas/v3/index.json b/schemas/v3/index.json index f0bf0f1ac..b697e619d 100644 --- a/schemas/v3/index.json +++ b/schemas/v3/index.json @@ -67,6 +67,16 @@ "deprecated": true, "description": "This setting is deprecated. Please use the `FORCE_ENABLE_ANONYMOUS_ACCESS` environment variable instead.", "default": false + }, + "experiment_repoDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the repo permission syncer should run. Defaults to 24 hours.", + "minimum": 1 + }, + "experiment_userDrivenPermissionSyncIntervalMs": { + "type": "number", + "description": "The interval (in milliseconds) at which the user permission syncer should run. Defaults to 24 hours.", + "minimum": 1 } }, "additionalProperties": false From 9e72365f17bb44f1a0ef1be141e0b331a7896bfe Mon Sep 17 00:00:00 2001 From: bkellam Date: Fri, 19 Sep 2025 22:04:37 -0700 Subject: [PATCH 11/14] changelog --- CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f3bbad3c4..2bac309a7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added +- [Experimental][Sourcebot EE] Added permission syncing repository Access Control Lists (ACLs) between Sourcebot and GitHub. [#508](https://github.com/sourcebot-dev/sourcebot/pull/508) + ### Changed - Improved repository query performance by adding db indices. [#526](https://github.com/sourcebot-dev/sourcebot/pull/526) From 46084c3a07fd8f9ec80810080678f8fdafcc68bc Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 20 Sep 2025 16:00:11 -0700 Subject: [PATCH 12/14] feedback --- .../snippets/experimental-feature-warning.mdx | 2 +- packages/backend/src/connectionManager.ts | 6 +- .../backend/src/ee/repoPermissionSyncer.ts | 67 ++--- .../backend/src/ee/userPermissionSyncer.ts | 76 +++-- packages/backend/src/git.ts | 28 +- packages/backend/src/github.ts | 273 +++++++++--------- packages/backend/src/index.ts | 21 +- packages/backend/src/repoManager.ts | 123 ++------ packages/backend/src/types.ts | 7 + packages/backend/src/utils.ts | 114 +++++++- packages/web/src/actions.ts | 43 ++- .../connections/[id]/components/repoList.tsx | 4 +- .../[id]/components/repoListItem.tsx | 2 +- .../[id]/components/repoRetryIndexButton.tsx | 5 +- packages/web/src/withAuthV2.ts | 2 +- 15 files changed, 403 insertions(+), 370 deletions(-) diff --git a/docs/snippets/experimental-feature-warning.mdx b/docs/snippets/experimental-feature-warning.mdx index b00db5d88..cdae892cd 100644 --- a/docs/snippets/experimental-feature-warning.mdx +++ b/docs/snippets/experimental-feature-warning.mdx @@ -1,4 +1,4 @@ -This is an experimental feature. Certain functionality may be buggy or incomplete, and breaking changes may ship in non-major releases. Have feedback? Submit a [issue](https://github.com/sourcebot-dev/sourcebot/issues) on GitHub. +This is an experimental feature. Certain functionality may be incomplete and breaking changes may ship in non-major releases. Have feedback? Submit a [issue](https://github.com/sourcebot-dev/sourcebot/issues) on GitHub. diff --git a/packages/backend/src/connectionManager.ts b/packages/backend/src/connectionManager.ts index d1cb0b891..ebbffe734 100644 --- a/packages/backend/src/connectionManager.ts +++ b/packages/backend/src/connectionManager.ts @@ -28,6 +28,7 @@ export class ConnectionManager { private worker: Worker; private queue: Queue; private logger = createLogger('connection-manager'); + private interval?: NodeJS.Timeout; constructor( private db: PrismaClient, @@ -71,7 +72,7 @@ export class ConnectionManager { public startScheduler() { this.logger.debug('Starting scheduler'); - return setInterval(async () => { + this.interval = setInterval(async () => { const thresholdDate = new Date(Date.now() - this.settings.resyncConnectionIntervalMs); const connections = await this.db.connection.findMany({ where: { @@ -364,6 +365,9 @@ export class ConnectionManager { } public dispose() { + if (this.interval) { + clearInterval(this.interval); + } this.worker.close(); this.queue.close(); } diff --git a/packages/backend/src/ee/repoPermissionSyncer.ts b/packages/backend/src/ee/repoPermissionSyncer.ts index 08a0cde30..f411c3e36 100644 --- a/packages/backend/src/ee/repoPermissionSyncer.ts +++ b/packages/backend/src/ee/repoPermissionSyncer.ts @@ -1,17 +1,14 @@ import * as Sentry from "@sentry/node"; import { PrismaClient, Repo, RepoPermissionSyncJobStatus } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; -import { BitbucketConnectionConfig } from "@sourcebot/schemas/v3/bitbucket.type"; -import { GiteaConnectionConfig } from "@sourcebot/schemas/v3/gitea.type"; -import { GithubConnectionConfig } from "@sourcebot/schemas/v3/github.type"; -import { GitlabConnectionConfig } from "@sourcebot/schemas/v3/gitlab.type"; +import { hasEntitlement } from "@sourcebot/shared"; import { Job, Queue, Worker } from 'bullmq'; import { Redis } from 'ioredis'; -import { env } from "../env.js"; -import { createOctokitFromConfig, getUserIdsWithReadAccessToRepo } from "../github.js"; -import { RepoWithConnections, Settings } from "../types.js"; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; -import { hasEntitlement } from "@sourcebot/shared"; +import { env } from "../env.js"; +import { createOctokitFromToken, getRepoCollaborators } from "../github.js"; +import { Settings } from "../types.js"; +import { getAuthCredentialsForRepo } from "../utils.js"; type RepoPermissionSyncJob = { jobId: string; @@ -25,6 +22,7 @@ const logger = createLogger('repo-permission-syncer'); export class RepoPermissionSyncer { private queue: Queue; private worker: Worker; + private interval?: NodeJS.Timeout; constructor( private db: PrismaClient, @@ -49,7 +47,7 @@ export class RepoPermissionSyncer { logger.debug('Starting scheduler'); - return setInterval(async () => { + this.interval = setInterval(async () => { // @todo: make this configurable const thresholdDate = new Date(Date.now() - this.settings.experiment_repoDrivenPermissionSyncIntervalMs); @@ -104,6 +102,9 @@ export class RepoPermissionSyncer { } public dispose() { + if (this.interval) { + clearInterval(this.interval); + } this.worker.close(); this.queue.close(); } @@ -157,15 +158,17 @@ export class RepoPermissionSyncer { logger.info(`Syncing permissions for repo ${repo.displayName}...`); - const connection = getFirstConnectionWithToken(repo); - if (!connection) { - throw new Error(`No connection with token found for repo ${id}`); + const credentials = await getAuthCredentialsForRepo(repo, this.db, logger); + if (!credentials) { + throw new Error(`No credentials found for repo ${id}`); } const userIds = await (async () => { - if (connection.connectionType === 'github') { - const config = connection.config as unknown as GithubConnectionConfig; - const { octokit } = await createOctokitFromConfig(config, repo.orgId, this.db); + if (repo.external_codeHostType === 'github') { + const { octokit } = await createOctokitFromToken({ + token: credentials.token, + url: credentials.hostUrl, + }); // @note: this is a bit of a hack since the displayName _might_ not be set.. // however, this property was introduced many versions ago and _should_ be set @@ -176,7 +179,8 @@ export class RepoPermissionSyncer { const [owner, repoName] = repo.displayName.split('/'); - const githubUserIds = await getUserIdsWithReadAccessToRepo(owner, repoName, octokit); + const collaborators = await getRepoCollaborators(owner, repoName, octokit); + const githubUserIds = collaborators.map(collaborator => collaborator.id.toString()); const accounts = await this.db.account.findMany({ where: { @@ -268,34 +272,3 @@ export class RepoPermissionSyncer { } } } - -const getFirstConnectionWithToken = (repo: RepoWithConnections) => { - for (const { connection } of repo.connections) { - if (connection.connectionType === 'github') { - const config = connection.config as unknown as GithubConnectionConfig; - if (config.token) { - return connection; - } - } - if (connection.connectionType === 'gitlab') { - const config = connection.config as unknown as GitlabConnectionConfig; - if (config.token) { - return connection; - } - } - if (connection.connectionType === 'gitea') { - const config = connection.config as unknown as GiteaConnectionConfig; - if (config.token) { - return connection; - } - } - if (connection.connectionType === 'bitbucket') { - const config = connection.config as unknown as BitbucketConnectionConfig; - if (config.token) { - return connection; - } - } - } - - return undefined; -} \ No newline at end of file diff --git a/packages/backend/src/ee/userPermissionSyncer.ts b/packages/backend/src/ee/userPermissionSyncer.ts index 5e6fc2d1b..90ae86296 100644 --- a/packages/backend/src/ee/userPermissionSyncer.ts +++ b/packages/backend/src/ee/userPermissionSyncer.ts @@ -1,4 +1,3 @@ -import { Octokit } from "@octokit/rest"; import * as Sentry from "@sentry/node"; import { PrismaClient, User, UserPermissionSyncJobStatus } from "@sourcebot/db"; import { createLogger } from "@sourcebot/logger"; @@ -6,7 +5,7 @@ import { Job, Queue, Worker } from "bullmq"; import { Redis } from "ioredis"; import { PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES } from "../constants.js"; import { env } from "../env.js"; -import { createOctokitFromOAuthToken, getReposForAuthenticatedUser } from "../github.js"; +import { createOctokitFromToken, getReposForAuthenticatedUser } from "../github.js"; import { hasEntitlement } from "@sourcebot/shared"; import { Settings } from "../types.js"; @@ -22,6 +21,7 @@ type UserPermissionSyncJob = { export class UserPermissionSyncer { private queue: Queue; private worker: Worker; + private interval?: NodeJS.Timeout; constructor( private db: PrismaClient, @@ -46,7 +46,7 @@ export class UserPermissionSyncer { logger.debug('Starting scheduler'); - return setInterval(async () => { + this.interval = setInterval(async () => { const thresholdDate = new Date(Date.now() - this.settings.experiment_userDrivenPermissionSyncIntervalMs); const users = await this.db.user.findMany({ @@ -102,6 +102,9 @@ export class UserPermissionSyncer { } public dispose() { + if (this.interval) { + clearInterval(this.interval); + } this.worker.close(); this.queue.close(); } @@ -151,50 +154,61 @@ export class UserPermissionSyncer { logger.info(`Syncing permissions for user ${user.email}...`); - for (const account of user.accounts) { - const repoIds = await (async () => { + // Get a list of all repos that the user has access to from all connected accounts. + const repoIds = await (async () => { + const aggregatedRepoIds: Set = new Set(); + + for (const account of user.accounts) { if (account.provider === 'github') { - const octokit = await createOctokitFromOAuthToken(account.access_token); + if (!account.access_token) { + throw new Error(`User '${user.email}' does not have an GitHub OAuth access token associated with their GitHub account.`); + } + + const { octokit } = await createOctokitFromToken({ + token: account.access_token, + url: env.AUTH_EE_GITHUB_BASE_URL, + }); // @note: we only care about the private repos since we don't need to build a mapping // for public repos. // @see: packages/web/src/prisma.ts - const repoIds = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit); + const githubRepos = await getReposForAuthenticatedUser(/* visibility = */ 'private', octokit); + const gitHubRepoIds = githubRepos.map(repo => repo.id.toString()); const repos = await this.db.repo.findMany({ where: { external_codeHostType: 'github', external_id: { - in: repoIds, + in: gitHubRepoIds, } } }); - return repos.map(repo => repo.id); + repos.forEach(repo => aggregatedRepoIds.add(repo.id)); } + } - return []; - })(); - + return Array.from(aggregatedRepoIds); + })(); - await this.db.$transaction([ - this.db.user.update({ - where: { - id: user.id, - }, - data: { - accessibleRepos: { - deleteMany: {}, - } + await this.db.$transaction([ + this.db.user.update({ + where: { + id: user.id, + }, + data: { + accessibleRepos: { + deleteMany: {}, } - }), - this.db.userToRepoPermission.createMany({ - data: repoIds.map(repoId => ({ - userId: user.id, - repoId, - })) - }) - ]); - } + } + }), + this.db.userToRepoPermission.createMany({ + data: repoIds.map(repoId => ({ + userId: user.id, + repoId, + })), + skipDuplicates: true, + }) + ]); } private async onJobCompleted(job: Job) { @@ -226,7 +240,7 @@ export class UserPermissionSyncer { queue: QUEUE_NAME, } }); - + const errorMessage = (email: string) => `User permission sync job failed for user ${email}: ${err.message}`; if (job) { diff --git a/packages/backend/src/git.ts b/packages/backend/src/git.ts index 97b6e5883..3f24b00e7 100644 --- a/packages/backend/src/git.ts +++ b/packages/backend/src/git.ts @@ -5,9 +5,15 @@ import { env } from './env.js'; type onProgressFn = (event: SimpleGitProgressEvent) => void; export const cloneRepository = async ( - remoteUrl: URL, - path: string, - onProgress?: onProgressFn + { + cloneUrl, + path, + onProgress, + }: { + cloneUrl: string, + path: string, + onProgress?: onProgressFn + } ) => { try { await mkdir(path, { recursive: true }); @@ -19,7 +25,7 @@ export const cloneRepository = async ( }) await git.clone( - remoteUrl.toString(), + cloneUrl, path, [ "--bare", @@ -42,9 +48,15 @@ export const cloneRepository = async ( }; export const fetchRepository = async ( - remoteUrl: URL, - path: string, - onProgress?: onProgressFn + { + cloneUrl, + path, + onProgress, + }: { + cloneUrl: string, + path: string, + onProgress?: onProgressFn + } ) => { try { const git = simpleGit({ @@ -54,7 +66,7 @@ export const fetchRepository = async ( }) await git.fetch([ - remoteUrl.toString(), + cloneUrl, "+refs/heads/*:refs/heads/*", "--prune", "--progress" diff --git a/packages/backend/src/github.ts b/packages/backend/src/github.ts index dc5114543..2b42eed23 100644 --- a/packages/backend/src/github.ts +++ b/packages/backend/src/github.ts @@ -41,8 +41,35 @@ const isHttpError = (error: unknown, status: number): boolean => { && error.status === status; } +export const createOctokitFromToken = async ({ token, url }: { token?: string, url?: string }): Promise<{ octokit: Octokit, isAuthenticated: boolean }> => { + const octokit = new Octokit({ + auth: token, + ...(url ? { + baseUrl: `${url}/api/v3` + } : {}), + }); + + return { + octokit, + isAuthenticated: !!token, + }; +} + export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient, signal: AbortSignal) => { - const { octokit, isAuthenticated } = await createOctokitFromConfig(config, orgId, db); + const hostname = config.url ? + new URL(config.url).hostname : + GITHUB_CLOUD_HOSTNAME; + + const token = config.token ? + await getTokenFromConfig(config.token, orgId, db, logger) : + hostname === GITHUB_CLOUD_HOSTNAME ? + env.FALLBACK_GITHUB_CLOUD_TOKEN : + undefined; + + const { octokit, isAuthenticated } = await createOctokitFromToken({ + token, + url: config.url, + }); if (isAuthenticated) { try { @@ -118,143 +145,37 @@ export const getGitHubReposFromConfig = async (config: GithubConnectionConfig, o }; } -export const getUserIdsWithReadAccessToRepo = async (owner: string, repo: string, octokit: Octokit) => { - const fetchFn = () => octokit.paginate(octokit.repos.listCollaborators, { - owner, - repo, - per_page: 100, - }); +export const getRepoCollaborators = async (owner: string, repo: string, octokit: Octokit) => { + try { + const fetchFn = () => octokit.paginate(octokit.repos.listCollaborators, { + owner, + repo, + per_page: 100, + }); - const collaborators = await fetchWithRetry(fetchFn, `repo ${owner}/${repo}`, logger); - return collaborators.map(collaborator => collaborator.id.toString()); + const collaborators = await fetchWithRetry(fetchFn, `repo ${owner}/${repo}`, logger); + return collaborators; + } catch (error) { + Sentry.captureException(error); + logger.error(`Failed to fetch collaborators for repo ${owner}/${repo}.`, error); + throw error; + } } export const getReposForAuthenticatedUser = async (visibility: 'all' | 'private' | 'public' = 'all', octokit: Octokit) => { - const fetchFn = () => octokit.paginate(octokit.repos.listForAuthenticatedUser, { - per_page: 100, - visibility, - }); - - const repos = await fetchWithRetry(fetchFn, `authenticated user`, logger); - return repos.map(repo => repo.id.toString()); -} - -export const createOctokitFromConfig = async (config: GithubConnectionConfig, orgId: number, db: PrismaClient): Promise<{ octokit: Octokit, isAuthenticated: boolean }> => { - const hostname = config.url ? - new URL(config.url).hostname : - GITHUB_CLOUD_HOSTNAME; - - const token = config.token ? - await getTokenFromConfig(config.token, orgId, db, logger) : - hostname === GITHUB_CLOUD_HOSTNAME ? - env.FALLBACK_GITHUB_CLOUD_TOKEN : - undefined; - - const octokit = new Octokit({ - auth: token, - ...(config.url ? { - baseUrl: `${config.url}/api/v3` - } : {}), - }); - - return { - octokit, - isAuthenticated: !!token, - }; -} - -export const createOctokitFromOAuthToken = async (token: string | null): Promise => { - const apiUrl = env.AUTH_EE_GITHUB_BASE_URL ? `${env.AUTH_EE_GITHUB_BASE_URL}/api/v3` : "https://api.github.com"; - return new Octokit({ - auth: token, - baseUrl: apiUrl, - }); -} - -export const shouldExcludeRepo = ({ - repo, - include, - exclude -}: { - repo: OctokitRepository, - include?: { - topics?: GithubConnectionConfig['topics'] - }, - exclude?: GithubConnectionConfig['exclude'] -}) => { - let reason = ''; - const repoName = repo.full_name; - - const shouldExclude = (() => { - if (!repo.clone_url) { - reason = 'clone_url is undefined'; - return true; - } - - if (!!exclude?.forks && repo.fork) { - reason = `\`exclude.forks\` is true`; - return true; - } - - if (!!exclude?.archived && !!repo.archived) { - reason = `\`exclude.archived\` is true`; - return true; - } - - if (exclude?.repos) { - if (micromatch.isMatch(repoName, exclude.repos)) { - reason = `\`exclude.repos\` contains ${repoName}`; - return true; - } - } - - if (exclude?.topics) { - const configTopics = exclude.topics.map(topic => topic.toLowerCase()); - const repoTopics = repo.topics ?? []; - - const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics)); - if (matchingTopics.length > 0) { - reason = `\`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`; - return true; - } - } - - if (include?.topics) { - const configTopics = include.topics.map(topic => topic.toLowerCase()); - const repoTopics = repo.topics ?? []; - - const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics)); - if (matchingTopics.length === 0) { - reason = `\`include.topics\` does not match any of the following topics: ${configTopics.join(', ')}`; - return true; - } - } - - const repoSizeInBytes = repo.size ? repo.size * 1000 : undefined; - if (exclude?.size && repoSizeInBytes) { - const min = exclude.size.min; - const max = exclude.size.max; - - if (min && repoSizeInBytes < min) { - reason = `repo is less than \`exclude.size.min\`=${min} bytes.`; - return true; - } - - if (max && repoSizeInBytes > max) { - reason = `repo is greater than \`exclude.size.max\`=${max} bytes.`; - return true; - } - } - - return false; - })(); + try { + const fetchFn = () => octokit.paginate(octokit.repos.listForAuthenticatedUser, { + per_page: 100, + visibility, + }); - if (shouldExclude) { - logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`); - return true; + const repos = await fetchWithRetry(fetchFn, `authenticated user`, logger); + return repos; + } catch (error) { + Sentry.captureException(error); + logger.error(`Failed to fetch repositories for authenticated user.`, error); + throw error; } - - return false; } const getReposOwnedByUsers = async (users: string[], octokit: Octokit, signal: AbortSignal) => { @@ -408,4 +329,90 @@ const getRepos = async (repoList: string[], octokit: Octokit, signal: AbortSigna validRepos, notFoundRepos, }; -} \ No newline at end of file +} + +export const shouldExcludeRepo = ({ + repo, + include, + exclude +}: { + repo: OctokitRepository, + include?: { + topics?: GithubConnectionConfig['topics'] + }, + exclude?: GithubConnectionConfig['exclude'] +}) => { + let reason = ''; + const repoName = repo.full_name; + + const shouldExclude = (() => { + if (!repo.clone_url) { + reason = 'clone_url is undefined'; + return true; + } + + if (!!exclude?.forks && repo.fork) { + reason = `\`exclude.forks\` is true`; + return true; + } + + if (!!exclude?.archived && !!repo.archived) { + reason = `\`exclude.archived\` is true`; + return true; + } + + if (exclude?.repos) { + if (micromatch.isMatch(repoName, exclude.repos)) { + reason = `\`exclude.repos\` contains ${repoName}`; + return true; + } + } + + if (exclude?.topics) { + const configTopics = exclude.topics.map(topic => topic.toLowerCase()); + const repoTopics = repo.topics ?? []; + + const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics)); + if (matchingTopics.length > 0) { + reason = `\`exclude.topics\` matches the following topics: ${matchingTopics.join(', ')}`; + return true; + } + } + + if (include?.topics) { + const configTopics = include.topics.map(topic => topic.toLowerCase()); + const repoTopics = repo.topics ?? []; + + const matchingTopics = repoTopics.filter((topic) => micromatch.isMatch(topic, configTopics)); + if (matchingTopics.length === 0) { + reason = `\`include.topics\` does not match any of the following topics: ${configTopics.join(', ')}`; + return true; + } + } + + const repoSizeInBytes = repo.size ? repo.size * 1000 : undefined; + if (exclude?.size && repoSizeInBytes) { + const min = exclude.size.min; + const max = exclude.size.max; + + if (min && repoSizeInBytes < min) { + reason = `repo is less than \`exclude.size.min\`=${min} bytes.`; + return true; + } + + if (max && repoSizeInBytes > max) { + reason = `repo is greater than \`exclude.size.max\`=${max} bytes.`; + return true; + } + } + + return false; + })(); + + if (shouldExclude) { + logger.debug(`Excluding repo ${repoName}. Reason: ${reason}`); + return true; + } + + return false; +} diff --git a/packages/backend/src/index.ts b/packages/backend/src/index.ts index 3664024aa..c8574492f 100644 --- a/packages/backend/src/index.ts +++ b/packages/backend/src/index.ts @@ -74,34 +74,21 @@ const userPermissionSyncer = new UserPermissionSyncer(prisma, settings, redis); await repoManager.validateIndexedReposHaveShards(); -const connectionManagerInterval = connectionManager.startScheduler(); -const repoManagerInterval = repoManager.startScheduler(); - -let repoPermissionSyncerInterval: NodeJS.Timeout | null = null; -let userPermissionSyncerInterval: NodeJS.Timeout | null = null; +connectionManager.startScheduler(); +repoManager.startScheduler(); if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && !hasEntitlement('permission-syncing')) { logger.error('Permission syncing is not supported in current plan. Please contact support@sourcebot.dev for assistance.'); process.exit(1); } else if (env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' && hasEntitlement('permission-syncing')) { - repoPermissionSyncerInterval = repoPermissionSyncer.startScheduler(); - userPermissionSyncerInterval = userPermissionSyncer.startScheduler(); + repoPermissionSyncer.startScheduler(); + userPermissionSyncer.startScheduler(); } const cleanup = async (signal: string) => { logger.info(`Recieved ${signal}, cleaning up...`); - if (userPermissionSyncerInterval) { - clearInterval(userPermissionSyncerInterval); - } - if (repoPermissionSyncerInterval) { - clearInterval(repoPermissionSyncerInterval); - } - - clearInterval(connectionManagerInterval); - clearInterval(repoManagerInterval); - connectionManager.dispose(); repoManager.dispose(); repoPermissionSyncer.dispose(); diff --git a/packages/backend/src/repoManager.ts b/packages/backend/src/repoManager.ts index ea122abf1..12bf71731 100644 --- a/packages/backend/src/repoManager.ts +++ b/packages/backend/src/repoManager.ts @@ -1,21 +1,19 @@ +import * as Sentry from "@sentry/node"; +import { PrismaClient, Repo, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db"; +import { createLogger } from "@sourcebot/logger"; import { Job, Queue, Worker } from 'bullmq'; +import { existsSync, promises, readdirSync } from 'fs'; import { Redis } from 'ioredis'; -import { createLogger } from "@sourcebot/logger"; -import { Connection, PrismaClient, Repo, RepoToConnection, RepoIndexingStatus, StripeSubscriptionStatus } from "@sourcebot/db"; -import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; -import { AppContext, Settings, repoMetadataSchema } from "./types.js"; -import { getRepoPath, getTokenFromConfig, measure, getShardPrefix } from "./utils.js"; +import { env } from './env.js'; import { cloneRepository, fetchRepository, unsetGitConfig, upsertGitConfig } from "./git.js"; -import { existsSync, readdirSync, promises } from 'fs'; -import { indexGitRepository } from "./zoekt.js"; import { PromClient } from './promClient.js'; -import * as Sentry from "@sentry/node"; -import { env } from './env.js'; +import { AppContext, RepoWithConnections, Settings, repoMetadataSchema } from "./types.js"; +import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, measure } from "./utils.js"; +import { indexGitRepository } from "./zoekt.js"; const REPO_INDEXING_QUEUE = 'repoIndexingQueue'; const REPO_GC_QUEUE = 'repoGarbageCollectionQueue'; -type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection })[] }; type RepoIndexingPayload = { repo: RepoWithConnections, } @@ -31,6 +29,7 @@ export class RepoManager { private indexQueue: Queue; private gcWorker: Worker; private gcQueue: Queue; + private interval?: NodeJS.Timeout; constructor( private db: PrismaClient, @@ -64,7 +63,7 @@ export class RepoManager { public startScheduler() { logger.debug('Starting scheduler'); - return setInterval(async () => { + this.interval = setInterval(async () => { await this.fetchAndScheduleRepoIndexing(); await this.fetchAndScheduleRepoGarbageCollection(); await this.fetchAndScheduleRepoTimeouts(); @@ -162,68 +161,6 @@ export class RepoManager { } } - - // TODO: do this better? ex: try using the tokens from all the connections - // We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to - // fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each - // may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This - // may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing. - private async getCloneCredentialsForRepo(repo: RepoWithConnections, db: PrismaClient): Promise<{ username?: string, password: string } | undefined> { - - for (const { connection } of repo.connections) { - if (connection.connectionType === 'github') { - const config = connection.config as unknown as GithubConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); - return { - password: token, - } - } - } else if (connection.connectionType === 'gitlab') { - const config = connection.config as unknown as GitlabConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); - return { - username: 'oauth2', - password: token, - } - } - } else if (connection.connectionType === 'gitea') { - const config = connection.config as unknown as GiteaConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); - return { - password: token, - } - } - } else if (connection.connectionType === 'bitbucket') { - const config = connection.config as unknown as BitbucketConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); - const username = config.user ?? 'x-token-auth'; - return { - username, - password: token, - } - } - } else if (connection.connectionType === 'azuredevops') { - const config = connection.config as unknown as AzureDevOpsConnectionConfig; - if (config.token) { - const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); - return { - // @note: If we don't provide a username, the password will be set as the username. This seems to work - // for ADO cloud but not for ADO server. To fix this, we set a placeholder username to ensure the password - // is set correctly - username: 'user', - password: token, - } - } - } - } - - return undefined; - } - private async syncGitRepository(repo: RepoWithConnections, repoAlreadyInIndexingState: boolean) { const { path: repoPath, isReadOnly } = getRepoPath(repo, this.ctx); @@ -236,21 +173,8 @@ export class RepoManager { await promises.rm(repoPath, { recursive: true, force: true }); } - const credentials = await this.getCloneCredentialsForRepo(repo, this.db); - const remoteUrl = new URL(repo.cloneUrl); - if (credentials) { - // @note: URL has a weird behavior where if you set the password but - // _not_ the username, the ":" delimiter will still be present in the - // URL (e.g., https://:password@example.com). To get around this, if - // we only have a password, we set the username to the password. - // @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA - if (!credentials.username) { - remoteUrl.username = credentials.password; - } else { - remoteUrl.username = credentials.username; - remoteUrl.password = credentials.password; - } - } + const credentials = await getAuthCredentialsForRepo(repo, this.db); + const cloneUrlMaybeWithToken = credentials?.cloneUrlWithToken ?? repo.cloneUrl; if (existsSync(repoPath) && !isReadOnly) { // @NOTE: in #483, we changed the cloning method s.t., we _no longer_ @@ -262,13 +186,13 @@ export class RepoManager { await unsetGitConfig(repoPath, ["remote.origin.url"]); logger.info(`Fetching ${repo.displayName}...`); - const { durationMs } = await measure(() => fetchRepository( - remoteUrl, - repoPath, - ({ method, stage, progress }) => { + const { durationMs } = await measure(() => fetchRepository({ + cloneUrl: cloneUrlMaybeWithToken, + path: repoPath, + onProgress: ({ method, stage, progress }) => { logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) } - )); + })); const fetchDuration_s = durationMs / 1000; process.stdout.write('\n'); @@ -277,13 +201,13 @@ export class RepoManager { } else if (!isReadOnly) { logger.info(`Cloning ${repo.displayName}...`); - const { durationMs } = await measure(() => cloneRepository( - remoteUrl, - repoPath, - ({ method, stage, progress }) => { + const { durationMs } = await measure(() => cloneRepository({ + cloneUrl: cloneUrlMaybeWithToken, + path: repoPath, + onProgress: ({ method, stage, progress }) => { logger.debug(`git.${method} ${stage} stage ${progress}% complete for ${repo.displayName}`) } - )); + })); const cloneDuration_s = durationMs / 1000; process.stdout.write('\n'); @@ -628,6 +552,9 @@ export class RepoManager { } public async dispose() { + if (this.interval) { + clearInterval(this.interval); + } this.indexWorker.close(); this.indexQueue.close(); this.gcQueue.close(); diff --git a/packages/backend/src/types.ts b/packages/backend/src/types.ts index 737720b4e..0a9e76ff6 100644 --- a/packages/backend/src/types.ts +++ b/packages/backend/src/types.ts @@ -54,3 +54,10 @@ export type DeepPartial = T extends object ? { export type WithRequired = T & { [P in K]-?: T[P] }; export type RepoWithConnections = Repo & { connections: (RepoToConnection & { connection: Connection })[] }; + + +export type RepoAuthCredentials = { + hostUrl?: string; + token: string; + cloneUrlWithToken: string; +} \ No newline at end of file diff --git a/packages/backend/src/utils.ts b/packages/backend/src/utils.ts index 3245828dc..6ea8e5ff3 100644 --- a/packages/backend/src/utils.ts +++ b/packages/backend/src/utils.ts @@ -1,10 +1,11 @@ import { Logger } from "winston"; -import { AppContext } from "./types.js"; +import { AppContext, RepoAuthCredentials, RepoWithConnections } from "./types.js"; import path from 'path'; import { PrismaClient, Repo } from "@sourcebot/db"; import { getTokenFromConfig as getTokenFromConfigBase } from "@sourcebot/crypto"; import { BackendException, BackendError } from "@sourcebot/error"; import * as Sentry from "@sentry/node"; +import { GithubConnectionConfig, GitlabConnectionConfig, GiteaConnectionConfig, BitbucketConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type'; export const measure = async (cb: () => Promise) => { const start = Date.now(); @@ -116,4 +117,115 @@ export const fetchWithRetry = async ( throw e; } } +} + +// TODO: do this better? ex: try using the tokens from all the connections +// We can no longer use repo.cloneUrl directly since it doesn't contain the token for security reasons. As a result, we need to +// fetch the token here using the connections from the repo. Multiple connections could be referencing this repo, and each +// may have their own token. This method will just pick the first connection that has a token (if one exists) and uses that. This +// may technically cause syncing to fail if that connection's token just so happens to not have access to the repo it's referencing. +export const getAuthCredentialsForRepo = async (repo: RepoWithConnections, db: PrismaClient, logger?: Logger): Promise => { + for (const { connection } of repo.connections) { + if (connection.connectionType === 'github') { + const config = connection.config as unknown as GithubConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); + return { + hostUrl: config.url, + token, + cloneUrlWithToken: createGitCloneUrlWithToken( + repo.cloneUrl, + { + password: token, + } + ), + } + } + } else if (connection.connectionType === 'gitlab') { + const config = connection.config as unknown as GitlabConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); + return { + hostUrl: config.url, + token, + cloneUrlWithToken: createGitCloneUrlWithToken( + repo.cloneUrl, + { + username: 'oauth2', + password: token + } + ), + } + } + } else if (connection.connectionType === 'gitea') { + const config = connection.config as unknown as GiteaConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); + return { + hostUrl: config.url, + token, + cloneUrlWithToken: createGitCloneUrlWithToken( + repo.cloneUrl, + { + password: token + } + ), + } + } + } else if (connection.connectionType === 'bitbucket') { + const config = connection.config as unknown as BitbucketConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); + const username = config.user ?? 'x-token-auth'; + return { + hostUrl: config.url, + token, + cloneUrlWithToken: createGitCloneUrlWithToken( + repo.cloneUrl, + { + username, + password: token + } + ), + } + } + } else if (connection.connectionType === 'azuredevops') { + const config = connection.config as unknown as AzureDevOpsConnectionConfig; + if (config.token) { + const token = await getTokenFromConfig(config.token, connection.orgId, db, logger); + return { + hostUrl: config.url, + token, + cloneUrlWithToken: createGitCloneUrlWithToken( + repo.cloneUrl, + { + // @note: If we don't provide a username, the password will be set as the username. This seems to work + // for ADO cloud but not for ADO server. To fix this, we set a placeholder username to ensure the password + // is set correctly + username: 'user', + password: token + } + ), + } + } + } + } + + return undefined; +} + +const createGitCloneUrlWithToken = (cloneUrl: string, credentials: { username?: string, password: string }) => { + const url = new URL(cloneUrl); + // @note: URL has a weird behavior where if you set the password but + // _not_ the username, the ":" delimiter will still be present in the + // URL (e.g., https://:password@example.com). To get around this, if + // we only have a password, we set the username to the password. + // @see: https://www.typescriptlang.org/play/?#code/MYewdgzgLgBArgJwDYwLwzAUwO4wKoBKAMgBQBEAFlFAA4QBcA9I5gB4CGAtjUpgHShOZADQBKANwAoREj412ECNhAIAJmhhl5i5WrJTQkELz5IQAcxIy+UEAGUoCAJZhLo0UA + if (!credentials.username) { + url.username = credentials.password; + } else { + url.username = credentials.username; + url.password = credentials.password; + } + return url.toString(); } \ No newline at end of file diff --git a/packages/web/src/actions.ts b/packages/web/src/actions.ts index 102638dab..5b73922c7 100644 --- a/packages/web/src/actions.ts +++ b/packages/web/src/actions.ts @@ -40,7 +40,7 @@ import JoinRequestSubmittedEmail from "./emails/joinRequestSubmittedEmail"; import { AGENTIC_SEARCH_TUTORIAL_DISMISSED_COOKIE_NAME, MOBILE_UNSUPPORTED_SPLASH_SCREEN_DISMISSED_COOKIE_NAME, SEARCH_MODE_COOKIE_NAME, SINGLE_TENANT_ORG_DOMAIN, SOURCEBOT_GUEST_USER_ID, SOURCEBOT_SUPPORT_EMAIL } from "./lib/constants"; import { orgDomainSchema, orgNameSchema, repositoryQuerySchema } from "./lib/schemas"; import { ApiKeyPayload, TenancyMode } from "./lib/types"; -import { withOptionalAuthV2 } from "./withAuthV2"; +import { withAuthV2, withOptionalAuthV2 } from "./withAuthV2"; const ajv = new Ajv({ validateFormats: false, @@ -1017,31 +1017,22 @@ export const flagConnectionForSync = async (connectionId: number, domain: string }) )); -export const flagReposForIndex = async (repoIds: number[], domain: string) => sew(() => - withAuth((userId) => - withOrgMembership(userId, domain, async ({ org }) => { - await prisma.repo.updateMany({ - where: { - id: { in: repoIds }, - orgId: org.id, - ...(env.EXPERIMENT_EE_PERMISSION_SYNC_ENABLED === 'true' ? { - permittedUsers: { - some: { - userId: userId, - } - } - } : {}) - }, - data: { - repoIndexingStatus: RepoIndexingStatus.NEW, - } - }); - - return { - success: true, +export const flagReposForIndex = async (repoIds: number[]) => sew(() => + withAuthV2(async ({ org, prisma }) => { + await prisma.repo.updateMany({ + where: { + id: { in: repoIds }, + orgId: org.id, + }, + data: { + repoIndexingStatus: RepoIndexingStatus.NEW, } - }) - )); + }); + + return { + success: true, + } + })); export const deleteConnection = async (connectionId: number, domain: string): Promise<{ success: boolean } | ServiceError> => sew(() => withAuth((userId) => @@ -2213,7 +2204,7 @@ const parseConnectionConfig = (config: string) => { switch (connectionType) { case "gitea": case "github": - case "bitbucket": + case "bitbucket": case "azuredevops": { return { numRepos: parsedConfig.repos?.length, diff --git a/packages/web/src/app/[domain]/connections/[id]/components/repoList.tsx b/packages/web/src/app/[domain]/connections/[id]/components/repoList.tsx index 3e91443e2..962b04cb6 100644 --- a/packages/web/src/app/[domain]/connections/[id]/components/repoList.tsx +++ b/packages/web/src/app/[domain]/connections/[id]/components/repoList.tsx @@ -98,7 +98,7 @@ export const RepoList = ({ connectionId }: RepoListProps) => { } setIsRetryAllFailedReposLoading(true); - flagReposForIndex(failedRepos.map((repo) => repo.repoId), domain) + flagReposForIndex(failedRepos.map((repo) => repo.repoId)) .then((response) => { if (isServiceError(response)) { captureEvent('wa_connection_retry_all_failed_repos_fail', {}); @@ -116,7 +116,7 @@ export const RepoList = ({ connectionId }: RepoListProps) => { .finally(() => { setIsRetryAllFailedReposLoading(false); }); - }, [captureEvent, domain, failedRepos, refetchRepos, toast]); + }, [captureEvent, failedRepos, refetchRepos, toast]); const filteredRepos = useMemo(() => { if (isServiceError(unfilteredRepos)) { diff --git a/packages/web/src/app/[domain]/connections/[id]/components/repoListItem.tsx b/packages/web/src/app/[domain]/connections/[id]/components/repoListItem.tsx index e2f8c55ca..fd491376f 100644 --- a/packages/web/src/app/[domain]/connections/[id]/components/repoListItem.tsx +++ b/packages/web/src/app/[domain]/connections/[id]/components/repoListItem.tsx @@ -70,7 +70,7 @@ export const RepoListItem = ({
{status === RepoIndexingStatus.FAILED && ( - + )}
{ +export const RetryRepoIndexButton = ({ repoId }: RetryRepoIndexButtonProps) => { const captureEvent = useCaptureEvent(); return ( @@ -21,7 +20,7 @@ export const RetryRepoIndexButton = ({ repoId, domain }: RetryRepoIndexButtonPro size="sm" className="ml-2" onClick={async () => { - const result = await flagReposForIndex([repoId], domain); + const result = await flagReposForIndex([repoId]); if (isServiceError(result)) { toast({ description: `❌ Failed to flag repository for indexing.`, diff --git a/packages/web/src/withAuthV2.ts b/packages/web/src/withAuthV2.ts index 1b0a723f3..c6cbb8bb3 100644 --- a/packages/web/src/withAuthV2.ts +++ b/packages/web/src/withAuthV2.ts @@ -20,7 +20,7 @@ interface OptionalAuthContext { interface RequiredAuthContext { user: User; org: Org; - role: Omit; + role: Exclude; prisma: PrismaClient; } From 8a51fe7d238080e30288b9574c9eae52fd7a6ab7 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 20 Sep 2025 16:27:15 -0700 Subject: [PATCH 13/14] fix migrations --- .../migration.sql | 2 - .../migration.sql | 3 +- packages/db/prisma/schema.prisma | 50 ++++++++----------- 3 files changed, 24 insertions(+), 31 deletions(-) delete mode 100644 packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql rename packages/db/prisma/migrations/{20250919224623_add_permission_sync_tables => 20250920232318_add_permission_sync_tables}/migration.sql (94%) diff --git a/packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql b/packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql deleted file mode 100644 index 909a6d466..000000000 --- a/packages/db/prisma/migrations/20250919230022_add_is_public_column_to_repo_table/migration.sql +++ /dev/null @@ -1,2 +0,0 @@ --- AlterTable -ALTER TABLE "Repo" ADD COLUMN "isPublic" BOOLEAN; diff --git a/packages/db/prisma/migrations/20250919224623_add_permission_sync_tables/migration.sql b/packages/db/prisma/migrations/20250920232318_add_permission_sync_tables/migration.sql similarity index 94% rename from packages/db/prisma/migrations/20250919224623_add_permission_sync_tables/migration.sql rename to packages/db/prisma/migrations/20250920232318_add_permission_sync_tables/migration.sql index 1a8f05972..9e921c6dc 100644 --- a/packages/db/prisma/migrations/20250919224623_add_permission_sync_tables/migration.sql +++ b/packages/db/prisma/migrations/20250920232318_add_permission_sync_tables/migration.sql @@ -5,7 +5,8 @@ CREATE TYPE "RepoPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'CO CREATE TYPE "UserPermissionSyncJobStatus" AS ENUM ('PENDING', 'IN_PROGRESS', 'COMPLETED', 'FAILED'); -- AlterTable -ALTER TABLE "Repo" ADD COLUMN "permissionSyncedAt" TIMESTAMP(3); +ALTER TABLE "Repo" ADD COLUMN "isPublic" BOOLEAN NOT NULL DEFAULT false, +ADD COLUMN "permissionSyncedAt" TIMESTAMP(3); -- AlterTable ALTER TABLE "User" ADD COLUMN "permissionSyncedAt" TIMESTAMP(3); diff --git a/packages/db/prisma/schema.prisma b/packages/db/prisma/schema.prisma index 45d9452ca..bdebbc690 100644 --- a/packages/db/prisma/schema.prisma +++ b/packages/db/prisma/schema.prisma @@ -41,35 +41,29 @@ enum ChatVisibility { } model Repo { - id Int @id @default(autoincrement()) - name String // Full repo name, including the vcs hostname (ex. github.com/sourcebot-dev/sourcebot) - displayName String? // Display name of the repo for UI (ex. sourcebot-dev/sourcebot) - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt - - /// When the repo was last indexed successfully. - indexedAt DateTime? + id Int @id @default(autoincrement()) + name String /// Full repo name, including the vcs hostname (ex. github.com/sourcebot-dev/sourcebot) + displayName String? /// Display name of the repo for UI (ex. sourcebot-dev/sourcebot) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt + indexedAt DateTime? /// When the repo was last indexed successfully. isFork Boolean isArchived Boolean - isPublic Boolean? - metadata Json // For schema see repoMetadataSchema in packages/backend/src/types.ts + isPublic Boolean @default(false) + metadata Json /// For schema see repoMetadataSchema in packages/backend/src/types.ts cloneUrl String webUrl String? connections RepoToConnection[] imageUrl String? repoIndexingStatus RepoIndexingStatus @default(NEW) - permittedUsers UserToRepoPermission[] + permittedUsers UserToRepoPermission[] permissionSyncJobs RepoPermissionSyncJob[] - /// When the permissions were last synced successfully. - permissionSyncedAt DateTime? + permissionSyncedAt DateTime? /// When the permissions were last synced successfully. - // The id of the repo in the external service - external_id String - // The type of the external service (e.g., github, gitlab, etc.) - external_codeHostType String - // The base url of the external service (e.g., https://github.com) - external_codeHostUrl String + external_id String /// The id of the repo in the external service + external_codeHostType String /// The type of the external service (e.g., github, gitlab, etc.) + external_codeHostUrl String /// The base url of the external service (e.g., https://github.com) org Org @relation(fields: [orgId], references: [id], onDelete: Cascade) orgId Int @@ -88,10 +82,10 @@ enum RepoPermissionSyncJobStatus { } model RepoPermissionSyncJob { - id String @id @default(cuid()) - status RepoPermissionSyncJobStatus @default(PENDING) - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt + id String @id @default(cuid()) + status RepoPermissionSyncJobStatus @default(PENDING) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt completedAt DateTime? errorMessage String? @@ -327,15 +321,15 @@ enum UserPermissionSyncJobStatus { } model UserPermissionSyncJob { - id String @id @default(cuid()) - status UserPermissionSyncJobStatus @default(PENDING) - createdAt DateTime @default(now()) - updatedAt DateTime @updatedAt + id String @id @default(cuid()) + status UserPermissionSyncJobStatus @default(PENDING) + createdAt DateTime @default(now()) + updatedAt DateTime @updatedAt completedAt DateTime? errorMessage String? - user User @relation(fields: [userId], references: [id], onDelete: Cascade) + user User @relation(fields: [userId], references: [id], onDelete: Cascade) userId String } From a386364473bea27ff7b8582bae717ba87d393d63 Mon Sep 17 00:00:00 2001 From: bkellam Date: Sat, 20 Sep 2025 16:43:48 -0700 Subject: [PATCH 14/14] fix unauthed user case --- packages/web/src/prisma.ts | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/packages/web/src/prisma.ts b/packages/web/src/prisma.ts index 1908c5fb3..1d4b7585b 100644 --- a/packages/web/src/prisma.ts +++ b/packages/web/src/prisma.ts @@ -32,14 +32,16 @@ export const userScopedPrismaClientExtension = (userId?: string) => { args.where = { ...args.where, OR: [ - // Only include repos that are permitted to the user, - { - permittedUsers: { - some: { - userId, + // Only include repos that are permitted to the user + ...(userId ? [ + { + permittedUsers: { + some: { + userId, + } } - } - }, + }, + ] : []), // or are public. { isPublic: true,