Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Improved homepage performance by removing client side polling. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Changed navbar indexing indicator to only report progress for first time indexing jobs. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Improved repo indexing job stability and robustness. [#563](https://github.com/sourcebot-dev/sourcebot/pull/563)
- Improved repositories table. [#572](https://github.com/sourcebot-dev/sourcebot/pull/572)

### Removed
- Removed spam "login page loaded" log. [#552](https://github.com/sourcebot-dev/sourcebot/pull/552)
Expand Down
21 changes: 0 additions & 21 deletions packages/backend/src/constants.ts
Original file line number Diff line number Diff line change
@@ -1,27 +1,6 @@
import { env } from "./env.js";
import { Settings } from "./types.js";
import path from "path";

/**
* Default settings.
*/
export const DEFAULT_SETTINGS: Settings = {
maxFileSize: 2 * 1024 * 1024, // 2MB in bytes
maxTrigramCount: 20000,
reindexIntervalMs: 1000 * 60 * 60, // 1 hour
resyncConnectionIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
resyncConnectionPollingIntervalMs: 1000 * 1, // 1 second
reindexRepoPollingIntervalMs: 1000 * 1, // 1 second
maxConnectionSyncJobConcurrency: 8,
maxRepoIndexingJobConcurrency: 8,
maxRepoGarbageCollectionJobConcurrency: 8,
repoGarbageCollectionGracePeriodMs: 10 * 1000, // 10 seconds
repoIndexTimeoutMs: 1000 * 60 * 60 * 2, // 2 hours
enablePublicAccess: false, // deprected, use FORCE_ENABLE_ANONYMOUS_ACCESS instead
experiment_repoDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
experiment_userDrivenPermissionSyncIntervalMs: 1000 * 60 * 60 * 24, // 24 hours
}

export const PERMISSION_SYNC_SUPPORTED_CODE_HOST_TYPES = [
'github',
];
Expand Down
22 changes: 22 additions & 0 deletions packages/backend/src/git.ts
Original file line number Diff line number Diff line change
Expand Up @@ -268,4 +268,26 @@ export const getTags = async (path: string) => {
const git = createGitClientForPath(path);
const tags = await git.tags();
return tags.all;
}

export const getCommitHashForRefName = async ({
path,
refName,
}: {
path: string,
refName: string,
}) => {
const git = createGitClientForPath(path);

try {
// The `^{commit}` suffix is used to fully dereference the ref to a commit hash.
const rev = await git.revparse(`${refName}^{commit}`);
return rev;

// @note: Was hitting errors when the repository is empty,
// so we're catching the error and returning undefined.
} catch (error: unknown) {
console.error(error);
return undefined;
}
}
21 changes: 3 additions & 18 deletions packages/backend/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import "./instrument.js";

import { PrismaClient } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { hasEntitlement, loadConfig } from '@sourcebot/shared';
import { getConfigSettings, hasEntitlement } from '@sourcebot/shared';
import { existsSync } from 'fs';
import { mkdir } from 'fs/promises';
import { Redis } from 'ioredis';
import { ConnectionManager } from './connectionManager.js';
import { DEFAULT_SETTINGS, INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.js';
import { INDEX_CACHE_DIR, REPOS_CACHE_DIR } from './constants.js';
import { RepoPermissionSyncer } from './ee/repoPermissionSyncer.js';
import { UserPermissionSyncer } from "./ee/userPermissionSyncer.js";
import { GithubAppManager } from "./ee/githubAppManager.js";
Expand All @@ -18,20 +18,6 @@ import { PromClient } from './promClient.js';

const logger = createLogger('backend-entrypoint');

const getSettings = async (configPath?: string) => {
if (!configPath) {
return DEFAULT_SETTINGS;
}

const config = await loadConfig(configPath);

return {
...DEFAULT_SETTINGS,
...config.settings,
}
}


const reposPath = REPOS_CACHE_DIR;
const indexPath = INDEX_CACHE_DIR;

Expand All @@ -57,8 +43,7 @@ redis.ping().then(() => {

const promClient = new PromClient();

const settings = await getSettings(env.CONFIG_PATH);

const settings = await getConfigSettings(env.CONFIG_PATH);

if (hasEntitlement('github-app')) {
await GithubAppManager.getInstance().init(prisma);
Expand Down
2 changes: 1 addition & 1 deletion packages/backend/src/repoCompileUtils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,12 @@ import { marshalBool } from "./utils.js";
import { createLogger } from '@sourcebot/logger';
import { BitbucketConnectionConfig, GerritConnectionConfig, GiteaConnectionConfig, GitlabConnectionConfig, GenericGitHostConnectionConfig, AzureDevOpsConnectionConfig } from '@sourcebot/schemas/v3/connection.type';
import { ProjectVisibility } from "azure-devops-node-api/interfaces/CoreInterfaces.js";
import { RepoMetadata } from './types.js';
import path from 'path';
import { glob } from 'glob';
import { getOriginUrl, isPathAValidGitRepoRoot, isUrlAValidGitRepo } from './git.js';
import assert from 'assert';
import GitUrlParse from 'git-url-parse';
import { RepoMetadata } from '@sourcebot/shared';

export type RepoData = WithRequired<Prisma.RepoCreateInput, 'connections'>;

Expand Down
86 changes: 79 additions & 7 deletions packages/backend/src/repoIndexManager.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,18 @@
import * as Sentry from '@sentry/node';
import { PrismaClient, Repo, RepoIndexingJobStatus, RepoIndexingJobType } from "@sourcebot/db";
import { createLogger, Logger } from "@sourcebot/logger";
import { repoMetadataSchema, RepoIndexingJobMetadata, repoIndexingJobMetadataSchema, RepoMetadata } from '@sourcebot/shared';
import { existsSync } from 'fs';
import { readdir, rm } from 'fs/promises';
import { Job, Queue, ReservedJob, Worker } from "groupmq";
import { Redis } from 'ioredis';
import micromatch from 'micromatch';
import { INDEX_CACHE_DIR } from './constants.js';
import { env } from './env.js';
import { cloneRepository, fetchRepository, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { cloneRepository, fetchRepository, getBranches, getCommitHashForRefName, getTags, isPathAValidGitRepoRoot, unsetGitConfig, upsertGitConfig } from './git.js';
import { captureEvent } from './posthog.js';
import { PromClient } from './promClient.js';
import { repoMetadataSchema, RepoWithConnections, Settings } from "./types.js";
import { RepoWithConnections, Settings } from "./types.js";
import { getAuthCredentialsForRepo, getRepoPath, getShardPrefix, groupmqLifecycleExceptionWrapper, measure } from './utils.js';
import { indexGitRepository } from './zoekt.js';

Expand Down Expand Up @@ -61,7 +64,7 @@ export class RepoIndexManager {
concurrency: this.settings.maxRepoIndexingJobConcurrency,
...(env.DEBUG_ENABLE_GROUPMQ_LOGGING === 'true' ? {
logger: true,
}: {}),
} : {}),
});

this.worker.on('completed', this.onJobCompleted.bind(this));
Expand Down Expand Up @@ -126,7 +129,7 @@ export class RepoIndexManager {
{
AND: [
{ status: RepoIndexingJobStatus.FAILED },
{ completedAt: { gt: timeoutDate } },
{ completedAt: { gt: thresholdDate } },
]
}
]
Expand Down Expand Up @@ -263,7 +266,16 @@ export class RepoIndexManager {

try {
if (jobType === RepoIndexingJobType.INDEX) {
await this.indexRepository(repo, logger, abortController.signal);
const revisions = await this.indexRepository(repo, logger, abortController.signal);

await this.db.repoIndexingJob.update({
where: { id },
data: {
metadata: {
indexedRevisions: revisions,
} satisfies RepoIndexingJobMetadata,
},
});
} else if (jobType === RepoIndexingJobType.CLEANUP) {
await this.cleanupRepository(repo, logger);
}
Expand All @@ -285,7 +297,7 @@ export class RepoIndexManager {
// If the repo path exists but it is not a valid git repository root, this indicates
// that the repository is in a bad state. To fix, we remove the directory and perform
// a fresh clone.
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot( { path: repoPath } ))) {
if (existsSync(repoPath) && !(await isPathAValidGitRepoRoot({ path: repoPath }))) {
const isValidGitRepo = await isPathAValidGitRepoRoot({
path: repoPath,
signal,
Expand Down Expand Up @@ -354,10 +366,54 @@ export class RepoIndexManager {
});
}

let revisions = [
'HEAD'
];

if (metadata.branches) {
const branchGlobs = metadata.branches
const allBranches = await getBranches(repoPath);
const matchingBranches =
allBranches
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
.map((branch) => `refs/heads/${branch}`);

revisions = [
...revisions,
...matchingBranches
];
}

if (metadata.tags) {
const tagGlobs = metadata.tags;
const allTags = await getTags(repoPath);
const matchingTags =
allTags
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
.map((tag) => `refs/tags/${tag}`);

revisions = [
...revisions,
...matchingTags
];
}

// zoekt has a limit of 64 branches/tags to index.
if (revisions.length > 64) {
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
captureEvent('backend_revisions_truncated', {
repoId: repo.id,
revisionCount: revisions.length,
});
revisions = revisions.slice(0, 64);
}

logger.info(`Indexing ${repo.name} (id: ${repo.id})...`);
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, signal));
const { durationMs } = await measure(() => indexGitRepository(repo, this.settings, revisions, signal));
const indexDuration_s = durationMs / 1000;
logger.info(`Indexed ${repo.name} (id: ${repo.id}) in ${indexDuration_s}s`);

return revisions;
}

private async cleanupRepository(repo: Repo, logger: Logger) {
Expand All @@ -384,16 +440,32 @@ export class RepoIndexManager {
data: {
status: RepoIndexingJobStatus.COMPLETED,
completedAt: new Date(),
},
include: {
repo: true,
}
});

const jobTypeLabel = getJobTypePrometheusLabel(jobData.type);

if (jobData.type === RepoIndexingJobType.INDEX) {
const { path: repoPath } = getRepoPath(jobData.repo);
const commitHash = await getCommitHashForRefName({
path: repoPath,
refName: 'HEAD',
});

const jobMetadata = repoIndexingJobMetadataSchema.parse(jobData.metadata);

const repo = await this.db.repo.update({
where: { id: jobData.repoId },
data: {
indexedAt: new Date(),
indexedCommitHash: commitHash,
metadata: {
...(jobData.repo.metadata as RepoMetadata),
indexedRevisions: jobMetadata.indexedRevisions,
} satisfies RepoMetadata,
}
});

Expand Down
28 changes: 0 additions & 28 deletions packages/backend/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,36 +1,8 @@
import { Connection, Repo, RepoToConnection } from "@sourcebot/db";
import { Settings as SettingsSchema } from "@sourcebot/schemas/v3/index.type";
import { z } from "zod";

export type Settings = Required<SettingsSchema>;

// Structure of the `metadata` field in the `Repo` table.
//
// @WARNING: If you modify this schema, please make sure it is backwards
// compatible with any prior versions of the schema!!
// @NOTE: If you move this schema, please update the comment in schema.prisma
// to point to the new location.
export const repoMetadataSchema = z.object({
/**
* A set of key-value pairs that will be used as git config
* variables when cloning the repo.
* @see: https://git-scm.com/docs/git-clone#Documentation/git-clone.txt-code--configcodecodeltkeygtltvaluegtcode
*/
gitConfig: z.record(z.string(), z.string()).optional(),

/**
* A list of branches to index. Glob patterns are supported.
*/
branches: z.array(z.string()).optional(),

/**
* A list of tags to index. Glob patterns are supported.
*/
tags: z.array(z.string()).optional(),
});

export type RepoMetadata = z.infer<typeof repoMetadataSchema>;

// @see : https://stackoverflow.com/a/61132308
export type DeepPartial<T> = T extends object ? {
[P in keyof T]?: DeepPartial<T[P]>;
Expand Down
54 changes: 4 additions & 50 deletions packages/backend/src/zoekt.ts
Original file line number Diff line number Diff line change
@@ -1,62 +1,16 @@
import { Repo } from "@sourcebot/db";
import { createLogger } from "@sourcebot/logger";
import { exec } from "child_process";
import micromatch from "micromatch";
import { INDEX_CACHE_DIR } from "./constants.js";
import { getBranches, getTags } from "./git.js";
import { captureEvent } from "./posthog.js";
import { repoMetadataSchema, Settings } from "./types.js";
import { Settings } from "./types.js";
import { getRepoPath, getShardPrefix } from "./utils.js";

const logger = createLogger('zoekt');

export const indexGitRepository = async (repo: Repo, settings: Settings, signal?: AbortSignal) => {
let revisions = [
'HEAD'
];

export const indexGitRepository = async (repo: Repo, settings: Settings, revisions: string[], signal?: AbortSignal) => {
const { path: repoPath } = getRepoPath(repo);
const shardPrefix = getShardPrefix(repo.orgId, repo.id);
const metadata = repoMetadataSchema.parse(repo.metadata);

if (metadata.branches) {
const branchGlobs = metadata.branches
const allBranches = await getBranches(repoPath);
const matchingBranches =
allBranches
.filter((branch) => micromatch.isMatch(branch, branchGlobs))
.map((branch) => `refs/heads/${branch}`);

revisions = [
...revisions,
...matchingBranches
];
}

if (metadata.tags) {
const tagGlobs = metadata.tags;
const allTags = await getTags(repoPath);
const matchingTags =
allTags
.filter((tag) => micromatch.isMatch(tag, tagGlobs))
.map((tag) => `refs/tags/${tag}`);

revisions = [
...revisions,
...matchingTags
];
}

// zoekt has a limit of 64 branches/tags to index.
if (revisions.length > 64) {
logger.warn(`Too many revisions (${revisions.length}) for repo ${repo.id}, truncating to 64`);
captureEvent('backend_revisions_truncated', {
repoId: repo.id,
revisionCount: revisions.length,
});
revisions = revisions.slice(0, 64);
}

const command = [
'zoekt-git-index',
'-allow_missing_branches',
Expand All @@ -76,7 +30,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, signal?
reject(error);
return;
}

if (stdout) {
stdout.split('\n').filter(line => line.trim()).forEach(line => {
logger.info(line);
Expand All @@ -89,7 +43,7 @@ export const indexGitRepository = async (repo: Repo, settings: Settings, signal?
logger.info(line);
});
}

resolve({
stdout,
stderr
Expand Down
Loading
Loading