From e62e04b4d427f43c201d029d1de8f7ce852427c3 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 04:34:35 +0530 Subject: [PATCH 01/15] add git utils --- packages/git-utils/README.md | 231 +++++++++++++ packages/git-utils/package.json | 34 ++ packages/git-utils/src/git.js | 499 ++++++++++++++++++++++++++++ packages/git-utils/src/index.js | 11 + packages/git-utils/test/.eslintrc | 4 + packages/git-utils/test/git.test.js | 496 +++++++++++++++++++++++++++ 6 files changed, 1275 insertions(+) create mode 100644 packages/git-utils/README.md create mode 100644 packages/git-utils/package.json create mode 100644 packages/git-utils/src/git.js create mode 100644 packages/git-utils/src/index.js create mode 100644 packages/git-utils/test/.eslintrc create mode 100644 packages/git-utils/test/git.test.js diff --git a/packages/git-utils/README.md b/packages/git-utils/README.md new file mode 100644 index 000000000..053d7f3a8 --- /dev/null +++ b/packages/git-utils/README.md @@ -0,0 +1,231 @@ +# @percy/git-utils + +Utility helpers for interacting with git (used internally by Percy CLI packages). + +This package provides higher-level helpers around common git operations with smart error handling, retry logic, and diagnostic capabilities. + +## Installation + +```bash +npm install @percy/git-utils +# or +yarn add @percy/git-utils +``` + +## Usage + +You can use the package in two ways: + +### Individual Function Imports + +```js +import { isGitRepository, getCurrentCommit } from '@percy/git-utils'; + +const isRepo = await isGitRepository(); +const commit = await getCurrentCommit(); +``` + +### PercyGitUtils Object + +```js +import { PercyGitUtils } from '@percy/git-utils'; + +const isRepo = await PercyGitUtils.isGitRepository(); +const commit = await PercyGitUtils.getCurrentCommit(); +``` + +## API Reference + +### Repository Validation + +#### `isGitRepository()` + +Check if the current directory is a git repository. + +```js +import { isGitRepository } from '@percy/git-utils'; + +const isRepo = await isGitRepository(); +// Returns: true or false +``` + +#### `getRepositoryRoot()` + +Get the root directory of the git repository. + +```js +import { getRepositoryRoot } from '@percy/git-utils'; + +const root = await getRepositoryRoot(); +// Returns: '/path/to/repo' +// Throws: Error if not a git repository +``` + +### Commit & Branch Information + +#### `getCurrentCommit()` + +Get the SHA of the current HEAD commit. + +```js +import { getCurrentCommit } from '@percy/git-utils'; + +const sha = await getCurrentCommit(); +// Returns: 'abc123...' (40-character SHA) +``` + +#### `getCurrentBranch()` + +Get the name of the current branch. + +```js +import { getCurrentBranch } from '@percy/git-utils'; + +const branch = await getCurrentBranch(); +// Returns: 'main' or 'HEAD' (if detached) +``` + +#### `commitExists(commit)` + +Check if a commit exists in the repository. + +```js +import { commitExists } from '@percy/git-utils'; + +const exists = await commitExists('abc123'); +// Returns: true or false +``` + +### Repository State & Diagnostics + +#### `getGitState()` + +Get comprehensive diagnostic information about the repository state. + +```js +import { getGitState } from '@percy/git-utils'; + +const state = await getGitState(); +// Returns: { +// isValid: true, +// isShallow: false, +// isDetached: false, +// isFirstCommit: false, +// hasRemote: true, +// remoteName: 'origin', +// defaultBranch: 'main', +// issues: [] // Array of diagnostic messages +// } +``` + +**State Properties:** +- `isValid`: Whether the directory is a valid git repository +- `isShallow`: Whether the repository is a shallow clone +- `isDetached`: Whether HEAD is in detached state +- `isFirstCommit`: Whether the current commit is the first commit +- `hasRemote`: Whether a remote is configured +- `remoteName`: Name of the first remote (usually 'origin') +- `defaultBranch`: Detected default branch name +- `issues`: Array of diagnostic warning messages + +### Merge Base & Changed Files + +#### `getMergeBase(targetBranch?)` + +Get the merge-base commit between HEAD and a target branch with smart fallback logic. + +```js +import { getMergeBase } from '@percy/git-utils'; + +const result = await getMergeBase('main'); +// Returns: { +// success: true, +// commit: 'abc123...', +// branch: 'main', +// error: null +// } + +// Or on failure: +// { +// success: false, +// commit: null, +// branch: 'main', +// error: { code: 'SHALLOW_CLONE', message: '...' } +// } +``` + +**Error Codes:** +- `NOT_GIT_REPO`: Not a git repository +- `SHALLOW_CLONE`: Repository is shallow +- `NO_MERGE_BASE`: No common ancestor found +- `UNKNOWN_ERROR`: Other error + +The function automatically: +- Detects the default branch if `targetBranch` is not provided +- Tries remote refs before local branches +- Handles detached HEAD state +- Provides helpful error messages + +#### `getChangedFiles(baselineCommit)` + +Get all changed files between a baseline commit and HEAD. + +```js +import { getChangedFiles } from '@percy/git-utils'; + +const files = await getChangedFiles('origin/main'); +// Returns: ['src/file.js', 'package.json', ...] +``` + +**Features:** +- Handles file renames (includes both old and new paths) +- Handles file copies (includes both source and destination) +- Detects submodule changes +- Returns paths relative to repository root + +### File Operations + +#### `checkoutFile(commit, filePath, outputDir)` + +Checkout a file from a specific commit to an output directory. + +```js +import { checkoutFile } from '@percy/git-utils'; + +const outputPath = await checkoutFile( + 'abc123', + 'src/file.js', + '/tmp/checkout' +); +// Returns: '/tmp/checkout/file.js' +``` + +## Advanced Features + +### Retry Logic + +All git commands include automatic retry logic for concurrent operations: +- Detects `index.lock` and similar errors +- Exponential backoff (100ms, 200ms, 400ms) +- Configurable via `retries` and `retryDelay` options + +### Error Handling + +Functions provide detailed error messages with context: +- Diagnostic information about repository state +- Suggestions for fixing common issues +- Specific error codes for programmatic handling + +## Development + +This repository uses Lerna and package-local scripts. From repo root run: + +```bash +yarn build +yarn test +yarn lint packages/git-utils +``` + +## License + +MIT diff --git a/packages/git-utils/package.json b/packages/git-utils/package.json new file mode 100644 index 000000000..131456b13 --- /dev/null +++ b/packages/git-utils/package.json @@ -0,0 +1,34 @@ +{ + "name": "@percy/git-utils", + "version": "1.31.4", + "license": "MIT", + "repository": { + "type": "git", + "url": "https://github.com/percy/cli", + "directory": "packages/git-utils" + }, + "publishConfig": { + "access": "public", + "tag": "latest" + }, + "engines": { + "node": ">=14" + }, + "files": [ + "dist" + ], + "main": "./dist/index.js", + "type": "module", + "exports": { + ".": "./dist/index.js" + }, + "scripts": { + "build": "node ../../scripts/build", + "lint": "eslint --ignore-path ../../.gitignore .", + "test": "node ../../scripts/test", + "test:coverage": "yarn test --coverage" + }, + "dependencies": { + "cross-spawn": "^7.0.3" + } +} diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js new file mode 100644 index 000000000..31d2c73b6 --- /dev/null +++ b/packages/git-utils/src/git.js @@ -0,0 +1,499 @@ +import { spawn } from 'cross-spawn'; +import path from 'path'; +import fs from 'fs'; + +const fsPromises = fs.promises; + +/** + * Execute a git command with retry logic for concurrent operations + * @param {string} command - Git command to execute + * @param {Object} options - Options + * @param {number} options.retries - Number of retries (default: 3) + * @param {number} options.retryDelay - Delay between retries in ms (default: 100) + * @returns {Promise} - Command output + */ +async function execGit(command, options = {}) { + const { retries = 3, retryDelay = 100, ...spawnOptions } = options; + let lastError; + + for (let attempt = 0; attempt <= retries; attempt++) { + try { + return await execGitOnce(command, spawnOptions); + } catch (err) { + lastError = err; + + // Check if error is due to concurrent git operations + const errorMsg = err.message.toLowerCase(); + const isConcurrentError = + errorMsg.includes('index.lock') || + errorMsg.includes('unable to create') || + errorMsg.includes('file exists') || + errorMsg.includes('another git process'); + + // Only retry for concurrent operation errors + if (isConcurrentError && attempt < retries) { + // Exponential backoff + const delay = retryDelay * Math.pow(2, attempt); + await new Promise(resolve => setTimeout(resolve, delay)); + continue; + } + + // For other errors or last attempt, throw + throw err; + } + } + + throw lastError; +} + +/** + * Execute a git command once (no retries) + * @param {string} command - Git command to execute + * @param {Object} options - Spawn options + * @returns {Promise} - Command output + */ +async function execGitOnce(command, options = {}) { + return new Promise((resolve, reject) => { + const [cmd, ...args] = command.split(' '); + const child = spawn(cmd, args, { + ...options, + encoding: 'utf8' + }); + + let stdout = ''; + let stderr = ''; + + if (child.stdout) { + child.stdout.on('data', (data) => { + stdout += data.toString(); + }); + } + + if (child.stderr) { + child.stderr.on('data', (data) => { + stderr += data.toString(); + }); + } + + child.on('error', (err) => { + reject(new Error(`Failed to execute git command: ${err.message}`)); + }); + + child.on('close', (code) => { + if (code !== 0) { + reject(new Error(`Git command failed (exit ${code}): ${stderr || stdout}`)); + } else { + resolve(stdout.trim()); + } + }); + }); +} +export async function isGitRepository() { + try { + await execGit('git rev-parse --git-dir'); + return true; + } catch (err) { + return false; + } +} + +export async function getRepositoryRoot() { + try { + const root = await execGit('git rev-parse --show-toplevel'); + return root; + } catch (err) { + throw new Error('Not a git repository'); + } +} + +export async function getCurrentCommit() { + try { + const commit = await execGit('git rev-parse HEAD'); + return commit; + } catch (err) { + throw new Error(`Failed to get current commit: ${err.message}`); + } +} + +/** + * Get current git branch name + * @returns {Promise} - Current branch name + */ +export async function getCurrentBranch() { + try { + const branch = await execGit('git rev-parse --abbrev-ref HEAD'); + return branch; + } catch (err) { + throw new Error(`Failed to get current branch: ${err.message}`); + } +} + +/** + * Validate git repository state and return diagnostic info + * @returns {Promise} - { isValid, isShallow, isDetached, defaultBranch, issues } + */ +export async function getGitState() { + const state = { + isValid: false, + isShallow: false, + isDetached: false, + isFirstCommit: false, + hasRemote: false, + remoteName: null, + defaultBranch: null, + issues: [] + }; + + try { + // Check if it's a git repository + await execGit('git rev-parse --git-dir'); + state.isValid = true; + } catch { + state.issues.push('Not a git repository'); + return state; + } + + // Check for remote configuration + try { + const remotes = await execGit('git remote -v'); + if (remotes && remotes.trim().length > 0) { + state.hasRemote = true; + // Extract first remote name (usually 'origin') + const match = remotes.match(/^(\S+)\s+/); + if (match) { + state.remoteName = match[1]; + } + } else { + state.hasRemote = false; + state.issues.push("No git remote configured - run 'git remote add origin '"); + } + } catch { + state.hasRemote = false; + state.issues.push('Failed to check git remote configuration'); + } + + // Check shallow clone + try { + const result = await execGit('git rev-parse --is-shallow-repository'); + state.isShallow = result === 'true'; + } catch { + // Fallback: check for .git/shallow file + try { + const repoRoot = await getRepositoryRoot(); + const shallowPath = path.join(repoRoot, '.git', 'shallow'); + await fsPromises.access(shallowPath, fs.constants.F_OK); + state.isShallow = true; + } catch { + state.isShallow = false; + } + } + + if (state.isShallow) { + state.issues.push("Shallow clone detected - use 'git fetch --unshallow' or set fetch-depth: 0 in CI"); + } + + // Check detached HEAD + try { + const branch = await getCurrentBranch(); + state.isDetached = branch === 'HEAD'; + if (state.isDetached) { + state.issues.push('Detached HEAD state - may need to fetch remote branches'); + } + } catch { + state.isDetached = false; + } + + // Check if first commit + try { + const parents = await execGit('git rev-list --parents HEAD'); + const lines = parents.split('\n').filter(Boolean); + if (lines.length > 0) { + const firstLine = lines[lines.length - 1]; + const shas = firstLine.trim().split(/\s+/); + state.isFirstCommit = shas.length === 1; + } + } catch { + state.isFirstCommit = false; + } + + // Try to detect default branch (only if remote exists) + if (state.hasRemote) { + const remoteName = state.remoteName || 'origin'; + const commonBranches = ['main', 'master', 'develop', 'development']; + for (const branch of commonBranches) { + try { + await execGit(`git rev-parse --verify ${remoteName}/${branch}`); + state.defaultBranch = branch; + break; + } catch { + // Try next branch + } + } + + // Try to get from remote HEAD + if (!state.defaultBranch) { + try { + const output = await execGit(`git symbolic-ref refs/remotes/${remoteName}/HEAD`); + const match = output.match(/refs\/remotes\/[^/]+\/(.+)/); + if (match) { + state.defaultBranch = match[1]; + } + } catch { + // Use fallback + state.defaultBranch = 'main'; + } + } + } else { + // No remote configured - try to detect local branches + const localBranches = ['main', 'master', 'develop', 'development']; + for (const branch of localBranches) { + try { + await execGit(`git rev-parse --verify ${branch}`); + state.defaultBranch = branch; + break; + } catch { + // Try next branch + } + } + + // Final fallback for local-only repos + if (!state.defaultBranch) { + state.defaultBranch = 'main'; + } + } + + return state; +} + +/** + * Get merge-base commit with smart error handling and recovery + * @param {string} targetBranch - Target branch (if null, auto-detects) + * @returns {Promise} - { success, commit, branch, error } + */ +export async function getMergeBase(targetBranch = null) { + const result = { success: false, commit: null, branch: null, error: null }; + + try { + // Get git state for diagnostics + const gitState = await getGitState(); + + // Early failures + if (!gitState.isValid) { + result.error = { code: 'NOT_GIT_REPO', message: 'Not a git repository' }; + return result; + } + + if (gitState.isShallow) { + result.error = { + code: 'SHALLOW_CLONE', + message: "Repository is a shallow clone. Use 'git fetch --unshallow' or configure CI with fetch-depth: 0" + }; + return result; + } + + // Disabled: do not bail on first commit for now. Keep diagnostic info but allow + // the merge-base flow to continue so SmartSnap doesn't exit early in repos + // that only contain a single commit (useful for certain CI environments). + // if (gitState.isFirstCommit) { + // result.error = { + // code: "FIRST_COMMIT", + // message: "This is the first commit in the repository" + // }; + // return result; + // } + + // Determine target branch + const branch = targetBranch || gitState.defaultBranch; + result.branch = branch; + + // If detached HEAD, try to fetch the branch (only if remote exists) + if (gitState.isDetached && gitState.hasRemote) { + const remoteName = gitState.remoteName || 'origin'; + try { + await execGit(`git rev-parse --verify ${remoteName}/${branch}`); + } catch { + try { + await execGit(`git fetch ${remoteName} ${branch}:refs/remotes/${remoteName}/${branch} --depth=100`); + } catch { + // Continue anyway, merge-base might still work + } + } + } + + // Try to get merge-base with various fallbacks + const attempts = []; + + // For repos with remote, try remote refs first + if (gitState.hasRemote) { + const remoteName = gitState.remoteName || 'origin'; + attempts.push(`${remoteName}/${branch}`); + } + + // Always try local branch + attempts.push(branch); + + // Try default branch as fallback (if different) + if (branch !== gitState.defaultBranch) { + if (gitState.hasRemote) { + const remoteName = gitState.remoteName || 'origin'; + attempts.push(`${remoteName}/${gitState.defaultBranch}`); + } + attempts.push(gitState.defaultBranch); + } + + for (const attempt of attempts) { + try { + const commit = await execGit(`git merge-base HEAD ${attempt}`); + result.success = true; + result.commit = commit; + return result; + } catch (err) { + // Continue to next attempt + } + } + + // All attempts failed - provide helpful error message + let errorMessage = `Could not find common ancestor with ${branch}.`; + + if (!gitState.hasRemote) { + errorMessage += ` No git remote configured. Tried local branch '${branch}'.`; + errorMessage += ' Use --smart-snap-baseline= to specify a different baseline branch.'; + } else { + errorMessage += ' This might be an orphan branch.'; + errorMessage += ` Tried: ${attempts.join(', ')}.`; + } + + result.error = { + code: 'NO_MERGE_BASE', + message: errorMessage + }; + } catch (err) { + result.error = { + code: 'UNKNOWN_ERROR', + message: `Failed to get merge base: ${err.message}` + }; + } + + return result; +} + +/** + * Get changed files between current commit and baseline + * Handles renames, copies, and submodule changes + * @param {string} baselineCommit - Baseline commit SHA or ref + * @returns {Promise} - Array of changed file paths (relative to repo root) + */ +export async function getChangedFiles(baselineCommit = 'origin/main') { + try { + // Use --name-status to detect renames + // Output format: "R100\told/path\tnew/path" for renames + // "M\tfile/path" for modifications + // "A\tfile/path" for additions + // "D\tfile/path" for deletions + const output = await execGit(`git diff --name-status ${baselineCommit}...HEAD`); + + if (!output) { + return []; + } + + const files = new Set(); + const lines = output.split('\n').filter(Boolean); + + for (const line of lines) { + const parts = line.split('\t'); + const status = parts[0]; + + // Handle renames: R\told\tnew + if (status.startsWith('R')) { + const oldPath = parts[1]; + const newPath = parts[2]; + + if (oldPath) files.add(oldPath); + if (newPath) files.add(newPath); + } else if (status.startsWith('C')) { + // Handle copies: C\tsource\tdest + const sourcePath = parts[1]; + const destPath = parts[2]; + + if (sourcePath) files.add(sourcePath); + if (destPath) files.add(destPath); + } else { + const filePath = parts[1]; + if (filePath) files.add(filePath); + } + } + + // Check for git submodule changes + try { + const submoduleOutput = await execGit(`git diff ${baselineCommit}...HEAD --submodule=short`); + if (submoduleOutput && submoduleOutput.includes('Submodule')) { + files.add('.gitmodules'); + + // Also try to get actual changed files within submodules + try { + const submodulePaths = await execGit('git config --file .gitmodules --get-regexp path'); + const submodules = submodulePaths.split('\n') + .filter(Boolean) + .map(line => line.split(' ')[1]); + + for (const submodulePath of submodules) { + try { + const subOutput = await execGit( + `git -C ${submodulePath} diff --name-only ${baselineCommit}...HEAD`, + { retries: 1 } + ); + if (subOutput) { + const subFiles = subOutput.split('\n').filter(Boolean); + for (const file of subFiles) { + files.add(`${submodulePath}/${file}`); + } + } + } catch { + // Submodule might not exist or be initialized + // .gitmodules addition will trigger bail anyway + } + } + } catch { + // Failed to enumerate submodules, but .gitmodules added + } + } + } catch { + // Continue without submodule tracking + } + + return Array.from(files); + } catch (err) { + throw new Error(`Failed to get changed files: ${err.message}`); + } +} + +/** + * Checkout file from specific commit to output path + * @param {string} commit - Commit SHA or ref + * @param {string} filePath - File path relative to repo root + * @param {string} outputDir - Output directory + * @returns {Promise} - Path to checked out file + */ +export async function checkoutFile(commit, filePath, outputDir) { + try { + await fsPromises.mkdir(outputDir, { recursive: true }); + + const outputPath = path.join(outputDir, path.basename(filePath)); + const contents = await execGit(`git show ${commit}:${filePath}`); + await fsPromises.writeFile(outputPath, contents, 'utf8'); + + return outputPath; + } catch (err) { + throw new Error(`Failed to checkout file ${filePath} from ${commit}: ${err.message}`); + } +} + +export async function commitExists(commit) { + try { + // Use cat-file -e which fails if the object doesn't exist + await execGit(`git cat-file -e ${commit}`); + return true; + } catch (err) { + return false; + } +} diff --git a/packages/git-utils/src/index.js b/packages/git-utils/src/index.js new file mode 100644 index 000000000..249abbb8a --- /dev/null +++ b/packages/git-utils/src/index.js @@ -0,0 +1,11 @@ +// Public entry point for @percy/git-utils +import * as gitUtils from './git.js'; + +// Export all individual functions +export * from './git.js'; + +// Export as a named object for convenient usage +export const PercyGitUtils = gitUtils; + +// Default export for CommonJS consumers (bundlers may handle this) +export default gitUtils; diff --git a/packages/git-utils/test/.eslintrc b/packages/git-utils/test/.eslintrc new file mode 100644 index 000000000..e9b386cb0 --- /dev/null +++ b/packages/git-utils/test/.eslintrc @@ -0,0 +1,4 @@ +env: + jasmine: true +rules: + import/no-extraneous-dependencies: off diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js new file mode 100644 index 000000000..7bdf327d2 --- /dev/null +++ b/packages/git-utils/test/git.test.js @@ -0,0 +1,496 @@ +import { + isGitRepository, + getRepositoryRoot, + getCurrentCommit, + getCurrentBranch, + getGitState, + getMergeBase, + getChangedFiles, + checkoutFile, + commitExists +} from '../src/git.js'; +import fs from 'fs'; +import path from 'path'; +import os from 'os'; + +describe('@percy/git-utils', () => { + describe('isGitRepository', () => { + it('should return true when in a git repository', async () => { + const result = await isGitRepository(); + expect(result).toBe(true); + }); + + it('should return false when not in a git repository', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + const result = await isGitRepository(); + expect(result).toBe(false); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + }); + + describe('getRepositoryRoot', () => { + it('should return the repository root path', async () => { + const root = await getRepositoryRoot(); + expect(typeof root).toBe('string'); + expect(root.length).toBeGreaterThan(0); + expect(root).toContain('cli'); + }); + + it('should throw error when not in a git repository', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + await expectAsync(getRepositoryRoot()).toBeRejectedWithError(/Not a git repository/); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + }); + + describe('getCurrentCommit', () => { + it('should return current commit SHA', async () => { + const commit = await getCurrentCommit(); + expect(typeof commit).toBe('string'); + expect(commit).toMatch(/^[0-9a-f]{40}$/); + }); + + it('should return valid commit that exists', async () => { + const commit = await getCurrentCommit(); + const exists = await commitExists(commit); + expect(exists).toBe(true); + }); + }); + + describe('getCurrentBranch', () => { + it('should return current branch name', async () => { + const branch = await getCurrentBranch(); + expect(typeof branch).toBe('string'); + expect(branch.length).toBeGreaterThan(0); + }); + + it('should return a valid branch name', async () => { + const branch = await getCurrentBranch(); + // Branch should not be empty and should be valid git ref format + expect(branch).not.toBe(''); + expect(branch).toMatch(/^[a-zA-Z0-9/_-]+$/); + }); + }); + + describe('commitExists', () => { + it('should return true for existing commit (HEAD)', async () => { + const currentCommit = await getCurrentCommit(); + const exists = await commitExists(currentCommit); + expect(exists).toBe(true); + }); + + it('should return true for HEAD reference', async () => { + const exists = await commitExists('HEAD'); + expect(exists).toBe(true); + }); + + it('should return false for non-existing commit', async () => { + // Use a SHA with an unusual pattern that won't match any object + const exists = await commitExists('deadbeefdeadbeefdeadbeefdeadbeefdeadbeef'); + expect(exists).toBe(false); + }); + + it('should return false for invalid commit format', async () => { + const exists = await commitExists('invalid-commit-sha'); + expect(exists).toBe(false); + }); + }); + + describe('getGitState', () => { + it('should return comprehensive git state object', async () => { + const state = await getGitState(); + + // Verify structure + expect(state).toEqual(jasmine.objectContaining({ + isValid: jasmine.any(Boolean), + isShallow: jasmine.any(Boolean), + isDetached: jasmine.any(Boolean), + isFirstCommit: jasmine.any(Boolean), + hasRemote: jasmine.any(Boolean), + defaultBranch: jasmine.any(String), + issues: jasmine.any(Array) + })); + }); + + it('should detect valid git repository', async () => { + const state = await getGitState(); + expect(state.isValid).toBe(true); + }); + + it('should have a default branch set', async () => { + const state = await getGitState(); + expect(state.defaultBranch).toBeTruthy(); + expect(['main', 'master', 'develop', 'development']).toContain(state.defaultBranch); + }); + + it('should detect remote configuration correctly', async () => { + const state = await getGitState(); + expect(state.hasRemote).toBe(true); + if (state.hasRemote) { + expect(state.remoteName).toBeTruthy(); + expect(typeof state.remoteName).toBe('string'); + } + }); + + it('should not be shallow repository in normal clone', async () => { + const state = await getGitState(); + // In a normal development environment, this should not be shallow + // CI environments might be shallow, so we just verify the type + expect(typeof state.isShallow).toBe('boolean'); + }); + + it('should detect non-git repository', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + const state = await getGitState(); + expect(state.isValid).toBe(false); + expect(state.issues).toContain('Not a git repository'); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should include issues array with helpful messages', async () => { + const state = await getGitState(); + expect(Array.isArray(state.issues)).toBe(true); + // In a valid repo with remote, issues should be empty or informational + if (!state.isValid || !state.hasRemote || state.isShallow || state.isDetached) { + expect(state.issues.length).toBeGreaterThan(0); + } + }); + }); + + describe('getMergeBase', () => { + it('should return result object with correct structure', async () => { + const result = await getMergeBase(); + + expect(result).toEqual(jasmine.objectContaining({ + success: jasmine.any(Boolean), + commit: jasmine.any(String), + branch: jasmine.any(String), + error: null + })); + }); + + it('should successfully get merge-base with default branch', async () => { + const result = await getMergeBase(); + + expect(result.success).toBe(true); + expect(result.commit).toMatch(/^[0-9a-f]{40}$/); + expect(result.error).toBe(null); + }); + + it('should return valid commit SHA that exists', async () => { + const result = await getMergeBase(); + + if (result.success) { + const exists = await commitExists(result.commit); + expect(exists).toBe(true); + } + }); + + it('should accept specific target branch', async () => { + const currentBranch = await getCurrentBranch(); + const result = await getMergeBase(currentBranch); + + // Should succeed or provide error + expect(result).toBeDefined(); + expect(typeof result.success).toBe('boolean'); + expect(result.branch).toBe(currentBranch); + }); + + it('should handle non-existent branch gracefully with fallback', async () => { + const result = await getMergeBase('this-branch-definitely-does-not-exist-xyz-12345-nonexistent'); + + // The function has smart fallback logic - it will try default branch if specified branch fails + // So it may succeed OR fail depending on repo state + expect(typeof result.success).toBe('boolean'); + expect(result.branch).toBe('this-branch-definitely-does-not-exist-xyz-12345-nonexistent'); + + if (!result.success) { + expect(result.error).toBeTruthy(); + expect(result.error.code).toBe('NO_MERGE_BASE'); + expect(result.error.message).toContain('Could not find common ancestor'); + } else { + // Fallback succeeded - verify it returned a valid commit + expect(result.commit).toMatch(/^[0-9a-f]{40}$/); + } + }); + + it('should provide helpful error messages', async () => { + const result = await getMergeBase('nonexistent-branch'); + + if (!result.success) { + expect(result.error).toBeTruthy(); + expect(result.error.code).toBeTruthy(); + expect(result.error.message).toBeTruthy(); + expect(typeof result.error.message).toBe('string'); + } + }); + }); + + describe('getChangedFiles', () => { + it('should return an array', async () => { + const files = await getChangedFiles('HEAD'); + expect(Array.isArray(files)).toBe(true); + }); + + it('should return empty array when comparing HEAD to itself', async () => { + const files = await getChangedFiles('HEAD'); + expect(files).toEqual([]); + }); + + it('should detect changes between commits', async () => { + // Try to get changes from HEAD~1 + try { + const files = await getChangedFiles('HEAD~1'); + expect(Array.isArray(files)).toBe(true); + // Files might be empty if HEAD is first commit, which is okay + expect(files.length).toBeGreaterThanOrEqual(0); + } catch (err) { + // HEAD~1 might not exist if this is the first commit + expect(err.message).toContain('Failed to get changed files'); + } + }); + + it('should return file paths as strings', async () => { + try { + const files = await getChangedFiles('HEAD~10'); + files.forEach(file => { + expect(typeof file).toBe('string'); + expect(file.length).toBeGreaterThan(0); + }); + } catch (err) { + // Might fail if repository has fewer than 10 commits + expect(err.message).toContain('Failed to get changed files'); + } + }); + + it('should handle baseline commit reference', async () => { + const state = await getGitState(); + const remote = state.remoteName || 'origin'; + const branch = state.defaultBranch || 'main'; + + try { + const files = await getChangedFiles(`${remote}/${branch}`); + expect(Array.isArray(files)).toBe(true); + } catch (err) { + // Remote branch might not exist in test environment + expect(err.message).toBeTruthy(); + } + }); + }); + + describe('checkoutFile', () => { + let tmpDir; + + beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-checkout-test-')); + }); + + afterEach(() => { + if (fs.existsSync(tmpDir)) { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should checkout file from current commit', async () => { + const currentCommit = await getCurrentCommit(); + + // Use README.md from repo root as it should exist in history + const readmePath = 'README.md'; + + const outputPath = await checkoutFile( + currentCommit, + readmePath, + tmpDir + ); + + expect(fs.existsSync(outputPath)).toBe(true); + expect(path.basename(outputPath)).toBe('README.md'); + + // Verify content + const content = fs.readFileSync(outputPath, 'utf8'); + expect(content.length).toBeGreaterThan(0); + }); + + it('should checkout file from HEAD reference', async () => { + const readmePath = 'README.md'; + + const outputPath = await checkoutFile( + 'HEAD', + readmePath, + tmpDir + ); + + expect(fs.existsSync(outputPath)).toBe(true); + }); + + it('should create output directory if it does not exist', async () => { + const currentCommit = await getCurrentCommit(); + const readmePath = 'README.md'; + + const nestedDir = path.join(tmpDir, 'nested', 'path'); + + const outputPath = await checkoutFile( + currentCommit, + readmePath, + nestedDir + ); + + expect(fs.existsSync(nestedDir)).toBe(true); + expect(fs.existsSync(outputPath)).toBe(true); + }); + + it('should throw error for non-existent file', async () => { + const currentCommit = await getCurrentCommit(); + + await expectAsync( + checkoutFile(currentCommit, 'this-file-does-not-exist.txt', tmpDir) + ).toBeRejectedWithError(/Failed to checkout file/); + }); + + it('should throw error for invalid commit', async () => { + const repoRoot = await getRepositoryRoot(); + const packageJsonPath = path.relative(repoRoot, path.join(repoRoot, 'package.json')); + + await expectAsync( + checkoutFile('invalid-commit-sha-12345', packageJsonPath, tmpDir) + ).toBeRejectedWithError(/Failed to checkout file/); + }); + + it('should throw error with descriptive message', async () => { + const currentCommit = await getCurrentCommit(); + + try { + await checkoutFile(currentCommit, 'nonexistent.txt', tmpDir); + fail('Should have thrown an error'); + } catch (err) { + expect(err.message).toContain('Failed to checkout file'); + expect(err.message).toContain('nonexistent.txt'); + expect(err.message).toContain(currentCommit); + } + }); + }); + + describe('Edge cases and error handling', () => { + it('should handle concurrent operations with retry', async () => { + // Run multiple operations concurrently + const promises = [ + isGitRepository(), + getCurrentCommit(), + getCurrentBranch(), + getGitState() + ]; + + const results = await Promise.all(promises); + + expect(results[0]).toBe(true); + expect(typeof results[1]).toBe('string'); + expect(typeof results[2]).toBe('string'); + expect(results[3].isValid).toBe(true); + }); + + it('should handle operations in non-git directory gracefully', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-test-')); + const originalCwd = process.cwd(); + + try { + process.chdir(tmpDir); + + const isRepo = await isGitRepository(); + expect(isRepo).toBe(false); + + await expectAsync(getRepositoryRoot()).toBeRejected(); + await expectAsync(getCurrentCommit()).toBeRejected(); + await expectAsync(getCurrentBranch()).toBeRejected(); + + const state = await getGitState(); + expect(state.isValid).toBe(false); + } finally { + process.chdir(originalCwd); + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + + it('should handle multiple sequential operations', async () => { + const isRepo = await isGitRepository(); + const root = await getRepositoryRoot(); + const commit = await getCurrentCommit(); + const branch = await getCurrentBranch(); + const state = await getGitState(); + + expect(isRepo).toBe(true); + expect(root).toBeTruthy(); + expect(commit).toMatch(/^[0-9a-f]{40}$/); + expect(branch).toBeTruthy(); + expect(state.isValid).toBe(true); + }); + }); + + describe('Integration scenarios', () => { + it('should provide complete workflow: check repo, get state, find merge-base', async () => { + // Step 1: Verify it's a git repository + const isRepo = await isGitRepository(); + expect(isRepo).toBe(true); + + // Step 2: Get comprehensive state + const state = await getGitState(); + expect(state.isValid).toBe(true); + + // Step 3: Get merge-base using detected default branch + const mergeBase = await getMergeBase(state.defaultBranch); + expect(mergeBase.success).toBe(true); + expect(mergeBase.commit).toBeTruthy(); + }); + + it('should support typical CI workflow', async () => { + const state = await getGitState(); + + if (state.isValid && !state.isShallow && state.hasRemote) { + const mergeBase = await getMergeBase(); + expect(mergeBase.success).toBe(true); + + if (mergeBase.success) { + const changedFiles = await getChangedFiles(mergeBase.commit); + expect(Array.isArray(changedFiles)).toBe(true); + } + } + }); + + it('should handle file checkout workflow', async () => { + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-workflow-')); + + try { + const commit = await getCurrentCommit(); + const testFile = 'README.md'; + + const outputPath = await checkoutFile(commit, testFile, tmpDir); + + expect(fs.existsSync(outputPath)).toBe(true); + const content = fs.readFileSync(outputPath, 'utf8'); + expect(content.length).toBeGreaterThan(0); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); + }); +}); From 3b789a426d599bfcb6d1e6d073d10fc93d9b1523 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 04:40:09 +0530 Subject: [PATCH 02/15] update git utils --- packages/git-utils/src/git.js | 41 +---------------------------- packages/git-utils/src/index.js | 4 --- packages/git-utils/test/git.test.js | 13 --------- 3 files changed, 1 insertion(+), 57 deletions(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 31d2c73b6..93fbc3391 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -38,7 +38,6 @@ async function execGit(command, options = {}) { continue; } - // For other errors or last attempt, throw throw err; } } @@ -145,7 +144,6 @@ export async function getGitState() { }; try { - // Check if it's a git repository await execGit('git rev-parse --git-dir'); state.isValid = true; } catch { @@ -153,12 +151,10 @@ export async function getGitState() { return state; } - // Check for remote configuration try { const remotes = await execGit('git remote -v'); if (remotes && remotes.trim().length > 0) { state.hasRemote = true; - // Extract first remote name (usually 'origin') const match = remotes.match(/^(\S+)\s+/); if (match) { state.remoteName = match[1]; @@ -172,7 +168,6 @@ export async function getGitState() { state.issues.push('Failed to check git remote configuration'); } - // Check shallow clone try { const result = await execGit('git rev-parse --is-shallow-repository'); state.isShallow = result === 'true'; @@ -203,7 +198,6 @@ export async function getGitState() { state.isDetached = false; } - // Check if first commit try { const parents = await execGit('git rev-list --parents HEAD'); const lines = parents.split('\n').filter(Boolean); @@ -216,7 +210,6 @@ export async function getGitState() { state.isFirstCommit = false; } - // Try to detect default branch (only if remote exists) if (state.hasRemote) { const remoteName = state.remoteName || 'origin'; const commonBranches = ['main', 'master', 'develop', 'development']; @@ -230,7 +223,6 @@ export async function getGitState() { } } - // Try to get from remote HEAD if (!state.defaultBranch) { try { const output = await execGit(`git symbolic-ref refs/remotes/${remoteName}/HEAD`); @@ -239,12 +231,10 @@ export async function getGitState() { state.defaultBranch = match[1]; } } catch { - // Use fallback state.defaultBranch = 'main'; } } } else { - // No remote configured - try to detect local branches const localBranches = ['main', 'master', 'develop', 'development']; for (const branch of localBranches) { try { @@ -256,7 +246,6 @@ export async function getGitState() { } } - // Final fallback for local-only repos if (!state.defaultBranch) { state.defaultBranch = 'main'; } @@ -274,10 +263,8 @@ export async function getMergeBase(targetBranch = null) { const result = { success: false, commit: null, branch: null, error: null }; try { - // Get git state for diagnostics const gitState = await getGitState(); - // Early failures if (!gitState.isValid) { result.error = { code: 'NOT_GIT_REPO', message: 'Not a git repository' }; return result; @@ -291,22 +278,9 @@ export async function getMergeBase(targetBranch = null) { return result; } - // Disabled: do not bail on first commit for now. Keep diagnostic info but allow - // the merge-base flow to continue so SmartSnap doesn't exit early in repos - // that only contain a single commit (useful for certain CI environments). - // if (gitState.isFirstCommit) { - // result.error = { - // code: "FIRST_COMMIT", - // message: "This is the first commit in the repository" - // }; - // return result; - // } - - // Determine target branch const branch = targetBranch || gitState.defaultBranch; result.branch = branch; - // If detached HEAD, try to fetch the branch (only if remote exists) if (gitState.isDetached && gitState.hasRemote) { const remoteName = gitState.remoteName || 'origin'; try { @@ -315,24 +289,19 @@ export async function getMergeBase(targetBranch = null) { try { await execGit(`git fetch ${remoteName} ${branch}:refs/remotes/${remoteName}/${branch} --depth=100`); } catch { - // Continue anyway, merge-base might still work } } } - // Try to get merge-base with various fallbacks const attempts = []; - // For repos with remote, try remote refs first if (gitState.hasRemote) { const remoteName = gitState.remoteName || 'origin'; attempts.push(`${remoteName}/${branch}`); } - // Always try local branch attempts.push(branch); - // Try default branch as fallback (if different) if (branch !== gitState.defaultBranch) { if (gitState.hasRemote) { const remoteName = gitState.remoteName || 'origin'; @@ -352,7 +321,6 @@ export async function getMergeBase(targetBranch = null) { } } - // All attempts failed - provide helpful error message let errorMessage = `Could not find common ancestor with ${branch}.`; if (!gitState.hasRemote) { @@ -385,11 +353,7 @@ export async function getMergeBase(targetBranch = null) { */ export async function getChangedFiles(baselineCommit = 'origin/main') { try { - // Use --name-status to detect renames - // Output format: "R100\told/path\tnew/path" for renames - // "M\tfile/path" for modifications - // "A\tfile/path" for additions - // "D\tfile/path" for deletions + const output = await execGit(`git diff --name-status ${baselineCommit}...HEAD`); if (!output) { @@ -429,7 +393,6 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { if (submoduleOutput && submoduleOutput.includes('Submodule')) { files.add('.gitmodules'); - // Also try to get actual changed files within submodules try { const submodulePaths = await execGit('git config --file .gitmodules --get-regexp path'); const submodules = submodulePaths.split('\n') @@ -450,7 +413,6 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { } } catch { // Submodule might not exist or be initialized - // .gitmodules addition will trigger bail anyway } } } catch { @@ -490,7 +452,6 @@ export async function checkoutFile(commit, filePath, outputDir) { export async function commitExists(commit) { try { - // Use cat-file -e which fails if the object doesn't exist await execGit(`git cat-file -e ${commit}`); return true; } catch (err) { diff --git a/packages/git-utils/src/index.js b/packages/git-utils/src/index.js index 249abbb8a..0000f3aff 100644 --- a/packages/git-utils/src/index.js +++ b/packages/git-utils/src/index.js @@ -1,11 +1,7 @@ -// Public entry point for @percy/git-utils import * as gitUtils from './git.js'; -// Export all individual functions export * from './git.js'; -// Export as a named object for convenient usage export const PercyGitUtils = gitUtils; -// Default export for CommonJS consumers (bundlers may handle this) export default gitUtils; diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index 7bdf327d2..9b6b89b10 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -220,8 +220,6 @@ describe('@percy/git-utils', () => { it('should handle non-existent branch gracefully with fallback', async () => { const result = await getMergeBase('this-branch-definitely-does-not-exist-xyz-12345-nonexistent'); - // The function has smart fallback logic - it will try default branch if specified branch fails - // So it may succeed OR fail depending on repo state expect(typeof result.success).toBe('boolean'); expect(result.branch).toBe('this-branch-definitely-does-not-exist-xyz-12345-nonexistent'); @@ -230,7 +228,6 @@ describe('@percy/git-utils', () => { expect(result.error.code).toBe('NO_MERGE_BASE'); expect(result.error.message).toContain('Could not find common ancestor'); } else { - // Fallback succeeded - verify it returned a valid commit expect(result.commit).toMatch(/^[0-9a-f]{40}$/); } }); @@ -259,14 +256,11 @@ describe('@percy/git-utils', () => { }); it('should detect changes between commits', async () => { - // Try to get changes from HEAD~1 try { const files = await getChangedFiles('HEAD~1'); expect(Array.isArray(files)).toBe(true); - // Files might be empty if HEAD is first commit, which is okay expect(files.length).toBeGreaterThanOrEqual(0); } catch (err) { - // HEAD~1 might not exist if this is the first commit expect(err.message).toContain('Failed to get changed files'); } }); @@ -279,7 +273,6 @@ describe('@percy/git-utils', () => { expect(file.length).toBeGreaterThan(0); }); } catch (err) { - // Might fail if repository has fewer than 10 commits expect(err.message).toContain('Failed to get changed files'); } }); @@ -293,7 +286,6 @@ describe('@percy/git-utils', () => { const files = await getChangedFiles(`${remote}/${branch}`); expect(Array.isArray(files)).toBe(true); } catch (err) { - // Remote branch might not exist in test environment expect(err.message).toBeTruthy(); } }); @@ -315,7 +307,6 @@ describe('@percy/git-utils', () => { it('should checkout file from current commit', async () => { const currentCommit = await getCurrentCommit(); - // Use README.md from repo root as it should exist in history const readmePath = 'README.md'; const outputPath = await checkoutFile( @@ -327,7 +318,6 @@ describe('@percy/git-utils', () => { expect(fs.existsSync(outputPath)).toBe(true); expect(path.basename(outputPath)).toBe('README.md'); - // Verify content const content = fs.readFileSync(outputPath, 'utf8'); expect(content.length).toBeGreaterThan(0); }); @@ -448,15 +438,12 @@ describe('@percy/git-utils', () => { describe('Integration scenarios', () => { it('should provide complete workflow: check repo, get state, find merge-base', async () => { - // Step 1: Verify it's a git repository const isRepo = await isGitRepository(); expect(isRepo).toBe(true); - // Step 2: Get comprehensive state const state = await getGitState(); expect(state.isValid).toBe(true); - // Step 3: Get merge-base using detected default branch const mergeBase = await getMergeBase(state.defaultBranch); expect(mergeBase.success).toBe(true); expect(mergeBase.commit).toBeTruthy(); From d7c91b0e48184fb9ffe1d5f0f30f2eaf39708e5d Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 04:43:22 +0530 Subject: [PATCH 03/15] fix lint --- packages/git-utils/src/git.js | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 93fbc3391..11ccc94d9 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -353,7 +353,6 @@ export async function getMergeBase(targetBranch = null) { */ export async function getChangedFiles(baselineCommit = 'origin/main') { try { - const output = await execGit(`git diff --name-status ${baselineCommit}...HEAD`); if (!output) { From b99ab27c321fa638bceb40dc2ca91185bfc36b5a Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 13:40:01 +0530 Subject: [PATCH 04/15] resolve comments --- packages/git-utils/src/git.js | 48 +++++++++++++++++++++++------ packages/git-utils/test/git.test.js | 25 +++++++++++++++ 2 files changed, 63 insertions(+), 10 deletions(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 11ccc94d9..48111fb6b 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -53,7 +53,15 @@ async function execGit(command, options = {}) { */ async function execGitOnce(command, options = {}) { return new Promise((resolve, reject) => { - const [cmd, ...args] = command.split(' '); + let cmd; + let args; + + if (Array.isArray(command)) { + [cmd, ...args] = command; + } else { + [cmd, ...args] = command.split(' '); + } + const child = spawn(cmd, args, { ...options, encoding: 'utf8' @@ -353,7 +361,7 @@ export async function getMergeBase(targetBranch = null) { */ export async function getChangedFiles(baselineCommit = 'origin/main') { try { - const output = await execGit(`git diff --name-status ${baselineCommit}...HEAD`); + const output = await execGit(['git', 'diff', '--name-status', `${baselineCommit}..HEAD`]); if (!output) { return []; @@ -388,22 +396,33 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { // Check for git submodule changes try { - const submoduleOutput = await execGit(`git diff ${baselineCommit}...HEAD --submodule=short`); + const submoduleOutput = await execGit(['git', 'diff', `${baselineCommit}..HEAD`, '--submodule=short']); if (submoduleOutput && submoduleOutput.includes('Submodule')) { files.add('.gitmodules'); try { - const submodulePaths = await execGit('git config --file .gitmodules --get-regexp path'); + const submodulePaths = await execGit(['git', 'config', '--file', '.gitmodules', '--get-regexp', 'path']); const submodules = submodulePaths.split('\n') .filter(Boolean) .map(line => line.split(' ')[1]); for (const submodulePath of submodules) { try { - const subOutput = await execGit( - `git -C ${submodulePath} diff --name-only ${baselineCommit}...HEAD`, - { retries: 1 } - ); + // Validate submodule path to avoid path traversal or injection + const normalizedSub = path.normalize(submodulePath); + if (path.isAbsolute(normalizedSub) || normalizedSub.split(path.sep).includes('..')) { + // skip suspicious submodule paths + continue; + } + + const subOutput = await execGit([ + 'git', + '-C', + normalizedSub, + 'diff', + '--name-only', + `${baselineCommit}..HEAD` + ], { retries: 1 }); if (subOutput) { const subFiles = subOutput.split('\n').filter(Boolean); for (const file of subFiles) { @@ -440,7 +459,12 @@ export async function checkoutFile(commit, filePath, outputDir) { await fsPromises.mkdir(outputDir, { recursive: true }); const outputPath = path.join(outputDir, path.basename(filePath)); - const contents = await execGit(`git show ${commit}:${filePath}`); + const normalized = path.normalize(filePath); + if (path.isAbsolute(normalized) || normalized.split(path.sep).includes('..')) { + throw new Error(`Invalid file path: ${filePath}`); + } + + const contents = await execGit(['git', 'show', `${commit}:${normalized}`]); await fsPromises.writeFile(outputPath, contents, 'utf8'); return outputPath; @@ -451,7 +475,11 @@ export async function checkoutFile(commit, filePath, outputDir) { export async function commitExists(commit) { try { - await execGit(`git cat-file -e ${commit}`); + if (!commit || typeof commit !== 'string') return false; + const safeRef = commit === 'HEAD' || /^[0-9a-fA-F]{4,40}$/.test(commit) || /^[\w/ -]+$/.test(commit); + if (!safeRef) return false; + + await execGit(['git', 'cat-file', '-e', commit]); return true; } catch (err) { return false; diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index 9b6b89b10..d05f94960 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -379,6 +379,23 @@ describe('@percy/git-utils', () => { expect(err.message).toContain(currentCommit); } }); + + it('should reject absolute paths and path traversal for filePath', async () => { + const currentCommit = await getCurrentCommit(); + const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-checkout-test-')); + + try { + await expectAsync( + checkoutFile(currentCommit, '/etc/passwd', tmpDir) + ).toBeRejectedWithError(/Invalid file path/); + + await expectAsync( + checkoutFile(currentCommit, '../package.json', tmpDir) + ).toBeRejectedWithError(/Invalid file path/); + } finally { + fs.rmSync(tmpDir, { recursive: true, force: true }); + } + }); }); describe('Edge cases and error handling', () => { @@ -436,6 +453,14 @@ describe('@percy/git-utils', () => { }); }); + describe('security checks', () => { + it('commitExists should reject clearly invalid refs', async () => { + const invalid = 'some$weird;ref`rm -rf /`'; + const exists = await commitExists(invalid); + expect(exists).toBe(false); + }); + }); + describe('Integration scenarios', () => { it('should provide complete workflow: check repo, get state, find merge-base', async () => { const isRepo = await isGitRepository(); From b39b02c6a5e5d0b8dd207c349dab51c20a931c96 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 13:44:26 +0530 Subject: [PATCH 05/15] update workflow --- .github/workflows/test.yml | 1 + .github/workflows/windows.yml | 1 + 2 files changed, 2 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e7ed4b05f..90b84254e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -57,6 +57,7 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' + - '@percy/git-utils' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v5 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index eb7803054..0d43d018b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -56,6 +56,7 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' + - '@percy/git-utils' runs-on: windows-latest steps: - uses: actions/checkout@v5 From 965a78e4f6f196fdbeb486dce87453a98f61dee4 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 13:49:27 +0530 Subject: [PATCH 06/15] update workflow --- .github/workflows/test.yml | 1 - .github/workflows/windows.yml | 1 - 2 files changed, 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 90b84254e..e7ed4b05f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -57,7 +57,6 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' - - '@percy/git-utils' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v5 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0d43d018b..eb7803054 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -56,7 +56,6 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' - - '@percy/git-utils' runs-on: windows-latest steps: - uses: actions/checkout@v5 From 1bd7edcf063e048473718fbd25e504d5d9c5f962 Mon Sep 17 00:00:00 2001 From: Pranav Z Date: Mon, 3 Nov 2025 13:55:31 +0530 Subject: [PATCH 07/15] Update packages/git-utils/src/git.js Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/git-utils/src/git.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 48111fb6b..3817e9770 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -476,7 +476,7 @@ export async function checkoutFile(commit, filePath, outputDir) { export async function commitExists(commit) { try { if (!commit || typeof commit !== 'string') return false; - const safeRef = commit === 'HEAD' || /^[0-9a-fA-F]{4,40}$/.test(commit) || /^[\w/ -]+$/.test(commit); + const safeRef = commit === 'HEAD' || /^[0-9a-fA-F]{4,40}$/.test(commit) || /^(refs\/[A-Za-z0-9._/-]+)$/.test(commit); if (!safeRef) return false; await execGit(['git', 'cat-file', '-e', commit]); From 96235f0827fde9566e7af9039b76be7ac73c144a Mon Sep 17 00:00:00 2001 From: Pranav Z Date: Mon, 3 Nov 2025 13:56:05 +0530 Subject: [PATCH 08/15] Update packages/git-utils/test/git.test.js Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- packages/git-utils/test/git.test.js | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index d05f94960..3642475a3 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -382,19 +382,14 @@ describe('@percy/git-utils', () => { it('should reject absolute paths and path traversal for filePath', async () => { const currentCommit = await getCurrentCommit(); - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-checkout-test-')); - try { - await expectAsync( - checkoutFile(currentCommit, '/etc/passwd', tmpDir) - ).toBeRejectedWithError(/Invalid file path/); + await expectAsync( + checkoutFile(currentCommit, '/etc/passwd', tmpDir) + ).toBeRejectedWithError(/Invalid file path/); - await expectAsync( - checkoutFile(currentCommit, '../package.json', tmpDir) - ).toBeRejectedWithError(/Invalid file path/); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } + await expectAsync( + checkoutFile(currentCommit, '../package.json', tmpDir) + ).toBeRejectedWithError(/Invalid file path/); }); }); From 92f99a5703fedc269b24e24746a71905a6fc4cc3 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 15:16:49 +0530 Subject: [PATCH 09/15] Add test binary file --- packages/git-utils/test-binary.bin | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 packages/git-utils/test-binary.bin diff --git a/packages/git-utils/test-binary.bin b/packages/git-utils/test-binary.bin new file mode 100644 index 000000000..0dd1608e4 --- /dev/null +++ b/packages/git-utils/test-binary.bin @@ -0,0 +1,2 @@ +�PNG + From b67c5c8deb10f438f760e4d4f67cf26a62f0c0c0 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Mon, 3 Nov 2025 15:30:39 +0530 Subject: [PATCH 10/15] handle utf8 encoding with options --- packages/git-utils/src/git.js | 31 +++++++++++++++++++++-------- packages/git-utils/test/git.test.js | 29 +++++++++++++++++++++++++++ 2 files changed, 52 insertions(+), 8 deletions(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 3817e9770..d5775da30 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -49,7 +49,8 @@ async function execGit(command, options = {}) { * Execute a git command once (no retries) * @param {string} command - Git command to execute * @param {Object} options - Spawn options - * @returns {Promise} - Command output + * @param {string|null} options.encoding - Output encoding ('utf8' or null for Buffer, default: 'utf8') + * @returns {Promise} - Command output (string if utf8, Buffer if null encoding) */ async function execGitOnce(command, options = {}) { return new Promise((resolve, reject) => { @@ -62,17 +63,25 @@ async function execGitOnce(command, options = {}) { [cmd, ...args] = command.split(' '); } + // Extract encoding option, default to 'utf8' for backward compatibility + const { encoding = 'utf8', ...spawnOptions } = options; + const isBinaryMode = encoding === null || encoding === 'buffer'; + const child = spawn(cmd, args, { - ...options, - encoding: 'utf8' + ...spawnOptions, + encoding: isBinaryMode ? null : encoding }); - let stdout = ''; + let stdout = isBinaryMode ? [] : ''; let stderr = ''; if (child.stdout) { child.stdout.on('data', (data) => { - stdout += data.toString(); + if (isBinaryMode) { + stdout.push(data); + } else { + stdout += data.toString(); + } }); } @@ -90,7 +99,11 @@ async function execGitOnce(command, options = {}) { if (code !== 0) { reject(new Error(`Git command failed (exit ${code}): ${stderr || stdout}`)); } else { - resolve(stdout.trim()); + if (isBinaryMode) { + resolve(Buffer.concat(stdout)); + } else { + resolve(stdout.trim()); + } } }); }); @@ -449,6 +462,7 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { /** * Checkout file from specific commit to output path + * Supports both text and binary files. * @param {string} commit - Commit SHA or ref * @param {string} filePath - File path relative to repo root * @param {string} outputDir - Output directory @@ -464,8 +478,9 @@ export async function checkoutFile(commit, filePath, outputDir) { throw new Error(`Invalid file path: ${filePath}`); } - const contents = await execGit(['git', 'show', `${commit}:${normalized}`]); - await fsPromises.writeFile(outputPath, contents, 'utf8'); + // Use binary mode to support both text and binary files + const contents = await execGit(['git', 'show', `${commit}:${normalized}`], { encoding: null }); + await fsPromises.writeFile(outputPath, contents); return outputPath; } catch (err) { diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index 3642475a3..e370e0263 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -391,6 +391,35 @@ describe('@percy/git-utils', () => { checkoutFile(currentCommit, '../package.json', tmpDir) ).toBeRejectedWithError(/Invalid file path/); }); + + it('should correctly checkout binary files without corruption', async () => { + const currentCommit = await getCurrentCommit(); + const binaryFilePath = 'packages/git-utils/test-binary.bin'; + + const outputPath = await checkoutFile( + currentCommit, + binaryFilePath, + tmpDir + ); + + expect(fs.existsSync(outputPath)).toBe(true); + expect(path.basename(outputPath)).toBe('test-binary.bin'); + + // Read both the original and checked out file as buffers + const repoRoot = await getRepositoryRoot(); + const originalPath = path.join(repoRoot, binaryFilePath); + const originalContent = fs.readFileSync(originalPath); + const checkedOutContent = fs.readFileSync(outputPath); + + // Verify the binary content is identical + expect(Buffer.compare(originalContent, checkedOutContent)).toBe(0); + + // Verify it contains PNG header bytes (binary data) + expect(checkedOutContent[0]).toBe(0x89); + expect(checkedOutContent[1]).toBe(0x50); + expect(checkedOutContent[2]).toBe(0x4e); + expect(checkedOutContent[3]).toBe(0x47); + }); }); describe('Edge cases and error handling', () => { From bcb4dc0654c33fd8ae44bc4ad9665097386e0da6 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Tue, 4 Nov 2025 13:05:11 +0530 Subject: [PATCH 11/15] fix security vulnerability --- packages/git-utils/src/git.js | 24 +++++++++++---- packages/git-utils/test/git.test.js | 47 +++++++++++++++++++++++++++++ 2 files changed, 65 insertions(+), 6 deletions(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index d5775da30..769ee583d 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -470,19 +470,31 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { */ export async function checkoutFile(commit, filePath, outputDir) { try { - await fsPromises.mkdir(outputDir, { recursive: true }); - - const outputPath = path.join(outputDir, path.basename(filePath)); const normalized = path.normalize(filePath); if (path.isAbsolute(normalized) || normalized.split(path.sep).includes('..')) { throw new Error(`Invalid file path: ${filePath}`); } - // Use binary mode to support both text and binary files + await fsPromises.mkdir(outputDir, { recursive: true }); + + const basename = path.basename(filePath); + if (basename.includes('/') || basename.includes('\\')) { + throw new Error(`Invalid filename in path: ${filePath}`); + } + + const outputPath = path.join(outputDir, basename); + + const resolvedOutputDir = path.resolve(outputDir); + const resolvedOutputPath = path.resolve(outputPath); + if (!resolvedOutputPath.startsWith(resolvedOutputDir + path.sep) && + resolvedOutputPath !== resolvedOutputDir) { + throw new Error(`Output path escapes output directory: ${outputPath}`); + } + const contents = await execGit(['git', 'show', `${commit}:${normalized}`], { encoding: null }); - await fsPromises.writeFile(outputPath, contents); + await fsPromises.writeFile(resolvedOutputPath, contents); - return outputPath; + return resolvedOutputPath; } catch (err) { throw new Error(`Failed to checkout file ${filePath} from ${commit}: ${err.message}`); } diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index e370e0263..a141185a4 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -392,6 +392,53 @@ describe('@percy/git-utils', () => { ).toBeRejectedWithError(/Invalid file path/); }); + it('should reject filenames with path separators in basename', async () => { + const currentCommit = await getCurrentCommit(); + + // Unix-style separator + await expectAsync( + checkoutFile(currentCommit, 'foo/bar/../../../etc/passwd', tmpDir) + ).toBeRejectedWithError(/Invalid file path/); + + // Windows-style separator in filename + await expectAsync( + checkoutFile(currentCommit, 'foo\\bar', tmpDir) + ).toBeRejectedWithError(/Invalid filename in path/); + }); + + it('should prevent path traversal attempts via multiple methods', async () => { + const currentCommit = await getCurrentCommit(); + + // Various traversal attempts + const maliciousPaths = [ + '../../etc/passwd', + './../../secrets.txt', + 'foo/../../../bar', + './../parent-dir/file.txt', + 'subdir/../../escape.txt' + ]; + + for (const maliciousPath of maliciousPaths) { + await expectAsync( + checkoutFile(currentCommit, maliciousPath, tmpDir) + ).toBeRejectedWithError(/Invalid file path/); + } + }); + + it('should ensure output stays within output directory', async () => { + const currentCommit = await getCurrentCommit(); + + // Even if somehow a traversal sequence gets through, the final check should catch it + // This test ensures the resolved path validation works + const result = await checkoutFile(currentCommit, 'README.md', tmpDir); + + // Verify the result is actually within tmpDir + const resolvedTmpDir = path.resolve(tmpDir); + const resolvedResult = path.resolve(result); + + expect(resolvedResult.startsWith(resolvedTmpDir)).toBe(true); + }); + it('should correctly checkout binary files without corruption', async () => { const currentCommit = await getCurrentCommit(); const binaryFilePath = 'packages/git-utils/test-binary.bin'; From e96eb8e237e8a64ba2d6f4efc05c2b38413bf875 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Tue, 4 Nov 2025 13:27:42 +0530 Subject: [PATCH 12/15] fix security vulnerability --- packages/git-utils/src/git.js | 21 +++++--- packages/git-utils/test/git.test.js | 75 +++++++++++++++++++++-------- 2 files changed, 70 insertions(+), 26 deletions(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 769ee583d..6ff6dd9e8 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -470,25 +470,34 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { */ export async function checkoutFile(commit, filePath, outputDir) { try { + const repoRoot = await getRepositoryRoot(); const normalized = path.normalize(filePath); if (path.isAbsolute(normalized) || normalized.split(path.sep).includes('..')) { throw new Error(`Invalid file path: ${filePath}`); } - await fsPromises.mkdir(outputDir, { recursive: true }); - const basename = path.basename(filePath); if (basename.includes('/') || basename.includes('\\')) { throw new Error(`Invalid filename in path: ${filePath}`); } - const outputPath = path.join(outputDir, basename); + let resolvedOutputDir; + if (path.isAbsolute(outputDir)) { + resolvedOutputDir = path.resolve(outputDir); + } else { + resolvedOutputDir = path.resolve(repoRoot, outputDir); + } + + if (!resolvedOutputDir.startsWith(repoRoot + path.sep) && resolvedOutputDir !== repoRoot) { + throw new Error(`Output directory escapes repository: ${outputDir}`); + } + + await fsPromises.mkdir(resolvedOutputDir, { recursive: true }); - const resolvedOutputDir = path.resolve(outputDir); - const resolvedOutputPath = path.resolve(outputPath); + const resolvedOutputPath = path.join(resolvedOutputDir, basename); if (!resolvedOutputPath.startsWith(resolvedOutputDir + path.sep) && resolvedOutputPath !== resolvedOutputDir) { - throw new Error(`Output path escapes output directory: ${outputPath}`); + throw new Error(`Output path escapes output directory: ${resolvedOutputPath}`); } const contents = await execGit(['git', 'show', `${commit}:${normalized}`], { encoding: null }); diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index a141185a4..8268c6d5f 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -293,9 +293,14 @@ describe('@percy/git-utils', () => { describe('checkoutFile', () => { let tmpDir; + let repoRoot; - beforeEach(() => { - tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-checkout-test-')); + beforeEach(async () => { + // Create temp directory within repo for testing + repoRoot = await getRepositoryRoot(); + const tmpDirName = `test-checkout-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; + tmpDir = path.join(repoRoot, tmpDirName); + fs.mkdirSync(tmpDir, { recursive: true }); }); afterEach(() => { @@ -308,11 +313,12 @@ describe('@percy/git-utils', () => { const currentCommit = await getCurrentCommit(); const readmePath = 'README.md'; + const relativeTmpDir = path.relative(repoRoot, tmpDir); const outputPath = await checkoutFile( currentCommit, readmePath, - tmpDir + relativeTmpDir ); expect(fs.existsSync(outputPath)).toBe(true); @@ -324,11 +330,12 @@ describe('@percy/git-utils', () => { it('should checkout file from HEAD reference', async () => { const readmePath = 'README.md'; + const relativeTmpDir = path.relative(repoRoot, tmpDir); const outputPath = await checkoutFile( 'HEAD', readmePath, - tmpDir + relativeTmpDir ); expect(fs.existsSync(outputPath)).toBe(true); @@ -338,7 +345,9 @@ describe('@percy/git-utils', () => { const currentCommit = await getCurrentCommit(); const readmePath = 'README.md'; - const nestedDir = path.join(tmpDir, 'nested', 'path'); + // Use relative path from repo root + const relativePath = path.relative(repoRoot, tmpDir); + const nestedDir = path.join(relativePath, 'nested', 'path'); const outputPath = await checkoutFile( currentCommit, @@ -346,32 +355,36 @@ describe('@percy/git-utils', () => { nestedDir ); - expect(fs.existsSync(nestedDir)).toBe(true); expect(fs.existsSync(outputPath)).toBe(true); + + const absoluteNestedDir = path.join(repoRoot, nestedDir); + expect(fs.existsSync(absoluteNestedDir)).toBe(true); }); it('should throw error for non-existent file', async () => { const currentCommit = await getCurrentCommit(); + const relativeTmpDir = path.relative(repoRoot, tmpDir); await expectAsync( - checkoutFile(currentCommit, 'this-file-does-not-exist.txt', tmpDir) + checkoutFile(currentCommit, 'this-file-does-not-exist.txt', relativeTmpDir) ).toBeRejectedWithError(/Failed to checkout file/); }); it('should throw error for invalid commit', async () => { - const repoRoot = await getRepositoryRoot(); const packageJsonPath = path.relative(repoRoot, path.join(repoRoot, 'package.json')); + const relativeTmpDir = path.relative(repoRoot, tmpDir); await expectAsync( - checkoutFile('invalid-commit-sha-12345', packageJsonPath, tmpDir) + checkoutFile('invalid-commit-sha-12345', packageJsonPath, relativeTmpDir) ).toBeRejectedWithError(/Failed to checkout file/); }); it('should throw error with descriptive message', async () => { const currentCommit = await getCurrentCommit(); + const relativeTmpDir = path.relative(repoRoot, tmpDir); try { - await checkoutFile(currentCommit, 'nonexistent.txt', tmpDir); + await checkoutFile(currentCommit, 'nonexistent.txt', relativeTmpDir); fail('Should have thrown an error'); } catch (err) { expect(err.message).toContain('Failed to checkout file'); @@ -382,32 +395,35 @@ describe('@percy/git-utils', () => { it('should reject absolute paths and path traversal for filePath', async () => { const currentCommit = await getCurrentCommit(); + const relativeTmpDir = path.relative(repoRoot, tmpDir); await expectAsync( - checkoutFile(currentCommit, '/etc/passwd', tmpDir) + checkoutFile(currentCommit, '/etc/passwd', relativeTmpDir) ).toBeRejectedWithError(/Invalid file path/); await expectAsync( - checkoutFile(currentCommit, '../package.json', tmpDir) + checkoutFile(currentCommit, '../package.json', relativeTmpDir) ).toBeRejectedWithError(/Invalid file path/); }); it('should reject filenames with path separators in basename', async () => { const currentCommit = await getCurrentCommit(); + const relativeTmpDir = path.relative(repoRoot, tmpDir); // Unix-style separator await expectAsync( - checkoutFile(currentCommit, 'foo/bar/../../../etc/passwd', tmpDir) + checkoutFile(currentCommit, 'foo/bar/../../../etc/passwd', relativeTmpDir) ).toBeRejectedWithError(/Invalid file path/); // Windows-style separator in filename await expectAsync( - checkoutFile(currentCommit, 'foo\\bar', tmpDir) + checkoutFile(currentCommit, 'foo\\bar', relativeTmpDir) ).toBeRejectedWithError(/Invalid filename in path/); }); it('should prevent path traversal attempts via multiple methods', async () => { const currentCommit = await getCurrentCommit(); + const relativeTmpDir = path.relative(repoRoot, tmpDir); // Various traversal attempts const maliciousPaths = [ @@ -420,17 +436,18 @@ describe('@percy/git-utils', () => { for (const maliciousPath of maliciousPaths) { await expectAsync( - checkoutFile(currentCommit, maliciousPath, tmpDir) + checkoutFile(currentCommit, maliciousPath, relativeTmpDir) ).toBeRejectedWithError(/Invalid file path/); } }); it('should ensure output stays within output directory', async () => { const currentCommit = await getCurrentCommit(); + const relativeTmpDir = path.relative(repoRoot, tmpDir); // Even if somehow a traversal sequence gets through, the final check should catch it // This test ensures the resolved path validation works - const result = await checkoutFile(currentCommit, 'README.md', tmpDir); + const result = await checkoutFile(currentCommit, 'README.md', relativeTmpDir); // Verify the result is actually within tmpDir const resolvedTmpDir = path.resolve(tmpDir); @@ -439,21 +456,35 @@ describe('@percy/git-utils', () => { expect(resolvedResult.startsWith(resolvedTmpDir)).toBe(true); }); + it('should reject outputDir outside repository boundary', async () => { + const currentCommit = await getCurrentCommit(); + + // Try to write outside repo using absolute path + await expectAsync( + checkoutFile(currentCommit, 'README.md', '/tmp/outside-repo') + ).toBeRejectedWithError(/Output directory escapes repository/); + + // Try to write outside repo using relative path + await expectAsync( + checkoutFile(currentCommit, 'README.md', '../../../tmp') + ).toBeRejectedWithError(/Output directory escapes repository/); + }); + it('should correctly checkout binary files without corruption', async () => { const currentCommit = await getCurrentCommit(); const binaryFilePath = 'packages/git-utils/test-binary.bin'; + const relativeTmpDir = path.relative(repoRoot, tmpDir); const outputPath = await checkoutFile( currentCommit, binaryFilePath, - tmpDir + relativeTmpDir ); expect(fs.existsSync(outputPath)).toBe(true); expect(path.basename(outputPath)).toBe('test-binary.bin'); // Read both the original and checked out file as buffers - const repoRoot = await getRepositoryRoot(); const originalPath = path.join(repoRoot, binaryFilePath); const originalContent = fs.readFileSync(originalPath); const checkedOutContent = fs.readFileSync(outputPath); @@ -560,13 +591,17 @@ describe('@percy/git-utils', () => { }); it('should handle file checkout workflow', async () => { - const tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'git-workflow-')); + const repoRoot = await getRepositoryRoot(); + const tmpDirName = `test-workflow-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; + const tmpDir = path.join(repoRoot, tmpDirName); + fs.mkdirSync(tmpDir, { recursive: true }); try { const commit = await getCurrentCommit(); const testFile = 'README.md'; + const relativeTmpDir = path.relative(repoRoot, tmpDir); - const outputPath = await checkoutFile(commit, testFile, tmpDir); + const outputPath = await checkoutFile(commit, testFile, relativeTmpDir); expect(fs.existsSync(outputPath)).toBe(true); const content = fs.readFileSync(outputPath, 'utf8'); From 15e40c66d1ad0ffa0987c6036d2c09decb25ca64 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Tue, 4 Nov 2025 13:45:59 +0530 Subject: [PATCH 13/15] fix security vulnerability --- packages/git-utils/src/git.js | 49 ++---- packages/git-utils/test/git.test.js | 233 ++++++++-------------------- 2 files changed, 75 insertions(+), 207 deletions(-) diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index 6ff6dd9e8..b957a0b50 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -461,51 +461,28 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { } /** - * Checkout file from specific commit to output path + * Get file content from a specific commit * Supports both text and binary files. - * @param {string} commit - Commit SHA or ref + * @param {string} commit - Commit SHA or ref (HEAD, branch name, etc.) * @param {string} filePath - File path relative to repo root - * @param {string} outputDir - Output directory - * @returns {Promise} - Path to checked out file + * @param {Object} options - Options + * @param {string|null} options.encoding - Output encoding ('utf8' or null for Buffer, default: 'utf8') + * @returns {Promise} - File contents (string if utf8, Buffer if null encoding) */ -export async function checkoutFile(commit, filePath, outputDir) { +export async function getFileContentFromCommit(commit, filePath, options = {}) { try { - const repoRoot = await getRepositoryRoot(); + if (!commit || typeof commit !== 'string') { + throw new Error('Invalid commit parameter'); + } const normalized = path.normalize(filePath); if (path.isAbsolute(normalized) || normalized.split(path.sep).includes('..')) { throw new Error(`Invalid file path: ${filePath}`); } - - const basename = path.basename(filePath); - if (basename.includes('/') || basename.includes('\\')) { - throw new Error(`Invalid filename in path: ${filePath}`); - } - - let resolvedOutputDir; - if (path.isAbsolute(outputDir)) { - resolvedOutputDir = path.resolve(outputDir); - } else { - resolvedOutputDir = path.resolve(repoRoot, outputDir); - } - - if (!resolvedOutputDir.startsWith(repoRoot + path.sep) && resolvedOutputDir !== repoRoot) { - throw new Error(`Output directory escapes repository: ${outputDir}`); - } - - await fsPromises.mkdir(resolvedOutputDir, { recursive: true }); - - const resolvedOutputPath = path.join(resolvedOutputDir, basename); - if (!resolvedOutputPath.startsWith(resolvedOutputDir + path.sep) && - resolvedOutputPath !== resolvedOutputDir) { - throw new Error(`Output path escapes output directory: ${resolvedOutputPath}`); - } - - const contents = await execGit(['git', 'show', `${commit}:${normalized}`], { encoding: null }); - await fsPromises.writeFile(resolvedOutputPath, contents); - - return resolvedOutputPath; + const { encoding = 'utf8' } = options; + const contents = await execGit(['git', 'show', `${commit}:${normalized}`], { encoding }); + return contents; } catch (err) { - throw new Error(`Failed to checkout file ${filePath} from ${commit}: ${err.message}`); + throw new Error(`Failed to get file ${filePath} from commit ${commit}: ${err.message}`); } } diff --git a/packages/git-utils/test/git.test.js b/packages/git-utils/test/git.test.js index 8268c6d5f..4a8181f61 100644 --- a/packages/git-utils/test/git.test.js +++ b/packages/git-utils/test/git.test.js @@ -6,7 +6,7 @@ import { getGitState, getMergeBase, getChangedFiles, - checkoutFile, + getFileContentFromCommit, commitExists } from '../src/git.js'; import fs from 'fs'; @@ -291,212 +291,124 @@ describe('@percy/git-utils', () => { }); }); - describe('checkoutFile', () => { - let tmpDir; - let repoRoot; - - beforeEach(async () => { - // Create temp directory within repo for testing - repoRoot = await getRepositoryRoot(); - const tmpDirName = `test-checkout-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; - tmpDir = path.join(repoRoot, tmpDirName); - fs.mkdirSync(tmpDir, { recursive: true }); - }); - - afterEach(() => { - if (fs.existsSync(tmpDir)) { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); - - it('should checkout file from current commit', async () => { + describe('getFileContentFromCommit', () => { + it('should get text file content from current commit', async () => { const currentCommit = await getCurrentCommit(); + const content = await getFileContentFromCommit(currentCommit, 'README.md'); - const readmePath = 'README.md'; - const relativeTmpDir = path.relative(repoRoot, tmpDir); - - const outputPath = await checkoutFile( - currentCommit, - readmePath, - relativeTmpDir - ); + expect(typeof content).toBe('string'); + expect(content.length).toBeGreaterThan(0); + }); - expect(fs.existsSync(outputPath)).toBe(true); - expect(path.basename(outputPath)).toBe('README.md'); + it('should get file content from HEAD reference', async () => { + const content = await getFileContentFromCommit('HEAD', 'README.md'); - const content = fs.readFileSync(outputPath, 'utf8'); + expect(typeof content).toBe('string'); expect(content.length).toBeGreaterThan(0); }); - it('should checkout file from HEAD reference', async () => { - const readmePath = 'README.md'; - const relativeTmpDir = path.relative(repoRoot, tmpDir); - - const outputPath = await checkoutFile( - 'HEAD', - readmePath, - relativeTmpDir - ); + it('should get file content with relative path', async () => { + const currentCommit = await getCurrentCommit(); + const content = await getFileContentFromCommit(currentCommit, 'packages/git-utils/package.json'); - expect(fs.existsSync(outputPath)).toBe(true); + expect(typeof content).toBe('string'); + expect(content).toContain('"name"'); }); - it('should create output directory if it does not exist', async () => { + it('should get binary file content without corruption', async () => { const currentCommit = await getCurrentCommit(); - const readmePath = 'README.md'; - - // Use relative path from repo root - const relativePath = path.relative(repoRoot, tmpDir); - const nestedDir = path.join(relativePath, 'nested', 'path'); - const outputPath = await checkoutFile( + // Get binary content with encoding: null + const binaryContent = await getFileContentFromCommit( currentCommit, - readmePath, - nestedDir + 'packages/git-utils/test-binary.bin', + { encoding: null } ); - expect(fs.existsSync(outputPath)).toBe(true); + expect(Buffer.isBuffer(binaryContent)).toBe(true); - const absoluteNestedDir = path.join(repoRoot, nestedDir); - expect(fs.existsSync(absoluteNestedDir)).toBe(true); + // Verify PNG header bytes + expect(binaryContent[0]).toBe(0x89); + expect(binaryContent[1]).toBe(0x50); + expect(binaryContent[2]).toBe(0x4e); + expect(binaryContent[3]).toBe(0x47); + + // Compare with actual file + const repoRoot = await getRepositoryRoot(); + const actualContent = fs.readFileSync(path.join(repoRoot, 'packages/git-utils/test-binary.bin')); + expect(Buffer.compare(binaryContent, actualContent)).toBe(0); }); it('should throw error for non-existent file', async () => { const currentCommit = await getCurrentCommit(); - const relativeTmpDir = path.relative(repoRoot, tmpDir); await expectAsync( - checkoutFile(currentCommit, 'this-file-does-not-exist.txt', relativeTmpDir) - ).toBeRejectedWithError(/Failed to checkout file/); + getFileContentFromCommit(currentCommit, 'this-file-does-not-exist.txt') + ).toBeRejectedWithError(/Failed to get file/); }); it('should throw error for invalid commit', async () => { - const packageJsonPath = path.relative(repoRoot, path.join(repoRoot, 'package.json')); - const relativeTmpDir = path.relative(repoRoot, tmpDir); - await expectAsync( - checkoutFile('invalid-commit-sha-12345', packageJsonPath, relativeTmpDir) - ).toBeRejectedWithError(/Failed to checkout file/); + getFileContentFromCommit('invalid-commit-sha-12345', 'README.md') + ).toBeRejectedWithError(/Failed to get file/); }); - it('should throw error with descriptive message', async () => { + it('should reject absolute paths', async () => { const currentCommit = await getCurrentCommit(); - const relativeTmpDir = path.relative(repoRoot, tmpDir); - try { - await checkoutFile(currentCommit, 'nonexistent.txt', relativeTmpDir); - fail('Should have thrown an error'); - } catch (err) { - expect(err.message).toContain('Failed to checkout file'); - expect(err.message).toContain('nonexistent.txt'); - expect(err.message).toContain(currentCommit); - } + await expectAsync( + getFileContentFromCommit(currentCommit, '/etc/passwd') + ).toBeRejectedWithError(/Invalid file path/); }); - it('should reject absolute paths and path traversal for filePath', async () => { + it('should reject path traversal attempts', async () => { const currentCommit = await getCurrentCommit(); - const relativeTmpDir = path.relative(repoRoot, tmpDir); await expectAsync( - checkoutFile(currentCommit, '/etc/passwd', relativeTmpDir) + getFileContentFromCommit(currentCommit, '../../../etc/passwd') ).toBeRejectedWithError(/Invalid file path/); await expectAsync( - checkoutFile(currentCommit, '../package.json', relativeTmpDir) + getFileContentFromCommit(currentCommit, 'foo/../../secrets.txt') ).toBeRejectedWithError(/Invalid file path/); }); - it('should reject filenames with path separators in basename', async () => { - const currentCommit = await getCurrentCommit(); - const relativeTmpDir = path.relative(repoRoot, tmpDir); - - // Unix-style separator + it('should throw error for invalid commit parameter', async () => { await expectAsync( - checkoutFile(currentCommit, 'foo/bar/../../../etc/passwd', relativeTmpDir) - ).toBeRejectedWithError(/Invalid file path/); + getFileContentFromCommit('', 'README.md') + ).toBeRejectedWithError(/Invalid commit parameter/); - // Windows-style separator in filename await expectAsync( - checkoutFile(currentCommit, 'foo\\bar', relativeTmpDir) - ).toBeRejectedWithError(/Invalid filename in path/); + getFileContentFromCommit(null, 'README.md') + ).toBeRejectedWithError(/Invalid commit parameter/); }); - it('should prevent path traversal attempts via multiple methods', async () => { + it('should return string by default for text files', async () => { const currentCommit = await getCurrentCommit(); - const relativeTmpDir = path.relative(repoRoot, tmpDir); - - // Various traversal attempts - const maliciousPaths = [ - '../../etc/passwd', - './../../secrets.txt', - 'foo/../../../bar', - './../parent-dir/file.txt', - 'subdir/../../escape.txt' - ]; - - for (const maliciousPath of maliciousPaths) { - await expectAsync( - checkoutFile(currentCommit, maliciousPath, relativeTmpDir) - ).toBeRejectedWithError(/Invalid file path/); - } - }); - - it('should ensure output stays within output directory', async () => { - const currentCommit = await getCurrentCommit(); - const relativeTmpDir = path.relative(repoRoot, tmpDir); - - // Even if somehow a traversal sequence gets through, the final check should catch it - // This test ensures the resolved path validation works - const result = await checkoutFile(currentCommit, 'README.md', relativeTmpDir); - // Verify the result is actually within tmpDir - const resolvedTmpDir = path.resolve(tmpDir); - const resolvedResult = path.resolve(result); - - expect(resolvedResult.startsWith(resolvedTmpDir)).toBe(true); - }); - - it('should reject outputDir outside repository boundary', async () => { - const currentCommit = await getCurrentCommit(); - - // Try to write outside repo using absolute path - await expectAsync( - checkoutFile(currentCommit, 'README.md', '/tmp/outside-repo') - ).toBeRejectedWithError(/Output directory escapes repository/); + // Default behavior without encoding option + const content = await getFileContentFromCommit(currentCommit, 'package.json'); - // Try to write outside repo using relative path - await expectAsync( - checkoutFile(currentCommit, 'README.md', '../../../tmp') - ).toBeRejectedWithError(/Output directory escapes repository/); + expect(typeof content).toBe('string'); + expect(content).toContain('"private"'); + expect(content).toContain('"workspaces"'); }); - it('should correctly checkout binary files without corruption', async () => { + it('should return Buffer when encoding is null', async () => { const currentCommit = await getCurrentCommit(); - const binaryFilePath = 'packages/git-utils/test-binary.bin'; - const relativeTmpDir = path.relative(repoRoot, tmpDir); - const outputPath = await checkoutFile( + const content = await getFileContentFromCommit( currentCommit, - binaryFilePath, - relativeTmpDir + 'package.json', + { encoding: null } ); - expect(fs.existsSync(outputPath)).toBe(true); - expect(path.basename(outputPath)).toBe('test-binary.bin'); - - // Read both the original and checked out file as buffers - const originalPath = path.join(repoRoot, binaryFilePath); - const originalContent = fs.readFileSync(originalPath); - const checkedOutContent = fs.readFileSync(outputPath); + expect(Buffer.isBuffer(content)).toBe(true); - // Verify the binary content is identical - expect(Buffer.compare(originalContent, checkedOutContent)).toBe(0); - - // Verify it contains PNG header bytes (binary data) - expect(checkedOutContent[0]).toBe(0x89); - expect(checkedOutContent[1]).toBe(0x50); - expect(checkedOutContent[2]).toBe(0x4e); - expect(checkedOutContent[3]).toBe(0x47); + // Can convert back to string + const str = content.toString('utf8'); + expect(str).toContain('"private"'); + expect(str).toContain('"workspaces"'); }); }); @@ -589,26 +501,5 @@ describe('@percy/git-utils', () => { } } }); - - it('should handle file checkout workflow', async () => { - const repoRoot = await getRepositoryRoot(); - const tmpDirName = `test-workflow-${Date.now()}-${Math.random().toString(36).slice(2, 9)}`; - const tmpDir = path.join(repoRoot, tmpDirName); - fs.mkdirSync(tmpDir, { recursive: true }); - - try { - const commit = await getCurrentCommit(); - const testFile = 'README.md'; - const relativeTmpDir = path.relative(repoRoot, tmpDir); - - const outputPath = await checkoutFile(commit, testFile, relativeTmpDir); - - expect(fs.existsSync(outputPath)).toBe(true); - const content = fs.readFileSync(outputPath, 'utf8'); - expect(content.length).toBeGreaterThan(0); - } finally { - fs.rmSync(tmpDir, { recursive: true, force: true }); - } - }); }); }); From 88111a97431209a3b5d03be46749cc358bbc4b13 Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Fri, 7 Nov 2025 10:51:59 +0530 Subject: [PATCH 14/15] fix comment issues --- .github/workflows/test.yml | 1 + .github/workflows/windows.yml | 1 + packages/git-utils/src/git-commands.js | 50 ++++++ packages/git-utils/src/git.js | 225 +++++++++++++++++-------- 4 files changed, 207 insertions(+), 70 deletions(-) create mode 100644 packages/git-utils/src/git-commands.js diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index e7ed4b05f..90b84254e 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -57,6 +57,7 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' + - '@percy/git-utils' runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v5 diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index eb7803054..0d43d018b 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -56,6 +56,7 @@ jobs: - '@percy/sdk-utils' - '@percy/webdriver-utils' - '@percy/monitoring' + - '@percy/git-utils' runs-on: windows-latest steps: - uses: actions/checkout@v5 diff --git a/packages/git-utils/src/git-commands.js b/packages/git-utils/src/git-commands.js new file mode 100644 index 000000000..28a932ce3 --- /dev/null +++ b/packages/git-utils/src/git-commands.js @@ -0,0 +1,50 @@ +// Basic git queries +export const GIT_REV_PARSE_GIT_DIR = ['git', 'rev-parse', '--git-dir']; +export const GIT_REV_PARSE_SHOW_TOPLEVEL = ['git', 'rev-parse', '--show-toplevel']; +export const GIT_REV_PARSE_HEAD = ['git', 'rev-parse', 'HEAD']; +export const GIT_REV_PARSE_ABBREV_REF_HEAD = ['git', 'rev-parse', '--abbrev-ref', 'HEAD']; +export const GIT_REV_PARSE_IS_SHALLOW = ['git', 'rev-parse', '--is-shallow-repository']; + +// Remote operations +export const GIT_REMOTE_V = ['git', 'remote', '-v']; +export const GIT_REMOTE_SET_HEAD = (remote, ...args) => ['git', 'remote', 'set-head', remote, ...args]; + +// History and commits +export const GIT_REV_LIST_PARENTS_HEAD = ['git', 'rev-list', '--parents', 'HEAD']; + +// Branch operations +export const GIT_REV_PARSE_VERIFY = (ref) => ['git', 'rev-parse', '--verify', ref]; +export const GIT_SYMBOLIC_REF = (ref) => ['git', 'symbolic-ref', ref]; + +// Config operations +export const GIT_CONFIG = (...args) => ['git', 'config', ...args]; +export const GIT_CONFIG_FILE_GET_REGEXP = (file, pattern) => + ['git', 'config', '--file', file, '--get-regexp', pattern]; + +// Merge base +export const GIT_MERGE_BASE = (ref1, ref2) => ['git', 'merge-base', ref1, ref2]; +export const GIT_FETCH = (remote, refspec, ...args) => ['git', 'fetch', remote, refspec, ...args]; + +// Diff operations +export const GIT_DIFF_NAME_STATUS = (baselineCommit, headCommit = 'HEAD') => + ['git', 'diff', '--name-status', `${baselineCommit}..${headCommit}`]; +export const GIT_DIFF_SUBMODULE = (baselineCommit, headCommit = 'HEAD') => + ['git', 'diff', `${baselineCommit}..${headCommit}`, '--submodule=short']; +export const GIT_DIFF_NAME_ONLY_SUBMODULE = (baselineCommit, headCommit = 'HEAD') => + ['git', 'diff', '--name-only', `${baselineCommit}..${headCommit}`]; + +// Submodule operations +export const GIT_SUBMODULE_DIFF = (submodulePath, baselineCommit, headCommit = 'HEAD') => + ['git', '-C', submodulePath, 'diff', '--name-only', `${baselineCommit}..${headCommit}`]; + +// File operations +export const GIT_SHOW = (ref, filePath) => ['git', 'show', `${ref}:${filePath}`]; +export const GIT_CAT_FILE_E = (ref) => ['git', 'cat-file', '-e', ref]; + +// Error patterns for retry logic +export const CONCURRENT_ERROR_PATTERNS = [ + 'index.lock', + 'unable to create', + 'file exists', + 'another git process' +]; diff --git a/packages/git-utils/src/git.js b/packages/git-utils/src/git.js index b957a0b50..a93b1aaf3 100644 --- a/packages/git-utils/src/git.js +++ b/packages/git-utils/src/git.js @@ -1,6 +1,7 @@ import { spawn } from 'cross-spawn'; import path from 'path'; import fs from 'fs'; +import * as GitCommands from './git-commands.js'; const fsPromises = fs.promises; @@ -22,17 +23,14 @@ async function execGit(command, options = {}) { } catch (err) { lastError = err; - // Check if error is due to concurrent git operations + // Check if error is due to concurrent git operations (index lock, file conflicts, etc.) const errorMsg = err.message.toLowerCase(); - const isConcurrentError = - errorMsg.includes('index.lock') || - errorMsg.includes('unable to create') || - errorMsg.includes('file exists') || - errorMsg.includes('another git process'); + const isConcurrentError = GitCommands.CONCURRENT_ERROR_PATTERNS.some( + pattern => errorMsg.includes(pattern) + ); - // Only retry for concurrent operation errors + // Only retry for concurrent operation errors with exponential backoff if (isConcurrentError && attempt < retries) { - // Exponential backoff const delay = retryDelay * Math.pow(2, attempt); await new Promise(resolve => setTimeout(resolve, delay)); continue; @@ -108,27 +106,38 @@ async function execGitOnce(command, options = {}) { }); }); } + +// Check if the current directory is a git repository +// Executes: git rev-parse --git-dir export async function isGitRepository() { try { - await execGit('git rev-parse --git-dir'); + await execGit(GitCommands.GIT_REV_PARSE_GIT_DIR); return true; } catch (err) { return false; } } +/** + * Get the root directory of the git repository + * Executes: git rev-parse --show-toplevel + */ export async function getRepositoryRoot() { try { - const root = await execGit('git rev-parse --show-toplevel'); + const root = await execGit(GitCommands.GIT_REV_PARSE_SHOW_TOPLEVEL); return root; } catch (err) { throw new Error('Not a git repository'); } } +/** + * Get the current commit SHA + * Executes: git rev-parse HEAD + */ export async function getCurrentCommit() { try { - const commit = await execGit('git rev-parse HEAD'); + const commit = await execGit(GitCommands.GIT_REV_PARSE_HEAD); return commit; } catch (err) { throw new Error(`Failed to get current commit: ${err.message}`); @@ -137,11 +146,12 @@ export async function getCurrentCommit() { /** * Get current git branch name + * Executes: git rev-parse --abbrev-ref HEAD * @returns {Promise} - Current branch name */ export async function getCurrentBranch() { try { - const branch = await execGit('git rev-parse --abbrev-ref HEAD'); + const branch = await execGit(GitCommands.GIT_REV_PARSE_ABBREV_REF_HEAD); return branch; } catch (err) { throw new Error(`Failed to get current branch: ${err.message}`); @@ -150,6 +160,7 @@ export async function getCurrentBranch() { /** * Validate git repository state and return diagnostic info + * Checks: repository validity, shallow clone, detached HEAD, remote config, default branch * @returns {Promise} - { isValid, isShallow, isDetached, defaultBranch, issues } */ export async function getGitState() { @@ -164,16 +175,20 @@ export async function getGitState() { issues: [] }; + // Verify this is a valid git repository + // Executes: git rev-parse --git-dir try { - await execGit('git rev-parse --git-dir'); + await execGit(GitCommands.GIT_REV_PARSE_GIT_DIR); state.isValid = true; } catch { state.issues.push('Not a git repository'); return state; } + // Check for remote configuration + // Executes: git remote -v try { - const remotes = await execGit('git remote -v'); + const remotes = await execGit(GitCommands.GIT_REMOTE_V); if (remotes && remotes.trim().length > 0) { state.hasRemote = true; const match = remotes.match(/^(\S+)\s+/); @@ -189,11 +204,13 @@ export async function getGitState() { state.issues.push('Failed to check git remote configuration'); } + // Check if repository is a shallow clone + // Executes: git rev-parse --is-shallow-repository try { - const result = await execGit('git rev-parse --is-shallow-repository'); + const result = await execGit(GitCommands.GIT_REV_PARSE_IS_SHALLOW); state.isShallow = result === 'true'; } catch { - // Fallback: check for .git/shallow file + // Fallback: check for .git/shallow file existence try { const repoRoot = await getRepositoryRoot(); const shallowPath = path.join(repoRoot, '.git', 'shallow'); @@ -204,11 +221,12 @@ export async function getGitState() { } } + // Warn about shallow clone as it affects history operations if (state.isShallow) { state.issues.push("Shallow clone detected - use 'git fetch --unshallow' or set fetch-depth: 0 in CI"); } - // Check detached HEAD + // Check if HEAD is detached (not on a branch) try { const branch = await getCurrentBranch(); state.isDetached = branch === 'HEAD'; @@ -219,64 +237,109 @@ export async function getGitState() { state.isDetached = false; } + // Check if this is the first commit (no parent commits) + // Executes: git rev-parse HEAD~1 (simplified approach) try { - const parents = await execGit('git rev-list --parents HEAD'); - const lines = parents.split('\n').filter(Boolean); - if (lines.length > 0) { - const firstLine = lines[lines.length - 1]; - const shas = firstLine.trim().split(/\s+/); - state.isFirstCommit = shas.length === 1; - } - } catch { + await execGit(GitCommands.GIT_REV_PARSE_VERIFY('HEAD~1')); state.isFirstCommit = false; + } catch { + // If HEAD~1 doesn't exist, this is the first commit + state.isFirstCommit = true; } - if (state.hasRemote) { - const remoteName = state.remoteName || 'origin'; + // Determine default branch by checking common branch names + state.defaultBranch = await findDefaultBranch(state.hasRemote, state.remoteName); + + return state; +} + +/** + * Helper function to find the default branch + * Uses git symbolic-ref to detect the actual default branch instead of guessing + * @param {boolean} hasRemote - Whether repository has a remote configured + * @param {string|null} remoteName - Name of the remote (e.g., 'origin') + * @returns {Promise} - Default branch name + */ +async function findDefaultBranch(hasRemote, remoteName) { + if (hasRemote) { + const remote = remoteName || 'origin'; + // Executes: git symbolic-ref refs/remotes//HEAD + // This returns the branch that the remote considers as default (e.g., refs/remotes/origin/main) + try { + const output = await execGit(GitCommands.GIT_SYMBOLIC_REF(`refs/remotes/${remote}/HEAD`)); + const match = output.match(/refs\/remotes\/[^/]+\/(.+)/); + if (match) { + return match[1]; + } + } catch { + // If symbolic-ref fails, the remote HEAD might not be set + // This can happen in shallow clones or if remote HEAD was never fetched + } + + // Fallback: Try to set the remote HEAD by fetching it, then retry + try { + // Executes: git remote set-head --auto + // This queries the remote and sets the symbolic-ref locally + await execGit(GitCommands.GIT_REMOTE_SET_HEAD(remote, '--auto')); + + // Retry getting the symbolic ref + const output = await execGit(GitCommands.GIT_SYMBOLIC_REF(`refs/remotes/${remote}/HEAD`)); + const match = output.match(/refs\/remotes\/[^/]+\/(.+)/); + if (match) { + return match[1]; + } + } catch { + // Remote set-head failed, continue to manual detection + } + + // Last resort for remote: Check common branch names const commonBranches = ['main', 'master', 'develop', 'development']; for (const branch of commonBranches) { try { - await execGit(`git rev-parse --verify ${remoteName}/${branch}`); - state.defaultBranch = branch; - break; + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(`${remote}/${branch}`)); + return branch; } catch { // Try next branch } } + } else { + // No remote configured - detect local default branch + // For local repos, we check which branch was used during git init - if (!state.defaultBranch) { - try { - const output = await execGit(`git symbolic-ref refs/remotes/${remoteName}/HEAD`); - const match = output.match(/refs\/remotes\/[^/]+\/(.+)/); - if (match) { - state.defaultBranch = match[1]; + try { + // Executes: git config init.defaultBranch + const configBranch = await execGit(GitCommands.GIT_CONFIG('init.defaultBranch')); + if (configBranch) { + // Verify this branch actually exists locally + try { + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(configBranch)); + return configBranch; + } catch { + // Config branch doesn't exist, continue } - } catch { - state.defaultBranch = 'main'; } + } catch { + // init.defaultBranch not set, continue } - } else { - const localBranches = ['main', 'master', 'develop', 'development']; - for (const branch of localBranches) { + + // Fallback: Check common local branch names + const commonBranches = ['main', 'master', 'develop', 'development']; + for (const branch of commonBranches) { try { - await execGit(`git rev-parse --verify ${branch}`); - state.defaultBranch = branch; - break; + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(branch)); + return branch; } catch { // Try next branch } } - - if (!state.defaultBranch) { - state.defaultBranch = 'main'; - } } - - return state; + return 'main'; } /** * Get merge-base commit with smart error handling and recovery + * Finds the common ancestor between HEAD and a target branch + * Executes: git merge-base HEAD * @param {string} targetBranch - Target branch (if null, auto-detects) * @returns {Promise} - { success, commit, branch, error } */ @@ -302,18 +365,24 @@ export async function getMergeBase(targetBranch = null) { const branch = targetBranch || gitState.defaultBranch; result.branch = branch; + // If in detached HEAD state with remote, try to fetch the branch if (gitState.isDetached && gitState.hasRemote) { const remoteName = gitState.remoteName || 'origin'; try { - await execGit(`git rev-parse --verify ${remoteName}/${branch}`); + // Check if remote branch exists + await execGit(GitCommands.GIT_REV_PARSE_VERIFY(`${remoteName}/${branch}`)); } catch { try { - await execGit(`git fetch ${remoteName} ${branch}:refs/remotes/${remoteName}/${branch} --depth=100`); + // Fetch remote branch with limited depth + // Executes: git fetch :refs/remotes// --depth=100 + await execGit(GitCommands.GIT_FETCH(remoteName, `${branch}:refs/remotes/${remoteName}/${branch}`, '--depth=100')); } catch { + // Fetch failed, continue with available refs } } } + // Build list of branch references to try for merge-base const attempts = []; if (gitState.hasRemote) { @@ -323,6 +392,7 @@ export async function getMergeBase(targetBranch = null) { attempts.push(branch); + // Also try default branch if different from target if (branch !== gitState.defaultBranch) { if (gitState.hasRemote) { const remoteName = gitState.remoteName || 'origin'; @@ -331,9 +401,11 @@ export async function getMergeBase(targetBranch = null) { attempts.push(gitState.defaultBranch); } + // Try each reference until one succeeds + // Executes: git merge-base HEAD for (const attempt of attempts) { try { - const commit = await execGit(`git merge-base HEAD ${attempt}`); + const commit = await execGit(GitCommands.GIT_MERGE_BASE('HEAD', attempt)); result.success = true; result.commit = commit; return result; @@ -342,11 +414,11 @@ export async function getMergeBase(targetBranch = null) { } } + // No merge-base found - build helpful error message let errorMessage = `Could not find common ancestor with ${branch}.`; if (!gitState.hasRemote) { errorMessage += ` No git remote configured. Tried local branch '${branch}'.`; - errorMessage += ' Use --smart-snap-baseline= to specify a different baseline branch.'; } else { errorMessage += ' This might be an orphan branch.'; errorMessage += ` Tried: ${attempts.join(', ')}.`; @@ -369,12 +441,14 @@ export async function getMergeBase(targetBranch = null) { /** * Get changed files between current commit and baseline * Handles renames, copies, and submodule changes + * Executes: git diff --name-status ..HEAD * @param {string} baselineCommit - Baseline commit SHA or ref * @returns {Promise} - Array of changed file paths (relative to repo root) */ export async function getChangedFiles(baselineCommit = 'origin/main') { try { - const output = await execGit(['git', 'diff', '--name-status', `${baselineCommit}..HEAD`]); + // Get list of changed files with status indicators + const output = await execGit(GitCommands.GIT_DIFF_NAME_STATUS(baselineCommit)); if (!output) { return []; @@ -383,6 +457,7 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { const files = new Set(); const lines = output.split('\n').filter(Boolean); + // Parse each line of git diff output for (const line of lines) { const parts = line.split('\t'); const status = parts[0]; @@ -408,34 +483,35 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { } // Check for git submodule changes + // Executes: git diff ..HEAD --submodule=short try { - const submoduleOutput = await execGit(['git', 'diff', `${baselineCommit}..HEAD`, '--submodule=short']); + const submoduleOutput = await execGit(GitCommands.GIT_DIFF_SUBMODULE(baselineCommit)); if (submoduleOutput && submoduleOutput.includes('Submodule')) { files.add('.gitmodules'); try { - const submodulePaths = await execGit(['git', 'config', '--file', '.gitmodules', '--get-regexp', 'path']); + // Get list of submodule paths from .gitmodules + // Executes: git config --file .gitmodules --get-regexp path + const submodulePaths = await execGit(GitCommands.GIT_CONFIG_FILE_GET_REGEXP('.gitmodules', 'path')); const submodules = submodulePaths.split('\n') .filter(Boolean) .map(line => line.split(' ')[1]); for (const submodulePath of submodules) { try { - // Validate submodule path to avoid path traversal or injection + // Validate submodule path to prevent path traversal attacks const normalizedSub = path.normalize(submodulePath); if (path.isAbsolute(normalizedSub) || normalizedSub.split(path.sep).includes('..')) { - // skip suspicious submodule paths + // Skip suspicious submodule paths continue; } - const subOutput = await execGit([ - 'git', - '-C', - normalizedSub, - 'diff', - '--name-only', - `${baselineCommit}..HEAD` - ], { retries: 1 }); + // Get changed files within the submodule + // Executes: git -C diff --name-only ..HEAD + const subOutput = await execGit( + GitCommands.GIT_SUBMODULE_DIFF(normalizedSub, baselineCommit), + { retries: 1 } + ); if (subOutput) { const subFiles = subOutput.split('\n').filter(Boolean); for (const file of subFiles) { @@ -462,7 +538,8 @@ export async function getChangedFiles(baselineCommit = 'origin/main') { /** * Get file content from a specific commit - * Supports both text and binary files. + * Supports both text and binary files + * Executes: git show : * @param {string} commit - Commit SHA or ref (HEAD, branch name, etc.) * @param {string} filePath - File path relative to repo root * @param {Object} options - Options @@ -474,25 +551,33 @@ export async function getFileContentFromCommit(commit, filePath, options = {}) { if (!commit || typeof commit !== 'string') { throw new Error('Invalid commit parameter'); } + // Sanitize file path to prevent path traversal attacks const normalized = path.normalize(filePath); if (path.isAbsolute(normalized) || normalized.split(path.sep).includes('..')) { throw new Error(`Invalid file path: ${filePath}`); } const { encoding = 'utf8' } = options; - const contents = await execGit(['git', 'show', `${commit}:${normalized}`], { encoding }); + const contents = await execGit(GitCommands.GIT_SHOW(commit, normalized), { encoding }); return contents; } catch (err) { throw new Error(`Failed to get file ${filePath} from commit ${commit}: ${err.message}`); } } +/** + * Check if a commit exists in the repository + * Executes: git cat-file -e + * @param {string} commit - Commit SHA or ref to check + * @returns {Promise} - True if commit exists + */ export async function commitExists(commit) { try { if (!commit || typeof commit !== 'string') return false; + // Validate commit reference format for security const safeRef = commit === 'HEAD' || /^[0-9a-fA-F]{4,40}$/.test(commit) || /^(refs\/[A-Za-z0-9._/-]+)$/.test(commit); if (!safeRef) return false; - await execGit(['git', 'cat-file', '-e', commit]); + await execGit(GitCommands.GIT_CAT_FILE_E(commit)); return true; } catch (err) { return false; From 98652d0b8e03fd66eb4d8f57982e593b91060d5a Mon Sep 17 00:00:00 2001 From: Pranav Zinzurde Date: Fri, 7 Nov 2025 10:58:11 +0530 Subject: [PATCH 15/15] add depth 0 --- .github/workflows/test.yml | 2 ++ .github/workflows/windows.yml | 2 ++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 90b84254e..18dc61af5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -61,6 +61,8 @@ jobs: runs-on: ${{ matrix.os }} steps: - uses: actions/checkout@v5 + with: + fetch-depth: ${{ matrix.package == '@percy/git-utils' && 0 || 1 }} - uses: actions/setup-node@v3 with: node-version: ${{ matrix.node }} diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml index 0d43d018b..0da249d10 100644 --- a/.github/workflows/windows.yml +++ b/.github/workflows/windows.yml @@ -60,6 +60,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v5 + with: + fetch-depth: ${{ matrix.package == '@percy/git-utils' && 0 || 1 }} - uses: actions/setup-node@v3 with: node-version: 14