Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions .github/workflows/chained_e2e.yml
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,7 @@ jobs:
with:
ref: '${{ needs.parse_run_context.outputs.sha }}'
repository: '${{ needs.parse_run_context.outputs.repository }}'
fetch-depth: 0

- name: 'Set up Node.js 20.x'
uses: 'actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020' # ratchet:actions-node@v4
Expand All @@ -302,7 +303,14 @@ jobs:
- name: 'Build project'
run: 'npm run build'

- name: 'Run Evals (ALWAYS_PASSING)'
- name: 'Check if evals should run'
id: 'check_evals'
run: |
SHOULD_RUN=$(node scripts/changed_prompt.js)
echo "should_run=$SHOULD_RUN" >> "$GITHUB_OUTPUT"

- name: 'Run Evals (Required to pass)'
if: "${{ steps.check_evals.outputs.should_run == 'true' }}"
env:
GEMINI_API_KEY: '${{ secrets.GEMINI_API_KEY }}'
run: 'npm run test:always_passing_evals'
Expand All @@ -315,14 +323,16 @@ jobs:
- 'e2e_linux'
- 'e2e_mac'
- 'e2e_windows'
- 'evals'
- 'merge_queue_skipper'
runs-on: 'gemini-cli-ubuntu-16-core'
steps:
- name: 'Check E2E test results'
run: |
if [[ ${NEEDS_E2E_LINUX_RESULT} != 'success' || \
${NEEDS_E2E_MAC_RESULT} != 'success' || \
${NEEDS_E2E_WINDOWS_RESULT} != 'success' ]]; then
${NEEDS_E2E_WINDOWS_RESULT} != 'success' || \
${NEEDS_EVALS_RESULT} != 'success' ]]; then
echo "One or more E2E jobs failed."
exit 1
fi
Expand All @@ -331,6 +341,7 @@ jobs:
NEEDS_E2E_LINUX_RESULT: '${{ needs.e2e_linux.result }}'
NEEDS_E2E_MAC_RESULT: '${{ needs.e2e_mac.result }}'
NEEDS_E2E_WINDOWS_RESULT: '${{ needs.e2e_windows.result }}'
NEEDS_EVALS_RESULT: '${{ needs.evals.result }}'

set_workflow_status:
runs-on: 'gemini-cli-ubuntu-16-core'
Expand Down
47 changes: 47 additions & 0 deletions scripts/changed_prompt.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/
import { execSync } from 'node:child_process';

const EVALS_FILE_PREFIXES = [
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

consider adding the evals/ directory too

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ok, added. Note that as written, only the ALWAYS_PASSES evals will end up getting run during the CI. Breaking changes to USUALLY_PASSES ones will not.

I guess we could make it smart enough to compute the delta, but I'd rather aspire to stabilizing as many tests as possible and running them during the CI by default.

'packages/core/src/prompts/',
'packages/core/src/tools/',
];

function main() {
try {
// Fetch main branch from origin.
execSync('git fetch origin main', {
stdio: 'ignore',
});

// Find the merge base with main
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it looks like the code find the merge base with the target branch instead of main?

const mergeBase = execSync('git merge-base HEAD FETCH_HEAD', {
encoding: 'utf-8',
}).trim();

// Get changed files
const changedFiles = execSync(`git diff --name-only ${mergeBase} HEAD`, {
encoding: 'utf-8',
})
.split('\n')
.filter(Boolean);

const shouldRun = changedFiles.some((file) =>
EVALS_FILE_PREFIXES.some((prefix) => file.startsWith(prefix)),
);

console.log(shouldRun ? 'true' : 'false');
} catch (error) {
// If anything fails (e.g., no git history), run evals to be safe
console.warn(
'Warning: Failed to determine if evals should run. Defaulting to true.',
);
console.error(error);
console.log('true');
}
}

main();
Loading