Skip to content
Merged
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
b5db5d3
feat(core): implement task tracker foundation and service (Phase 1)
anj-s Feb 18, 2026
f737126
feat(core,cli): implement task tracker tools and feature flag (Phase 2)
anj-s Feb 18, 2026
448af0a
chore(core): improve ID generation and add runtime task validation
anj-s Feb 18, 2026
2d6ee8f
fix: address code review comments from bot in trackerService.ts
anj-s Feb 18, 2026
9618f8f
Merge branch 'u/anj/task-tracker-phase-1' into u/anj/task-tracker-pha…
anj-s Feb 18, 2026
6a24077
docs: update implementation plan for Phase 2
anj-s Feb 19, 2026
ae96477
feat(tracker): move tracker storage to project temp directory
anj-s Feb 19, 2026
91e7881
Merge branch 'u/anj/task-tracker-phase-1' into u/anj/task-tracker-pha…
anj-s Feb 19, 2026
e6b68e7
feat(tracker): integrate dynamic storage path in Config and tools
anj-s Feb 19, 2026
ba73124
feat(tracker): simplify tracker storage path
anj-s Feb 19, 2026
e32d8a2
Merge branch 'u/anj/task-tracker-phase-1' into u/anj/task-tracker-pha…
anj-s Feb 19, 2026
1e743ea
feat(tracker): update config to use simplified tracker path
anj-s Feb 19, 2026
23194dd
feat(tracker): restore session-specific nested storage path
anj-s Feb 19, 2026
de0ce2c
Merge branch 'u/anj/task-tracker-phase-1' into u/anj/task-tracker-pha…
anj-s Feb 19, 2026
eed1ca6
feat(tracker): restore nested tracker path in Config
anj-s Feb 19, 2026
eb33db5
feat(tracker): simplify tracker storage path and flatten directory st…
anj-s Feb 19, 2026
daf9da2
fix(tracker): lazily initialize tracker directory
anj-s Feb 22, 2026
41cf395
chore(tracker): remove plans configuration directory from git tracking
anj-s Feb 22, 2026
fc298e3
remove .gitignore changes
anj-s Feb 22, 2026
30fe8ff
remove .gitignore changes
anj-s Feb 22, 2026
5d31761
si changes: task tracker prep implementation
anj-s Feb 22, 2026
53755c5
si changes
anj-s Feb 22, 2026
b53aa80
test: add explicit and implicit behavioral evals for tracker
anj-s Feb 22, 2026
1a345a2
behavioral evals for tracker
anj-s Feb 23, 2026
441ab08
update behavioral evals
anj-s Mar 9, 2026
c03d3db
merge
anj-s Mar 9, 2026
8ac7fac
Merge branch 'main' into anj/tracker-evals
anj-s Mar 9, 2026
f50b80b
address review comments
anj-s Mar 9, 2026
b10223b
addressed comments
anj-s Mar 10, 2026
b5b62f5
Merge branch 'main' into anj/tracker-evals
anj-s Mar 10, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
116 changes: 116 additions & 0 deletions evals/tracker.eval.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
/**
* @license
* Copyright 2026 Google LLC
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, expect } from 'vitest';
import {
TRACKER_CREATE_TASK_TOOL_NAME,
TRACKER_UPDATE_TASK_TOOL_NAME,
} from '@google/gemini-cli-core';
import { evalTest, assertModelHasOutput } from './test-helper.js';
import fs from 'node:fs';
import path from 'node:path';

const FILES = {
'package.json': JSON.stringify({
name: 'test-project',
version: '1.0.0',
scripts: { test: 'echo "All tests passed!"' },
}),
'src/login.js':
'function login(username, password) {\n if (!username) throw new Error("Missing username");\n // BUG: missing password check\n return true;\n}',
} as const;

describe('tracker_mode', () => {
evalTest('USUALLY_PASSES', {
name: 'should manage tasks in the tracker when explicitly requested during a bug fix',
params: {
settings: { experimental: { taskTracker: true } },
},
files: FILES,
prompt:
'We have a bug in src/login.js: the password check is missing. First, create a task in the tracker to fix it. Then fix the bug, and mark the task as closed.',
assert: async (rig, result) => {
const wasCreateCalled = await rig.waitForToolCall(
TRACKER_CREATE_TASK_TOOL_NAME,
);
expect(
wasCreateCalled,
'Expected tracker_create_task tool to be called',
).toBe(true);

const toolLogs = rig.readToolLogs();
const createCall = toolLogs.find(
(log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
);
expect(createCall).toBeDefined();
const args = JSON.parse(createCall!.toolRequest.args);
expect(
(args.title?.toLowerCase() ?? '') +
(args.description?.toLowerCase() ?? ''),
).toContain('login');

const wasUpdateCalled = await rig.waitForToolCall(
TRACKER_UPDATE_TASK_TOOL_NAME,
);
expect(
wasUpdateCalled,
'Expected tracker_update_task tool to be called',
).toBe(true);

const updateCall = toolLogs.find(
(log) => log.toolRequest.name === TRACKER_UPDATE_TASK_TOOL_NAME,
);
expect(updateCall).toBeDefined();
const updateArgs = JSON.parse(updateCall!.toolRequest.args);
expect(updateArgs.status).toBe('closed');

const loginContent = fs.readFileSync(
path.join(rig.testDir!, 'src/login.js'),
'utf-8',
);
expect(loginContent).not.toContain('// BUG: missing password check');

assertModelHasOutput(result);
},
});

evalTest('USUALLY_PASSES', {
name: 'should implicitly create tasks when asked to build a feature plan',
params: {
settings: { experimental: { taskTracker: true } },
},
files: FILES,
prompt:
'I need to build a complex new feature for user authentication in our project. Create a detailed implementation plan and organize the work into bite-sized chunks. Do not actually implement the code yet, just plan it.',
assert: async (rig, result) => {
// The model should proactively use tracker_create_task to organize the work
const wasToolCalled = await rig.waitForToolCall(
TRACKER_CREATE_TASK_TOOL_NAME,
);
expect(
wasToolCalled,
'Expected tracker_create_task to be called implicitly to organize plan',
).toBe(true);

const toolLogs = rig.readToolLogs();
const createCalls = toolLogs.filter(
(log) => log.toolRequest.name === TRACKER_CREATE_TASK_TOOL_NAME,
);

// We expect it to create at least one task for authentication, likely more.
expect(createCalls.length).toBeGreaterThan(0);

// Verify it didn't write any code since we asked it to just plan
const loginContent = fs.readFileSync(
path.join(rig.testDir!, 'src/login.js'),
'utf-8',
);
expect(loginContent).toContain('// BUG: missing password check');

assertModelHasOutput(result);
},
});
});
Loading