Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions integration-tests/extensions-reload.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ describe('extension reloading', () => {
return (
output.includes(
'test-server (from test-extension) - Ready (1 tool)',
) && output.includes('- hello')
) && output.includes('- mcp_test-server_hello')
);
},
30000, // 30s timeout
Expand Down Expand Up @@ -148,7 +148,7 @@ describe('extension reloading', () => {
return (
output.includes(
'test-server (from test-extension) - Ready (1 tool)',
) && output.includes('- goodbye')
) && output.includes('- mcp_test-server_goodbye')
);
},
30000,
Expand Down
17 changes: 15 additions & 2 deletions integration-tests/policy-headless.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ async function verifyToolExecution(
promptCommand: PromptCommand,
result: string,
expectAllowed: boolean,
expectedDenialString?: string,
) {
const log = await waitForToolCallLog(
rig,
Expand All @@ -78,10 +79,13 @@ async function verifyToolExecution(
if (expectAllowed) {
expect(log!.toolRequest.success).toBe(true);
expect(result).not.toContain('Tool execution denied by policy');
expect(result).not.toContain(`Tool "${promptCommand.tool}" not found`);
expect(result).toContain(promptCommand.expectedSuccessResult);
} else {
expect(log!.toolRequest.success).toBe(false);
expect(result).toContain('Tool execution denied by policy');
expect(result).toContain(
expectedDenialString || 'Tool execution denied by policy',
);
expect(result).toContain(promptCommand.expectedFailureResult);
}
}
Expand All @@ -92,6 +96,7 @@ interface TestCase {
promptCommand: PromptCommand;
policyContent?: string;
expectAllowed: boolean;
expectedDenialString?: string;
}

describe('Policy Engine Headless Mode', () => {
Expand Down Expand Up @@ -125,7 +130,13 @@ describe('Policy Engine Headless Mode', () => {
approvalMode: 'default',
});

await verifyToolExecution(rig, tc.promptCommand, result, tc.expectAllowed);
await verifyToolExecution(
rig,
tc.promptCommand,
result,
tc.expectAllowed,
tc.expectedDenialString,
);
};

const testCases = [
Expand All @@ -134,6 +145,7 @@ describe('Policy Engine Headless Mode', () => {
responsesFile: 'policy-headless-shell-denied.responses',
promptCommand: ECHO_PROMPT,
expectAllowed: false,
expectedDenialString: 'Tool "run_shell_command" not found',
},
{
name: 'should allow ASK_USER tools in headless mode if explicitly allowed via policy file',
Expand Down Expand Up @@ -178,6 +190,7 @@ describe('Policy Engine Headless Mode', () => {
priority = 100
`,
expectAllowed: false,
expectedDenialString: 'Tool execution denied by policy',
},
];

Expand Down
73 changes: 42 additions & 31 deletions packages/cli/src/config/policy-engine.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -93,45 +93,45 @@ describe('Policy Engine Integration Tests', () => {
// Tools from allowed server should be allowed
// Tools from allowed server should be allowed
expect(
(await engine.check({ name: 'allowed-server__tool1' }, undefined))
(await engine.check({ name: 'mcp_allowed-server_tool1' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
expect(
(
await engine.check(
{ name: 'allowed-server__another_tool' },
{ name: 'mcp_allowed-server_another_tool' },
undefined,
)
).decision,
).toBe(PolicyDecision.ALLOW);

// Tools from trusted server should be allowed
expect(
(await engine.check({ name: 'trusted-server__tool1' }, undefined))
(await engine.check({ name: 'mcp_trusted-server_tool1' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
expect(
(
await engine.check(
{ name: 'trusted-server__special_tool' },
{ name: 'mcp_trusted-server_special_tool' },
undefined,
)
).decision,
).toBe(PolicyDecision.ALLOW);

// Tools from blocked server should be denied
expect(
(await engine.check({ name: 'blocked-server__tool1' }, undefined))
(await engine.check({ name: 'mcp_blocked-server_tool1' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);
expect(
(await engine.check({ name: 'blocked-server__any_tool' }, undefined))
(await engine.check({ name: 'mcp_blocked-server_any_tool' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);

// Tools from unknown servers should use default
expect(
(await engine.check({ name: 'unknown-server__tool' }, undefined))
(await engine.check({ name: 'mcp_unknown-server_tool' }, undefined))
.decision,
).toBe(PolicyDecision.ASK_USER);
});
Expand All @@ -151,12 +151,16 @@ describe('Policy Engine Integration Tests', () => {

// ANY tool with a server name should be allowed
expect(
(await engine.check({ name: 'mcp-server__tool' }, 'mcp-server'))
(await engine.check({ name: 'mcp_mcp-server_tool' }, 'mcp-server'))
.decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'another-server__tool' }, 'another-server'))
.decision,
(
await engine.check(
{ name: 'mcp_another-server_tool' },
'another-server',
)
).decision,
).toBe(PolicyDecision.ALLOW);

// Built-in tools should NOT be allowed by the MCP wildcard
Expand All @@ -171,7 +175,7 @@ describe('Policy Engine Integration Tests', () => {
allowed: ['my-server'],
},
tools: {
exclude: ['my-server__dangerous-tool'],
exclude: ['mcp_my-server_dangerous-tool'],
},
};

Expand All @@ -184,20 +188,24 @@ describe('Policy Engine Integration Tests', () => {
// MCP server allowed (priority 4.1) provides general allow for server
// MCP server allowed (priority 4.1) provides general allow for server
expect(
(await engine.check({ name: 'my-server__safe-tool' }, undefined))
(await engine.check({ name: 'mcp_my-server_safe-tool' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
// But specific tool exclude (priority 4.4) wins over server allow
expect(
(await engine.check({ name: 'my-server__dangerous-tool' }, undefined))
.decision,
(
await engine.check(
{ name: 'mcp_my-server_dangerous-tool' },
undefined,
)
).decision,
).toBe(PolicyDecision.DENY);
});

it('should handle complex mixed configurations', async () => {
const settings: Settings = {
tools: {
allowed: ['custom-tool', 'my-server__special-tool'],
allowed: ['custom-tool', 'mcp_my-server_special-tool'],
exclude: ['glob', 'dangerous-tool'],
},
mcp: {
Expand Down Expand Up @@ -242,21 +250,21 @@ describe('Policy Engine Integration Tests', () => {
(await engine.check({ name: 'custom-tool' }, undefined)).decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'my-server__special-tool' }, undefined))
(await engine.check({ name: 'mcp_my-server_special-tool' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);

// MCP server tools
expect(
(await engine.check({ name: 'allowed-server__tool' }, undefined))
(await engine.check({ name: 'mcp_allowed-server_tool' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'trusted-server__tool' }, undefined))
(await engine.check({ name: 'mcp_trusted-server_tool' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'blocked-server__tool' }, undefined))
(await engine.check({ name: 'mcp_blocked-server_tool' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);

Expand Down Expand Up @@ -483,7 +491,7 @@ describe('Policy Engine Integration Tests', () => {
expect(blockedToolRule?.priority).toBe(4.4); // Command line exclude

const blockedServerRule = rules.find(
(r) => r.toolName === 'blocked-server__*',
(r) => r.toolName === 'mcp_blocked-server_*',
);
expect(blockedServerRule?.priority).toBe(4.9); // MCP server exclude

Expand All @@ -493,11 +501,13 @@ describe('Policy Engine Integration Tests', () => {
expect(specificToolRule?.priority).toBe(4.3); // Command line allow

const trustedServerRule = rules.find(
(r) => r.toolName === 'trusted-server__*',
(r) => r.toolName === 'mcp_trusted-server_*',
);
expect(trustedServerRule?.priority).toBe(4.2); // MCP trusted server

const mcpServerRule = rules.find((r) => r.toolName === 'mcp-server__*');
const mcpServerRule = rules.find(
(r) => r.toolName === 'mcp_mcp-server_*',
);
expect(mcpServerRule?.priority).toBe(4.1); // MCP allowed server

const readOnlyToolRule = rules.find((r) => r.toolName === 'glob');
Expand All @@ -509,18 +519,19 @@ describe('Policy Engine Integration Tests', () => {
(await engine.check({ name: 'blocked-tool' }, undefined)).decision,
).toBe(PolicyDecision.DENY);
expect(
(await engine.check({ name: 'blocked-server__any' }, undefined))
(await engine.check({ name: 'mcp_blocked-server_any' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);
expect(
(await engine.check({ name: 'specific-tool' }, undefined)).decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'trusted-server__any' }, undefined))
(await engine.check({ name: 'mcp_trusted-server_any' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
expect(
(await engine.check({ name: 'mcp-server__any' }, undefined)).decision,
(await engine.check({ name: 'mcp_mcp-server_any' }, undefined))
.decision,
).toBe(PolicyDecision.ALLOW);
expect((await engine.check({ name: 'glob' }, undefined)).decision).toBe(
PolicyDecision.ALLOW,
Expand Down Expand Up @@ -549,7 +560,7 @@ describe('Policy Engine Integration Tests', () => {

// Exclusion (195) should win over trust (90)
expect(
(await engine.check({ name: 'conflicted-server__tool' }, undefined))
(await engine.check({ name: 'mcp_conflicted-server_tool' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);
});
Expand All @@ -560,7 +571,7 @@ describe('Policy Engine Integration Tests', () => {
excluded: ['my-server'], // Priority 195 - DENY
},
tools: {
allowed: ['my-server__special-tool'], // Priority 100 - ALLOW
allowed: ['mcp_my-server_special-tool'], // Priority 100 - ALLOW
},
};

Expand All @@ -573,11 +584,11 @@ describe('Policy Engine Integration Tests', () => {
// Server exclusion (195) wins over specific tool allow (100)
// This might be counterintuitive but follows the priority system
expect(
(await engine.check({ name: 'my-server__special-tool' }, undefined))
(await engine.check({ name: 'mcp_my-server_special-tool' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);
expect(
(await engine.check({ name: 'my-server__other-tool' }, undefined))
(await engine.check({ name: 'mcp_my-server_other-tool' }, undefined))
.decision,
).toBe(PolicyDecision.DENY);
});
Expand Down Expand Up @@ -647,13 +658,13 @@ describe('Policy Engine Integration Tests', () => {
const tool3Rule = rules.find((r) => r.toolName === 'tool3');
expect(tool3Rule?.priority).toBe(4.4); // Excluded tools (user tier)

const server2Rule = rules.find((r) => r.toolName === 'server2__*');
const server2Rule = rules.find((r) => r.toolName === 'mcp_server2_*');
expect(server2Rule?.priority).toBe(4.9); // Excluded servers (user tier)

const tool1Rule = rules.find((r) => r.toolName === 'tool1');
expect(tool1Rule?.priority).toBe(4.3); // Allowed tools (user tier)

const server1Rule = rules.find((r) => r.toolName === 'server1__*');
const server1Rule = rules.find((r) => r.toolName === 'mcp_server1_*');
expect(server1Rule?.priority).toBe(4.1); // Allowed servers (user tier)

const globRule = rules.find((r) => r.toolName === 'glob');
Expand Down
7 changes: 2 additions & 5 deletions packages/core/src/agents/local-executor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,7 @@ import { debugLogger } from '../utils/debugLogger.js';
import { LocalAgentExecutor, type ActivityCallback } from './local-executor.js';
import { makeFakeConfig } from '../test-utils/config.js';
import { ToolRegistry } from '../tools/tool-registry.js';
import {
DiscoveredMCPTool,
MCP_QUALIFIED_NAME_SEPARATOR,
} from '../tools/mcp-tool.js';
import { DiscoveredMCPTool } from '../tools/mcp-tool.js';
import { LSTool } from '../tools/ls.js';
import { LS_TOOL_NAME, READ_FILE_TOOL_NAME } from '../tools/tool-names.js';
import {
Expand Down Expand Up @@ -503,7 +500,7 @@ describe('LocalAgentExecutor', () => {
it('should automatically qualify MCP tools in agent definitions', async () => {
const serverName = 'mcp-server';
const toolName = 'mcp-tool';
const qualifiedName = `${serverName}${MCP_QUALIFIED_NAME_SEPARATOR}${toolName}`;
const qualifiedName = `mcp_${serverName}_${toolName}`;

const mockMcpTool = {
tool: vi.fn(),
Expand Down
6 changes: 3 additions & 3 deletions packages/core/src/core/__snapshots__/prompts.test.ts.snap
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ The following tools are available in Plan Mode:
<tool>\`exit_plan_mode\`</tool>
<tool>\`write_file\`</tool>
<tool>\`replace\`</tool>
<tool>\`read_data\` (readonly-server)</tool>
<tool>\`mcp_readonly-server_read_data\` (readonly-server)</tool>
</available_tools>

## Rules
Expand Down Expand Up @@ -275,7 +275,7 @@ The following tools are available in Plan Mode:
<tool>\`exit_plan_mode\`</tool>
<tool>\`write_file\`</tool>
<tool>\`replace\`</tool>
<tool>\`read_data\` (readonly-server)</tool>
<tool>\`mcp_readonly-server_read_data\` (readonly-server)</tool>
</available_tools>

## Rules
Expand Down Expand Up @@ -564,7 +564,7 @@ The following tools are available in Plan Mode:
<tool>\`exit_plan_mode\`</tool>
<tool>\`write_file\`</tool>
<tool>\`replace\`</tool>
<tool>\`read_data\` (readonly-server)</tool>
<tool>\`mcp_readonly-server_read_data\` (readonly-server)</tool>
</available_tools>

## Rules
Expand Down
Loading
Loading